mirror of
https://github.com/kennethreitz/requests.git
synced 2026-06-05 22:50:18 +00:00
delete packages
This commit is contained in:
@@ -1,13 +0,0 @@
|
||||
If you are planning to submit a pull request to requests with any changes in
|
||||
this library do not go any further. These are independent libraries which we
|
||||
vendor into requests. Any changes necessary to these libraries must be made in
|
||||
them and submitted as separate pull requests to those libraries.
|
||||
|
||||
urllib3 pull requests go here: https://github.com/shazow/urllib3
|
||||
|
||||
chardet pull requests go here: https://github.com/chardet/chardet
|
||||
|
||||
idna pull requests go here: https://github.com/kjd/idna
|
||||
|
||||
See https://github.com/kennethreitz/requests/pull/1812#issuecomment-30854316
|
||||
for the reasoning behind this.
|
||||
@@ -1,36 +0,0 @@
|
||||
'''
|
||||
Debian and other distributions "unbundle" requests' vendored dependencies, and
|
||||
rewrite all imports to use the global versions of ``urllib3`` and ``chardet``.
|
||||
The problem with this is that not only requests itself imports those
|
||||
dependencies, but third-party code outside of the distros' control too.
|
||||
|
||||
In reaction to these problems, the distro maintainers replaced
|
||||
``requests.packages`` with a magical "stub module" that imports the correct
|
||||
modules. The implementations were varying in quality and all had severe
|
||||
problems. For example, a symlink (or hardlink) that links the correct modules
|
||||
into place introduces problems regarding object identity, since you now have
|
||||
two modules in `sys.modules` with the same API, but different identities::
|
||||
|
||||
requests.packages.urllib3 is not urllib3
|
||||
|
||||
With version ``2.5.2``, requests started to maintain its own stub, so that
|
||||
distro-specific breakage would be reduced to a minimum, even though the whole
|
||||
issue is not requests' fault in the first place. See
|
||||
https://github.com/kennethreitz/requests/pull/2375 for the corresponding pull
|
||||
request.
|
||||
'''
|
||||
|
||||
from __future__ import absolute_import
|
||||
import sys
|
||||
|
||||
try:
|
||||
from . import urllib3
|
||||
except ImportError:
|
||||
import urllib3
|
||||
sys.modules['%s.urllib3' % __name__] = urllib3
|
||||
|
||||
try:
|
||||
from . import chardet
|
||||
except ImportError:
|
||||
import chardet
|
||||
sys.modules['%s.chardet' % __name__] = chardet
|
||||
@@ -1,39 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
|
||||
from .compat import PY2, PY3
|
||||
from .universaldetector import UniversalDetector
|
||||
from .version import __version__, VERSION
|
||||
|
||||
|
||||
def detect(byte_str):
|
||||
"""
|
||||
Detect the encoding of the given byte string.
|
||||
|
||||
:param byte_str: The byte sequence to examine.
|
||||
:type byte_str: ``bytes`` or ``bytearray``
|
||||
"""
|
||||
if not isinstance(byte_str, bytearray):
|
||||
if not isinstance(byte_str, bytes):
|
||||
raise TypeError('Expected object of type bytes or bytearray, got: '
|
||||
'{0}'.format(type(byte_str)))
|
||||
else:
|
||||
byte_str = bytearray(byte_str)
|
||||
detector = UniversalDetector()
|
||||
detector.feed(byte_str)
|
||||
return detector.close()
|
||||
@@ -1,386 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# Big5 frequency table
|
||||
# by Taiwan's Mandarin Promotion Council
|
||||
# <http://www.edu.tw:81/mandr/>
|
||||
#
|
||||
# 128 --> 0.42261
|
||||
# 256 --> 0.57851
|
||||
# 512 --> 0.74851
|
||||
# 1024 --> 0.89384
|
||||
# 2048 --> 0.97583
|
||||
#
|
||||
# Ideal Distribution Ratio = 0.74851/(1-0.74851) =2.98
|
||||
# Random Distribution Ration = 512/(5401-512)=0.105
|
||||
#
|
||||
# Typical Distribution Ratio about 25% of Ideal one, still much higher than RDR
|
||||
|
||||
BIG5_TYPICAL_DISTRIBUTION_RATIO = 0.75
|
||||
|
||||
#Char to FreqOrder table
|
||||
BIG5_TABLE_SIZE = 5376
|
||||
|
||||
BIG5_CHAR_TO_FREQ_ORDER = (
|
||||
1,1801,1506, 255,1431, 198, 9, 82, 6,5008, 177, 202,3681,1256,2821, 110, # 16
|
||||
3814, 33,3274, 261, 76, 44,2114, 16,2946,2187,1176, 659,3971, 26,3451,2653, # 32
|
||||
1198,3972,3350,4202, 410,2215, 302, 590, 361,1964, 8, 204, 58,4510,5009,1932, # 48
|
||||
63,5010,5011, 317,1614, 75, 222, 159,4203,2417,1480,5012,3555,3091, 224,2822, # 64
|
||||
3682, 3, 10,3973,1471, 29,2787,1135,2866,1940, 873, 130,3275,1123, 312,5013, # 80
|
||||
4511,2052, 507, 252, 682,5014, 142,1915, 124, 206,2947, 34,3556,3204, 64, 604, # 96
|
||||
5015,2501,1977,1978, 155,1991, 645, 641,1606,5016,3452, 337, 72, 406,5017, 80, # 112
|
||||
630, 238,3205,1509, 263, 939,1092,2654, 756,1440,1094,3453, 449, 69,2987, 591, # 128
|
||||
179,2096, 471, 115,2035,1844, 60, 50,2988, 134, 806,1869, 734,2036,3454, 180, # 144
|
||||
995,1607, 156, 537,2907, 688,5018, 319,1305, 779,2145, 514,2379, 298,4512, 359, # 160
|
||||
2502, 90,2716,1338, 663, 11, 906,1099,2553, 20,2441, 182, 532,1716,5019, 732, # 176
|
||||
1376,4204,1311,1420,3206, 25,2317,1056, 113, 399, 382,1950, 242,3455,2474, 529, # 192
|
||||
3276, 475,1447,3683,5020, 117, 21, 656, 810,1297,2300,2334,3557,5021, 126,4205, # 208
|
||||
706, 456, 150, 613,4513, 71,1118,2037,4206, 145,3092, 85, 835, 486,2115,1246, # 224
|
||||
1426, 428, 727,1285,1015, 800, 106, 623, 303,1281,5022,2128,2359, 347,3815, 221, # 240
|
||||
3558,3135,5023,1956,1153,4207, 83, 296,1199,3093, 192, 624, 93,5024, 822,1898, # 256
|
||||
2823,3136, 795,2065, 991,1554,1542,1592, 27, 43,2867, 859, 139,1456, 860,4514, # 272
|
||||
437, 712,3974, 164,2397,3137, 695, 211,3037,2097, 195,3975,1608,3559,3560,3684, # 288
|
||||
3976, 234, 811,2989,2098,3977,2233,1441,3561,1615,2380, 668,2077,1638, 305, 228, # 304
|
||||
1664,4515, 467, 415,5025, 262,2099,1593, 239, 108, 300, 200,1033, 512,1247,2078, # 320
|
||||
5026,5027,2176,3207,3685,2682, 593, 845,1062,3277, 88,1723,2038,3978,1951, 212, # 336
|
||||
266, 152, 149, 468,1899,4208,4516, 77, 187,5028,3038, 37, 5,2990,5029,3979, # 352
|
||||
5030,5031, 39,2524,4517,2908,3208,2079, 55, 148, 74,4518, 545, 483,1474,1029, # 368
|
||||
1665, 217,1870,1531,3138,1104,2655,4209, 24, 172,3562, 900,3980,3563,3564,4519, # 384
|
||||
32,1408,2824,1312, 329, 487,2360,2251,2717, 784,2683, 4,3039,3351,1427,1789, # 400
|
||||
188, 109, 499,5032,3686,1717,1790, 888,1217,3040,4520,5033,3565,5034,3352,1520, # 416
|
||||
3687,3981, 196,1034, 775,5035,5036, 929,1816, 249, 439, 38,5037,1063,5038, 794, # 432
|
||||
3982,1435,2301, 46, 178,3278,2066,5039,2381,5040, 214,1709,4521, 804, 35, 707, # 448
|
||||
324,3688,1601,2554, 140, 459,4210,5041,5042,1365, 839, 272, 978,2262,2580,3456, # 464
|
||||
2129,1363,3689,1423, 697, 100,3094, 48, 70,1231, 495,3139,2196,5043,1294,5044, # 480
|
||||
2080, 462, 586,1042,3279, 853, 256, 988, 185,2382,3457,1698, 434,1084,5045,3458, # 496
|
||||
314,2625,2788,4522,2335,2336, 569,2285, 637,1817,2525, 757,1162,1879,1616,3459, # 512
|
||||
287,1577,2116, 768,4523,1671,2868,3566,2526,1321,3816, 909,2418,5046,4211, 933, # 528
|
||||
3817,4212,2053,2361,1222,4524, 765,2419,1322, 786,4525,5047,1920,1462,1677,2909, # 544
|
||||
1699,5048,4526,1424,2442,3140,3690,2600,3353,1775,1941,3460,3983,4213, 309,1369, # 560
|
||||
1130,2825, 364,2234,1653,1299,3984,3567,3985,3986,2656, 525,1085,3041, 902,2001, # 576
|
||||
1475, 964,4527, 421,1845,1415,1057,2286, 940,1364,3141, 376,4528,4529,1381, 7, # 592
|
||||
2527, 983,2383, 336,1710,2684,1846, 321,3461, 559,1131,3042,2752,1809,1132,1313, # 608
|
||||
265,1481,1858,5049, 352,1203,2826,3280, 167,1089, 420,2827, 776, 792,1724,3568, # 624
|
||||
4214,2443,3281,5050,4215,5051, 446, 229, 333,2753, 901,3818,1200,1557,4530,2657, # 640
|
||||
1921, 395,2754,2685,3819,4216,1836, 125, 916,3209,2626,4531,5052,5053,3820,5054, # 656
|
||||
5055,5056,4532,3142,3691,1133,2555,1757,3462,1510,2318,1409,3569,5057,2146, 438, # 672
|
||||
2601,2910,2384,3354,1068, 958,3043, 461, 311,2869,2686,4217,1916,3210,4218,1979, # 688
|
||||
383, 750,2755,2627,4219, 274, 539, 385,1278,1442,5058,1154,1965, 384, 561, 210, # 704
|
||||
98,1295,2556,3570,5059,1711,2420,1482,3463,3987,2911,1257, 129,5060,3821, 642, # 720
|
||||
523,2789,2790,2658,5061, 141,2235,1333, 68, 176, 441, 876, 907,4220, 603,2602, # 736
|
||||
710, 171,3464, 404, 549, 18,3143,2398,1410,3692,1666,5062,3571,4533,2912,4534, # 752
|
||||
5063,2991, 368,5064, 146, 366, 99, 871,3693,1543, 748, 807,1586,1185, 22,2263, # 768
|
||||
379,3822,3211,5065,3212, 505,1942,2628,1992,1382,2319,5066, 380,2362, 218, 702, # 784
|
||||
1818,1248,3465,3044,3572,3355,3282,5067,2992,3694, 930,3283,3823,5068, 59,5069, # 800
|
||||
585, 601,4221, 497,3466,1112,1314,4535,1802,5070,1223,1472,2177,5071, 749,1837, # 816
|
||||
690,1900,3824,1773,3988,1476, 429,1043,1791,2236,2117, 917,4222, 447,1086,1629, # 832
|
||||
5072, 556,5073,5074,2021,1654, 844,1090, 105, 550, 966,1758,2828,1008,1783, 686, # 848
|
||||
1095,5075,2287, 793,1602,5076,3573,2603,4536,4223,2948,2302,4537,3825, 980,2503, # 864
|
||||
544, 353, 527,4538, 908,2687,2913,5077, 381,2629,1943,1348,5078,1341,1252, 560, # 880
|
||||
3095,5079,3467,2870,5080,2054, 973, 886,2081, 143,4539,5081,5082, 157,3989, 496, # 896
|
||||
4224, 57, 840, 540,2039,4540,4541,3468,2118,1445, 970,2264,1748,1966,2082,4225, # 912
|
||||
3144,1234,1776,3284,2829,3695, 773,1206,2130,1066,2040,1326,3990,1738,1725,4226, # 928
|
||||
279,3145, 51,1544,2604, 423,1578,2131,2067, 173,4542,1880,5083,5084,1583, 264, # 944
|
||||
610,3696,4543,2444, 280, 154,5085,5086,5087,1739, 338,1282,3096, 693,2871,1411, # 960
|
||||
1074,3826,2445,5088,4544,5089,5090,1240, 952,2399,5091,2914,1538,2688, 685,1483, # 976
|
||||
4227,2475,1436, 953,4228,2055,4545, 671,2400, 79,4229,2446,3285, 608, 567,2689, # 992
|
||||
3469,4230,4231,1691, 393,1261,1792,2401,5092,4546,5093,5094,5095,5096,1383,1672, # 1008
|
||||
3827,3213,1464, 522,1119, 661,1150, 216, 675,4547,3991,1432,3574, 609,4548,2690, # 1024
|
||||
2402,5097,5098,5099,4232,3045, 0,5100,2476, 315, 231,2447, 301,3356,4549,2385, # 1040
|
||||
5101, 233,4233,3697,1819,4550,4551,5102, 96,1777,1315,2083,5103, 257,5104,1810, # 1056
|
||||
3698,2718,1139,1820,4234,2022,1124,2164,2791,1778,2659,5105,3097, 363,1655,3214, # 1072
|
||||
5106,2993,5107,5108,5109,3992,1567,3993, 718, 103,3215, 849,1443, 341,3357,2949, # 1088
|
||||
1484,5110,1712, 127, 67, 339,4235,2403, 679,1412, 821,5111,5112, 834, 738, 351, # 1104
|
||||
2994,2147, 846, 235,1497,1881, 418,1993,3828,2719, 186,1100,2148,2756,3575,1545, # 1120
|
||||
1355,2950,2872,1377, 583,3994,4236,2581,2995,5113,1298,3699,1078,2557,3700,2363, # 1136
|
||||
78,3829,3830, 267,1289,2100,2002,1594,4237, 348, 369,1274,2197,2178,1838,4552, # 1152
|
||||
1821,2830,3701,2757,2288,2003,4553,2951,2758, 144,3358, 882,4554,3995,2759,3470, # 1168
|
||||
4555,2915,5114,4238,1726, 320,5115,3996,3046, 788,2996,5116,2831,1774,1327,2873, # 1184
|
||||
3997,2832,5117,1306,4556,2004,1700,3831,3576,2364,2660, 787,2023, 506, 824,3702, # 1200
|
||||
534, 323,4557,1044,3359,2024,1901, 946,3471,5118,1779,1500,1678,5119,1882,4558, # 1216
|
||||
165, 243,4559,3703,2528, 123, 683,4239, 764,4560, 36,3998,1793, 589,2916, 816, # 1232
|
||||
626,1667,3047,2237,1639,1555,1622,3832,3999,5120,4000,2874,1370,1228,1933, 891, # 1248
|
||||
2084,2917, 304,4240,5121, 292,2997,2720,3577, 691,2101,4241,1115,4561, 118, 662, # 1264
|
||||
5122, 611,1156, 854,2386,1316,2875, 2, 386, 515,2918,5123,5124,3286, 868,2238, # 1280
|
||||
1486, 855,2661, 785,2216,3048,5125,1040,3216,3578,5126,3146, 448,5127,1525,5128, # 1296
|
||||
2165,4562,5129,3833,5130,4242,2833,3579,3147, 503, 818,4001,3148,1568, 814, 676, # 1312
|
||||
1444, 306,1749,5131,3834,1416,1030, 197,1428, 805,2834,1501,4563,5132,5133,5134, # 1328
|
||||
1994,5135,4564,5136,5137,2198, 13,2792,3704,2998,3149,1229,1917,5138,3835,2132, # 1344
|
||||
5139,4243,4565,2404,3580,5140,2217,1511,1727,1120,5141,5142, 646,3836,2448, 307, # 1360
|
||||
5143,5144,1595,3217,5145,5146,5147,3705,1113,1356,4002,1465,2529,2530,5148, 519, # 1376
|
||||
5149, 128,2133, 92,2289,1980,5150,4003,1512, 342,3150,2199,5151,2793,2218,1981, # 1392
|
||||
3360,4244, 290,1656,1317, 789, 827,2365,5152,3837,4566, 562, 581,4004,5153, 401, # 1408
|
||||
4567,2252, 94,4568,5154,1399,2794,5155,1463,2025,4569,3218,1944,5156, 828,1105, # 1424
|
||||
4245,1262,1394,5157,4246, 605,4570,5158,1784,2876,5159,2835, 819,2102, 578,2200, # 1440
|
||||
2952,5160,1502, 436,3287,4247,3288,2836,4005,2919,3472,3473,5161,2721,2320,5162, # 1456
|
||||
5163,2337,2068, 23,4571, 193, 826,3838,2103, 699,1630,4248,3098, 390,1794,1064, # 1472
|
||||
3581,5164,1579,3099,3100,1400,5165,4249,1839,1640,2877,5166,4572,4573, 137,4250, # 1488
|
||||
598,3101,1967, 780, 104, 974,2953,5167, 278, 899, 253, 402, 572, 504, 493,1339, # 1504
|
||||
5168,4006,1275,4574,2582,2558,5169,3706,3049,3102,2253, 565,1334,2722, 863, 41, # 1520
|
||||
5170,5171,4575,5172,1657,2338, 19, 463,2760,4251, 606,5173,2999,3289,1087,2085, # 1536
|
||||
1323,2662,3000,5174,1631,1623,1750,4252,2691,5175,2878, 791,2723,2663,2339, 232, # 1552
|
||||
2421,5176,3001,1498,5177,2664,2630, 755,1366,3707,3290,3151,2026,1609, 119,1918, # 1568
|
||||
3474, 862,1026,4253,5178,4007,3839,4576,4008,4577,2265,1952,2477,5179,1125, 817, # 1584
|
||||
4254,4255,4009,1513,1766,2041,1487,4256,3050,3291,2837,3840,3152,5180,5181,1507, # 1600
|
||||
5182,2692, 733, 40,1632,1106,2879, 345,4257, 841,2531, 230,4578,3002,1847,3292, # 1616
|
||||
3475,5183,1263, 986,3476,5184, 735, 879, 254,1137, 857, 622,1300,1180,1388,1562, # 1632
|
||||
4010,4011,2954, 967,2761,2665,1349, 592,2134,1692,3361,3003,1995,4258,1679,4012, # 1648
|
||||
1902,2188,5185, 739,3708,2724,1296,1290,5186,4259,2201,2202,1922,1563,2605,2559, # 1664
|
||||
1871,2762,3004,5187, 435,5188, 343,1108, 596, 17,1751,4579,2239,3477,3709,5189, # 1680
|
||||
4580, 294,3582,2955,1693, 477, 979, 281,2042,3583, 643,2043,3710,2631,2795,2266, # 1696
|
||||
1031,2340,2135,2303,3584,4581, 367,1249,2560,5190,3585,5191,4582,1283,3362,2005, # 1712
|
||||
240,1762,3363,4583,4584, 836,1069,3153, 474,5192,2149,2532, 268,3586,5193,3219, # 1728
|
||||
1521,1284,5194,1658,1546,4260,5195,3587,3588,5196,4261,3364,2693,1685,4262, 961, # 1744
|
||||
1673,2632, 190,2006,2203,3841,4585,4586,5197, 570,2504,3711,1490,5198,4587,2633, # 1760
|
||||
3293,1957,4588, 584,1514, 396,1045,1945,5199,4589,1968,2449,5200,5201,4590,4013, # 1776
|
||||
619,5202,3154,3294, 215,2007,2796,2561,3220,4591,3221,4592, 763,4263,3842,4593, # 1792
|
||||
5203,5204,1958,1767,2956,3365,3712,1174, 452,1477,4594,3366,3155,5205,2838,1253, # 1808
|
||||
2387,2189,1091,2290,4264, 492,5206, 638,1169,1825,2136,1752,4014, 648, 926,1021, # 1824
|
||||
1324,4595, 520,4596, 997, 847,1007, 892,4597,3843,2267,1872,3713,2405,1785,4598, # 1840
|
||||
1953,2957,3103,3222,1728,4265,2044,3714,4599,2008,1701,3156,1551, 30,2268,4266, # 1856
|
||||
5207,2027,4600,3589,5208, 501,5209,4267, 594,3478,2166,1822,3590,3479,3591,3223, # 1872
|
||||
829,2839,4268,5210,1680,3157,1225,4269,5211,3295,4601,4270,3158,2341,5212,4602, # 1888
|
||||
4271,5213,4015,4016,5214,1848,2388,2606,3367,5215,4603, 374,4017, 652,4272,4273, # 1904
|
||||
375,1140, 798,5216,5217,5218,2366,4604,2269, 546,1659, 138,3051,2450,4605,5219, # 1920
|
||||
2254, 612,1849, 910, 796,3844,1740,1371, 825,3845,3846,5220,2920,2562,5221, 692, # 1936
|
||||
444,3052,2634, 801,4606,4274,5222,1491, 244,1053,3053,4275,4276, 340,5223,4018, # 1952
|
||||
1041,3005, 293,1168, 87,1357,5224,1539, 959,5225,2240, 721, 694,4277,3847, 219, # 1968
|
||||
1478, 644,1417,3368,2666,1413,1401,1335,1389,4019,5226,5227,3006,2367,3159,1826, # 1984
|
||||
730,1515, 184,2840, 66,4607,5228,1660,2958, 246,3369, 378,1457, 226,3480, 975, # 2000
|
||||
4020,2959,1264,3592, 674, 696,5229, 163,5230,1141,2422,2167, 713,3593,3370,4608, # 2016
|
||||
4021,5231,5232,1186, 15,5233,1079,1070,5234,1522,3224,3594, 276,1050,2725, 758, # 2032
|
||||
1126, 653,2960,3296,5235,2342, 889,3595,4022,3104,3007, 903,1250,4609,4023,3481, # 2048
|
||||
3596,1342,1681,1718, 766,3297, 286, 89,2961,3715,5236,1713,5237,2607,3371,3008, # 2064
|
||||
5238,2962,2219,3225,2880,5239,4610,2505,2533, 181, 387,1075,4024, 731,2190,3372, # 2080
|
||||
5240,3298, 310, 313,3482,2304, 770,4278, 54,3054, 189,4611,3105,3848,4025,5241, # 2096
|
||||
1230,1617,1850, 355,3597,4279,4612,3373, 111,4280,3716,1350,3160,3483,3055,4281, # 2112
|
||||
2150,3299,3598,5242,2797,4026,4027,3009, 722,2009,5243,1071, 247,1207,2343,2478, # 2128
|
||||
1378,4613,2010, 864,1437,1214,4614, 373,3849,1142,2220, 667,4615, 442,2763,2563, # 2144
|
||||
3850,4028,1969,4282,3300,1840, 837, 170,1107, 934,1336,1883,5244,5245,2119,4283, # 2160
|
||||
2841, 743,1569,5246,4616,4284, 582,2389,1418,3484,5247,1803,5248, 357,1395,1729, # 2176
|
||||
3717,3301,2423,1564,2241,5249,3106,3851,1633,4617,1114,2086,4285,1532,5250, 482, # 2192
|
||||
2451,4618,5251,5252,1492, 833,1466,5253,2726,3599,1641,2842,5254,1526,1272,3718, # 2208
|
||||
4286,1686,1795, 416,2564,1903,1954,1804,5255,3852,2798,3853,1159,2321,5256,2881, # 2224
|
||||
4619,1610,1584,3056,2424,2764, 443,3302,1163,3161,5257,5258,4029,5259,4287,2506, # 2240
|
||||
3057,4620,4030,3162,2104,1647,3600,2011,1873,4288,5260,4289, 431,3485,5261, 250, # 2256
|
||||
97, 81,4290,5262,1648,1851,1558, 160, 848,5263, 866, 740,1694,5264,2204,2843, # 2272
|
||||
3226,4291,4621,3719,1687, 950,2479, 426, 469,3227,3720,3721,4031,5265,5266,1188, # 2288
|
||||
424,1996, 861,3601,4292,3854,2205,2694, 168,1235,3602,4293,5267,2087,1674,4622, # 2304
|
||||
3374,3303, 220,2565,1009,5268,3855, 670,3010, 332,1208, 717,5269,5270,3603,2452, # 2320
|
||||
4032,3375,5271, 513,5272,1209,2882,3376,3163,4623,1080,5273,5274,5275,5276,2534, # 2336
|
||||
3722,3604, 815,1587,4033,4034,5277,3605,3486,3856,1254,4624,1328,3058,1390,4035, # 2352
|
||||
1741,4036,3857,4037,5278, 236,3858,2453,3304,5279,5280,3723,3859,1273,3860,4625, # 2368
|
||||
5281, 308,5282,4626, 245,4627,1852,2480,1307,2583, 430, 715,2137,2454,5283, 270, # 2384
|
||||
199,2883,4038,5284,3606,2727,1753, 761,1754, 725,1661,1841,4628,3487,3724,5285, # 2400
|
||||
5286, 587, 14,3305, 227,2608, 326, 480,2270, 943,2765,3607, 291, 650,1884,5287, # 2416
|
||||
1702,1226, 102,1547, 62,3488, 904,4629,3489,1164,4294,5288,5289,1224,1548,2766, # 2432
|
||||
391, 498,1493,5290,1386,1419,5291,2056,1177,4630, 813, 880,1081,2368, 566,1145, # 2448
|
||||
4631,2291,1001,1035,2566,2609,2242, 394,1286,5292,5293,2069,5294, 86,1494,1730, # 2464
|
||||
4039, 491,1588, 745, 897,2963, 843,3377,4040,2767,2884,3306,1768, 998,2221,2070, # 2480
|
||||
397,1827,1195,1970,3725,3011,3378, 284,5295,3861,2507,2138,2120,1904,5296,4041, # 2496
|
||||
2151,4042,4295,1036,3490,1905, 114,2567,4296, 209,1527,5297,5298,2964,2844,2635, # 2512
|
||||
2390,2728,3164, 812,2568,5299,3307,5300,1559, 737,1885,3726,1210, 885, 28,2695, # 2528
|
||||
3608,3862,5301,4297,1004,1780,4632,5302, 346,1982,2222,2696,4633,3863,1742, 797, # 2544
|
||||
1642,4043,1934,1072,1384,2152, 896,4044,3308,3727,3228,2885,3609,5303,2569,1959, # 2560
|
||||
4634,2455,1786,5304,5305,5306,4045,4298,1005,1308,3728,4299,2729,4635,4636,1528, # 2576
|
||||
2610, 161,1178,4300,1983, 987,4637,1101,4301, 631,4046,1157,3229,2425,1343,1241, # 2592
|
||||
1016,2243,2570, 372, 877,2344,2508,1160, 555,1935, 911,4047,5307, 466,1170, 169, # 2608
|
||||
1051,2921,2697,3729,2481,3012,1182,2012,2571,1251,2636,5308, 992,2345,3491,1540, # 2624
|
||||
2730,1201,2071,2406,1997,2482,5309,4638, 528,1923,2191,1503,1874,1570,2369,3379, # 2640
|
||||
3309,5310, 557,1073,5311,1828,3492,2088,2271,3165,3059,3107, 767,3108,2799,4639, # 2656
|
||||
1006,4302,4640,2346,1267,2179,3730,3230, 778,4048,3231,2731,1597,2667,5312,4641, # 2672
|
||||
5313,3493,5314,5315,5316,3310,2698,1433,3311, 131, 95,1504,4049, 723,4303,3166, # 2688
|
||||
1842,3610,2768,2192,4050,2028,2105,3731,5317,3013,4051,1218,5318,3380,3232,4052, # 2704
|
||||
4304,2584, 248,1634,3864, 912,5319,2845,3732,3060,3865, 654, 53,5320,3014,5321, # 2720
|
||||
1688,4642, 777,3494,1032,4053,1425,5322, 191, 820,2121,2846, 971,4643, 931,3233, # 2736
|
||||
135, 664, 783,3866,1998, 772,2922,1936,4054,3867,4644,2923,3234, 282,2732, 640, # 2752
|
||||
1372,3495,1127, 922, 325,3381,5323,5324, 711,2045,5325,5326,4055,2223,2800,1937, # 2768
|
||||
4056,3382,2224,2255,3868,2305,5327,4645,3869,1258,3312,4057,3235,2139,2965,4058, # 2784
|
||||
4059,5328,2225, 258,3236,4646, 101,1227,5329,3313,1755,5330,1391,3314,5331,2924, # 2800
|
||||
2057, 893,5332,5333,5334,1402,4305,2347,5335,5336,3237,3611,5337,5338, 878,1325, # 2816
|
||||
1781,2801,4647, 259,1385,2585, 744,1183,2272,4648,5339,4060,2509,5340, 684,1024, # 2832
|
||||
4306,5341, 472,3612,3496,1165,3315,4061,4062, 322,2153, 881, 455,1695,1152,1340, # 2848
|
||||
660, 554,2154,4649,1058,4650,4307, 830,1065,3383,4063,4651,1924,5342,1703,1919, # 2864
|
||||
5343, 932,2273, 122,5344,4652, 947, 677,5345,3870,2637, 297,1906,1925,2274,4653, # 2880
|
||||
2322,3316,5346,5347,4308,5348,4309, 84,4310, 112, 989,5349, 547,1059,4064, 701, # 2896
|
||||
3613,1019,5350,4311,5351,3497, 942, 639, 457,2306,2456, 993,2966, 407, 851, 494, # 2912
|
||||
4654,3384, 927,5352,1237,5353,2426,3385, 573,4312, 680, 921,2925,1279,1875, 285, # 2928
|
||||
790,1448,1984, 719,2168,5354,5355,4655,4065,4066,1649,5356,1541, 563,5357,1077, # 2944
|
||||
5358,3386,3061,3498, 511,3015,4067,4068,3733,4069,1268,2572,3387,3238,4656,4657, # 2960
|
||||
5359, 535,1048,1276,1189,2926,2029,3167,1438,1373,2847,2967,1134,2013,5360,4313, # 2976
|
||||
1238,2586,3109,1259,5361, 700,5362,2968,3168,3734,4314,5363,4315,1146,1876,1907, # 2992
|
||||
4658,2611,4070, 781,2427, 132,1589, 203, 147, 273,2802,2407, 898,1787,2155,4071, # 3008
|
||||
4072,5364,3871,2803,5365,5366,4659,4660,5367,3239,5368,1635,3872, 965,5369,1805, # 3024
|
||||
2699,1516,3614,1121,1082,1329,3317,4073,1449,3873, 65,1128,2848,2927,2769,1590, # 3040
|
||||
3874,5370,5371, 12,2668, 45, 976,2587,3169,4661, 517,2535,1013,1037,3240,5372, # 3056
|
||||
3875,2849,5373,3876,5374,3499,5375,2612, 614,1999,2323,3877,3110,2733,2638,5376, # 3072
|
||||
2588,4316, 599,1269,5377,1811,3735,5378,2700,3111, 759,1060, 489,1806,3388,3318, # 3088
|
||||
1358,5379,5380,2391,1387,1215,2639,2256, 490,5381,5382,4317,1759,2392,2348,5383, # 3104
|
||||
4662,3878,1908,4074,2640,1807,3241,4663,3500,3319,2770,2349, 874,5384,5385,3501, # 3120
|
||||
3736,1859, 91,2928,3737,3062,3879,4664,5386,3170,4075,2669,5387,3502,1202,1403, # 3136
|
||||
3880,2969,2536,1517,2510,4665,3503,2511,5388,4666,5389,2701,1886,1495,1731,4076, # 3152
|
||||
2370,4667,5390,2030,5391,5392,4077,2702,1216, 237,2589,4318,2324,4078,3881,4668, # 3168
|
||||
4669,2703,3615,3504, 445,4670,5393,5394,5395,5396,2771, 61,4079,3738,1823,4080, # 3184
|
||||
5397, 687,2046, 935, 925, 405,2670, 703,1096,1860,2734,4671,4081,1877,1367,2704, # 3200
|
||||
3389, 918,2106,1782,2483, 334,3320,1611,1093,4672, 564,3171,3505,3739,3390, 945, # 3216
|
||||
2641,2058,4673,5398,1926, 872,4319,5399,3506,2705,3112, 349,4320,3740,4082,4674, # 3232
|
||||
3882,4321,3741,2156,4083,4675,4676,4322,4677,2408,2047, 782,4084, 400, 251,4323, # 3248
|
||||
1624,5400,5401, 277,3742, 299,1265, 476,1191,3883,2122,4324,4325,1109, 205,5402, # 3264
|
||||
2590,1000,2157,3616,1861,5403,5404,5405,4678,5406,4679,2573, 107,2484,2158,4085, # 3280
|
||||
3507,3172,5407,1533, 541,1301, 158, 753,4326,2886,3617,5408,1696, 370,1088,4327, # 3296
|
||||
4680,3618, 579, 327, 440, 162,2244, 269,1938,1374,3508, 968,3063, 56,1396,3113, # 3312
|
||||
2107,3321,3391,5409,1927,2159,4681,3016,5410,3619,5411,5412,3743,4682,2485,5413, # 3328
|
||||
2804,5414,1650,4683,5415,2613,5416,5417,4086,2671,3392,1149,3393,4087,3884,4088, # 3344
|
||||
5418,1076, 49,5419, 951,3242,3322,3323, 450,2850, 920,5420,1812,2805,2371,4328, # 3360
|
||||
1909,1138,2372,3885,3509,5421,3243,4684,1910,1147,1518,2428,4685,3886,5422,4686, # 3376
|
||||
2393,2614, 260,1796,3244,5423,5424,3887,3324, 708,5425,3620,1704,5426,3621,1351, # 3392
|
||||
1618,3394,3017,1887, 944,4329,3395,4330,3064,3396,4331,5427,3744, 422, 413,1714, # 3408
|
||||
3325, 500,2059,2350,4332,2486,5428,1344,1911, 954,5429,1668,5430,5431,4089,2409, # 3424
|
||||
4333,3622,3888,4334,5432,2307,1318,2512,3114, 133,3115,2887,4687, 629, 31,2851, # 3440
|
||||
2706,3889,4688, 850, 949,4689,4090,2970,1732,2089,4335,1496,1853,5433,4091, 620, # 3456
|
||||
3245, 981,1242,3745,3397,1619,3746,1643,3326,2140,2457,1971,1719,3510,2169,5434, # 3472
|
||||
3246,5435,5436,3398,1829,5437,1277,4690,1565,2048,5438,1636,3623,3116,5439, 869, # 3488
|
||||
2852, 655,3890,3891,3117,4092,3018,3892,1310,3624,4691,5440,5441,5442,1733, 558, # 3504
|
||||
4692,3747, 335,1549,3065,1756,4336,3748,1946,3511,1830,1291,1192, 470,2735,2108, # 3520
|
||||
2806, 913,1054,4093,5443,1027,5444,3066,4094,4693, 982,2672,3399,3173,3512,3247, # 3536
|
||||
3248,1947,2807,5445, 571,4694,5446,1831,5447,3625,2591,1523,2429,5448,2090, 984, # 3552
|
||||
4695,3749,1960,5449,3750, 852, 923,2808,3513,3751, 969,1519, 999,2049,2325,1705, # 3568
|
||||
5450,3118, 615,1662, 151, 597,4095,2410,2326,1049, 275,4696,3752,4337, 568,3753, # 3584
|
||||
3626,2487,4338,3754,5451,2430,2275, 409,3249,5452,1566,2888,3514,1002, 769,2853, # 3600
|
||||
194,2091,3174,3755,2226,3327,4339, 628,1505,5453,5454,1763,2180,3019,4096, 521, # 3616
|
||||
1161,2592,1788,2206,2411,4697,4097,1625,4340,4341, 412, 42,3119, 464,5455,2642, # 3632
|
||||
4698,3400,1760,1571,2889,3515,2537,1219,2207,3893,2643,2141,2373,4699,4700,3328, # 3648
|
||||
1651,3401,3627,5456,5457,3628,2488,3516,5458,3756,5459,5460,2276,2092, 460,5461, # 3664
|
||||
4701,5462,3020, 962, 588,3629, 289,3250,2644,1116, 52,5463,3067,1797,5464,5465, # 3680
|
||||
5466,1467,5467,1598,1143,3757,4342,1985,1734,1067,4702,1280,3402, 465,4703,1572, # 3696
|
||||
510,5468,1928,2245,1813,1644,3630,5469,4704,3758,5470,5471,2673,1573,1534,5472, # 3712
|
||||
5473, 536,1808,1761,3517,3894,3175,2645,5474,5475,5476,4705,3518,2929,1912,2809, # 3728
|
||||
5477,3329,1122, 377,3251,5478, 360,5479,5480,4343,1529, 551,5481,2060,3759,1769, # 3744
|
||||
2431,5482,2930,4344,3330,3120,2327,2109,2031,4706,1404, 136,1468,1479, 672,1171, # 3760
|
||||
3252,2308, 271,3176,5483,2772,5484,2050, 678,2736, 865,1948,4707,5485,2014,4098, # 3776
|
||||
2971,5486,2737,2227,1397,3068,3760,4708,4709,1735,2931,3403,3631,5487,3895, 509, # 3792
|
||||
2854,2458,2890,3896,5488,5489,3177,3178,4710,4345,2538,4711,2309,1166,1010, 552, # 3808
|
||||
681,1888,5490,5491,2972,2973,4099,1287,1596,1862,3179, 358, 453, 736, 175, 478, # 3824
|
||||
1117, 905,1167,1097,5492,1854,1530,5493,1706,5494,2181,3519,2292,3761,3520,3632, # 3840
|
||||
4346,2093,4347,5495,3404,1193,2489,4348,1458,2193,2208,1863,1889,1421,3331,2932, # 3856
|
||||
3069,2182,3521, 595,2123,5496,4100,5497,5498,4349,1707,2646, 223,3762,1359, 751, # 3872
|
||||
3121, 183,3522,5499,2810,3021, 419,2374, 633, 704,3897,2394, 241,5500,5501,5502, # 3888
|
||||
838,3022,3763,2277,2773,2459,3898,1939,2051,4101,1309,3122,2246,1181,5503,1136, # 3904
|
||||
2209,3899,2375,1446,4350,2310,4712,5504,5505,4351,1055,2615, 484,3764,5506,4102, # 3920
|
||||
625,4352,2278,3405,1499,4353,4103,5507,4104,4354,3253,2279,2280,3523,5508,5509, # 3936
|
||||
2774, 808,2616,3765,3406,4105,4355,3123,2539, 526,3407,3900,4356, 955,5510,1620, # 3952
|
||||
4357,2647,2432,5511,1429,3766,1669,1832, 994, 928,5512,3633,1260,5513,5514,5515, # 3968
|
||||
1949,2293, 741,2933,1626,4358,2738,2460, 867,1184, 362,3408,1392,5516,5517,4106, # 3984
|
||||
4359,1770,1736,3254,2934,4713,4714,1929,2707,1459,1158,5518,3070,3409,2891,1292, # 4000
|
||||
1930,2513,2855,3767,1986,1187,2072,2015,2617,4360,5519,2574,2514,2170,3768,2490, # 4016
|
||||
3332,5520,3769,4715,5521,5522, 666,1003,3023,1022,3634,4361,5523,4716,1814,2257, # 4032
|
||||
574,3901,1603, 295,1535, 705,3902,4362, 283, 858, 417,5524,5525,3255,4717,4718, # 4048
|
||||
3071,1220,1890,1046,2281,2461,4107,1393,1599, 689,2575, 388,4363,5526,2491, 802, # 4064
|
||||
5527,2811,3903,2061,1405,2258,5528,4719,3904,2110,1052,1345,3256,1585,5529, 809, # 4080
|
||||
5530,5531,5532, 575,2739,3524, 956,1552,1469,1144,2328,5533,2329,1560,2462,3635, # 4096
|
||||
3257,4108, 616,2210,4364,3180,2183,2294,5534,1833,5535,3525,4720,5536,1319,3770, # 4112
|
||||
3771,1211,3636,1023,3258,1293,2812,5537,5538,5539,3905, 607,2311,3906, 762,2892, # 4128
|
||||
1439,4365,1360,4721,1485,3072,5540,4722,1038,4366,1450,2062,2648,4367,1379,4723, # 4144
|
||||
2593,5541,5542,4368,1352,1414,2330,2935,1172,5543,5544,3907,3908,4724,1798,1451, # 4160
|
||||
5545,5546,5547,5548,2936,4109,4110,2492,2351, 411,4111,4112,3637,3333,3124,4725, # 4176
|
||||
1561,2674,1452,4113,1375,5549,5550, 47,2974, 316,5551,1406,1591,2937,3181,5552, # 4192
|
||||
1025,2142,3125,3182, 354,2740, 884,2228,4369,2412, 508,3772, 726,3638, 996,2433, # 4208
|
||||
3639, 729,5553, 392,2194,1453,4114,4726,3773,5554,5555,2463,3640,2618,1675,2813, # 4224
|
||||
919,2352,2975,2353,1270,4727,4115, 73,5556,5557, 647,5558,3259,2856,2259,1550, # 4240
|
||||
1346,3024,5559,1332, 883,3526,5560,5561,5562,5563,3334,2775,5564,1212, 831,1347, # 4256
|
||||
4370,4728,2331,3909,1864,3073, 720,3910,4729,4730,3911,5565,4371,5566,5567,4731, # 4272
|
||||
5568,5569,1799,4732,3774,2619,4733,3641,1645,2376,4734,5570,2938, 669,2211,2675, # 4288
|
||||
2434,5571,2893,5572,5573,1028,3260,5574,4372,2413,5575,2260,1353,5576,5577,4735, # 4304
|
||||
3183, 518,5578,4116,5579,4373,1961,5580,2143,4374,5581,5582,3025,2354,2355,3912, # 4320
|
||||
516,1834,1454,4117,2708,4375,4736,2229,2620,1972,1129,3642,5583,2776,5584,2976, # 4336
|
||||
1422, 577,1470,3026,1524,3410,5585,5586, 432,4376,3074,3527,5587,2594,1455,2515, # 4352
|
||||
2230,1973,1175,5588,1020,2741,4118,3528,4737,5589,2742,5590,1743,1361,3075,3529, # 4368
|
||||
2649,4119,4377,4738,2295, 895, 924,4378,2171, 331,2247,3076, 166,1627,3077,1098, # 4384
|
||||
5591,1232,2894,2231,3411,4739, 657, 403,1196,2377, 542,3775,3412,1600,4379,3530, # 4400
|
||||
5592,4740,2777,3261, 576, 530,1362,4741,4742,2540,2676,3776,4120,5593, 842,3913, # 4416
|
||||
5594,2814,2032,1014,4121, 213,2709,3413, 665, 621,4380,5595,3777,2939,2435,5596, # 4432
|
||||
2436,3335,3643,3414,4743,4381,2541,4382,4744,3644,1682,4383,3531,1380,5597, 724, # 4448
|
||||
2282, 600,1670,5598,1337,1233,4745,3126,2248,5599,1621,4746,5600, 651,4384,5601, # 4464
|
||||
1612,4385,2621,5602,2857,5603,2743,2312,3078,5604, 716,2464,3079, 174,1255,2710, # 4480
|
||||
4122,3645, 548,1320,1398, 728,4123,1574,5605,1891,1197,3080,4124,5606,3081,3082, # 4496
|
||||
3778,3646,3779, 747,5607, 635,4386,4747,5608,5609,5610,4387,5611,5612,4748,5613, # 4512
|
||||
3415,4749,2437, 451,5614,3780,2542,2073,4388,2744,4389,4125,5615,1764,4750,5616, # 4528
|
||||
4390, 350,4751,2283,2395,2493,5617,4391,4126,2249,1434,4127, 488,4752, 458,4392, # 4544
|
||||
4128,3781, 771,1330,2396,3914,2576,3184,2160,2414,1553,2677,3185,4393,5618,2494, # 4560
|
||||
2895,2622,1720,2711,4394,3416,4753,5619,2543,4395,5620,3262,4396,2778,5621,2016, # 4576
|
||||
2745,5622,1155,1017,3782,3915,5623,3336,2313, 201,1865,4397,1430,5624,4129,5625, # 4592
|
||||
5626,5627,5628,5629,4398,1604,5630, 414,1866, 371,2595,4754,4755,3532,2017,3127, # 4608
|
||||
4756,1708, 960,4399, 887, 389,2172,1536,1663,1721,5631,2232,4130,2356,2940,1580, # 4624
|
||||
5632,5633,1744,4757,2544,4758,4759,5634,4760,5635,2074,5636,4761,3647,3417,2896, # 4640
|
||||
4400,5637,4401,2650,3418,2815, 673,2712,2465, 709,3533,4131,3648,4402,5638,1148, # 4656
|
||||
502, 634,5639,5640,1204,4762,3649,1575,4763,2623,3783,5641,3784,3128, 948,3263, # 4672
|
||||
121,1745,3916,1110,5642,4403,3083,2516,3027,4132,3785,1151,1771,3917,1488,4133, # 4688
|
||||
1987,5643,2438,3534,5644,5645,2094,5646,4404,3918,1213,1407,2816, 531,2746,2545, # 4704
|
||||
3264,1011,1537,4764,2779,4405,3129,1061,5647,3786,3787,1867,2897,5648,2018, 120, # 4720
|
||||
4406,4407,2063,3650,3265,2314,3919,2678,3419,1955,4765,4134,5649,3535,1047,2713, # 4736
|
||||
1266,5650,1368,4766,2858, 649,3420,3920,2546,2747,1102,2859,2679,5651,5652,2000, # 4752
|
||||
5653,1111,3651,2977,5654,2495,3921,3652,2817,1855,3421,3788,5655,5656,3422,2415, # 4768
|
||||
2898,3337,3266,3653,5657,2577,5658,3654,2818,4135,1460, 856,5659,3655,5660,2899, # 4784
|
||||
2978,5661,2900,3922,5662,4408, 632,2517, 875,3923,1697,3924,2296,5663,5664,4767, # 4800
|
||||
3028,1239, 580,4768,4409,5665, 914, 936,2075,1190,4136,1039,2124,5666,5667,5668, # 4816
|
||||
5669,3423,1473,5670,1354,4410,3925,4769,2173,3084,4137, 915,3338,4411,4412,3339, # 4832
|
||||
1605,1835,5671,2748, 398,3656,4413,3926,4138, 328,1913,2860,4139,3927,1331,4414, # 4848
|
||||
3029, 937,4415,5672,3657,4140,4141,3424,2161,4770,3425, 524, 742, 538,3085,1012, # 4864
|
||||
5673,5674,3928,2466,5675, 658,1103, 225,3929,5676,5677,4771,5678,4772,5679,3267, # 4880
|
||||
1243,5680,4142, 963,2250,4773,5681,2714,3658,3186,5682,5683,2596,2332,5684,4774, # 4896
|
||||
5685,5686,5687,3536, 957,3426,2547,2033,1931,2941,2467, 870,2019,3659,1746,2780, # 4912
|
||||
2781,2439,2468,5688,3930,5689,3789,3130,3790,3537,3427,3791,5690,1179,3086,5691, # 4928
|
||||
3187,2378,4416,3792,2548,3188,3131,2749,4143,5692,3428,1556,2549,2297, 977,2901, # 4944
|
||||
2034,4144,1205,3429,5693,1765,3430,3189,2125,1271, 714,1689,4775,3538,5694,2333, # 4960
|
||||
3931, 533,4417,3660,2184, 617,5695,2469,3340,3539,2315,5696,5697,3190,5698,5699, # 4976
|
||||
3932,1988, 618, 427,2651,3540,3431,5700,5701,1244,1690,5702,2819,4418,4776,5703, # 4992
|
||||
3541,4777,5704,2284,1576, 473,3661,4419,3432, 972,5705,3662,5706,3087,5707,5708, # 5008
|
||||
4778,4779,5709,3793,4145,4146,5710, 153,4780, 356,5711,1892,2902,4420,2144, 408, # 5024
|
||||
803,2357,5712,3933,5713,4421,1646,2578,2518,4781,4782,3934,5714,3935,4422,5715, # 5040
|
||||
2416,3433, 752,5716,5717,1962,3341,2979,5718, 746,3030,2470,4783,4423,3794, 698, # 5056
|
||||
4784,1893,4424,3663,2550,4785,3664,3936,5719,3191,3434,5720,1824,1302,4147,2715, # 5072
|
||||
3937,1974,4425,5721,4426,3192, 823,1303,1288,1236,2861,3542,4148,3435, 774,3938, # 5088
|
||||
5722,1581,4786,1304,2862,3939,4787,5723,2440,2162,1083,3268,4427,4149,4428, 344, # 5104
|
||||
1173, 288,2316, 454,1683,5724,5725,1461,4788,4150,2597,5726,5727,4789, 985, 894, # 5120
|
||||
5728,3436,3193,5729,1914,2942,3795,1989,5730,2111,1975,5731,4151,5732,2579,1194, # 5136
|
||||
425,5733,4790,3194,1245,3796,4429,5734,5735,2863,5736, 636,4791,1856,3940, 760, # 5152
|
||||
1800,5737,4430,2212,1508,4792,4152,1894,1684,2298,5738,5739,4793,4431,4432,2213, # 5168
|
||||
479,5740,5741, 832,5742,4153,2496,5743,2980,2497,3797, 990,3132, 627,1815,2652, # 5184
|
||||
4433,1582,4434,2126,2112,3543,4794,5744, 799,4435,3195,5745,4795,2113,1737,3031, # 5200
|
||||
1018, 543, 754,4436,3342,1676,4796,4797,4154,4798,1489,5746,3544,5747,2624,2903, # 5216
|
||||
4155,5748,5749,2981,5750,5751,5752,5753,3196,4799,4800,2185,1722,5754,3269,3270, # 5232
|
||||
1843,3665,1715, 481, 365,1976,1857,5755,5756,1963,2498,4801,5757,2127,3666,3271, # 5248
|
||||
433,1895,2064,2076,5758, 602,2750,5759,5760,5761,5762,5763,3032,1628,3437,5764, # 5264
|
||||
3197,4802,4156,2904,4803,2519,5765,2551,2782,5766,5767,5768,3343,4804,2905,5769, # 5280
|
||||
4805,5770,2864,4806,4807,1221,2982,4157,2520,5771,5772,5773,1868,1990,5774,5775, # 5296
|
||||
5776,1896,5777,5778,4808,1897,4158, 318,5779,2095,4159,4437,5780,5781, 485,5782, # 5312
|
||||
938,3941, 553,2680, 116,5783,3942,3667,5784,3545,2681,2783,3438,3344,2820,5785, # 5328
|
||||
3668,2943,4160,1747,2944,2983,5786,5787, 207,5788,4809,5789,4810,2521,5790,3033, # 5344
|
||||
890,3669,3943,5791,1878,3798,3439,5792,2186,2358,3440,1652,5793,5794,5795, 941, # 5360
|
||||
2299, 208,3546,4161,2020, 330,4438,3944,2906,2499,3799,4439,4811,5796,5797,5798, # 5376
|
||||
)
|
||||
|
||||
@@ -1,47 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .mbcharsetprober import MultiByteCharSetProber
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .chardistribution import Big5DistributionAnalysis
|
||||
from .mbcssm import BIG5_SM_MODEL
|
||||
|
||||
|
||||
class Big5Prober(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
super(Big5Prober, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(BIG5_SM_MODEL)
|
||||
self.distribution_analyzer = Big5DistributionAnalysis()
|
||||
self.reset()
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "Big5"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return "Chinese"
|
||||
@@ -1,233 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .euctwfreq import (EUCTW_CHAR_TO_FREQ_ORDER, EUCTW_TABLE_SIZE,
|
||||
EUCTW_TYPICAL_DISTRIBUTION_RATIO)
|
||||
from .euckrfreq import (EUCKR_CHAR_TO_FREQ_ORDER, EUCKR_TABLE_SIZE,
|
||||
EUCKR_TYPICAL_DISTRIBUTION_RATIO)
|
||||
from .gb2312freq import (GB2312_CHAR_TO_FREQ_ORDER, GB2312_TABLE_SIZE,
|
||||
GB2312_TYPICAL_DISTRIBUTION_RATIO)
|
||||
from .big5freq import (BIG5_CHAR_TO_FREQ_ORDER, BIG5_TABLE_SIZE,
|
||||
BIG5_TYPICAL_DISTRIBUTION_RATIO)
|
||||
from .jisfreq import (JIS_CHAR_TO_FREQ_ORDER, JIS_TABLE_SIZE,
|
||||
JIS_TYPICAL_DISTRIBUTION_RATIO)
|
||||
|
||||
|
||||
class CharDistributionAnalysis(object):
|
||||
ENOUGH_DATA_THRESHOLD = 1024
|
||||
SURE_YES = 0.99
|
||||
SURE_NO = 0.01
|
||||
MINIMUM_DATA_THRESHOLD = 3
|
||||
|
||||
def __init__(self):
|
||||
# Mapping table to get frequency order from char order (get from
|
||||
# GetOrder())
|
||||
self._char_to_freq_order = None
|
||||
self._table_size = None # Size of above table
|
||||
# This is a constant value which varies from language to language,
|
||||
# used in calculating confidence. See
|
||||
# http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html
|
||||
# for further detail.
|
||||
self.typical_distribution_ratio = None
|
||||
self._done = None
|
||||
self._total_chars = None
|
||||
self._freq_chars = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
"""reset analyser, clear any state"""
|
||||
# If this flag is set to True, detection is done and conclusion has
|
||||
# been made
|
||||
self._done = False
|
||||
self._total_chars = 0 # Total characters encountered
|
||||
# The number of characters whose frequency order is less than 512
|
||||
self._freq_chars = 0
|
||||
|
||||
def feed(self, char, char_len):
|
||||
"""feed a character with known length"""
|
||||
if char_len == 2:
|
||||
# we only care about 2-bytes character in our distribution analysis
|
||||
order = self.get_order(char)
|
||||
else:
|
||||
order = -1
|
||||
if order >= 0:
|
||||
self._total_chars += 1
|
||||
# order is valid
|
||||
if order < self._table_size:
|
||||
if 512 > self._char_to_freq_order[order]:
|
||||
self._freq_chars += 1
|
||||
|
||||
def get_confidence(self):
|
||||
"""return confidence based on existing data"""
|
||||
# if we didn't receive any character in our consideration range,
|
||||
# return negative answer
|
||||
if self._total_chars <= 0 or self._freq_chars <= self.MINIMUM_DATA_THRESHOLD:
|
||||
return self.SURE_NO
|
||||
|
||||
if self._total_chars != self._freq_chars:
|
||||
r = (self._freq_chars / ((self._total_chars - self._freq_chars)
|
||||
* self.typical_distribution_ratio))
|
||||
if r < self.SURE_YES:
|
||||
return r
|
||||
|
||||
# normalize confidence (we don't want to be 100% sure)
|
||||
return self.SURE_YES
|
||||
|
||||
def got_enough_data(self):
|
||||
# It is not necessary to receive all data to draw conclusion.
|
||||
# For charset detection, certain amount of data is enough
|
||||
return self._total_chars > self.ENOUGH_DATA_THRESHOLD
|
||||
|
||||
def get_order(self, byte_str):
|
||||
# We do not handle characters based on the original encoding string,
|
||||
# but convert this encoding string to a number, here called order.
|
||||
# This allows multiple encodings of a language to share one frequency
|
||||
# table.
|
||||
return -1
|
||||
|
||||
|
||||
class EUCTWDistributionAnalysis(CharDistributionAnalysis):
|
||||
def __init__(self):
|
||||
super(EUCTWDistributionAnalysis, self).__init__()
|
||||
self._char_to_freq_order = EUCTW_CHAR_TO_FREQ_ORDER
|
||||
self._table_size = EUCTW_TABLE_SIZE
|
||||
self.typical_distribution_ratio = EUCTW_TYPICAL_DISTRIBUTION_RATIO
|
||||
|
||||
def get_order(self, byte_str):
|
||||
# for euc-TW encoding, we are interested
|
||||
# first byte range: 0xc4 -- 0xfe
|
||||
# second byte range: 0xa1 -- 0xfe
|
||||
# no validation needed here. State machine has done that
|
||||
first_char = byte_str[0]
|
||||
if first_char >= 0xC4:
|
||||
return 94 * (first_char - 0xC4) + byte_str[1] - 0xA1
|
||||
else:
|
||||
return -1
|
||||
|
||||
|
||||
class EUCKRDistributionAnalysis(CharDistributionAnalysis):
|
||||
def __init__(self):
|
||||
super(EUCKRDistributionAnalysis, self).__init__()
|
||||
self._char_to_freq_order = EUCKR_CHAR_TO_FREQ_ORDER
|
||||
self._table_size = EUCKR_TABLE_SIZE
|
||||
self.typical_distribution_ratio = EUCKR_TYPICAL_DISTRIBUTION_RATIO
|
||||
|
||||
def get_order(self, byte_str):
|
||||
# for euc-KR encoding, we are interested
|
||||
# first byte range: 0xb0 -- 0xfe
|
||||
# second byte range: 0xa1 -- 0xfe
|
||||
# no validation needed here. State machine has done that
|
||||
first_char = byte_str[0]
|
||||
if first_char >= 0xB0:
|
||||
return 94 * (first_char - 0xB0) + byte_str[1] - 0xA1
|
||||
else:
|
||||
return -1
|
||||
|
||||
|
||||
class GB2312DistributionAnalysis(CharDistributionAnalysis):
|
||||
def __init__(self):
|
||||
super(GB2312DistributionAnalysis, self).__init__()
|
||||
self._char_to_freq_order = GB2312_CHAR_TO_FREQ_ORDER
|
||||
self._table_size = GB2312_TABLE_SIZE
|
||||
self.typical_distribution_ratio = GB2312_TYPICAL_DISTRIBUTION_RATIO
|
||||
|
||||
def get_order(self, byte_str):
|
||||
# for GB2312 encoding, we are interested
|
||||
# first byte range: 0xb0 -- 0xfe
|
||||
# second byte range: 0xa1 -- 0xfe
|
||||
# no validation needed here. State machine has done that
|
||||
first_char, second_char = byte_str[0], byte_str[1]
|
||||
if (first_char >= 0xB0) and (second_char >= 0xA1):
|
||||
return 94 * (first_char - 0xB0) + second_char - 0xA1
|
||||
else:
|
||||
return -1
|
||||
|
||||
|
||||
class Big5DistributionAnalysis(CharDistributionAnalysis):
|
||||
def __init__(self):
|
||||
super(Big5DistributionAnalysis, self).__init__()
|
||||
self._char_to_freq_order = BIG5_CHAR_TO_FREQ_ORDER
|
||||
self._table_size = BIG5_TABLE_SIZE
|
||||
self.typical_distribution_ratio = BIG5_TYPICAL_DISTRIBUTION_RATIO
|
||||
|
||||
def get_order(self, byte_str):
|
||||
# for big5 encoding, we are interested
|
||||
# first byte range: 0xa4 -- 0xfe
|
||||
# second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe
|
||||
# no validation needed here. State machine has done that
|
||||
first_char, second_char = byte_str[0], byte_str[1]
|
||||
if first_char >= 0xA4:
|
||||
if second_char >= 0xA1:
|
||||
return 157 * (first_char - 0xA4) + second_char - 0xA1 + 63
|
||||
else:
|
||||
return 157 * (first_char - 0xA4) + second_char - 0x40
|
||||
else:
|
||||
return -1
|
||||
|
||||
|
||||
class SJISDistributionAnalysis(CharDistributionAnalysis):
|
||||
def __init__(self):
|
||||
super(SJISDistributionAnalysis, self).__init__()
|
||||
self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER
|
||||
self._table_size = JIS_TABLE_SIZE
|
||||
self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO
|
||||
|
||||
def get_order(self, byte_str):
|
||||
# for sjis encoding, we are interested
|
||||
# first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe
|
||||
# second byte range: 0x40 -- 0x7e, 0x81 -- oxfe
|
||||
# no validation needed here. State machine has done that
|
||||
first_char, second_char = byte_str[0], byte_str[1]
|
||||
if (first_char >= 0x81) and (first_char <= 0x9F):
|
||||
order = 188 * (first_char - 0x81)
|
||||
elif (first_char >= 0xE0) and (first_char <= 0xEF):
|
||||
order = 188 * (first_char - 0xE0 + 31)
|
||||
else:
|
||||
return -1
|
||||
order = order + second_char - 0x40
|
||||
if second_char > 0x7F:
|
||||
order = -1
|
||||
return order
|
||||
|
||||
|
||||
class EUCJPDistributionAnalysis(CharDistributionAnalysis):
|
||||
def __init__(self):
|
||||
super(EUCJPDistributionAnalysis, self).__init__()
|
||||
self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER
|
||||
self._table_size = JIS_TABLE_SIZE
|
||||
self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO
|
||||
|
||||
def get_order(self, byte_str):
|
||||
# for euc-JP encoding, we are interested
|
||||
# first byte range: 0xa0 -- 0xfe
|
||||
# second byte range: 0xa1 -- 0xfe
|
||||
# no validation needed here. State machine has done that
|
||||
char = byte_str[0]
|
||||
if char >= 0xA0:
|
||||
return 94 * (char - 0xA1) + byte_str[1] - 0xa1
|
||||
else:
|
||||
return -1
|
||||
@@ -1,106 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .enums import ProbingState
|
||||
from .charsetprober import CharSetProber
|
||||
|
||||
|
||||
class CharSetGroupProber(CharSetProber):
|
||||
def __init__(self, lang_filter=None):
|
||||
super(CharSetGroupProber, self).__init__(lang_filter=lang_filter)
|
||||
self._active_num = 0
|
||||
self.probers = []
|
||||
self._best_guess_prober = None
|
||||
|
||||
def reset(self):
|
||||
super(CharSetGroupProber, self).reset()
|
||||
self._active_num = 0
|
||||
for prober in self.probers:
|
||||
if prober:
|
||||
prober.reset()
|
||||
prober.active = True
|
||||
self._active_num += 1
|
||||
self._best_guess_prober = None
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
if not self._best_guess_prober:
|
||||
self.get_confidence()
|
||||
if not self._best_guess_prober:
|
||||
return None
|
||||
return self._best_guess_prober.charset_name
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
if not self._best_guess_prober:
|
||||
self.get_confidence()
|
||||
if not self._best_guess_prober:
|
||||
return None
|
||||
return self._best_guess_prober.language
|
||||
|
||||
def feed(self, byte_str):
|
||||
for prober in self.probers:
|
||||
if not prober:
|
||||
continue
|
||||
if not prober.active:
|
||||
continue
|
||||
state = prober.feed(byte_str)
|
||||
if not state:
|
||||
continue
|
||||
if state == ProbingState.FOUND_IT:
|
||||
self._best_guess_prober = prober
|
||||
return self.state
|
||||
elif state == ProbingState.NOT_ME:
|
||||
prober.active = False
|
||||
self._active_num -= 1
|
||||
if self._active_num <= 0:
|
||||
self._state = ProbingState.NOT_ME
|
||||
return self.state
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
state = self.state
|
||||
if state == ProbingState.FOUND_IT:
|
||||
return 0.99
|
||||
elif state == ProbingState.NOT_ME:
|
||||
return 0.01
|
||||
best_conf = 0.0
|
||||
self._best_guess_prober = None
|
||||
for prober in self.probers:
|
||||
if not prober:
|
||||
continue
|
||||
if not prober.active:
|
||||
self.logger.debug('%s not active', prober.charset_name)
|
||||
continue
|
||||
conf = prober.get_confidence()
|
||||
self.logger.debug('%s %s confidence = %s', prober.charset_name, prober.language, conf)
|
||||
if best_conf < conf:
|
||||
best_conf = conf
|
||||
self._best_guess_prober = prober
|
||||
if not self._best_guess_prober:
|
||||
return 0.0
|
||||
return best_conf
|
||||
@@ -1,145 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
# Shy Shalom - original C code
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
import logging
|
||||
import re
|
||||
|
||||
from .enums import ProbingState
|
||||
|
||||
|
||||
class CharSetProber(object):
|
||||
|
||||
SHORTCUT_THRESHOLD = 0.95
|
||||
|
||||
def __init__(self, lang_filter=None):
|
||||
self._state = None
|
||||
self.lang_filter = lang_filter
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
def reset(self):
|
||||
self._state = ProbingState.DETECTING
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return None
|
||||
|
||||
def feed(self, buf):
|
||||
pass
|
||||
|
||||
@property
|
||||
def state(self):
|
||||
return self._state
|
||||
|
||||
def get_confidence(self):
|
||||
return 0.0
|
||||
|
||||
@staticmethod
|
||||
def filter_high_byte_only(buf):
|
||||
buf = re.sub(b'([\x00-\x7F])+', b' ', buf)
|
||||
return buf
|
||||
|
||||
@staticmethod
|
||||
def filter_international_words(buf):
|
||||
"""
|
||||
We define three types of bytes:
|
||||
alphabet: english alphabets [a-zA-Z]
|
||||
international: international characters [\x80-\xFF]
|
||||
marker: everything else [^a-zA-Z\x80-\xFF]
|
||||
|
||||
The input buffer can be thought to contain a series of words delimited
|
||||
by markers. This function works to filter all words that contain at
|
||||
least one international character. All contiguous sequences of markers
|
||||
are replaced by a single space ascii character.
|
||||
|
||||
This filter applies to all scripts which do not use English characters.
|
||||
"""
|
||||
filtered = bytearray()
|
||||
|
||||
# This regex expression filters out only words that have at-least one
|
||||
# international character. The word may include one marker character at
|
||||
# the end.
|
||||
words = re.findall(b'[a-zA-Z]*[\x80-\xFF]+[a-zA-Z]*[^a-zA-Z\x80-\xFF]?',
|
||||
buf)
|
||||
|
||||
for word in words:
|
||||
filtered.extend(word[:-1])
|
||||
|
||||
# If the last character in the word is a marker, replace it with a
|
||||
# space as markers shouldn't affect our analysis (they are used
|
||||
# similarly across all languages and may thus have similar
|
||||
# frequencies).
|
||||
last_char = word[-1:]
|
||||
if not last_char.isalpha() and last_char < b'\x80':
|
||||
last_char = b' '
|
||||
filtered.extend(last_char)
|
||||
|
||||
return filtered
|
||||
|
||||
@staticmethod
|
||||
def filter_with_english_letters(buf):
|
||||
"""
|
||||
Returns a copy of ``buf`` that retains only the sequences of English
|
||||
alphabet and high byte characters that are not between <> characters.
|
||||
Also retains English alphabet and high byte characters immediately
|
||||
before occurrences of >.
|
||||
|
||||
This filter can be applied to all scripts which contain both English
|
||||
characters and extended ASCII characters, but is currently only used by
|
||||
``Latin1Prober``.
|
||||
"""
|
||||
filtered = bytearray()
|
||||
in_tag = False
|
||||
prev = 0
|
||||
|
||||
for curr in range(len(buf)):
|
||||
# Slice here to get bytes instead of an int with Python 3
|
||||
buf_char = buf[curr:curr + 1]
|
||||
# Check if we're coming out of or entering an HTML tag
|
||||
if buf_char == b'>':
|
||||
in_tag = False
|
||||
elif buf_char == b'<':
|
||||
in_tag = True
|
||||
|
||||
# If current character is not extended-ASCII and not alphabetic...
|
||||
if buf_char < b'\x80' and not buf_char.isalpha():
|
||||
# ...and we're not in a tag
|
||||
if curr > prev and not in_tag:
|
||||
# Keep everything after last non-extended-ASCII,
|
||||
# non-alphabetic character
|
||||
filtered.extend(buf[prev:curr])
|
||||
# Output a space to delimit stretch we kept
|
||||
filtered.extend(b' ')
|
||||
prev = curr + 1
|
||||
|
||||
# If we're not in a tag...
|
||||
if not in_tag:
|
||||
# Keep everything after last non-extended-ASCII, non-alphabetic
|
||||
# character
|
||||
filtered.extend(buf[prev:])
|
||||
|
||||
return filtered
|
||||
@@ -1 +0,0 @@
|
||||
|
||||
@@ -1,85 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Script which takes one or more file paths and reports on their detected
|
||||
encodings
|
||||
|
||||
Example::
|
||||
|
||||
% chardetect somefile someotherfile
|
||||
somefile: windows-1252 with confidence 0.5
|
||||
someotherfile: ascii with confidence 1.0
|
||||
|
||||
If no paths are provided, it takes its input from stdin.
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import, print_function, unicode_literals
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
from chardet import __version__
|
||||
from chardet.compat import PY2
|
||||
from chardet.universaldetector import UniversalDetector
|
||||
|
||||
|
||||
def description_of(lines, name='stdin'):
|
||||
"""
|
||||
Return a string describing the probable encoding of a file or
|
||||
list of strings.
|
||||
|
||||
:param lines: The lines to get the encoding of.
|
||||
:type lines: Iterable of bytes
|
||||
:param name: Name of file or collection of lines
|
||||
:type name: str
|
||||
"""
|
||||
u = UniversalDetector()
|
||||
for line in lines:
|
||||
line = bytearray(line)
|
||||
u.feed(line)
|
||||
# shortcut out of the loop to save reading further - particularly useful if we read a BOM.
|
||||
if u.done:
|
||||
break
|
||||
u.close()
|
||||
result = u.result
|
||||
if PY2:
|
||||
name = name.decode(sys.getfilesystemencoding(), 'ignore')
|
||||
if result['encoding']:
|
||||
return '{0}: {1} with confidence {2}'.format(name, result['encoding'],
|
||||
result['confidence'])
|
||||
else:
|
||||
return '{0}: no result'.format(name)
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
"""
|
||||
Handles command line arguments and gets things started.
|
||||
|
||||
:param argv: List of arguments, as if specified on the command-line.
|
||||
If None, ``sys.argv[1:]`` is used instead.
|
||||
:type argv: list of str
|
||||
"""
|
||||
# Get command line arguments
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Takes one or more file paths and reports their detected \
|
||||
encodings")
|
||||
parser.add_argument('input',
|
||||
help='File whose encoding we would like to determine. \
|
||||
(default: stdin)',
|
||||
type=argparse.FileType('rb'), nargs='*',
|
||||
default=[sys.stdin if PY2 else sys.stdin.buffer])
|
||||
parser.add_argument('--version', action='version',
|
||||
version='%(prog)s {0}'.format(__version__))
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
for f in args.input:
|
||||
if f.isatty():
|
||||
print("You are running chardetect interactively. Press " +
|
||||
"CTRL-D twice at the start of a blank line to signal the " +
|
||||
"end of your input. If you want help, run chardetect " +
|
||||
"--help\n", file=sys.stderr)
|
||||
print(description_of(f, f.name))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,88 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
import logging
|
||||
|
||||
from .enums import MachineState
|
||||
|
||||
|
||||
class CodingStateMachine(object):
|
||||
"""
|
||||
A state machine to verify a byte sequence for a particular encoding. For
|
||||
each byte the detector receives, it will feed that byte to every active
|
||||
state machine available, one byte at a time. The state machine changes its
|
||||
state based on its previous state and the byte it receives. There are 3
|
||||
states in a state machine that are of interest to an auto-detector:
|
||||
|
||||
START state: This is the state to start with, or a legal byte sequence
|
||||
(i.e. a valid code point) for character has been identified.
|
||||
|
||||
ME state: This indicates that the state machine identified a byte sequence
|
||||
that is specific to the charset it is designed for and that
|
||||
there is no other possible encoding which can contain this byte
|
||||
sequence. This will to lead to an immediate positive answer for
|
||||
the detector.
|
||||
|
||||
ERROR state: This indicates the state machine identified an illegal byte
|
||||
sequence for that encoding. This will lead to an immediate
|
||||
negative answer for this encoding. Detector will exclude this
|
||||
encoding from consideration from here on.
|
||||
"""
|
||||
def __init__(self, sm):
|
||||
self._model = sm
|
||||
self._curr_byte_pos = 0
|
||||
self._curr_char_len = 0
|
||||
self._curr_state = None
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self._curr_state = MachineState.START
|
||||
|
||||
def next_state(self, c):
|
||||
# for each byte we get its class
|
||||
# if it is first byte, we also get byte length
|
||||
byte_class = self._model['class_table'][c]
|
||||
if self._curr_state == MachineState.START:
|
||||
self._curr_byte_pos = 0
|
||||
self._curr_char_len = self._model['char_len_table'][byte_class]
|
||||
# from byte's class and state_table, we get its next state
|
||||
curr_state = (self._curr_state * self._model['class_factor']
|
||||
+ byte_class)
|
||||
self._curr_state = self._model['state_table'][curr_state]
|
||||
self._curr_byte_pos += 1
|
||||
return self._curr_state
|
||||
|
||||
def get_current_charlen(self):
|
||||
return self._curr_char_len
|
||||
|
||||
def get_coding_state_machine(self):
|
||||
return self._model['name']
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return self._model['language']
|
||||
@@ -1,34 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# Contributor(s):
|
||||
# Dan Blanchard
|
||||
# Ian Cordasco
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
import sys
|
||||
|
||||
|
||||
if sys.version_info < (3, 0):
|
||||
PY2 = True
|
||||
PY3 = False
|
||||
base_str = (str, unicode)
|
||||
text_type = unicode
|
||||
else:
|
||||
PY2 = False
|
||||
PY3 = True
|
||||
base_str = (bytes, str)
|
||||
text_type = str
|
||||
@@ -1,49 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .chardistribution import EUCKRDistributionAnalysis
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .mbcharsetprober import MultiByteCharSetProber
|
||||
from .mbcssm import CP949_SM_MODEL
|
||||
|
||||
|
||||
class CP949Prober(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
super(CP949Prober, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(CP949_SM_MODEL)
|
||||
# NOTE: CP949 is a superset of EUC-KR, so the distribution should be
|
||||
# not different.
|
||||
self.distribution_analyzer = EUCKRDistributionAnalysis()
|
||||
self.reset()
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "CP949"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return "Korean"
|
||||
@@ -1,76 +0,0 @@
|
||||
"""
|
||||
All of the Enums that are used throughout the chardet package.
|
||||
|
||||
:author: Dan Blanchard (dan.blanchard@gmail.com)
|
||||
"""
|
||||
|
||||
|
||||
class InputState(object):
|
||||
"""
|
||||
This enum represents the different states a universal detector can be in.
|
||||
"""
|
||||
PURE_ASCII = 0
|
||||
ESC_ASCII = 1
|
||||
HIGH_BYTE = 2
|
||||
|
||||
|
||||
class LanguageFilter(object):
|
||||
"""
|
||||
This enum represents the different language filters we can apply to a
|
||||
``UniversalDetector``.
|
||||
"""
|
||||
CHINESE_SIMPLIFIED = 0x01
|
||||
CHINESE_TRADITIONAL = 0x02
|
||||
JAPANESE = 0x04
|
||||
KOREAN = 0x08
|
||||
NON_CJK = 0x10
|
||||
ALL = 0x1F
|
||||
CHINESE = CHINESE_SIMPLIFIED | CHINESE_TRADITIONAL
|
||||
CJK = CHINESE | JAPANESE | KOREAN
|
||||
|
||||
|
||||
class ProbingState(object):
|
||||
"""
|
||||
This enum represents the different states a prober can be in.
|
||||
"""
|
||||
DETECTING = 0
|
||||
FOUND_IT = 1
|
||||
NOT_ME = 2
|
||||
|
||||
|
||||
class MachineState(object):
|
||||
"""
|
||||
This enum represents the different states a state machine can be in.
|
||||
"""
|
||||
START = 0
|
||||
ERROR = 1
|
||||
ITS_ME = 2
|
||||
|
||||
|
||||
class SequenceLikelihood(object):
|
||||
"""
|
||||
This enum represents the likelihood of a character following the previous one.
|
||||
"""
|
||||
NEGATIVE = 0
|
||||
UNLIKELY = 1
|
||||
LIKELY = 2
|
||||
POSITIVE = 3
|
||||
|
||||
@classmethod
|
||||
def get_num_categories(cls):
|
||||
""":returns: The number of likelihood categories in the enum."""
|
||||
return 4
|
||||
|
||||
|
||||
class CharacterCategory(object):
|
||||
"""
|
||||
This enum represents the different categories language models for
|
||||
``SingleByteCharsetProber`` put characters into.
|
||||
|
||||
Anything less than CONTROL is considered a letter.
|
||||
"""
|
||||
UNDEFINED = 255
|
||||
LINE_BREAK = 254
|
||||
SYMBOL = 253
|
||||
DIGIT = 252
|
||||
CONTROL = 251
|
||||
@@ -1,101 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .charsetprober import CharSetProber
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .enums import LanguageFilter, ProbingState, MachineState
|
||||
from .escsm import (HZ_SM_MODEL, ISO2022CN_SM_MODEL, ISO2022JP_SM_MODEL,
|
||||
ISO2022KR_SM_MODEL)
|
||||
|
||||
|
||||
class EscCharSetProber(CharSetProber):
|
||||
"""
|
||||
This CharSetProber uses a "code scheme" approach for detecting encodings,
|
||||
whereby easily recognizable escape or shift sequences are relied on to
|
||||
identify these encodings.
|
||||
"""
|
||||
|
||||
def __init__(self, lang_filter=None):
|
||||
super(EscCharSetProber, self).__init__(lang_filter=lang_filter)
|
||||
self.coding_sm = []
|
||||
if self.lang_filter & LanguageFilter.CHINESE_SIMPLIFIED:
|
||||
self.coding_sm.append(CodingStateMachine(HZ_SM_MODEL))
|
||||
self.coding_sm.append(CodingStateMachine(ISO2022CN_SM_MODEL))
|
||||
if self.lang_filter & LanguageFilter.JAPANESE:
|
||||
self.coding_sm.append(CodingStateMachine(ISO2022JP_SM_MODEL))
|
||||
if self.lang_filter & LanguageFilter.KOREAN:
|
||||
self.coding_sm.append(CodingStateMachine(ISO2022KR_SM_MODEL))
|
||||
self.active_sm_count = None
|
||||
self._detected_charset = None
|
||||
self._detected_language = None
|
||||
self._state = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
super(EscCharSetProber, self).reset()
|
||||
for coding_sm in self.coding_sm:
|
||||
if not coding_sm:
|
||||
continue
|
||||
coding_sm.active = True
|
||||
coding_sm.reset()
|
||||
self.active_sm_count = len(self.coding_sm)
|
||||
self._detected_charset = None
|
||||
self._detected_language = None
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return self._detected_charset
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return self._detected_language
|
||||
|
||||
def get_confidence(self):
|
||||
if self._detected_charset:
|
||||
return 0.99
|
||||
else:
|
||||
return 0.00
|
||||
|
||||
def feed(self, byte_str):
|
||||
for c in byte_str:
|
||||
for coding_sm in self.coding_sm:
|
||||
if not coding_sm or not coding_sm.active:
|
||||
continue
|
||||
coding_state = coding_sm.next_state(c)
|
||||
if coding_state == MachineState.ERROR:
|
||||
coding_sm.active = False
|
||||
self.active_sm_count -= 1
|
||||
if self.active_sm_count <= 0:
|
||||
self._state = ProbingState.NOT_ME
|
||||
return self.state
|
||||
elif coding_state == MachineState.ITS_ME:
|
||||
self._state = ProbingState.FOUND_IT
|
||||
self._detected_charset = coding_sm.get_coding_state_machine()
|
||||
self._detected_language = coding_sm.language
|
||||
return self.state
|
||||
|
||||
return self.state
|
||||
@@ -1,246 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .enums import MachineState
|
||||
|
||||
HZ_CLS = (
|
||||
1,0,0,0,0,0,0,0, # 00 - 07
|
||||
0,0,0,0,0,0,0,0, # 08 - 0f
|
||||
0,0,0,0,0,0,0,0, # 10 - 17
|
||||
0,0,0,1,0,0,0,0, # 18 - 1f
|
||||
0,0,0,0,0,0,0,0, # 20 - 27
|
||||
0,0,0,0,0,0,0,0, # 28 - 2f
|
||||
0,0,0,0,0,0,0,0, # 30 - 37
|
||||
0,0,0,0,0,0,0,0, # 38 - 3f
|
||||
0,0,0,0,0,0,0,0, # 40 - 47
|
||||
0,0,0,0,0,0,0,0, # 48 - 4f
|
||||
0,0,0,0,0,0,0,0, # 50 - 57
|
||||
0,0,0,0,0,0,0,0, # 58 - 5f
|
||||
0,0,0,0,0,0,0,0, # 60 - 67
|
||||
0,0,0,0,0,0,0,0, # 68 - 6f
|
||||
0,0,0,0,0,0,0,0, # 70 - 77
|
||||
0,0,0,4,0,5,2,0, # 78 - 7f
|
||||
1,1,1,1,1,1,1,1, # 80 - 87
|
||||
1,1,1,1,1,1,1,1, # 88 - 8f
|
||||
1,1,1,1,1,1,1,1, # 90 - 97
|
||||
1,1,1,1,1,1,1,1, # 98 - 9f
|
||||
1,1,1,1,1,1,1,1, # a0 - a7
|
||||
1,1,1,1,1,1,1,1, # a8 - af
|
||||
1,1,1,1,1,1,1,1, # b0 - b7
|
||||
1,1,1,1,1,1,1,1, # b8 - bf
|
||||
1,1,1,1,1,1,1,1, # c0 - c7
|
||||
1,1,1,1,1,1,1,1, # c8 - cf
|
||||
1,1,1,1,1,1,1,1, # d0 - d7
|
||||
1,1,1,1,1,1,1,1, # d8 - df
|
||||
1,1,1,1,1,1,1,1, # e0 - e7
|
||||
1,1,1,1,1,1,1,1, # e8 - ef
|
||||
1,1,1,1,1,1,1,1, # f0 - f7
|
||||
1,1,1,1,1,1,1,1, # f8 - ff
|
||||
)
|
||||
|
||||
HZ_ST = (
|
||||
MachineState.START,MachineState.ERROR, 3,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,# 00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START, 4,MachineState.ERROR,# 10-17
|
||||
5,MachineState.ERROR, 6,MachineState.ERROR, 5, 5, 4,MachineState.ERROR,# 18-1f
|
||||
4,MachineState.ERROR, 4, 4, 4,MachineState.ERROR, 4,MachineState.ERROR,# 20-27
|
||||
4,MachineState.ITS_ME,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 28-2f
|
||||
)
|
||||
|
||||
HZ_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0)
|
||||
|
||||
HZ_SM_MODEL = {'class_table': HZ_CLS,
|
||||
'class_factor': 6,
|
||||
'state_table': HZ_ST,
|
||||
'char_len_table': HZ_CHAR_LEN_TABLE,
|
||||
'name': "HZ-GB-2312",
|
||||
'language': 'Chinese'}
|
||||
|
||||
ISO2022CN_CLS = (
|
||||
2,0,0,0,0,0,0,0, # 00 - 07
|
||||
0,0,0,0,0,0,0,0, # 08 - 0f
|
||||
0,0,0,0,0,0,0,0, # 10 - 17
|
||||
0,0,0,1,0,0,0,0, # 18 - 1f
|
||||
0,0,0,0,0,0,0,0, # 20 - 27
|
||||
0,3,0,0,0,0,0,0, # 28 - 2f
|
||||
0,0,0,0,0,0,0,0, # 30 - 37
|
||||
0,0,0,0,0,0,0,0, # 38 - 3f
|
||||
0,0,0,4,0,0,0,0, # 40 - 47
|
||||
0,0,0,0,0,0,0,0, # 48 - 4f
|
||||
0,0,0,0,0,0,0,0, # 50 - 57
|
||||
0,0,0,0,0,0,0,0, # 58 - 5f
|
||||
0,0,0,0,0,0,0,0, # 60 - 67
|
||||
0,0,0,0,0,0,0,0, # 68 - 6f
|
||||
0,0,0,0,0,0,0,0, # 70 - 77
|
||||
0,0,0,0,0,0,0,0, # 78 - 7f
|
||||
2,2,2,2,2,2,2,2, # 80 - 87
|
||||
2,2,2,2,2,2,2,2, # 88 - 8f
|
||||
2,2,2,2,2,2,2,2, # 90 - 97
|
||||
2,2,2,2,2,2,2,2, # 98 - 9f
|
||||
2,2,2,2,2,2,2,2, # a0 - a7
|
||||
2,2,2,2,2,2,2,2, # a8 - af
|
||||
2,2,2,2,2,2,2,2, # b0 - b7
|
||||
2,2,2,2,2,2,2,2, # b8 - bf
|
||||
2,2,2,2,2,2,2,2, # c0 - c7
|
||||
2,2,2,2,2,2,2,2, # c8 - cf
|
||||
2,2,2,2,2,2,2,2, # d0 - d7
|
||||
2,2,2,2,2,2,2,2, # d8 - df
|
||||
2,2,2,2,2,2,2,2, # e0 - e7
|
||||
2,2,2,2,2,2,2,2, # e8 - ef
|
||||
2,2,2,2,2,2,2,2, # f0 - f7
|
||||
2,2,2,2,2,2,2,2, # f8 - ff
|
||||
)
|
||||
|
||||
ISO2022CN_ST = (
|
||||
MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 00-07
|
||||
MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 08-0f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 10-17
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,# 18-1f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 20-27
|
||||
5, 6,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 28-2f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 30-37
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,# 38-3f
|
||||
)
|
||||
|
||||
ISO2022CN_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0)
|
||||
|
||||
ISO2022CN_SM_MODEL = {'class_table': ISO2022CN_CLS,
|
||||
'class_factor': 9,
|
||||
'state_table': ISO2022CN_ST,
|
||||
'char_len_table': ISO2022CN_CHAR_LEN_TABLE,
|
||||
'name': "ISO-2022-CN",
|
||||
'language': 'Chinese'}
|
||||
|
||||
ISO2022JP_CLS = (
|
||||
2,0,0,0,0,0,0,0, # 00 - 07
|
||||
0,0,0,0,0,0,2,2, # 08 - 0f
|
||||
0,0,0,0,0,0,0,0, # 10 - 17
|
||||
0,0,0,1,0,0,0,0, # 18 - 1f
|
||||
0,0,0,0,7,0,0,0, # 20 - 27
|
||||
3,0,0,0,0,0,0,0, # 28 - 2f
|
||||
0,0,0,0,0,0,0,0, # 30 - 37
|
||||
0,0,0,0,0,0,0,0, # 38 - 3f
|
||||
6,0,4,0,8,0,0,0, # 40 - 47
|
||||
0,9,5,0,0,0,0,0, # 48 - 4f
|
||||
0,0,0,0,0,0,0,0, # 50 - 57
|
||||
0,0,0,0,0,0,0,0, # 58 - 5f
|
||||
0,0,0,0,0,0,0,0, # 60 - 67
|
||||
0,0,0,0,0,0,0,0, # 68 - 6f
|
||||
0,0,0,0,0,0,0,0, # 70 - 77
|
||||
0,0,0,0,0,0,0,0, # 78 - 7f
|
||||
2,2,2,2,2,2,2,2, # 80 - 87
|
||||
2,2,2,2,2,2,2,2, # 88 - 8f
|
||||
2,2,2,2,2,2,2,2, # 90 - 97
|
||||
2,2,2,2,2,2,2,2, # 98 - 9f
|
||||
2,2,2,2,2,2,2,2, # a0 - a7
|
||||
2,2,2,2,2,2,2,2, # a8 - af
|
||||
2,2,2,2,2,2,2,2, # b0 - b7
|
||||
2,2,2,2,2,2,2,2, # b8 - bf
|
||||
2,2,2,2,2,2,2,2, # c0 - c7
|
||||
2,2,2,2,2,2,2,2, # c8 - cf
|
||||
2,2,2,2,2,2,2,2, # d0 - d7
|
||||
2,2,2,2,2,2,2,2, # d8 - df
|
||||
2,2,2,2,2,2,2,2, # e0 - e7
|
||||
2,2,2,2,2,2,2,2, # e8 - ef
|
||||
2,2,2,2,2,2,2,2, # f0 - f7
|
||||
2,2,2,2,2,2,2,2, # f8 - ff
|
||||
)
|
||||
|
||||
ISO2022JP_ST = (
|
||||
MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 00-07
|
||||
MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 08-0f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 10-17
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,# 18-1f
|
||||
MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,MachineState.ERROR,# 20-27
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 6,MachineState.ITS_ME,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,# 28-2f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,# 30-37
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 38-3f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,MachineState.START,# 40-47
|
||||
)
|
||||
|
||||
ISO2022JP_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
|
||||
|
||||
ISO2022JP_SM_MODEL = {'class_table': ISO2022JP_CLS,
|
||||
'class_factor': 10,
|
||||
'state_table': ISO2022JP_ST,
|
||||
'char_len_table': ISO2022JP_CHAR_LEN_TABLE,
|
||||
'name': "ISO-2022-JP",
|
||||
'language': 'Japanese'}
|
||||
|
||||
ISO2022KR_CLS = (
|
||||
2,0,0,0,0,0,0,0, # 00 - 07
|
||||
0,0,0,0,0,0,0,0, # 08 - 0f
|
||||
0,0,0,0,0,0,0,0, # 10 - 17
|
||||
0,0,0,1,0,0,0,0, # 18 - 1f
|
||||
0,0,0,0,3,0,0,0, # 20 - 27
|
||||
0,4,0,0,0,0,0,0, # 28 - 2f
|
||||
0,0,0,0,0,0,0,0, # 30 - 37
|
||||
0,0,0,0,0,0,0,0, # 38 - 3f
|
||||
0,0,0,5,0,0,0,0, # 40 - 47
|
||||
0,0,0,0,0,0,0,0, # 48 - 4f
|
||||
0,0,0,0,0,0,0,0, # 50 - 57
|
||||
0,0,0,0,0,0,0,0, # 58 - 5f
|
||||
0,0,0,0,0,0,0,0, # 60 - 67
|
||||
0,0,0,0,0,0,0,0, # 68 - 6f
|
||||
0,0,0,0,0,0,0,0, # 70 - 77
|
||||
0,0,0,0,0,0,0,0, # 78 - 7f
|
||||
2,2,2,2,2,2,2,2, # 80 - 87
|
||||
2,2,2,2,2,2,2,2, # 88 - 8f
|
||||
2,2,2,2,2,2,2,2, # 90 - 97
|
||||
2,2,2,2,2,2,2,2, # 98 - 9f
|
||||
2,2,2,2,2,2,2,2, # a0 - a7
|
||||
2,2,2,2,2,2,2,2, # a8 - af
|
||||
2,2,2,2,2,2,2,2, # b0 - b7
|
||||
2,2,2,2,2,2,2,2, # b8 - bf
|
||||
2,2,2,2,2,2,2,2, # c0 - c7
|
||||
2,2,2,2,2,2,2,2, # c8 - cf
|
||||
2,2,2,2,2,2,2,2, # d0 - d7
|
||||
2,2,2,2,2,2,2,2, # d8 - df
|
||||
2,2,2,2,2,2,2,2, # e0 - e7
|
||||
2,2,2,2,2,2,2,2, # e8 - ef
|
||||
2,2,2,2,2,2,2,2, # f0 - f7
|
||||
2,2,2,2,2,2,2,2, # f8 - ff
|
||||
)
|
||||
|
||||
ISO2022KR_ST = (
|
||||
MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,# 00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,MachineState.ERROR,# 10-17
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 18-1f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 20-27
|
||||
)
|
||||
|
||||
ISO2022KR_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0)
|
||||
|
||||
ISO2022KR_SM_MODEL = {'class_table': ISO2022KR_CLS,
|
||||
'class_factor': 6,
|
||||
'state_table': ISO2022KR_ST,
|
||||
'char_len_table': ISO2022KR_CHAR_LEN_TABLE,
|
||||
'name': "ISO-2022-KR",
|
||||
'language': 'Korean'}
|
||||
|
||||
|
||||
@@ -1,92 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .enums import ProbingState, MachineState
|
||||
from .mbcharsetprober import MultiByteCharSetProber
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .chardistribution import EUCJPDistributionAnalysis
|
||||
from .jpcntx import EUCJPContextAnalysis
|
||||
from .mbcssm import EUCJP_SM_MODEL
|
||||
|
||||
|
||||
class EUCJPProber(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
super(EUCJPProber, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(EUCJP_SM_MODEL)
|
||||
self.distribution_analyzer = EUCJPDistributionAnalysis()
|
||||
self.context_analyzer = EUCJPContextAnalysis()
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
super(EUCJPProber, self).reset()
|
||||
self.context_analyzer.reset()
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "EUC-JP"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return "Japanese"
|
||||
|
||||
def feed(self, byte_str):
|
||||
for i in range(len(byte_str)):
|
||||
# PY3K: byte_str is a byte array, so byte_str[i] is an int, not a byte
|
||||
coding_state = self.coding_sm.next_state(byte_str[i])
|
||||
if coding_state == MachineState.ERROR:
|
||||
self.logger.debug('%s %s prober hit error at byte %s',
|
||||
self.charset_name, self.language, i)
|
||||
self._state = ProbingState.NOT_ME
|
||||
break
|
||||
elif coding_state == MachineState.ITS_ME:
|
||||
self._state = ProbingState.FOUND_IT
|
||||
break
|
||||
elif coding_state == MachineState.START:
|
||||
char_len = self.coding_sm.get_current_charlen()
|
||||
if i == 0:
|
||||
self._last_char[1] = byte_str[0]
|
||||
self.context_analyzer.feed(self._last_char, char_len)
|
||||
self.distribution_analyzer.feed(self._last_char, char_len)
|
||||
else:
|
||||
self.context_analyzer.feed(byte_str[i - 1:i + 1],
|
||||
char_len)
|
||||
self.distribution_analyzer.feed(byte_str[i - 1:i + 1],
|
||||
char_len)
|
||||
|
||||
self._last_char[0] = byte_str[-1]
|
||||
|
||||
if self.state == ProbingState.DETECTING:
|
||||
if (self.context_analyzer.got_enough_data() and
|
||||
(self.get_confidence() > self.SHORTCUT_THRESHOLD)):
|
||||
self._state = ProbingState.FOUND_IT
|
||||
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
context_conf = self.context_analyzer.get_confidence()
|
||||
distrib_conf = self.distribution_analyzer.get_confidence()
|
||||
return max(context_conf, distrib_conf)
|
||||
@@ -1,195 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# Sampling from about 20M text materials include literature and computer technology
|
||||
|
||||
# 128 --> 0.79
|
||||
# 256 --> 0.92
|
||||
# 512 --> 0.986
|
||||
# 1024 --> 0.99944
|
||||
# 2048 --> 0.99999
|
||||
#
|
||||
# Idea Distribution Ratio = 0.98653 / (1-0.98653) = 73.24
|
||||
# Random Distribution Ration = 512 / (2350-512) = 0.279.
|
||||
#
|
||||
# Typical Distribution Ratio
|
||||
|
||||
EUCKR_TYPICAL_DISTRIBUTION_RATIO = 6.0
|
||||
|
||||
EUCKR_TABLE_SIZE = 2352
|
||||
|
||||
# Char to FreqOrder table ,
|
||||
EUCKR_CHAR_TO_FREQ_ORDER = (
|
||||
13, 130, 120,1396, 481,1719,1720, 328, 609, 212,1721, 707, 400, 299,1722, 87,
|
||||
1397,1723, 104, 536,1117,1203,1724,1267, 685,1268, 508,1725,1726,1727,1728,1398,
|
||||
1399,1729,1730,1731, 141, 621, 326,1057, 368,1732, 267, 488, 20,1733,1269,1734,
|
||||
945,1400,1735, 47, 904,1270,1736,1737, 773, 248,1738, 409, 313, 786, 429,1739,
|
||||
116, 987, 813,1401, 683, 75,1204, 145,1740,1741,1742,1743, 16, 847, 667, 622,
|
||||
708,1744,1745,1746, 966, 787, 304, 129,1747, 60, 820, 123, 676,1748,1749,1750,
|
||||
1751, 617,1752, 626,1753,1754,1755,1756, 653,1757,1758,1759,1760,1761,1762, 856,
|
||||
344,1763,1764,1765,1766, 89, 401, 418, 806, 905, 848,1767,1768,1769, 946,1205,
|
||||
709,1770,1118,1771, 241,1772,1773,1774,1271,1775, 569,1776, 999,1777,1778,1779,
|
||||
1780, 337, 751,1058, 28, 628, 254,1781, 177, 906, 270, 349, 891,1079,1782, 19,
|
||||
1783, 379,1784, 315,1785, 629, 754,1402, 559,1786, 636, 203,1206,1787, 710, 567,
|
||||
1788, 935, 814,1789,1790,1207, 766, 528,1791,1792,1208,1793,1794,1795,1796,1797,
|
||||
1403,1798,1799, 533,1059,1404,1405,1156,1406, 936, 884,1080,1800, 351,1801,1802,
|
||||
1803,1804,1805, 801,1806,1807,1808,1119,1809,1157, 714, 474,1407,1810, 298, 899,
|
||||
885,1811,1120, 802,1158,1812, 892,1813,1814,1408, 659,1815,1816,1121,1817,1818,
|
||||
1819,1820,1821,1822, 319,1823, 594, 545,1824, 815, 937,1209,1825,1826, 573,1409,
|
||||
1022,1827,1210,1828,1829,1830,1831,1832,1833, 556, 722, 807,1122,1060,1834, 697,
|
||||
1835, 900, 557, 715,1836,1410, 540,1411, 752,1159, 294, 597,1211, 976, 803, 770,
|
||||
1412,1837,1838, 39, 794,1413, 358,1839, 371, 925,1840, 453, 661, 788, 531, 723,
|
||||
544,1023,1081, 869, 91,1841, 392, 430, 790, 602,1414, 677,1082, 457,1415,1416,
|
||||
1842,1843, 475, 327,1024,1417, 795, 121,1844, 733, 403,1418,1845,1846,1847, 300,
|
||||
119, 711,1212, 627,1848,1272, 207,1849,1850, 796,1213, 382,1851, 519,1852,1083,
|
||||
893,1853,1854,1855, 367, 809, 487, 671,1856, 663,1857,1858, 956, 471, 306, 857,
|
||||
1859,1860,1160,1084,1861,1862,1863,1864,1865,1061,1866,1867,1868,1869,1870,1871,
|
||||
282, 96, 574,1872, 502,1085,1873,1214,1874, 907,1875,1876, 827, 977,1419,1420,
|
||||
1421, 268,1877,1422,1878,1879,1880, 308,1881, 2, 537,1882,1883,1215,1884,1885,
|
||||
127, 791,1886,1273,1423,1887, 34, 336, 404, 643,1888, 571, 654, 894, 840,1889,
|
||||
0, 886,1274, 122, 575, 260, 908, 938,1890,1275, 410, 316,1891,1892, 100,1893,
|
||||
1894,1123, 48,1161,1124,1025,1895, 633, 901,1276,1896,1897, 115, 816,1898, 317,
|
||||
1899, 694,1900, 909, 734,1424, 572, 866,1425, 691, 85, 524,1010, 543, 394, 841,
|
||||
1901,1902,1903,1026,1904,1905,1906,1907,1908,1909, 30, 451, 651, 988, 310,1910,
|
||||
1911,1426, 810,1216, 93,1912,1913,1277,1217,1914, 858, 759, 45, 58, 181, 610,
|
||||
269,1915,1916, 131,1062, 551, 443,1000, 821,1427, 957, 895,1086,1917,1918, 375,
|
||||
1919, 359,1920, 687,1921, 822,1922, 293,1923,1924, 40, 662, 118, 692, 29, 939,
|
||||
887, 640, 482, 174,1925, 69,1162, 728,1428, 910,1926,1278,1218,1279, 386, 870,
|
||||
217, 854,1163, 823,1927,1928,1929,1930, 834,1931, 78,1932, 859,1933,1063,1934,
|
||||
1935,1936,1937, 438,1164, 208, 595,1938,1939,1940,1941,1219,1125,1942, 280, 888,
|
||||
1429,1430,1220,1431,1943,1944,1945,1946,1947,1280, 150, 510,1432,1948,1949,1950,
|
||||
1951,1952,1953,1954,1011,1087,1955,1433,1043,1956, 881,1957, 614, 958,1064,1065,
|
||||
1221,1958, 638,1001, 860, 967, 896,1434, 989, 492, 553,1281,1165,1959,1282,1002,
|
||||
1283,1222,1960,1961,1962,1963, 36, 383, 228, 753, 247, 454,1964, 876, 678,1965,
|
||||
1966,1284, 126, 464, 490, 835, 136, 672, 529, 940,1088,1435, 473,1967,1968, 467,
|
||||
50, 390, 227, 587, 279, 378, 598, 792, 968, 240, 151, 160, 849, 882,1126,1285,
|
||||
639,1044, 133, 140, 288, 360, 811, 563,1027, 561, 142, 523,1969,1970,1971, 7,
|
||||
103, 296, 439, 407, 506, 634, 990,1972,1973,1974,1975, 645,1976,1977,1978,1979,
|
||||
1980,1981, 236,1982,1436,1983,1984,1089, 192, 828, 618, 518,1166, 333,1127,1985,
|
||||
818,1223,1986,1987,1988,1989,1990,1991,1992,1993, 342,1128,1286, 746, 842,1994,
|
||||
1995, 560, 223,1287, 98, 8, 189, 650, 978,1288,1996,1437,1997, 17, 345, 250,
|
||||
423, 277, 234, 512, 226, 97, 289, 42, 167,1998, 201,1999,2000, 843, 836, 824,
|
||||
532, 338, 783,1090, 182, 576, 436,1438,1439, 527, 500,2001, 947, 889,2002,2003,
|
||||
2004,2005, 262, 600, 314, 447,2006, 547,2007, 693, 738,1129,2008, 71,1440, 745,
|
||||
619, 688,2009, 829,2010,2011, 147,2012, 33, 948,2013,2014, 74, 224,2015, 61,
|
||||
191, 918, 399, 637,2016,1028,1130, 257, 902,2017,2018,2019,2020,2021,2022,2023,
|
||||
2024,2025,2026, 837,2027,2028,2029,2030, 179, 874, 591, 52, 724, 246,2031,2032,
|
||||
2033,2034,1167, 969,2035,1289, 630, 605, 911,1091,1168,2036,2037,2038,1441, 912,
|
||||
2039, 623,2040,2041, 253,1169,1290,2042,1442, 146, 620, 611, 577, 433,2043,1224,
|
||||
719,1170, 959, 440, 437, 534, 84, 388, 480,1131, 159, 220, 198, 679,2044,1012,
|
||||
819,1066,1443, 113,1225, 194, 318,1003,1029,2045,2046,2047,2048,1067,2049,2050,
|
||||
2051,2052,2053, 59, 913, 112,2054, 632,2055, 455, 144, 739,1291,2056, 273, 681,
|
||||
499,2057, 448,2058,2059, 760,2060,2061, 970, 384, 169, 245,1132,2062,2063, 414,
|
||||
1444,2064,2065, 41, 235,2066, 157, 252, 877, 568, 919, 789, 580,2067, 725,2068,
|
||||
2069,1292,2070,2071,1445,2072,1446,2073,2074, 55, 588, 66,1447, 271,1092,2075,
|
||||
1226,2076, 960,1013, 372,2077,2078,2079,2080,2081,1293,2082,2083,2084,2085, 850,
|
||||
2086,2087,2088,2089,2090, 186,2091,1068, 180,2092,2093,2094, 109,1227, 522, 606,
|
||||
2095, 867,1448,1093, 991,1171, 926, 353,1133,2096, 581,2097,2098,2099,1294,1449,
|
||||
1450,2100, 596,1172,1014,1228,2101,1451,1295,1173,1229,2102,2103,1296,1134,1452,
|
||||
949,1135,2104,2105,1094,1453,1454,1455,2106,1095,2107,2108,2109,2110,2111,2112,
|
||||
2113,2114,2115,2116,2117, 804,2118,2119,1230,1231, 805,1456, 405,1136,2120,2121,
|
||||
2122,2123,2124, 720, 701,1297, 992,1457, 927,1004,2125,2126,2127,2128,2129,2130,
|
||||
22, 417,2131, 303,2132, 385,2133, 971, 520, 513,2134,1174, 73,1096, 231, 274,
|
||||
962,1458, 673,2135,1459,2136, 152,1137,2137,2138,2139,2140,1005,1138,1460,1139,
|
||||
2141,2142,2143,2144, 11, 374, 844,2145, 154,1232, 46,1461,2146, 838, 830, 721,
|
||||
1233, 106,2147, 90, 428, 462, 578, 566,1175, 352,2148,2149, 538,1234, 124,1298,
|
||||
2150,1462, 761, 565,2151, 686,2152, 649,2153, 72, 173,2154, 460, 415,2155,1463,
|
||||
2156,1235, 305,2157,2158,2159,2160,2161,2162, 579,2163,2164,2165,2166,2167, 747,
|
||||
2168,2169,2170,2171,1464, 669,2172,2173,2174,2175,2176,1465,2177, 23, 530, 285,
|
||||
2178, 335, 729,2179, 397,2180,2181,2182,1030,2183,2184, 698,2185,2186, 325,2187,
|
||||
2188, 369,2189, 799,1097,1015, 348,2190,1069, 680,2191, 851,1466,2192,2193, 10,
|
||||
2194, 613, 424,2195, 979, 108, 449, 589, 27, 172, 81,1031, 80, 774, 281, 350,
|
||||
1032, 525, 301, 582,1176,2196, 674,1045,2197,2198,1467, 730, 762,2199,2200,2201,
|
||||
2202,1468,2203, 993,2204,2205, 266,1070, 963,1140,2206,2207,2208, 664,1098, 972,
|
||||
2209,2210,2211,1177,1469,1470, 871,2212,2213,2214,2215,2216,1471,2217,2218,2219,
|
||||
2220,2221,2222,2223,2224,2225,2226,2227,1472,1236,2228,2229,2230,2231,2232,2233,
|
||||
2234,2235,1299,2236,2237, 200,2238, 477, 373,2239,2240, 731, 825, 777,2241,2242,
|
||||
2243, 521, 486, 548,2244,2245,2246,1473,1300, 53, 549, 137, 875, 76, 158,2247,
|
||||
1301,1474, 469, 396,1016, 278, 712,2248, 321, 442, 503, 767, 744, 941,1237,1178,
|
||||
1475,2249, 82, 178,1141,1179, 973,2250,1302,2251, 297,2252,2253, 570,2254,2255,
|
||||
2256, 18, 450, 206,2257, 290, 292,1142,2258, 511, 162, 99, 346, 164, 735,2259,
|
||||
1476,1477, 4, 554, 343, 798,1099,2260,1100,2261, 43, 171,1303, 139, 215,2262,
|
||||
2263, 717, 775,2264,1033, 322, 216,2265, 831,2266, 149,2267,1304,2268,2269, 702,
|
||||
1238, 135, 845, 347, 309,2270, 484,2271, 878, 655, 238,1006,1478,2272, 67,2273,
|
||||
295,2274,2275, 461,2276, 478, 942, 412,2277,1034,2278,2279,2280, 265,2281, 541,
|
||||
2282,2283,2284,2285,2286, 70, 852,1071,2287,2288,2289,2290, 21, 56, 509, 117,
|
||||
432,2291,2292, 331, 980, 552,1101, 148, 284, 105, 393,1180,1239, 755,2293, 187,
|
||||
2294,1046,1479,2295, 340,2296, 63,1047, 230,2297,2298,1305, 763,1306, 101, 800,
|
||||
808, 494,2299,2300,2301, 903,2302, 37,1072, 14, 5,2303, 79, 675,2304, 312,
|
||||
2305,2306,2307,2308,2309,1480, 6,1307,2310,2311,2312, 1, 470, 35, 24, 229,
|
||||
2313, 695, 210, 86, 778, 15, 784, 592, 779, 32, 77, 855, 964,2314, 259,2315,
|
||||
501, 380,2316,2317, 83, 981, 153, 689,1308,1481,1482,1483,2318,2319, 716,1484,
|
||||
2320,2321,2322,2323,2324,2325,1485,2326,2327, 128, 57, 68, 261,1048, 211, 170,
|
||||
1240, 31,2328, 51, 435, 742,2329,2330,2331, 635,2332, 264, 456,2333,2334,2335,
|
||||
425,2336,1486, 143, 507, 263, 943,2337, 363, 920,1487, 256,1488,1102, 243, 601,
|
||||
1489,2338,2339,2340,2341,2342,2343,2344, 861,2345,2346,2347,2348,2349,2350, 395,
|
||||
2351,1490,1491, 62, 535, 166, 225,2352,2353, 668, 419,1241, 138, 604, 928,2354,
|
||||
1181,2355,1492,1493,2356,2357,2358,1143,2359, 696,2360, 387, 307,1309, 682, 476,
|
||||
2361,2362, 332, 12, 222, 156,2363, 232,2364, 641, 276, 656, 517,1494,1495,1035,
|
||||
416, 736,1496,2365,1017, 586,2366,2367,2368,1497,2369, 242,2370,2371,2372,1498,
|
||||
2373, 965, 713,2374,2375,2376,2377, 740, 982,1499, 944,1500,1007,2378,2379,1310,
|
||||
1501,2380,2381,2382, 785, 329,2383,2384,1502,2385,2386,2387, 932,2388,1503,2389,
|
||||
2390,2391,2392,1242,2393,2394,2395,2396,2397, 994, 950,2398,2399,2400,2401,1504,
|
||||
1311,2402,2403,2404,2405,1049, 749,2406,2407, 853, 718,1144,1312,2408,1182,1505,
|
||||
2409,2410, 255, 516, 479, 564, 550, 214,1506,1507,1313, 413, 239, 444, 339,1145,
|
||||
1036,1508,1509,1314,1037,1510,1315,2411,1511,2412,2413,2414, 176, 703, 497, 624,
|
||||
593, 921, 302,2415, 341, 165,1103,1512,2416,1513,2417,2418,2419, 376,2420, 700,
|
||||
2421,2422,2423, 258, 768,1316,2424,1183,2425, 995, 608,2426,2427,2428,2429, 221,
|
||||
2430,2431,2432,2433,2434,2435,2436,2437, 195, 323, 726, 188, 897, 983,1317, 377,
|
||||
644,1050, 879,2438, 452,2439,2440,2441,2442,2443,2444, 914,2445,2446,2447,2448,
|
||||
915, 489,2449,1514,1184,2450,2451, 515, 64, 427, 495,2452, 583,2453, 483, 485,
|
||||
1038, 562, 213,1515, 748, 666,2454,2455,2456,2457, 334,2458, 780, 996,1008, 705,
|
||||
1243,2459,2460,2461,2462,2463, 114,2464, 493,1146, 366, 163,1516, 961,1104,2465,
|
||||
291,2466,1318,1105,2467,1517, 365,2468, 355, 951,1244,2469,1319,2470, 631,2471,
|
||||
2472, 218,1320, 364, 320, 756,1518,1519,1321,1520,1322,2473,2474,2475,2476, 997,
|
||||
2477,2478,2479,2480, 665,1185,2481, 916,1521,2482,2483,2484, 584, 684,2485,2486,
|
||||
797,2487,1051,1186,2488,2489,2490,1522,2491,2492, 370,2493,1039,1187, 65,2494,
|
||||
434, 205, 463,1188,2495, 125, 812, 391, 402, 826, 699, 286, 398, 155, 781, 771,
|
||||
585,2496, 590, 505,1073,2497, 599, 244, 219, 917,1018, 952, 646,1523,2498,1323,
|
||||
2499,2500, 49, 984, 354, 741,2501, 625,2502,1324,2503,1019, 190, 357, 757, 491,
|
||||
95, 782, 868,2504,2505,2506,2507,2508,2509, 134,1524,1074, 422,1525, 898,2510,
|
||||
161,2511,2512,2513,2514, 769,2515,1526,2516,2517, 411,1325,2518, 472,1527,2519,
|
||||
2520,2521,2522,2523,2524, 985,2525,2526,2527,2528,2529,2530, 764,2531,1245,2532,
|
||||
2533, 25, 204, 311,2534, 496,2535,1052,2536,2537,2538,2539,2540,2541,2542, 199,
|
||||
704, 504, 468, 758, 657,1528, 196, 44, 839,1246, 272, 750,2543, 765, 862,2544,
|
||||
2545,1326,2546, 132, 615, 933,2547, 732,2548,2549,2550,1189,1529,2551, 283,1247,
|
||||
1053, 607, 929,2552,2553,2554, 930, 183, 872, 616,1040,1147,2555,1148,1020, 441,
|
||||
249,1075,2556,2557,2558, 466, 743,2559,2560,2561, 92, 514, 426, 420, 526,2562,
|
||||
2563,2564,2565,2566,2567,2568, 185,2569,2570,2571,2572, 776,1530, 658,2573, 362,
|
||||
2574, 361, 922,1076, 793,2575,2576,2577,2578,2579,2580,1531, 251,2581,2582,2583,
|
||||
2584,1532, 54, 612, 237,1327,2585,2586, 275, 408, 647, 111,2587,1533,1106, 465,
|
||||
3, 458, 9, 38,2588, 107, 110, 890, 209, 26, 737, 498,2589,1534,2590, 431,
|
||||
202, 88,1535, 356, 287,1107, 660,1149,2591, 381,1536, 986,1150, 445,1248,1151,
|
||||
974,2592,2593, 846,2594, 446, 953, 184,1249,1250, 727,2595, 923, 193, 883,2596,
|
||||
2597,2598, 102, 324, 539, 817,2599, 421,1041,2600, 832,2601, 94, 175, 197, 406,
|
||||
2602, 459,2603,2604,2605,2606,2607, 330, 555,2608,2609,2610, 706,1108, 389,2611,
|
||||
2612,2613,2614, 233,2615, 833, 558, 931, 954,1251,2616,2617,1537, 546,2618,2619,
|
||||
1009,2620,2621,2622,1538, 690,1328,2623, 955,2624,1539,2625,2626, 772,2627,2628,
|
||||
2629,2630,2631, 924, 648, 863, 603,2632,2633, 934,1540, 864, 865,2634, 642,1042,
|
||||
670,1190,2635,2636,2637,2638, 168,2639, 652, 873, 542,1054,1541,2640,2641,2642, # 512, 256
|
||||
)
|
||||
|
||||
@@ -1,47 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .mbcharsetprober import MultiByteCharSetProber
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .chardistribution import EUCKRDistributionAnalysis
|
||||
from .mbcssm import EUCKR_SM_MODEL
|
||||
|
||||
|
||||
class EUCKRProber(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
super(EUCKRProber, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(EUCKR_SM_MODEL)
|
||||
self.distribution_analyzer = EUCKRDistributionAnalysis()
|
||||
self.reset()
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "EUC-KR"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return "Korean"
|
||||
@@ -1,387 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# EUCTW frequency table
|
||||
# Converted from big5 work
|
||||
# by Taiwan's Mandarin Promotion Council
|
||||
# <http:#www.edu.tw:81/mandr/>
|
||||
|
||||
# 128 --> 0.42261
|
||||
# 256 --> 0.57851
|
||||
# 512 --> 0.74851
|
||||
# 1024 --> 0.89384
|
||||
# 2048 --> 0.97583
|
||||
#
|
||||
# Idea Distribution Ratio = 0.74851/(1-0.74851) =2.98
|
||||
# Random Distribution Ration = 512/(5401-512)=0.105
|
||||
#
|
||||
# Typical Distribution Ratio about 25% of Ideal one, still much higher than RDR
|
||||
|
||||
EUCTW_TYPICAL_DISTRIBUTION_RATIO = 0.75
|
||||
|
||||
# Char to FreqOrder table ,
|
||||
EUCTW_TABLE_SIZE = 5376
|
||||
|
||||
EUCTW_CHAR_TO_FREQ_ORDER = (
|
||||
1,1800,1506, 255,1431, 198, 9, 82, 6,7310, 177, 202,3615,1256,2808, 110, # 2742
|
||||
3735, 33,3241, 261, 76, 44,2113, 16,2931,2184,1176, 659,3868, 26,3404,2643, # 2758
|
||||
1198,3869,3313,4060, 410,2211, 302, 590, 361,1963, 8, 204, 58,4296,7311,1931, # 2774
|
||||
63,7312,7313, 317,1614, 75, 222, 159,4061,2412,1480,7314,3500,3068, 224,2809, # 2790
|
||||
3616, 3, 10,3870,1471, 29,2774,1135,2852,1939, 873, 130,3242,1123, 312,7315, # 2806
|
||||
4297,2051, 507, 252, 682,7316, 142,1914, 124, 206,2932, 34,3501,3173, 64, 604, # 2822
|
||||
7317,2494,1976,1977, 155,1990, 645, 641,1606,7318,3405, 337, 72, 406,7319, 80, # 2838
|
||||
630, 238,3174,1509, 263, 939,1092,2644, 756,1440,1094,3406, 449, 69,2969, 591, # 2854
|
||||
179,2095, 471, 115,2034,1843, 60, 50,2970, 134, 806,1868, 734,2035,3407, 180, # 2870
|
||||
995,1607, 156, 537,2893, 688,7320, 319,1305, 779,2144, 514,2374, 298,4298, 359, # 2886
|
||||
2495, 90,2707,1338, 663, 11, 906,1099,2545, 20,2436, 182, 532,1716,7321, 732, # 2902
|
||||
1376,4062,1311,1420,3175, 25,2312,1056, 113, 399, 382,1949, 242,3408,2467, 529, # 2918
|
||||
3243, 475,1447,3617,7322, 117, 21, 656, 810,1297,2295,2329,3502,7323, 126,4063, # 2934
|
||||
706, 456, 150, 613,4299, 71,1118,2036,4064, 145,3069, 85, 835, 486,2114,1246, # 2950
|
||||
1426, 428, 727,1285,1015, 800, 106, 623, 303,1281,7324,2127,2354, 347,3736, 221, # 2966
|
||||
3503,3110,7325,1955,1153,4065, 83, 296,1199,3070, 192, 624, 93,7326, 822,1897, # 2982
|
||||
2810,3111, 795,2064, 991,1554,1542,1592, 27, 43,2853, 859, 139,1456, 860,4300, # 2998
|
||||
437, 712,3871, 164,2392,3112, 695, 211,3017,2096, 195,3872,1608,3504,3505,3618, # 3014
|
||||
3873, 234, 811,2971,2097,3874,2229,1441,3506,1615,2375, 668,2076,1638, 305, 228, # 3030
|
||||
1664,4301, 467, 415,7327, 262,2098,1593, 239, 108, 300, 200,1033, 512,1247,2077, # 3046
|
||||
7328,7329,2173,3176,3619,2673, 593, 845,1062,3244, 88,1723,2037,3875,1950, 212, # 3062
|
||||
266, 152, 149, 468,1898,4066,4302, 77, 187,7330,3018, 37, 5,2972,7331,3876, # 3078
|
||||
7332,7333, 39,2517,4303,2894,3177,2078, 55, 148, 74,4304, 545, 483,1474,1029, # 3094
|
||||
1665, 217,1869,1531,3113,1104,2645,4067, 24, 172,3507, 900,3877,3508,3509,4305, # 3110
|
||||
32,1408,2811,1312, 329, 487,2355,2247,2708, 784,2674, 4,3019,3314,1427,1788, # 3126
|
||||
188, 109, 499,7334,3620,1717,1789, 888,1217,3020,4306,7335,3510,7336,3315,1520, # 3142
|
||||
3621,3878, 196,1034, 775,7337,7338, 929,1815, 249, 439, 38,7339,1063,7340, 794, # 3158
|
||||
3879,1435,2296, 46, 178,3245,2065,7341,2376,7342, 214,1709,4307, 804, 35, 707, # 3174
|
||||
324,3622,1601,2546, 140, 459,4068,7343,7344,1365, 839, 272, 978,2257,2572,3409, # 3190
|
||||
2128,1363,3623,1423, 697, 100,3071, 48, 70,1231, 495,3114,2193,7345,1294,7346, # 3206
|
||||
2079, 462, 586,1042,3246, 853, 256, 988, 185,2377,3410,1698, 434,1084,7347,3411, # 3222
|
||||
314,2615,2775,4308,2330,2331, 569,2280, 637,1816,2518, 757,1162,1878,1616,3412, # 3238
|
||||
287,1577,2115, 768,4309,1671,2854,3511,2519,1321,3737, 909,2413,7348,4069, 933, # 3254
|
||||
3738,7349,2052,2356,1222,4310, 765,2414,1322, 786,4311,7350,1919,1462,1677,2895, # 3270
|
||||
1699,7351,4312,1424,2437,3115,3624,2590,3316,1774,1940,3413,3880,4070, 309,1369, # 3286
|
||||
1130,2812, 364,2230,1653,1299,3881,3512,3882,3883,2646, 525,1085,3021, 902,2000, # 3302
|
||||
1475, 964,4313, 421,1844,1415,1057,2281, 940,1364,3116, 376,4314,4315,1381, 7, # 3318
|
||||
2520, 983,2378, 336,1710,2675,1845, 321,3414, 559,1131,3022,2742,1808,1132,1313, # 3334
|
||||
265,1481,1857,7352, 352,1203,2813,3247, 167,1089, 420,2814, 776, 792,1724,3513, # 3350
|
||||
4071,2438,3248,7353,4072,7354, 446, 229, 333,2743, 901,3739,1200,1557,4316,2647, # 3366
|
||||
1920, 395,2744,2676,3740,4073,1835, 125, 916,3178,2616,4317,7355,7356,3741,7357, # 3382
|
||||
7358,7359,4318,3117,3625,1133,2547,1757,3415,1510,2313,1409,3514,7360,2145, 438, # 3398
|
||||
2591,2896,2379,3317,1068, 958,3023, 461, 311,2855,2677,4074,1915,3179,4075,1978, # 3414
|
||||
383, 750,2745,2617,4076, 274, 539, 385,1278,1442,7361,1154,1964, 384, 561, 210, # 3430
|
||||
98,1295,2548,3515,7362,1711,2415,1482,3416,3884,2897,1257, 129,7363,3742, 642, # 3446
|
||||
523,2776,2777,2648,7364, 141,2231,1333, 68, 176, 441, 876, 907,4077, 603,2592, # 3462
|
||||
710, 171,3417, 404, 549, 18,3118,2393,1410,3626,1666,7365,3516,4319,2898,4320, # 3478
|
||||
7366,2973, 368,7367, 146, 366, 99, 871,3627,1543, 748, 807,1586,1185, 22,2258, # 3494
|
||||
379,3743,3180,7368,3181, 505,1941,2618,1991,1382,2314,7369, 380,2357, 218, 702, # 3510
|
||||
1817,1248,3418,3024,3517,3318,3249,7370,2974,3628, 930,3250,3744,7371, 59,7372, # 3526
|
||||
585, 601,4078, 497,3419,1112,1314,4321,1801,7373,1223,1472,2174,7374, 749,1836, # 3542
|
||||
690,1899,3745,1772,3885,1476, 429,1043,1790,2232,2116, 917,4079, 447,1086,1629, # 3558
|
||||
7375, 556,7376,7377,2020,1654, 844,1090, 105, 550, 966,1758,2815,1008,1782, 686, # 3574
|
||||
1095,7378,2282, 793,1602,7379,3518,2593,4322,4080,2933,2297,4323,3746, 980,2496, # 3590
|
||||
544, 353, 527,4324, 908,2678,2899,7380, 381,2619,1942,1348,7381,1341,1252, 560, # 3606
|
||||
3072,7382,3420,2856,7383,2053, 973, 886,2080, 143,4325,7384,7385, 157,3886, 496, # 3622
|
||||
4081, 57, 840, 540,2038,4326,4327,3421,2117,1445, 970,2259,1748,1965,2081,4082, # 3638
|
||||
3119,1234,1775,3251,2816,3629, 773,1206,2129,1066,2039,1326,3887,1738,1725,4083, # 3654
|
||||
279,3120, 51,1544,2594, 423,1578,2130,2066, 173,4328,1879,7386,7387,1583, 264, # 3670
|
||||
610,3630,4329,2439, 280, 154,7388,7389,7390,1739, 338,1282,3073, 693,2857,1411, # 3686
|
||||
1074,3747,2440,7391,4330,7392,7393,1240, 952,2394,7394,2900,1538,2679, 685,1483, # 3702
|
||||
4084,2468,1436, 953,4085,2054,4331, 671,2395, 79,4086,2441,3252, 608, 567,2680, # 3718
|
||||
3422,4087,4088,1691, 393,1261,1791,2396,7395,4332,7396,7397,7398,7399,1383,1672, # 3734
|
||||
3748,3182,1464, 522,1119, 661,1150, 216, 675,4333,3888,1432,3519, 609,4334,2681, # 3750
|
||||
2397,7400,7401,7402,4089,3025, 0,7403,2469, 315, 231,2442, 301,3319,4335,2380, # 3766
|
||||
7404, 233,4090,3631,1818,4336,4337,7405, 96,1776,1315,2082,7406, 257,7407,1809, # 3782
|
||||
3632,2709,1139,1819,4091,2021,1124,2163,2778,1777,2649,7408,3074, 363,1655,3183, # 3798
|
||||
7409,2975,7410,7411,7412,3889,1567,3890, 718, 103,3184, 849,1443, 341,3320,2934, # 3814
|
||||
1484,7413,1712, 127, 67, 339,4092,2398, 679,1412, 821,7414,7415, 834, 738, 351, # 3830
|
||||
2976,2146, 846, 235,1497,1880, 418,1992,3749,2710, 186,1100,2147,2746,3520,1545, # 3846
|
||||
1355,2935,2858,1377, 583,3891,4093,2573,2977,7416,1298,3633,1078,2549,3634,2358, # 3862
|
||||
78,3750,3751, 267,1289,2099,2001,1594,4094, 348, 369,1274,2194,2175,1837,4338, # 3878
|
||||
1820,2817,3635,2747,2283,2002,4339,2936,2748, 144,3321, 882,4340,3892,2749,3423, # 3894
|
||||
4341,2901,7417,4095,1726, 320,7418,3893,3026, 788,2978,7419,2818,1773,1327,2859, # 3910
|
||||
3894,2819,7420,1306,4342,2003,1700,3752,3521,2359,2650, 787,2022, 506, 824,3636, # 3926
|
||||
534, 323,4343,1044,3322,2023,1900, 946,3424,7421,1778,1500,1678,7422,1881,4344, # 3942
|
||||
165, 243,4345,3637,2521, 123, 683,4096, 764,4346, 36,3895,1792, 589,2902, 816, # 3958
|
||||
626,1667,3027,2233,1639,1555,1622,3753,3896,7423,3897,2860,1370,1228,1932, 891, # 3974
|
||||
2083,2903, 304,4097,7424, 292,2979,2711,3522, 691,2100,4098,1115,4347, 118, 662, # 3990
|
||||
7425, 611,1156, 854,2381,1316,2861, 2, 386, 515,2904,7426,7427,3253, 868,2234, # 4006
|
||||
1486, 855,2651, 785,2212,3028,7428,1040,3185,3523,7429,3121, 448,7430,1525,7431, # 4022
|
||||
2164,4348,7432,3754,7433,4099,2820,3524,3122, 503, 818,3898,3123,1568, 814, 676, # 4038
|
||||
1444, 306,1749,7434,3755,1416,1030, 197,1428, 805,2821,1501,4349,7435,7436,7437, # 4054
|
||||
1993,7438,4350,7439,7440,2195, 13,2779,3638,2980,3124,1229,1916,7441,3756,2131, # 4070
|
||||
7442,4100,4351,2399,3525,7443,2213,1511,1727,1120,7444,7445, 646,3757,2443, 307, # 4086
|
||||
7446,7447,1595,3186,7448,7449,7450,3639,1113,1356,3899,1465,2522,2523,7451, 519, # 4102
|
||||
7452, 128,2132, 92,2284,1979,7453,3900,1512, 342,3125,2196,7454,2780,2214,1980, # 4118
|
||||
3323,7455, 290,1656,1317, 789, 827,2360,7456,3758,4352, 562, 581,3901,7457, 401, # 4134
|
||||
4353,2248, 94,4354,1399,2781,7458,1463,2024,4355,3187,1943,7459, 828,1105,4101, # 4150
|
||||
1262,1394,7460,4102, 605,4356,7461,1783,2862,7462,2822, 819,2101, 578,2197,2937, # 4166
|
||||
7463,1502, 436,3254,4103,3255,2823,3902,2905,3425,3426,7464,2712,2315,7465,7466, # 4182
|
||||
2332,2067, 23,4357, 193, 826,3759,2102, 699,1630,4104,3075, 390,1793,1064,3526, # 4198
|
||||
7467,1579,3076,3077,1400,7468,4105,1838,1640,2863,7469,4358,4359, 137,4106, 598, # 4214
|
||||
3078,1966, 780, 104, 974,2938,7470, 278, 899, 253, 402, 572, 504, 493,1339,7471, # 4230
|
||||
3903,1275,4360,2574,2550,7472,3640,3029,3079,2249, 565,1334,2713, 863, 41,7473, # 4246
|
||||
7474,4361,7475,1657,2333, 19, 463,2750,4107, 606,7476,2981,3256,1087,2084,1323, # 4262
|
||||
2652,2982,7477,1631,1623,1750,4108,2682,7478,2864, 791,2714,2653,2334, 232,2416, # 4278
|
||||
7479,2983,1498,7480,2654,2620, 755,1366,3641,3257,3126,2025,1609, 119,1917,3427, # 4294
|
||||
862,1026,4109,7481,3904,3760,4362,3905,4363,2260,1951,2470,7482,1125, 817,4110, # 4310
|
||||
4111,3906,1513,1766,2040,1487,4112,3030,3258,2824,3761,3127,7483,7484,1507,7485, # 4326
|
||||
2683, 733, 40,1632,1106,2865, 345,4113, 841,2524, 230,4364,2984,1846,3259,3428, # 4342
|
||||
7486,1263, 986,3429,7487, 735, 879, 254,1137, 857, 622,1300,1180,1388,1562,3907, # 4358
|
||||
3908,2939, 967,2751,2655,1349, 592,2133,1692,3324,2985,1994,4114,1679,3909,1901, # 4374
|
||||
2185,7488, 739,3642,2715,1296,1290,7489,4115,2198,2199,1921,1563,2595,2551,1870, # 4390
|
||||
2752,2986,7490, 435,7491, 343,1108, 596, 17,1751,4365,2235,3430,3643,7492,4366, # 4406
|
||||
294,3527,2940,1693, 477, 979, 281,2041,3528, 643,2042,3644,2621,2782,2261,1031, # 4422
|
||||
2335,2134,2298,3529,4367, 367,1249,2552,7493,3530,7494,4368,1283,3325,2004, 240, # 4438
|
||||
1762,3326,4369,4370, 836,1069,3128, 474,7495,2148,2525, 268,3531,7496,3188,1521, # 4454
|
||||
1284,7497,1658,1546,4116,7498,3532,3533,7499,4117,3327,2684,1685,4118, 961,1673, # 4470
|
||||
2622, 190,2005,2200,3762,4371,4372,7500, 570,2497,3645,1490,7501,4373,2623,3260, # 4486
|
||||
1956,4374, 584,1514, 396,1045,1944,7502,4375,1967,2444,7503,7504,4376,3910, 619, # 4502
|
||||
7505,3129,3261, 215,2006,2783,2553,3189,4377,3190,4378, 763,4119,3763,4379,7506, # 4518
|
||||
7507,1957,1767,2941,3328,3646,1174, 452,1477,4380,3329,3130,7508,2825,1253,2382, # 4534
|
||||
2186,1091,2285,4120, 492,7509, 638,1169,1824,2135,1752,3911, 648, 926,1021,1324, # 4550
|
||||
4381, 520,4382, 997, 847,1007, 892,4383,3764,2262,1871,3647,7510,2400,1784,4384, # 4566
|
||||
1952,2942,3080,3191,1728,4121,2043,3648,4385,2007,1701,3131,1551, 30,2263,4122, # 4582
|
||||
7511,2026,4386,3534,7512, 501,7513,4123, 594,3431,2165,1821,3535,3432,3536,3192, # 4598
|
||||
829,2826,4124,7514,1680,3132,1225,4125,7515,3262,4387,4126,3133,2336,7516,4388, # 4614
|
||||
4127,7517,3912,3913,7518,1847,2383,2596,3330,7519,4389, 374,3914, 652,4128,4129, # 4630
|
||||
375,1140, 798,7520,7521,7522,2361,4390,2264, 546,1659, 138,3031,2445,4391,7523, # 4646
|
||||
2250, 612,1848, 910, 796,3765,1740,1371, 825,3766,3767,7524,2906,2554,7525, 692, # 4662
|
||||
444,3032,2624, 801,4392,4130,7526,1491, 244,1053,3033,4131,4132, 340,7527,3915, # 4678
|
||||
1041,2987, 293,1168, 87,1357,7528,1539, 959,7529,2236, 721, 694,4133,3768, 219, # 4694
|
||||
1478, 644,1417,3331,2656,1413,1401,1335,1389,3916,7530,7531,2988,2362,3134,1825, # 4710
|
||||
730,1515, 184,2827, 66,4393,7532,1660,2943, 246,3332, 378,1457, 226,3433, 975, # 4726
|
||||
3917,2944,1264,3537, 674, 696,7533, 163,7534,1141,2417,2166, 713,3538,3333,4394, # 4742
|
||||
3918,7535,7536,1186, 15,7537,1079,1070,7538,1522,3193,3539, 276,1050,2716, 758, # 4758
|
||||
1126, 653,2945,3263,7539,2337, 889,3540,3919,3081,2989, 903,1250,4395,3920,3434, # 4774
|
||||
3541,1342,1681,1718, 766,3264, 286, 89,2946,3649,7540,1713,7541,2597,3334,2990, # 4790
|
||||
7542,2947,2215,3194,2866,7543,4396,2498,2526, 181, 387,1075,3921, 731,2187,3335, # 4806
|
||||
7544,3265, 310, 313,3435,2299, 770,4134, 54,3034, 189,4397,3082,3769,3922,7545, # 4822
|
||||
1230,1617,1849, 355,3542,4135,4398,3336, 111,4136,3650,1350,3135,3436,3035,4137, # 4838
|
||||
2149,3266,3543,7546,2784,3923,3924,2991, 722,2008,7547,1071, 247,1207,2338,2471, # 4854
|
||||
1378,4399,2009, 864,1437,1214,4400, 373,3770,1142,2216, 667,4401, 442,2753,2555, # 4870
|
||||
3771,3925,1968,4138,3267,1839, 837, 170,1107, 934,1336,1882,7548,7549,2118,4139, # 4886
|
||||
2828, 743,1569,7550,4402,4140, 582,2384,1418,3437,7551,1802,7552, 357,1395,1729, # 4902
|
||||
3651,3268,2418,1564,2237,7553,3083,3772,1633,4403,1114,2085,4141,1532,7554, 482, # 4918
|
||||
2446,4404,7555,7556,1492, 833,1466,7557,2717,3544,1641,2829,7558,1526,1272,3652, # 4934
|
||||
4142,1686,1794, 416,2556,1902,1953,1803,7559,3773,2785,3774,1159,2316,7560,2867, # 4950
|
||||
4405,1610,1584,3036,2419,2754, 443,3269,1163,3136,7561,7562,3926,7563,4143,2499, # 4966
|
||||
3037,4406,3927,3137,2103,1647,3545,2010,1872,4144,7564,4145, 431,3438,7565, 250, # 4982
|
||||
97, 81,4146,7566,1648,1850,1558, 160, 848,7567, 866, 740,1694,7568,2201,2830, # 4998
|
||||
3195,4147,4407,3653,1687, 950,2472, 426, 469,3196,3654,3655,3928,7569,7570,1188, # 5014
|
||||
424,1995, 861,3546,4148,3775,2202,2685, 168,1235,3547,4149,7571,2086,1674,4408, # 5030
|
||||
3337,3270, 220,2557,1009,7572,3776, 670,2992, 332,1208, 717,7573,7574,3548,2447, # 5046
|
||||
3929,3338,7575, 513,7576,1209,2868,3339,3138,4409,1080,7577,7578,7579,7580,2527, # 5062
|
||||
3656,3549, 815,1587,3930,3931,7581,3550,3439,3777,1254,4410,1328,3038,1390,3932, # 5078
|
||||
1741,3933,3778,3934,7582, 236,3779,2448,3271,7583,7584,3657,3780,1273,3781,4411, # 5094
|
||||
7585, 308,7586,4412, 245,4413,1851,2473,1307,2575, 430, 715,2136,2449,7587, 270, # 5110
|
||||
199,2869,3935,7588,3551,2718,1753, 761,1754, 725,1661,1840,4414,3440,3658,7589, # 5126
|
||||
7590, 587, 14,3272, 227,2598, 326, 480,2265, 943,2755,3552, 291, 650,1883,7591, # 5142
|
||||
1702,1226, 102,1547, 62,3441, 904,4415,3442,1164,4150,7592,7593,1224,1548,2756, # 5158
|
||||
391, 498,1493,7594,1386,1419,7595,2055,1177,4416, 813, 880,1081,2363, 566,1145, # 5174
|
||||
4417,2286,1001,1035,2558,2599,2238, 394,1286,7596,7597,2068,7598, 86,1494,1730, # 5190
|
||||
3936, 491,1588, 745, 897,2948, 843,3340,3937,2757,2870,3273,1768, 998,2217,2069, # 5206
|
||||
397,1826,1195,1969,3659,2993,3341, 284,7599,3782,2500,2137,2119,1903,7600,3938, # 5222
|
||||
2150,3939,4151,1036,3443,1904, 114,2559,4152, 209,1527,7601,7602,2949,2831,2625, # 5238
|
||||
2385,2719,3139, 812,2560,7603,3274,7604,1559, 737,1884,3660,1210, 885, 28,2686, # 5254
|
||||
3553,3783,7605,4153,1004,1779,4418,7606, 346,1981,2218,2687,4419,3784,1742, 797, # 5270
|
||||
1642,3940,1933,1072,1384,2151, 896,3941,3275,3661,3197,2871,3554,7607,2561,1958, # 5286
|
||||
4420,2450,1785,7608,7609,7610,3942,4154,1005,1308,3662,4155,2720,4421,4422,1528, # 5302
|
||||
2600, 161,1178,4156,1982, 987,4423,1101,4157, 631,3943,1157,3198,2420,1343,1241, # 5318
|
||||
1016,2239,2562, 372, 877,2339,2501,1160, 555,1934, 911,3944,7611, 466,1170, 169, # 5334
|
||||
1051,2907,2688,3663,2474,2994,1182,2011,2563,1251,2626,7612, 992,2340,3444,1540, # 5350
|
||||
2721,1201,2070,2401,1996,2475,7613,4424, 528,1922,2188,1503,1873,1570,2364,3342, # 5366
|
||||
3276,7614, 557,1073,7615,1827,3445,2087,2266,3140,3039,3084, 767,3085,2786,4425, # 5382
|
||||
1006,4158,4426,2341,1267,2176,3664,3199, 778,3945,3200,2722,1597,2657,7616,4427, # 5398
|
||||
7617,3446,7618,7619,7620,3277,2689,1433,3278, 131, 95,1504,3946, 723,4159,3141, # 5414
|
||||
1841,3555,2758,2189,3947,2027,2104,3665,7621,2995,3948,1218,7622,3343,3201,3949, # 5430
|
||||
4160,2576, 248,1634,3785, 912,7623,2832,3666,3040,3786, 654, 53,7624,2996,7625, # 5446
|
||||
1688,4428, 777,3447,1032,3950,1425,7626, 191, 820,2120,2833, 971,4429, 931,3202, # 5462
|
||||
135, 664, 783,3787,1997, 772,2908,1935,3951,3788,4430,2909,3203, 282,2723, 640, # 5478
|
||||
1372,3448,1127, 922, 325,3344,7627,7628, 711,2044,7629,7630,3952,2219,2787,1936, # 5494
|
||||
3953,3345,2220,2251,3789,2300,7631,4431,3790,1258,3279,3954,3204,2138,2950,3955, # 5510
|
||||
3956,7632,2221, 258,3205,4432, 101,1227,7633,3280,1755,7634,1391,3281,7635,2910, # 5526
|
||||
2056, 893,7636,7637,7638,1402,4161,2342,7639,7640,3206,3556,7641,7642, 878,1325, # 5542
|
||||
1780,2788,4433, 259,1385,2577, 744,1183,2267,4434,7643,3957,2502,7644, 684,1024, # 5558
|
||||
4162,7645, 472,3557,3449,1165,3282,3958,3959, 322,2152, 881, 455,1695,1152,1340, # 5574
|
||||
660, 554,2153,4435,1058,4436,4163, 830,1065,3346,3960,4437,1923,7646,1703,1918, # 5590
|
||||
7647, 932,2268, 122,7648,4438, 947, 677,7649,3791,2627, 297,1905,1924,2269,4439, # 5606
|
||||
2317,3283,7650,7651,4164,7652,4165, 84,4166, 112, 989,7653, 547,1059,3961, 701, # 5622
|
||||
3558,1019,7654,4167,7655,3450, 942, 639, 457,2301,2451, 993,2951, 407, 851, 494, # 5638
|
||||
4440,3347, 927,7656,1237,7657,2421,3348, 573,4168, 680, 921,2911,1279,1874, 285, # 5654
|
||||
790,1448,1983, 719,2167,7658,7659,4441,3962,3963,1649,7660,1541, 563,7661,1077, # 5670
|
||||
7662,3349,3041,3451, 511,2997,3964,3965,3667,3966,1268,2564,3350,3207,4442,4443, # 5686
|
||||
7663, 535,1048,1276,1189,2912,2028,3142,1438,1373,2834,2952,1134,2012,7664,4169, # 5702
|
||||
1238,2578,3086,1259,7665, 700,7666,2953,3143,3668,4170,7667,4171,1146,1875,1906, # 5718
|
||||
4444,2601,3967, 781,2422, 132,1589, 203, 147, 273,2789,2402, 898,1786,2154,3968, # 5734
|
||||
3969,7668,3792,2790,7669,7670,4445,4446,7671,3208,7672,1635,3793, 965,7673,1804, # 5750
|
||||
2690,1516,3559,1121,1082,1329,3284,3970,1449,3794, 65,1128,2835,2913,2759,1590, # 5766
|
||||
3795,7674,7675, 12,2658, 45, 976,2579,3144,4447, 517,2528,1013,1037,3209,7676, # 5782
|
||||
3796,2836,7677,3797,7678,3452,7679,2602, 614,1998,2318,3798,3087,2724,2628,7680, # 5798
|
||||
2580,4172, 599,1269,7681,1810,3669,7682,2691,3088, 759,1060, 489,1805,3351,3285, # 5814
|
||||
1358,7683,7684,2386,1387,1215,2629,2252, 490,7685,7686,4173,1759,2387,2343,7687, # 5830
|
||||
4448,3799,1907,3971,2630,1806,3210,4449,3453,3286,2760,2344, 874,7688,7689,3454, # 5846
|
||||
3670,1858, 91,2914,3671,3042,3800,4450,7690,3145,3972,2659,7691,3455,1202,1403, # 5862
|
||||
3801,2954,2529,1517,2503,4451,3456,2504,7692,4452,7693,2692,1885,1495,1731,3973, # 5878
|
||||
2365,4453,7694,2029,7695,7696,3974,2693,1216, 237,2581,4174,2319,3975,3802,4454, # 5894
|
||||
4455,2694,3560,3457, 445,4456,7697,7698,7699,7700,2761, 61,3976,3672,1822,3977, # 5910
|
||||
7701, 687,2045, 935, 925, 405,2660, 703,1096,1859,2725,4457,3978,1876,1367,2695, # 5926
|
||||
3352, 918,2105,1781,2476, 334,3287,1611,1093,4458, 564,3146,3458,3673,3353, 945, # 5942
|
||||
2631,2057,4459,7702,1925, 872,4175,7703,3459,2696,3089, 349,4176,3674,3979,4460, # 5958
|
||||
3803,4177,3675,2155,3980,4461,4462,4178,4463,2403,2046, 782,3981, 400, 251,4179, # 5974
|
||||
1624,7704,7705, 277,3676, 299,1265, 476,1191,3804,2121,4180,4181,1109, 205,7706, # 5990
|
||||
2582,1000,2156,3561,1860,7707,7708,7709,4464,7710,4465,2565, 107,2477,2157,3982, # 6006
|
||||
3460,3147,7711,1533, 541,1301, 158, 753,4182,2872,3562,7712,1696, 370,1088,4183, # 6022
|
||||
4466,3563, 579, 327, 440, 162,2240, 269,1937,1374,3461, 968,3043, 56,1396,3090, # 6038
|
||||
2106,3288,3354,7713,1926,2158,4467,2998,7714,3564,7715,7716,3677,4468,2478,7717, # 6054
|
||||
2791,7718,1650,4469,7719,2603,7720,7721,3983,2661,3355,1149,3356,3984,3805,3985, # 6070
|
||||
7722,1076, 49,7723, 951,3211,3289,3290, 450,2837, 920,7724,1811,2792,2366,4184, # 6086
|
||||
1908,1138,2367,3806,3462,7725,3212,4470,1909,1147,1518,2423,4471,3807,7726,4472, # 6102
|
||||
2388,2604, 260,1795,3213,7727,7728,3808,3291, 708,7729,3565,1704,7730,3566,1351, # 6118
|
||||
1618,3357,2999,1886, 944,4185,3358,4186,3044,3359,4187,7731,3678, 422, 413,1714, # 6134
|
||||
3292, 500,2058,2345,4188,2479,7732,1344,1910, 954,7733,1668,7734,7735,3986,2404, # 6150
|
||||
4189,3567,3809,4190,7736,2302,1318,2505,3091, 133,3092,2873,4473, 629, 31,2838, # 6166
|
||||
2697,3810,4474, 850, 949,4475,3987,2955,1732,2088,4191,1496,1852,7737,3988, 620, # 6182
|
||||
3214, 981,1242,3679,3360,1619,3680,1643,3293,2139,2452,1970,1719,3463,2168,7738, # 6198
|
||||
3215,7739,7740,3361,1828,7741,1277,4476,1565,2047,7742,1636,3568,3093,7743, 869, # 6214
|
||||
2839, 655,3811,3812,3094,3989,3000,3813,1310,3569,4477,7744,7745,7746,1733, 558, # 6230
|
||||
4478,3681, 335,1549,3045,1756,4192,3682,1945,3464,1829,1291,1192, 470,2726,2107, # 6246
|
||||
2793, 913,1054,3990,7747,1027,7748,3046,3991,4479, 982,2662,3362,3148,3465,3216, # 6262
|
||||
3217,1946,2794,7749, 571,4480,7750,1830,7751,3570,2583,1523,2424,7752,2089, 984, # 6278
|
||||
4481,3683,1959,7753,3684, 852, 923,2795,3466,3685, 969,1519, 999,2048,2320,1705, # 6294
|
||||
7754,3095, 615,1662, 151, 597,3992,2405,2321,1049, 275,4482,3686,4193, 568,3687, # 6310
|
||||
3571,2480,4194,3688,7755,2425,2270, 409,3218,7756,1566,2874,3467,1002, 769,2840, # 6326
|
||||
194,2090,3149,3689,2222,3294,4195, 628,1505,7757,7758,1763,2177,3001,3993, 521, # 6342
|
||||
1161,2584,1787,2203,2406,4483,3994,1625,4196,4197, 412, 42,3096, 464,7759,2632, # 6358
|
||||
4484,3363,1760,1571,2875,3468,2530,1219,2204,3814,2633,2140,2368,4485,4486,3295, # 6374
|
||||
1651,3364,3572,7760,7761,3573,2481,3469,7762,3690,7763,7764,2271,2091, 460,7765, # 6390
|
||||
4487,7766,3002, 962, 588,3574, 289,3219,2634,1116, 52,7767,3047,1796,7768,7769, # 6406
|
||||
7770,1467,7771,1598,1143,3691,4198,1984,1734,1067,4488,1280,3365, 465,4489,1572, # 6422
|
||||
510,7772,1927,2241,1812,1644,3575,7773,4490,3692,7774,7775,2663,1573,1534,7776, # 6438
|
||||
7777,4199, 536,1807,1761,3470,3815,3150,2635,7778,7779,7780,4491,3471,2915,1911, # 6454
|
||||
2796,7781,3296,1122, 377,3220,7782, 360,7783,7784,4200,1529, 551,7785,2059,3693, # 6470
|
||||
1769,2426,7786,2916,4201,3297,3097,2322,2108,2030,4492,1404, 136,1468,1479, 672, # 6486
|
||||
1171,3221,2303, 271,3151,7787,2762,7788,2049, 678,2727, 865,1947,4493,7789,2013, # 6502
|
||||
3995,2956,7790,2728,2223,1397,3048,3694,4494,4495,1735,2917,3366,3576,7791,3816, # 6518
|
||||
509,2841,2453,2876,3817,7792,7793,3152,3153,4496,4202,2531,4497,2304,1166,1010, # 6534
|
||||
552, 681,1887,7794,7795,2957,2958,3996,1287,1596,1861,3154, 358, 453, 736, 175, # 6550
|
||||
478,1117, 905,1167,1097,7796,1853,1530,7797,1706,7798,2178,3472,2287,3695,3473, # 6566
|
||||
3577,4203,2092,4204,7799,3367,1193,2482,4205,1458,2190,2205,1862,1888,1421,3298, # 6582
|
||||
2918,3049,2179,3474, 595,2122,7800,3997,7801,7802,4206,1707,2636, 223,3696,1359, # 6598
|
||||
751,3098, 183,3475,7803,2797,3003, 419,2369, 633, 704,3818,2389, 241,7804,7805, # 6614
|
||||
7806, 838,3004,3697,2272,2763,2454,3819,1938,2050,3998,1309,3099,2242,1181,7807, # 6630
|
||||
1136,2206,3820,2370,1446,4207,2305,4498,7808,7809,4208,1055,2605, 484,3698,7810, # 6646
|
||||
3999, 625,4209,2273,3368,1499,4210,4000,7811,4001,4211,3222,2274,2275,3476,7812, # 6662
|
||||
7813,2764, 808,2606,3699,3369,4002,4212,3100,2532, 526,3370,3821,4213, 955,7814, # 6678
|
||||
1620,4214,2637,2427,7815,1429,3700,1669,1831, 994, 928,7816,3578,1260,7817,7818, # 6694
|
||||
7819,1948,2288, 741,2919,1626,4215,2729,2455, 867,1184, 362,3371,1392,7820,7821, # 6710
|
||||
4003,4216,1770,1736,3223,2920,4499,4500,1928,2698,1459,1158,7822,3050,3372,2877, # 6726
|
||||
1292,1929,2506,2842,3701,1985,1187,2071,2014,2607,4217,7823,2566,2507,2169,3702, # 6742
|
||||
2483,3299,7824,3703,4501,7825,7826, 666,1003,3005,1022,3579,4218,7827,4502,1813, # 6758
|
||||
2253, 574,3822,1603, 295,1535, 705,3823,4219, 283, 858, 417,7828,7829,3224,4503, # 6774
|
||||
4504,3051,1220,1889,1046,2276,2456,4004,1393,1599, 689,2567, 388,4220,7830,2484, # 6790
|
||||
802,7831,2798,3824,2060,1405,2254,7832,4505,3825,2109,1052,1345,3225,1585,7833, # 6806
|
||||
809,7834,7835,7836, 575,2730,3477, 956,1552,1469,1144,2323,7837,2324,1560,2457, # 6822
|
||||
3580,3226,4005, 616,2207,3155,2180,2289,7838,1832,7839,3478,4506,7840,1319,3704, # 6838
|
||||
3705,1211,3581,1023,3227,1293,2799,7841,7842,7843,3826, 607,2306,3827, 762,2878, # 6854
|
||||
1439,4221,1360,7844,1485,3052,7845,4507,1038,4222,1450,2061,2638,4223,1379,4508, # 6870
|
||||
2585,7846,7847,4224,1352,1414,2325,2921,1172,7848,7849,3828,3829,7850,1797,1451, # 6886
|
||||
7851,7852,7853,7854,2922,4006,4007,2485,2346, 411,4008,4009,3582,3300,3101,4509, # 6902
|
||||
1561,2664,1452,4010,1375,7855,7856, 47,2959, 316,7857,1406,1591,2923,3156,7858, # 6918
|
||||
1025,2141,3102,3157, 354,2731, 884,2224,4225,2407, 508,3706, 726,3583, 996,2428, # 6934
|
||||
3584, 729,7859, 392,2191,1453,4011,4510,3707,7860,7861,2458,3585,2608,1675,2800, # 6950
|
||||
919,2347,2960,2348,1270,4511,4012, 73,7862,7863, 647,7864,3228,2843,2255,1550, # 6966
|
||||
1346,3006,7865,1332, 883,3479,7866,7867,7868,7869,3301,2765,7870,1212, 831,1347, # 6982
|
||||
4226,4512,2326,3830,1863,3053, 720,3831,4513,4514,3832,7871,4227,7872,7873,4515, # 6998
|
||||
7874,7875,1798,4516,3708,2609,4517,3586,1645,2371,7876,7877,2924, 669,2208,2665, # 7014
|
||||
2429,7878,2879,7879,7880,1028,3229,7881,4228,2408,7882,2256,1353,7883,7884,4518, # 7030
|
||||
3158, 518,7885,4013,7886,4229,1960,7887,2142,4230,7888,7889,3007,2349,2350,3833, # 7046
|
||||
516,1833,1454,4014,2699,4231,4519,2225,2610,1971,1129,3587,7890,2766,7891,2961, # 7062
|
||||
1422, 577,1470,3008,1524,3373,7892,7893, 432,4232,3054,3480,7894,2586,1455,2508, # 7078
|
||||
2226,1972,1175,7895,1020,2732,4015,3481,4520,7896,2733,7897,1743,1361,3055,3482, # 7094
|
||||
2639,4016,4233,4521,2290, 895, 924,4234,2170, 331,2243,3056, 166,1627,3057,1098, # 7110
|
||||
7898,1232,2880,2227,3374,4522, 657, 403,1196,2372, 542,3709,3375,1600,4235,3483, # 7126
|
||||
7899,4523,2767,3230, 576, 530,1362,7900,4524,2533,2666,3710,4017,7901, 842,3834, # 7142
|
||||
7902,2801,2031,1014,4018, 213,2700,3376, 665, 621,4236,7903,3711,2925,2430,7904, # 7158
|
||||
2431,3302,3588,3377,7905,4237,2534,4238,4525,3589,1682,4239,3484,1380,7906, 724, # 7174
|
||||
2277, 600,1670,7907,1337,1233,4526,3103,2244,7908,1621,4527,7909, 651,4240,7910, # 7190
|
||||
1612,4241,2611,7911,2844,7912,2734,2307,3058,7913, 716,2459,3059, 174,1255,2701, # 7206
|
||||
4019,3590, 548,1320,1398, 728,4020,1574,7914,1890,1197,3060,4021,7915,3061,3062, # 7222
|
||||
3712,3591,3713, 747,7916, 635,4242,4528,7917,7918,7919,4243,7920,7921,4529,7922, # 7238
|
||||
3378,4530,2432, 451,7923,3714,2535,2072,4244,2735,4245,4022,7924,1764,4531,7925, # 7254
|
||||
4246, 350,7926,2278,2390,2486,7927,4247,4023,2245,1434,4024, 488,4532, 458,4248, # 7270
|
||||
4025,3715, 771,1330,2391,3835,2568,3159,2159,2409,1553,2667,3160,4249,7928,2487, # 7286
|
||||
2881,2612,1720,2702,4250,3379,4533,7929,2536,4251,7930,3231,4252,2768,7931,2015, # 7302
|
||||
2736,7932,1155,1017,3716,3836,7933,3303,2308, 201,1864,4253,1430,7934,4026,7935, # 7318
|
||||
7936,7937,7938,7939,4254,1604,7940, 414,1865, 371,2587,4534,4535,3485,2016,3104, # 7334
|
||||
4536,1708, 960,4255, 887, 389,2171,1536,1663,1721,7941,2228,4027,2351,2926,1580, # 7350
|
||||
7942,7943,7944,1744,7945,2537,4537,4538,7946,4539,7947,2073,7948,7949,3592,3380, # 7366
|
||||
2882,4256,7950,4257,2640,3381,2802, 673,2703,2460, 709,3486,4028,3593,4258,7951, # 7382
|
||||
1148, 502, 634,7952,7953,1204,4540,3594,1575,4541,2613,3717,7954,3718,3105, 948, # 7398
|
||||
3232, 121,1745,3837,1110,7955,4259,3063,2509,3009,4029,3719,1151,1771,3838,1488, # 7414
|
||||
4030,1986,7956,2433,3487,7957,7958,2093,7959,4260,3839,1213,1407,2803, 531,2737, # 7430
|
||||
2538,3233,1011,1537,7960,2769,4261,3106,1061,7961,3720,3721,1866,2883,7962,2017, # 7446
|
||||
120,4262,4263,2062,3595,3234,2309,3840,2668,3382,1954,4542,7963,7964,3488,1047, # 7462
|
||||
2704,1266,7965,1368,4543,2845, 649,3383,3841,2539,2738,1102,2846,2669,7966,7967, # 7478
|
||||
1999,7968,1111,3596,2962,7969,2488,3842,3597,2804,1854,3384,3722,7970,7971,3385, # 7494
|
||||
2410,2884,3304,3235,3598,7972,2569,7973,3599,2805,4031,1460, 856,7974,3600,7975, # 7510
|
||||
2885,2963,7976,2886,3843,7977,4264, 632,2510, 875,3844,1697,3845,2291,7978,7979, # 7526
|
||||
4544,3010,1239, 580,4545,4265,7980, 914, 936,2074,1190,4032,1039,2123,7981,7982, # 7542
|
||||
7983,3386,1473,7984,1354,4266,3846,7985,2172,3064,4033, 915,3305,4267,4268,3306, # 7558
|
||||
1605,1834,7986,2739, 398,3601,4269,3847,4034, 328,1912,2847,4035,3848,1331,4270, # 7574
|
||||
3011, 937,4271,7987,3602,4036,4037,3387,2160,4546,3388, 524, 742, 538,3065,1012, # 7590
|
||||
7988,7989,3849,2461,7990, 658,1103, 225,3850,7991,7992,4547,7993,4548,7994,3236, # 7606
|
||||
1243,7995,4038, 963,2246,4549,7996,2705,3603,3161,7997,7998,2588,2327,7999,4550, # 7622
|
||||
8000,8001,8002,3489,3307, 957,3389,2540,2032,1930,2927,2462, 870,2018,3604,1746, # 7638
|
||||
2770,2771,2434,2463,8003,3851,8004,3723,3107,3724,3490,3390,3725,8005,1179,3066, # 7654
|
||||
8006,3162,2373,4272,3726,2541,3163,3108,2740,4039,8007,3391,1556,2542,2292, 977, # 7670
|
||||
2887,2033,4040,1205,3392,8008,1765,3393,3164,2124,1271,1689, 714,4551,3491,8009, # 7686
|
||||
2328,3852, 533,4273,3605,2181, 617,8010,2464,3308,3492,2310,8011,8012,3165,8013, # 7702
|
||||
8014,3853,1987, 618, 427,2641,3493,3394,8015,8016,1244,1690,8017,2806,4274,4552, # 7718
|
||||
8018,3494,8019,8020,2279,1576, 473,3606,4275,3395, 972,8021,3607,8022,3067,8023, # 7734
|
||||
8024,4553,4554,8025,3727,4041,4042,8026, 153,4555, 356,8027,1891,2888,4276,2143, # 7750
|
||||
408, 803,2352,8028,3854,8029,4277,1646,2570,2511,4556,4557,3855,8030,3856,4278, # 7766
|
||||
8031,2411,3396, 752,8032,8033,1961,2964,8034, 746,3012,2465,8035,4279,3728, 698, # 7782
|
||||
4558,1892,4280,3608,2543,4559,3609,3857,8036,3166,3397,8037,1823,1302,4043,2706, # 7798
|
||||
3858,1973,4281,8038,4282,3167, 823,1303,1288,1236,2848,3495,4044,3398, 774,3859, # 7814
|
||||
8039,1581,4560,1304,2849,3860,4561,8040,2435,2161,1083,3237,4283,4045,4284, 344, # 7830
|
||||
1173, 288,2311, 454,1683,8041,8042,1461,4562,4046,2589,8043,8044,4563, 985, 894, # 7846
|
||||
8045,3399,3168,8046,1913,2928,3729,1988,8047,2110,1974,8048,4047,8049,2571,1194, # 7862
|
||||
425,8050,4564,3169,1245,3730,4285,8051,8052,2850,8053, 636,4565,1855,3861, 760, # 7878
|
||||
1799,8054,4286,2209,1508,4566,4048,1893,1684,2293,8055,8056,8057,4287,4288,2210, # 7894
|
||||
479,8058,8059, 832,8060,4049,2489,8061,2965,2490,3731, 990,3109, 627,1814,2642, # 7910
|
||||
4289,1582,4290,2125,2111,3496,4567,8062, 799,4291,3170,8063,4568,2112,1737,3013, # 7926
|
||||
1018, 543, 754,4292,3309,1676,4569,4570,4050,8064,1489,8065,3497,8066,2614,2889, # 7942
|
||||
4051,8067,8068,2966,8069,8070,8071,8072,3171,4571,4572,2182,1722,8073,3238,3239, # 7958
|
||||
1842,3610,1715, 481, 365,1975,1856,8074,8075,1962,2491,4573,8076,2126,3611,3240, # 7974
|
||||
433,1894,2063,2075,8077, 602,2741,8078,8079,8080,8081,8082,3014,1628,3400,8083, # 7990
|
||||
3172,4574,4052,2890,4575,2512,8084,2544,2772,8085,8086,8087,3310,4576,2891,8088, # 8006
|
||||
4577,8089,2851,4578,4579,1221,2967,4053,2513,8090,8091,8092,1867,1989,8093,8094, # 8022
|
||||
8095,1895,8096,8097,4580,1896,4054, 318,8098,2094,4055,4293,8099,8100, 485,8101, # 8038
|
||||
938,3862, 553,2670, 116,8102,3863,3612,8103,3498,2671,2773,3401,3311,2807,8104, # 8054
|
||||
3613,2929,4056,1747,2930,2968,8105,8106, 207,8107,8108,2672,4581,2514,8109,3015, # 8070
|
||||
890,3614,3864,8110,1877,3732,3402,8111,2183,2353,3403,1652,8112,8113,8114, 941, # 8086
|
||||
2294, 208,3499,4057,2019, 330,4294,3865,2892,2492,3733,4295,8115,8116,8117,8118, # 8102
|
||||
)
|
||||
|
||||
@@ -1,46 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .mbcharsetprober import MultiByteCharSetProber
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .chardistribution import EUCTWDistributionAnalysis
|
||||
from .mbcssm import EUCTW_SM_MODEL
|
||||
|
||||
class EUCTWProber(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
super(EUCTWProber, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(EUCTW_SM_MODEL)
|
||||
self.distribution_analyzer = EUCTWDistributionAnalysis()
|
||||
self.reset()
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "EUC-TW"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return "Taiwan"
|
||||
@@ -1,283 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# GB2312 most frequently used character table
|
||||
#
|
||||
# Char to FreqOrder table , from hz6763
|
||||
|
||||
# 512 --> 0.79 -- 0.79
|
||||
# 1024 --> 0.92 -- 0.13
|
||||
# 2048 --> 0.98 -- 0.06
|
||||
# 6768 --> 1.00 -- 0.02
|
||||
#
|
||||
# Ideal Distribution Ratio = 0.79135/(1-0.79135) = 3.79
|
||||
# Random Distribution Ration = 512 / (3755 - 512) = 0.157
|
||||
#
|
||||
# Typical Distribution Ratio about 25% of Ideal one, still much higher that RDR
|
||||
|
||||
GB2312_TYPICAL_DISTRIBUTION_RATIO = 0.9
|
||||
|
||||
GB2312_TABLE_SIZE = 3760
|
||||
|
||||
GB2312_CHAR_TO_FREQ_ORDER = (
|
||||
1671, 749,1443,2364,3924,3807,2330,3921,1704,3463,2691,1511,1515, 572,3191,2205,
|
||||
2361, 224,2558, 479,1711, 963,3162, 440,4060,1905,2966,2947,3580,2647,3961,3842,
|
||||
2204, 869,4207, 970,2678,5626,2944,2956,1479,4048, 514,3595, 588,1346,2820,3409,
|
||||
249,4088,1746,1873,2047,1774, 581,1813, 358,1174,3590,1014,1561,4844,2245, 670,
|
||||
1636,3112, 889,1286, 953, 556,2327,3060,1290,3141, 613, 185,3477,1367, 850,3820,
|
||||
1715,2428,2642,2303,2732,3041,2562,2648,3566,3946,1349, 388,3098,2091,1360,3585,
|
||||
152,1687,1539, 738,1559, 59,1232,2925,2267,1388,1249,1741,1679,2960, 151,1566,
|
||||
1125,1352,4271, 924,4296, 385,3166,4459, 310,1245,2850, 70,3285,2729,3534,3575,
|
||||
2398,3298,3466,1960,2265, 217,3647, 864,1909,2084,4401,2773,1010,3269,5152, 853,
|
||||
3051,3121,1244,4251,1895, 364,1499,1540,2313,1180,3655,2268, 562, 715,2417,3061,
|
||||
544, 336,3768,2380,1752,4075, 950, 280,2425,4382, 183,2759,3272, 333,4297,2155,
|
||||
1688,2356,1444,1039,4540, 736,1177,3349,2443,2368,2144,2225, 565, 196,1482,3406,
|
||||
927,1335,4147, 692, 878,1311,1653,3911,3622,1378,4200,1840,2969,3149,2126,1816,
|
||||
2534,1546,2393,2760, 737,2494, 13, 447, 245,2747, 38,2765,2129,2589,1079, 606,
|
||||
360, 471,3755,2890, 404, 848, 699,1785,1236, 370,2221,1023,3746,2074,2026,2023,
|
||||
2388,1581,2119, 812,1141,3091,2536,1519, 804,2053, 406,1596,1090, 784, 548,4414,
|
||||
1806,2264,2936,1100, 343,4114,5096, 622,3358, 743,3668,1510,1626,5020,3567,2513,
|
||||
3195,4115,5627,2489,2991, 24,2065,2697,1087,2719, 48,1634, 315, 68, 985,2052,
|
||||
198,2239,1347,1107,1439, 597,2366,2172, 871,3307, 919,2487,2790,1867, 236,2570,
|
||||
1413,3794, 906,3365,3381,1701,1982,1818,1524,2924,1205, 616,2586,2072,2004, 575,
|
||||
253,3099, 32,1365,1182, 197,1714,2454,1201, 554,3388,3224,2748, 756,2587, 250,
|
||||
2567,1507,1517,3529,1922,2761,2337,3416,1961,1677,2452,2238,3153, 615, 911,1506,
|
||||
1474,2495,1265,1906,2749,3756,3280,2161, 898,2714,1759,3450,2243,2444, 563, 26,
|
||||
3286,2266,3769,3344,2707,3677, 611,1402, 531,1028,2871,4548,1375, 261,2948, 835,
|
||||
1190,4134, 353, 840,2684,1900,3082,1435,2109,1207,1674, 329,1872,2781,4055,2686,
|
||||
2104, 608,3318,2423,2957,2768,1108,3739,3512,3271,3985,2203,1771,3520,1418,2054,
|
||||
1681,1153, 225,1627,2929, 162,2050,2511,3687,1954, 124,1859,2431,1684,3032,2894,
|
||||
585,4805,3969,2869,2704,2088,2032,2095,3656,2635,4362,2209, 256, 518,2042,2105,
|
||||
3777,3657, 643,2298,1148,1779, 190, 989,3544, 414, 11,2135,2063,2979,1471, 403,
|
||||
3678, 126, 770,1563, 671,2499,3216,2877, 600,1179, 307,2805,4937,1268,1297,2694,
|
||||
252,4032,1448,1494,1331,1394, 127,2256, 222,1647,1035,1481,3056,1915,1048, 873,
|
||||
3651, 210, 33,1608,2516, 200,1520, 415, 102, 0,3389,1287, 817, 91,3299,2940,
|
||||
836,1814, 549,2197,1396,1669,2987,3582,2297,2848,4528,1070, 687, 20,1819, 121,
|
||||
1552,1364,1461,1968,2617,3540,2824,2083, 177, 948,4938,2291, 110,4549,2066, 648,
|
||||
3359,1755,2110,2114,4642,4845,1693,3937,3308,1257,1869,2123, 208,1804,3159,2992,
|
||||
2531,2549,3361,2418,1350,2347,2800,2568,1291,2036,2680, 72, 842,1990, 212,1233,
|
||||
1154,1586, 75,2027,3410,4900,1823,1337,2710,2676, 728,2810,1522,3026,4995, 157,
|
||||
755,1050,4022, 710, 785,1936,2194,2085,1406,2777,2400, 150,1250,4049,1206, 807,
|
||||
1910, 534, 529,3309,1721,1660, 274, 39,2827, 661,2670,1578, 925,3248,3815,1094,
|
||||
4278,4901,4252, 41,1150,3747,2572,2227,4501,3658,4902,3813,3357,3617,2884,2258,
|
||||
887, 538,4187,3199,1294,2439,3042,2329,2343,2497,1255, 107, 543,1527, 521,3478,
|
||||
3568, 194,5062, 15, 961,3870,1241,1192,2664, 66,5215,3260,2111,1295,1127,2152,
|
||||
3805,4135, 901,1164,1976, 398,1278, 530,1460, 748, 904,1054,1966,1426, 53,2909,
|
||||
509, 523,2279,1534, 536,1019, 239,1685, 460,2353, 673,1065,2401,3600,4298,2272,
|
||||
1272,2363, 284,1753,3679,4064,1695, 81, 815,2677,2757,2731,1386, 859, 500,4221,
|
||||
2190,2566, 757,1006,2519,2068,1166,1455, 337,2654,3203,1863,1682,1914,3025,1252,
|
||||
1409,1366, 847, 714,2834,2038,3209, 964,2970,1901, 885,2553,1078,1756,3049, 301,
|
||||
1572,3326, 688,2130,1996,2429,1805,1648,2930,3421,2750,3652,3088, 262,1158,1254,
|
||||
389,1641,1812, 526,1719, 923,2073,1073,1902, 468, 489,4625,1140, 857,2375,3070,
|
||||
3319,2863, 380, 116,1328,2693,1161,2244, 273,1212,1884,2769,3011,1775,1142, 461,
|
||||
3066,1200,2147,2212, 790, 702,2695,4222,1601,1058, 434,2338,5153,3640, 67,2360,
|
||||
4099,2502, 618,3472,1329, 416,1132, 830,2782,1807,2653,3211,3510,1662, 192,2124,
|
||||
296,3979,1739,1611,3684, 23, 118, 324, 446,1239,1225, 293,2520,3814,3795,2535,
|
||||
3116, 17,1074, 467,2692,2201, 387,2922, 45,1326,3055,1645,3659,2817, 958, 243,
|
||||
1903,2320,1339,2825,1784,3289, 356, 576, 865,2315,2381,3377,3916,1088,3122,1713,
|
||||
1655, 935, 628,4689,1034,1327, 441, 800, 720, 894,1979,2183,1528,5289,2702,1071,
|
||||
4046,3572,2399,1571,3281, 79, 761,1103, 327, 134, 758,1899,1371,1615, 879, 442,
|
||||
215,2605,2579, 173,2048,2485,1057,2975,3317,1097,2253,3801,4263,1403,1650,2946,
|
||||
814,4968,3487,1548,2644,1567,1285, 2, 295,2636, 97, 946,3576, 832, 141,4257,
|
||||
3273, 760,3821,3521,3156,2607, 949,1024,1733,1516,1803,1920,2125,2283,2665,3180,
|
||||
1501,2064,3560,2171,1592, 803,3518,1416, 732,3897,4258,1363,1362,2458, 119,1427,
|
||||
602,1525,2608,1605,1639,3175, 694,3064, 10, 465, 76,2000,4846,4208, 444,3781,
|
||||
1619,3353,2206,1273,3796, 740,2483, 320,1723,2377,3660,2619,1359,1137,1762,1724,
|
||||
2345,2842,1850,1862, 912, 821,1866, 612,2625,1735,2573,3369,1093, 844, 89, 937,
|
||||
930,1424,3564,2413,2972,1004,3046,3019,2011, 711,3171,1452,4178, 428, 801,1943,
|
||||
432, 445,2811, 206,4136,1472, 730, 349, 73, 397,2802,2547, 998,1637,1167, 789,
|
||||
396,3217, 154,1218, 716,1120,1780,2819,4826,1931,3334,3762,2139,1215,2627, 552,
|
||||
3664,3628,3232,1405,2383,3111,1356,2652,3577,3320,3101,1703, 640,1045,1370,1246,
|
||||
4996, 371,1575,2436,1621,2210, 984,4033,1734,2638, 16,4529, 663,2755,3255,1451,
|
||||
3917,2257,1253,1955,2234,1263,2951, 214,1229, 617, 485, 359,1831,1969, 473,2310,
|
||||
750,2058, 165, 80,2864,2419, 361,4344,2416,2479,1134, 796,3726,1266,2943, 860,
|
||||
2715, 938, 390,2734,1313,1384, 248, 202, 877,1064,2854, 522,3907, 279,1602, 297,
|
||||
2357, 395,3740, 137,2075, 944,4089,2584,1267,3802, 62,1533,2285, 178, 176, 780,
|
||||
2440, 201,3707, 590, 478,1560,4354,2117,1075, 30, 74,4643,4004,1635,1441,2745,
|
||||
776,2596, 238,1077,1692,1912,2844, 605, 499,1742,3947, 241,3053, 980,1749, 936,
|
||||
2640,4511,2582, 515,1543,2162,5322,2892,2993, 890,2148,1924, 665,1827,3581,1032,
|
||||
968,3163, 339,1044,1896, 270, 583,1791,1720,4367,1194,3488,3669, 43,2523,1657,
|
||||
163,2167, 290,1209,1622,3378, 550, 634,2508,2510, 695,2634,2384,2512,1476,1414,
|
||||
220,1469,2341,2138,2852,3183,2900,4939,2865,3502,1211,3680, 854,3227,1299,2976,
|
||||
3172, 186,2998,1459, 443,1067,3251,1495, 321,1932,3054, 909, 753,1410,1828, 436,
|
||||
2441,1119,1587,3164,2186,1258, 227, 231,1425,1890,3200,3942, 247, 959, 725,5254,
|
||||
2741, 577,2158,2079, 929, 120, 174, 838,2813, 591,1115, 417,2024, 40,3240,1536,
|
||||
1037, 291,4151,2354, 632,1298,2406,2500,3535,1825,1846,3451, 205,1171, 345,4238,
|
||||
18,1163, 811, 685,2208,1217, 425,1312,1508,1175,4308,2552,1033, 587,1381,3059,
|
||||
2984,3482, 340,1316,4023,3972, 792,3176, 519, 777,4690, 918, 933,4130,2981,3741,
|
||||
90,3360,2911,2200,5184,4550, 609,3079,2030, 272,3379,2736, 363,3881,1130,1447,
|
||||
286, 779, 357,1169,3350,3137,1630,1220,2687,2391, 747,1277,3688,2618,2682,2601,
|
||||
1156,3196,5290,4034,3102,1689,3596,3128, 874, 219,2783, 798, 508,1843,2461, 269,
|
||||
1658,1776,1392,1913,2983,3287,2866,2159,2372, 829,4076, 46,4253,2873,1889,1894,
|
||||
915,1834,1631,2181,2318, 298, 664,2818,3555,2735, 954,3228,3117, 527,3511,2173,
|
||||
681,2712,3033,2247,2346,3467,1652, 155,2164,3382, 113,1994, 450, 899, 494, 994,
|
||||
1237,2958,1875,2336,1926,3727, 545,1577,1550, 633,3473, 204,1305,3072,2410,1956,
|
||||
2471, 707,2134, 841,2195,2196,2663,3843,1026,4940, 990,3252,4997, 368,1092, 437,
|
||||
3212,3258,1933,1829, 675,2977,2893, 412, 943,3723,4644,3294,3283,2230,2373,5154,
|
||||
2389,2241,2661,2323,1404,2524, 593, 787, 677,3008,1275,2059, 438,2709,2609,2240,
|
||||
2269,2246,1446, 36,1568,1373,3892,1574,2301,1456,3962, 693,2276,5216,2035,1143,
|
||||
2720,1919,1797,1811,2763,4137,2597,1830,1699,1488,1198,2090, 424,1694, 312,3634,
|
||||
3390,4179,3335,2252,1214, 561,1059,3243,2295,2561, 975,5155,2321,2751,3772, 472,
|
||||
1537,3282,3398,1047,2077,2348,2878,1323,3340,3076, 690,2906, 51, 369, 170,3541,
|
||||
1060,2187,2688,3670,2541,1083,1683, 928,3918, 459, 109,4427, 599,3744,4286, 143,
|
||||
2101,2730,2490, 82,1588,3036,2121, 281,1860, 477,4035,1238,2812,3020,2716,3312,
|
||||
1530,2188,2055,1317, 843, 636,1808,1173,3495, 649, 181,1002, 147,3641,1159,2414,
|
||||
3750,2289,2795, 813,3123,2610,1136,4368, 5,3391,4541,2174, 420, 429,1728, 754,
|
||||
1228,2115,2219, 347,2223,2733, 735,1518,3003,2355,3134,1764,3948,3329,1888,2424,
|
||||
1001,1234,1972,3321,3363,1672,1021,1450,1584, 226, 765, 655,2526,3404,3244,2302,
|
||||
3665, 731, 594,2184, 319,1576, 621, 658,2656,4299,2099,3864,1279,2071,2598,2739,
|
||||
795,3086,3699,3908,1707,2352,2402,1382,3136,2475,1465,4847,3496,3865,1085,3004,
|
||||
2591,1084, 213,2287,1963,3565,2250, 822, 793,4574,3187,1772,1789,3050, 595,1484,
|
||||
1959,2770,1080,2650, 456, 422,2996, 940,3322,4328,4345,3092,2742, 965,2784, 739,
|
||||
4124, 952,1358,2498,2949,2565, 332,2698,2378, 660,2260,2473,4194,3856,2919, 535,
|
||||
1260,2651,1208,1428,1300,1949,1303,2942, 433,2455,2450,1251,1946, 614,1269, 641,
|
||||
1306,1810,2737,3078,2912, 564,2365,1419,1415,1497,4460,2367,2185,1379,3005,1307,
|
||||
3218,2175,1897,3063, 682,1157,4040,4005,1712,1160,1941,1399, 394, 402,2952,1573,
|
||||
1151,2986,2404, 862, 299,2033,1489,3006, 346, 171,2886,3401,1726,2932, 168,2533,
|
||||
47,2507,1030,3735,1145,3370,1395,1318,1579,3609,4560,2857,4116,1457,2529,1965,
|
||||
504,1036,2690,2988,2405, 745,5871, 849,2397,2056,3081, 863,2359,3857,2096, 99,
|
||||
1397,1769,2300,4428,1643,3455,1978,1757,3718,1440, 35,4879,3742,1296,4228,2280,
|
||||
160,5063,1599,2013, 166, 520,3479,1646,3345,3012, 490,1937,1545,1264,2182,2505,
|
||||
1096,1188,1369,1436,2421,1667,2792,2460,1270,2122, 727,3167,2143, 806,1706,1012,
|
||||
1800,3037, 960,2218,1882, 805, 139,2456,1139,1521, 851,1052,3093,3089, 342,2039,
|
||||
744,5097,1468,1502,1585,2087, 223, 939, 326,2140,2577, 892,2481,1623,4077, 982,
|
||||
3708, 135,2131, 87,2503,3114,2326,1106, 876,1616, 547,2997,2831,2093,3441,4530,
|
||||
4314, 9,3256,4229,4148, 659,1462,1986,1710,2046,2913,2231,4090,4880,5255,3392,
|
||||
3274,1368,3689,4645,1477, 705,3384,3635,1068,1529,2941,1458,3782,1509, 100,1656,
|
||||
2548, 718,2339, 408,1590,2780,3548,1838,4117,3719,1345,3530, 717,3442,2778,3220,
|
||||
2898,1892,4590,3614,3371,2043,1998,1224,3483, 891, 635, 584,2559,3355, 733,1766,
|
||||
1729,1172,3789,1891,2307, 781,2982,2271,1957,1580,5773,2633,2005,4195,3097,1535,
|
||||
3213,1189,1934,5693,3262, 586,3118,1324,1598, 517,1564,2217,1868,1893,4445,3728,
|
||||
2703,3139,1526,1787,1992,3882,2875,1549,1199,1056,2224,1904,2711,5098,4287, 338,
|
||||
1993,3129,3489,2689,1809,2815,1997, 957,1855,3898,2550,3275,3057,1105,1319, 627,
|
||||
1505,1911,1883,3526, 698,3629,3456,1833,1431, 746, 77,1261,2017,2296,1977,1885,
|
||||
125,1334,1600, 525,1798,1109,2222,1470,1945, 559,2236,1186,3443,2476,1929,1411,
|
||||
2411,3135,1777,3372,2621,1841,1613,3229, 668,1430,1839,2643,2916, 195,1989,2671,
|
||||
2358,1387, 629,3205,2293,5256,4439, 123,1310, 888,1879,4300,3021,3605,1003,1162,
|
||||
3192,2910,2010, 140,2395,2859, 55,1082,2012,2901, 662, 419,2081,1438, 680,2774,
|
||||
4654,3912,1620,1731,1625,5035,4065,2328, 512,1344, 802,5443,2163,2311,2537, 524,
|
||||
3399, 98,1155,2103,1918,2606,3925,2816,1393,2465,1504,3773,2177,3963,1478,4346,
|
||||
180,1113,4655,3461,2028,1698, 833,2696,1235,1322,1594,4408,3623,3013,3225,2040,
|
||||
3022, 541,2881, 607,3632,2029,1665,1219, 639,1385,1686,1099,2803,3231,1938,3188,
|
||||
2858, 427, 676,2772,1168,2025, 454,3253,2486,3556, 230,1950, 580, 791,1991,1280,
|
||||
1086,1974,2034, 630, 257,3338,2788,4903,1017, 86,4790, 966,2789,1995,1696,1131,
|
||||
259,3095,4188,1308, 179,1463,5257, 289,4107,1248, 42,3413,1725,2288, 896,1947,
|
||||
774,4474,4254, 604,3430,4264, 392,2514,2588, 452, 237,1408,3018, 988,4531,1970,
|
||||
3034,3310, 540,2370,1562,1288,2990, 502,4765,1147, 4,1853,2708, 207, 294,2814,
|
||||
4078,2902,2509, 684, 34,3105,3532,2551, 644, 709,2801,2344, 573,1727,3573,3557,
|
||||
2021,1081,3100,4315,2100,3681, 199,2263,1837,2385, 146,3484,1195,2776,3949, 997,
|
||||
1939,3973,1008,1091,1202,1962,1847,1149,4209,5444,1076, 493, 117,5400,2521, 972,
|
||||
1490,2934,1796,4542,2374,1512,2933,2657, 413,2888,1135,2762,2314,2156,1355,2369,
|
||||
766,2007,2527,2170,3124,2491,2593,2632,4757,2437, 234,3125,3591,1898,1750,1376,
|
||||
1942,3468,3138, 570,2127,2145,3276,4131, 962, 132,1445,4196, 19, 941,3624,3480,
|
||||
3366,1973,1374,4461,3431,2629, 283,2415,2275, 808,2887,3620,2112,2563,1353,3610,
|
||||
955,1089,3103,1053, 96, 88,4097, 823,3808,1583, 399, 292,4091,3313, 421,1128,
|
||||
642,4006, 903,2539,1877,2082, 596, 29,4066,1790, 722,2157, 130, 995,1569, 769,
|
||||
1485, 464, 513,2213, 288,1923,1101,2453,4316, 133, 486,2445, 50, 625, 487,2207,
|
||||
57, 423, 481,2962, 159,3729,1558, 491, 303, 482, 501, 240,2837, 112,3648,2392,
|
||||
1783, 362, 8,3433,3422, 610,2793,3277,1390,1284,1654, 21,3823, 734, 367, 623,
|
||||
193, 287, 374,1009,1483, 816, 476, 313,2255,2340,1262,2150,2899,1146,2581, 782,
|
||||
2116,1659,2018,1880, 255,3586,3314,1110,2867,2137,2564, 986,2767,5185,2006, 650,
|
||||
158, 926, 762, 881,3157,2717,2362,3587, 306,3690,3245,1542,3077,2427,1691,2478,
|
||||
2118,2985,3490,2438, 539,2305, 983, 129,1754, 355,4201,2386, 827,2923, 104,1773,
|
||||
2838,2771, 411,2905,3919, 376, 767, 122,1114, 828,2422,1817,3506, 266,3460,1007,
|
||||
1609,4998, 945,2612,4429,2274, 726,1247,1964,2914,2199,2070,4002,4108, 657,3323,
|
||||
1422, 579, 455,2764,4737,1222,2895,1670, 824,1223,1487,2525, 558, 861,3080, 598,
|
||||
2659,2515,1967, 752,2583,2376,2214,4180, 977, 704,2464,4999,2622,4109,1210,2961,
|
||||
819,1541, 142,2284, 44, 418, 457,1126,3730,4347,4626,1644,1876,3671,1864, 302,
|
||||
1063,5694, 624, 723,1984,3745,1314,1676,2488,1610,1449,3558,3569,2166,2098, 409,
|
||||
1011,2325,3704,2306, 818,1732,1383,1824,1844,3757, 999,2705,3497,1216,1423,2683,
|
||||
2426,2954,2501,2726,2229,1475,2554,5064,1971,1794,1666,2014,1343, 783, 724, 191,
|
||||
2434,1354,2220,5065,1763,2752,2472,4152, 131, 175,2885,3434, 92,1466,4920,2616,
|
||||
3871,3872,3866, 128,1551,1632, 669,1854,3682,4691,4125,1230, 188,2973,3290,1302,
|
||||
1213, 560,3266, 917, 763,3909,3249,1760, 868,1958, 764,1782,2097, 145,2277,3774,
|
||||
4462, 64,1491,3062, 971,2132,3606,2442, 221,1226,1617, 218, 323,1185,3207,3147,
|
||||
571, 619,1473,1005,1744,2281, 449,1887,2396,3685, 275, 375,3816,1743,3844,3731,
|
||||
845,1983,2350,4210,1377, 773, 967,3499,3052,3743,2725,4007,1697,1022,3943,1464,
|
||||
3264,2855,2722,1952,1029,2839,2467, 84,4383,2215, 820,1391,2015,2448,3672, 377,
|
||||
1948,2168, 797,2545,3536,2578,2645, 94,2874,1678, 405,1259,3071, 771, 546,1315,
|
||||
470,1243,3083, 895,2468, 981, 969,2037, 846,4181, 653,1276,2928, 14,2594, 557,
|
||||
3007,2474, 156, 902,1338,1740,2574, 537,2518, 973,2282,2216,2433,1928, 138,2903,
|
||||
1293,2631,1612, 646,3457, 839,2935, 111, 496,2191,2847, 589,3186, 149,3994,2060,
|
||||
4031,2641,4067,3145,1870, 37,3597,2136,1025,2051,3009,3383,3549,1121,1016,3261,
|
||||
1301, 251,2446,2599,2153, 872,3246, 637, 334,3705, 831, 884, 921,3065,3140,4092,
|
||||
2198,1944, 246,2964, 108,2045,1152,1921,2308,1031, 203,3173,4170,1907,3890, 810,
|
||||
1401,2003,1690, 506, 647,1242,2828,1761,1649,3208,2249,1589,3709,2931,5156,1708,
|
||||
498, 666,2613, 834,3817,1231, 184,2851,1124, 883,3197,2261,3710,1765,1553,2658,
|
||||
1178,2639,2351, 93,1193, 942,2538,2141,4402, 235,1821, 870,1591,2192,1709,1871,
|
||||
3341,1618,4126,2595,2334, 603, 651, 69, 701, 268,2662,3411,2555,1380,1606, 503,
|
||||
448, 254,2371,2646, 574,1187,2309,1770, 322,2235,1292,1801, 305, 566,1133, 229,
|
||||
2067,2057, 706, 167, 483,2002,2672,3295,1820,3561,3067, 316, 378,2746,3452,1112,
|
||||
136,1981, 507,1651,2917,1117, 285,4591, 182,2580,3522,1304, 335,3303,1835,2504,
|
||||
1795,1792,2248, 674,1018,2106,2449,1857,2292,2845, 976,3047,1781,2600,2727,1389,
|
||||
1281, 52,3152, 153, 265,3950, 672,3485,3951,4463, 430,1183, 365, 278,2169, 27,
|
||||
1407,1336,2304, 209,1340,1730,2202,1852,2403,2883, 979,1737,1062, 631,2829,2542,
|
||||
3876,2592, 825,2086,2226,3048,3625, 352,1417,3724, 542, 991, 431,1351,3938,1861,
|
||||
2294, 826,1361,2927,3142,3503,1738, 463,2462,2723, 582,1916,1595,2808, 400,3845,
|
||||
3891,2868,3621,2254, 58,2492,1123, 910,2160,2614,1372,1603,1196,1072,3385,1700,
|
||||
3267,1980, 696, 480,2430, 920, 799,1570,2920,1951,2041,4047,2540,1321,4223,2469,
|
||||
3562,2228,1271,2602, 401,2833,3351,2575,5157, 907,2312,1256, 410, 263,3507,1582,
|
||||
996, 678,1849,2316,1480, 908,3545,2237, 703,2322, 667,1826,2849,1531,2604,2999,
|
||||
2407,3146,2151,2630,1786,3711, 469,3542, 497,3899,2409, 858, 837,4446,3393,1274,
|
||||
786, 620,1845,2001,3311, 484, 308,3367,1204,1815,3691,2332,1532,2557,1842,2020,
|
||||
2724,1927,2333,4440, 567, 22,1673,2728,4475,1987,1858,1144,1597, 101,1832,3601,
|
||||
12, 974,3783,4391, 951,1412, 1,3720, 453,4608,4041, 528,1041,1027,3230,2628,
|
||||
1129, 875,1051,3291,1203,2262,1069,2860,2799,2149,2615,3278, 144,1758,3040, 31,
|
||||
475,1680, 366,2685,3184, 311,1642,4008,2466,5036,1593,1493,2809, 216,1420,1668,
|
||||
233, 304,2128,3284, 232,1429,1768,1040,2008,3407,2740,2967,2543, 242,2133, 778,
|
||||
1565,2022,2620, 505,2189,2756,1098,2273, 372,1614, 708, 553,2846,2094,2278, 169,
|
||||
3626,2835,4161, 228,2674,3165, 809,1454,1309, 466,1705,1095, 900,3423, 880,2667,
|
||||
3751,5258,2317,3109,2571,4317,2766,1503,1342, 866,4447,1118, 63,2076, 314,1881,
|
||||
1348,1061, 172, 978,3515,1747, 532, 511,3970, 6, 601, 905,2699,3300,1751, 276,
|
||||
1467,3725,2668, 65,4239,2544,2779,2556,1604, 578,2451,1802, 992,2331,2624,1320,
|
||||
3446, 713,1513,1013, 103,2786,2447,1661, 886,1702, 916, 654,3574,2031,1556, 751,
|
||||
2178,2821,2179,1498,1538,2176, 271, 914,2251,2080,1325, 638,1953,2937,3877,2432,
|
||||
2754, 95,3265,1716, 260,1227,4083, 775, 106,1357,3254, 426,1607, 555,2480, 772,
|
||||
1985, 244,2546, 474, 495,1046,2611,1851,2061, 71,2089,1675,2590, 742,3758,2843,
|
||||
3222,1433, 267,2180,2576,2826,2233,2092,3913,2435, 956,1745,3075, 856,2113,1116,
|
||||
451, 3,1988,2896,1398, 993,2463,1878,2049,1341,2718,2721,2870,2108, 712,2904,
|
||||
4363,2753,2324, 277,2872,2349,2649, 384, 987, 435, 691,3000, 922, 164,3939, 652,
|
||||
1500,1184,4153,2482,3373,2165,4848,2335,3775,3508,3154,2806,2830,1554,2102,1664,
|
||||
2530,1434,2408, 893,1547,2623,3447,2832,2242,2532,3169,2856,3223,2078, 49,3770,
|
||||
3469, 462, 318, 656,2259,3250,3069, 679,1629,2758, 344,1138,1104,3120,1836,1283,
|
||||
3115,2154,1437,4448, 934, 759,1999, 794,2862,1038, 533,2560,1722,2342, 855,2626,
|
||||
1197,1663,4476,3127, 85,4240,2528, 25,1111,1181,3673, 407,3470,4561,2679,2713,
|
||||
768,1925,2841,3986,1544,1165, 932, 373,1240,2146,1930,2673, 721,4766, 354,4333,
|
||||
391,2963, 187, 61,3364,1442,1102, 330,1940,1767, 341,3809,4118, 393,2496,2062,
|
||||
2211, 105, 331, 300, 439, 913,1332, 626, 379,3304,1557, 328, 689,3952, 309,1555,
|
||||
931, 317,2517,3027, 325, 569, 686,2107,3084, 60,1042,1333,2794, 264,3177,4014,
|
||||
1628, 258,3712, 7,4464,1176,1043,1778, 683, 114,1975, 78,1492, 383,1886, 510,
|
||||
386, 645,5291,2891,2069,3305,4138,3867,2939,2603,2493,1935,1066,1848,3588,1015,
|
||||
1282,1289,4609, 697,1453,3044,2666,3611,1856,2412, 54, 719,1330, 568,3778,2459,
|
||||
1748, 788, 492, 551,1191,1000, 488,3394,3763, 282,1799, 348,2016,1523,3155,2390,
|
||||
1049, 382,2019,1788,1170, 729,2968,3523, 897,3926,2785,2938,3292, 350,2319,3238,
|
||||
1718,1717,2655,3453,3143,4465, 161,2889,2980,2009,1421, 56,1908,1640,2387,2232,
|
||||
1917,1874,2477,4921, 148, 83,3438, 592,4245,2882,1822,1055, 741, 115,1496,1624,
|
||||
381,1638,4592,1020, 516,3214, 458, 947,4575,1432, 211,1514,2926,1865,2142, 189,
|
||||
852,1221,1400,1486, 882,2299,4036, 351, 28,1122, 700,6479,6480,6481,6482,6483, #last 512
|
||||
)
|
||||
|
||||
@@ -1,46 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .mbcharsetprober import MultiByteCharSetProber
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .chardistribution import GB2312DistributionAnalysis
|
||||
from .mbcssm import GB2312_SM_MODEL
|
||||
|
||||
class GB2312Prober(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
super(GB2312Prober, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(GB2312_SM_MODEL)
|
||||
self.distribution_analyzer = GB2312DistributionAnalysis()
|
||||
self.reset()
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "GB2312"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return "Chinese"
|
||||
@@ -1,292 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Shy Shalom
|
||||
# Portions created by the Initial Developer are Copyright (C) 2005
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .charsetprober import CharSetProber
|
||||
from .enums import ProbingState
|
||||
|
||||
# This prober doesn't actually recognize a language or a charset.
|
||||
# It is a helper prober for the use of the Hebrew model probers
|
||||
|
||||
### General ideas of the Hebrew charset recognition ###
|
||||
#
|
||||
# Four main charsets exist in Hebrew:
|
||||
# "ISO-8859-8" - Visual Hebrew
|
||||
# "windows-1255" - Logical Hebrew
|
||||
# "ISO-8859-8-I" - Logical Hebrew
|
||||
# "x-mac-hebrew" - ?? Logical Hebrew ??
|
||||
#
|
||||
# Both "ISO" charsets use a completely identical set of code points, whereas
|
||||
# "windows-1255" and "x-mac-hebrew" are two different proper supersets of
|
||||
# these code points. windows-1255 defines additional characters in the range
|
||||
# 0x80-0x9F as some misc punctuation marks as well as some Hebrew-specific
|
||||
# diacritics and additional 'Yiddish' ligature letters in the range 0xc0-0xd6.
|
||||
# x-mac-hebrew defines similar additional code points but with a different
|
||||
# mapping.
|
||||
#
|
||||
# As far as an average Hebrew text with no diacritics is concerned, all four
|
||||
# charsets are identical with respect to code points. Meaning that for the
|
||||
# main Hebrew alphabet, all four map the same values to all 27 Hebrew letters
|
||||
# (including final letters).
|
||||
#
|
||||
# The dominant difference between these charsets is their directionality.
|
||||
# "Visual" directionality means that the text is ordered as if the renderer is
|
||||
# not aware of a BIDI rendering algorithm. The renderer sees the text and
|
||||
# draws it from left to right. The text itself when ordered naturally is read
|
||||
# backwards. A buffer of Visual Hebrew generally looks like so:
|
||||
# "[last word of first line spelled backwards] [whole line ordered backwards
|
||||
# and spelled backwards] [first word of first line spelled backwards]
|
||||
# [end of line] [last word of second line] ... etc' "
|
||||
# adding punctuation marks, numbers and English text to visual text is
|
||||
# naturally also "visual" and from left to right.
|
||||
#
|
||||
# "Logical" directionality means the text is ordered "naturally" according to
|
||||
# the order it is read. It is the responsibility of the renderer to display
|
||||
# the text from right to left. A BIDI algorithm is used to place general
|
||||
# punctuation marks, numbers and English text in the text.
|
||||
#
|
||||
# Texts in x-mac-hebrew are almost impossible to find on the Internet. From
|
||||
# what little evidence I could find, it seems that its general directionality
|
||||
# is Logical.
|
||||
#
|
||||
# To sum up all of the above, the Hebrew probing mechanism knows about two
|
||||
# charsets:
|
||||
# Visual Hebrew - "ISO-8859-8" - backwards text - Words and sentences are
|
||||
# backwards while line order is natural. For charset recognition purposes
|
||||
# the line order is unimportant (In fact, for this implementation, even
|
||||
# word order is unimportant).
|
||||
# Logical Hebrew - "windows-1255" - normal, naturally ordered text.
|
||||
#
|
||||
# "ISO-8859-8-I" is a subset of windows-1255 and doesn't need to be
|
||||
# specifically identified.
|
||||
# "x-mac-hebrew" is also identified as windows-1255. A text in x-mac-hebrew
|
||||
# that contain special punctuation marks or diacritics is displayed with
|
||||
# some unconverted characters showing as question marks. This problem might
|
||||
# be corrected using another model prober for x-mac-hebrew. Due to the fact
|
||||
# that x-mac-hebrew texts are so rare, writing another model prober isn't
|
||||
# worth the effort and performance hit.
|
||||
#
|
||||
#### The Prober ####
|
||||
#
|
||||
# The prober is divided between two SBCharSetProbers and a HebrewProber,
|
||||
# all of which are managed, created, fed data, inquired and deleted by the
|
||||
# SBCSGroupProber. The two SBCharSetProbers identify that the text is in
|
||||
# fact some kind of Hebrew, Logical or Visual. The final decision about which
|
||||
# one is it is made by the HebrewProber by combining final-letter scores
|
||||
# with the scores of the two SBCharSetProbers to produce a final answer.
|
||||
#
|
||||
# The SBCSGroupProber is responsible for stripping the original text of HTML
|
||||
# tags, English characters, numbers, low-ASCII punctuation characters, spaces
|
||||
# and new lines. It reduces any sequence of such characters to a single space.
|
||||
# The buffer fed to each prober in the SBCS group prober is pure text in
|
||||
# high-ASCII.
|
||||
# The two SBCharSetProbers (model probers) share the same language model:
|
||||
# Win1255Model.
|
||||
# The first SBCharSetProber uses the model normally as any other
|
||||
# SBCharSetProber does, to recognize windows-1255, upon which this model was
|
||||
# built. The second SBCharSetProber is told to make the pair-of-letter
|
||||
# lookup in the language model backwards. This in practice exactly simulates
|
||||
# a visual Hebrew model using the windows-1255 logical Hebrew model.
|
||||
#
|
||||
# The HebrewProber is not using any language model. All it does is look for
|
||||
# final-letter evidence suggesting the text is either logical Hebrew or visual
|
||||
# Hebrew. Disjointed from the model probers, the results of the HebrewProber
|
||||
# alone are meaningless. HebrewProber always returns 0.00 as confidence
|
||||
# since it never identifies a charset by itself. Instead, the pointer to the
|
||||
# HebrewProber is passed to the model probers as a helper "Name Prober".
|
||||
# When the Group prober receives a positive identification from any prober,
|
||||
# it asks for the name of the charset identified. If the prober queried is a
|
||||
# Hebrew model prober, the model prober forwards the call to the
|
||||
# HebrewProber to make the final decision. In the HebrewProber, the
|
||||
# decision is made according to the final-letters scores maintained and Both
|
||||
# model probers scores. The answer is returned in the form of the name of the
|
||||
# charset identified, either "windows-1255" or "ISO-8859-8".
|
||||
|
||||
class HebrewProber(CharSetProber):
|
||||
# windows-1255 / ISO-8859-8 code points of interest
|
||||
FINAL_KAF = 0xea
|
||||
NORMAL_KAF = 0xeb
|
||||
FINAL_MEM = 0xed
|
||||
NORMAL_MEM = 0xee
|
||||
FINAL_NUN = 0xef
|
||||
NORMAL_NUN = 0xf0
|
||||
FINAL_PE = 0xf3
|
||||
NORMAL_PE = 0xf4
|
||||
FINAL_TSADI = 0xf5
|
||||
NORMAL_TSADI = 0xf6
|
||||
|
||||
# Minimum Visual vs Logical final letter score difference.
|
||||
# If the difference is below this, don't rely solely on the final letter score
|
||||
# distance.
|
||||
MIN_FINAL_CHAR_DISTANCE = 5
|
||||
|
||||
# Minimum Visual vs Logical model score difference.
|
||||
# If the difference is below this, don't rely at all on the model score
|
||||
# distance.
|
||||
MIN_MODEL_DISTANCE = 0.01
|
||||
|
||||
VISUAL_HEBREW_NAME = "ISO-8859-8"
|
||||
LOGICAL_HEBREW_NAME = "windows-1255"
|
||||
|
||||
def __init__(self):
|
||||
super(HebrewProber, self).__init__()
|
||||
self._final_char_logical_score = None
|
||||
self._final_char_visual_score = None
|
||||
self._prev = None
|
||||
self._before_prev = None
|
||||
self._logical_prober = None
|
||||
self._visual_prober = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self._final_char_logical_score = 0
|
||||
self._final_char_visual_score = 0
|
||||
# The two last characters seen in the previous buffer,
|
||||
# mPrev and mBeforePrev are initialized to space in order to simulate
|
||||
# a word delimiter at the beginning of the data
|
||||
self._prev = ' '
|
||||
self._before_prev = ' '
|
||||
# These probers are owned by the group prober.
|
||||
|
||||
def set_model_probers(self, logicalProber, visualProber):
|
||||
self._logical_prober = logicalProber
|
||||
self._visual_prober = visualProber
|
||||
|
||||
def is_final(self, c):
|
||||
return c in [self.FINAL_KAF, self.FINAL_MEM, self.FINAL_NUN,
|
||||
self.FINAL_PE, self.FINAL_TSADI]
|
||||
|
||||
def is_non_final(self, c):
|
||||
# The normal Tsadi is not a good Non-Final letter due to words like
|
||||
# 'lechotet' (to chat) containing an apostrophe after the tsadi. This
|
||||
# apostrophe is converted to a space in FilterWithoutEnglishLetters
|
||||
# causing the Non-Final tsadi to appear at an end of a word even
|
||||
# though this is not the case in the original text.
|
||||
# The letters Pe and Kaf rarely display a related behavior of not being
|
||||
# a good Non-Final letter. Words like 'Pop', 'Winamp' and 'Mubarak'
|
||||
# for example legally end with a Non-Final Pe or Kaf. However, the
|
||||
# benefit of these letters as Non-Final letters outweighs the damage
|
||||
# since these words are quite rare.
|
||||
return c in [self.NORMAL_KAF, self.NORMAL_MEM,
|
||||
self.NORMAL_NUN, self.NORMAL_PE]
|
||||
|
||||
def feed(self, byte_str):
|
||||
# Final letter analysis for logical-visual decision.
|
||||
# Look for evidence that the received buffer is either logical Hebrew
|
||||
# or visual Hebrew.
|
||||
# The following cases are checked:
|
||||
# 1) A word longer than 1 letter, ending with a final letter. This is
|
||||
# an indication that the text is laid out "naturally" since the
|
||||
# final letter really appears at the end. +1 for logical score.
|
||||
# 2) A word longer than 1 letter, ending with a Non-Final letter. In
|
||||
# normal Hebrew, words ending with Kaf, Mem, Nun, Pe or Tsadi,
|
||||
# should not end with the Non-Final form of that letter. Exceptions
|
||||
# to this rule are mentioned above in isNonFinal(). This is an
|
||||
# indication that the text is laid out backwards. +1 for visual
|
||||
# score
|
||||
# 3) A word longer than 1 letter, starting with a final letter. Final
|
||||
# letters should not appear at the beginning of a word. This is an
|
||||
# indication that the text is laid out backwards. +1 for visual
|
||||
# score.
|
||||
#
|
||||
# The visual score and logical score are accumulated throughout the
|
||||
# text and are finally checked against each other in GetCharSetName().
|
||||
# No checking for final letters in the middle of words is done since
|
||||
# that case is not an indication for either Logical or Visual text.
|
||||
#
|
||||
# We automatically filter out all 7-bit characters (replace them with
|
||||
# spaces) so the word boundary detection works properly. [MAP]
|
||||
|
||||
if self.state == ProbingState.NOT_ME:
|
||||
# Both model probers say it's not them. No reason to continue.
|
||||
return ProbingState.NOT_ME
|
||||
|
||||
byte_str = self.filter_high_byte_only(byte_str)
|
||||
|
||||
for cur in byte_str:
|
||||
if cur == ' ':
|
||||
# We stand on a space - a word just ended
|
||||
if self._before_prev != ' ':
|
||||
# next-to-last char was not a space so self._prev is not a
|
||||
# 1 letter word
|
||||
if self.is_final(self._prev):
|
||||
# case (1) [-2:not space][-1:final letter][cur:space]
|
||||
self._final_char_logical_score += 1
|
||||
elif self.is_non_final(self._prev):
|
||||
# case (2) [-2:not space][-1:Non-Final letter][
|
||||
# cur:space]
|
||||
self._final_char_visual_score += 1
|
||||
else:
|
||||
# Not standing on a space
|
||||
if ((self._before_prev == ' ') and
|
||||
(self.is_final(self._prev)) and (cur != ' ')):
|
||||
# case (3) [-2:space][-1:final letter][cur:not space]
|
||||
self._final_char_visual_score += 1
|
||||
self._before_prev = self._prev
|
||||
self._prev = cur
|
||||
|
||||
# Forever detecting, till the end or until both model probers return
|
||||
# ProbingState.NOT_ME (handled above)
|
||||
return ProbingState.DETECTING
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
# Make the decision: is it Logical or Visual?
|
||||
# If the final letter score distance is dominant enough, rely on it.
|
||||
finalsub = self._final_char_logical_score - self._final_char_visual_score
|
||||
if finalsub >= self.MIN_FINAL_CHAR_DISTANCE:
|
||||
return self.LOGICAL_HEBREW_NAME
|
||||
if finalsub <= -self.MIN_FINAL_CHAR_DISTANCE:
|
||||
return self.VISUAL_HEBREW_NAME
|
||||
|
||||
# It's not dominant enough, try to rely on the model scores instead.
|
||||
modelsub = (self._logical_prober.get_confidence()
|
||||
- self._visual_prober.get_confidence())
|
||||
if modelsub > self.MIN_MODEL_DISTANCE:
|
||||
return self.LOGICAL_HEBREW_NAME
|
||||
if modelsub < -self.MIN_MODEL_DISTANCE:
|
||||
return self.VISUAL_HEBREW_NAME
|
||||
|
||||
# Still no good, back to final letter distance, maybe it'll save the
|
||||
# day.
|
||||
if finalsub < 0.0:
|
||||
return self.VISUAL_HEBREW_NAME
|
||||
|
||||
# (finalsub > 0 - Logical) or (don't know what to do) default to
|
||||
# Logical.
|
||||
return self.LOGICAL_HEBREW_NAME
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return 'Hebrew'
|
||||
|
||||
@property
|
||||
def state(self):
|
||||
# Remain active as long as any of the model probers are active.
|
||||
if (self._logical_prober.state == ProbingState.NOT_ME) and \
|
||||
(self._visual_prober.state == ProbingState.NOT_ME):
|
||||
return ProbingState.NOT_ME
|
||||
return ProbingState.DETECTING
|
||||
@@ -1,325 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# Sampling from about 20M text materials include literature and computer technology
|
||||
#
|
||||
# Japanese frequency table, applied to both S-JIS and EUC-JP
|
||||
# They are sorted in order.
|
||||
|
||||
# 128 --> 0.77094
|
||||
# 256 --> 0.85710
|
||||
# 512 --> 0.92635
|
||||
# 1024 --> 0.97130
|
||||
# 2048 --> 0.99431
|
||||
#
|
||||
# Ideal Distribution Ratio = 0.92635 / (1-0.92635) = 12.58
|
||||
# Random Distribution Ration = 512 / (2965+62+83+86-512) = 0.191
|
||||
#
|
||||
# Typical Distribution Ratio, 25% of IDR
|
||||
|
||||
JIS_TYPICAL_DISTRIBUTION_RATIO = 3.0
|
||||
|
||||
# Char to FreqOrder table ,
|
||||
JIS_TABLE_SIZE = 4368
|
||||
|
||||
JIS_CHAR_TO_FREQ_ORDER = (
|
||||
40, 1, 6, 182, 152, 180, 295,2127, 285, 381,3295,4304,3068,4606,3165,3510, # 16
|
||||
3511,1822,2785,4607,1193,2226,5070,4608, 171,2996,1247, 18, 179,5071, 856,1661, # 32
|
||||
1262,5072, 619, 127,3431,3512,3230,1899,1700, 232, 228,1294,1298, 284, 283,2041, # 48
|
||||
2042,1061,1062, 48, 49, 44, 45, 433, 434,1040,1041, 996, 787,2997,1255,4305, # 64
|
||||
2108,4609,1684,1648,5073,5074,5075,5076,5077,5078,3687,5079,4610,5080,3927,3928, # 80
|
||||
5081,3296,3432, 290,2285,1471,2187,5082,2580,2825,1303,2140,1739,1445,2691,3375, # 96
|
||||
1691,3297,4306,4307,4611, 452,3376,1182,2713,3688,3069,4308,5083,5084,5085,5086, # 112
|
||||
5087,5088,5089,5090,5091,5092,5093,5094,5095,5096,5097,5098,5099,5100,5101,5102, # 128
|
||||
5103,5104,5105,5106,5107,5108,5109,5110,5111,5112,4097,5113,5114,5115,5116,5117, # 144
|
||||
5118,5119,5120,5121,5122,5123,5124,5125,5126,5127,5128,5129,5130,5131,5132,5133, # 160
|
||||
5134,5135,5136,5137,5138,5139,5140,5141,5142,5143,5144,5145,5146,5147,5148,5149, # 176
|
||||
5150,5151,5152,4612,5153,5154,5155,5156,5157,5158,5159,5160,5161,5162,5163,5164, # 192
|
||||
5165,5166,5167,5168,5169,5170,5171,5172,5173,5174,5175,1472, 598, 618, 820,1205, # 208
|
||||
1309,1412,1858,1307,1692,5176,5177,5178,5179,5180,5181,5182,1142,1452,1234,1172, # 224
|
||||
1875,2043,2149,1793,1382,2973, 925,2404,1067,1241, 960,1377,2935,1491, 919,1217, # 240
|
||||
1865,2030,1406,1499,2749,4098,5183,5184,5185,5186,5187,5188,2561,4099,3117,1804, # 256
|
||||
2049,3689,4309,3513,1663,5189,3166,3118,3298,1587,1561,3433,5190,3119,1625,2998, # 272
|
||||
3299,4613,1766,3690,2786,4614,5191,5192,5193,5194,2161, 26,3377, 2,3929, 20, # 288
|
||||
3691, 47,4100, 50, 17, 16, 35, 268, 27, 243, 42, 155, 24, 154, 29, 184, # 304
|
||||
4, 91, 14, 92, 53, 396, 33, 289, 9, 37, 64, 620, 21, 39, 321, 5, # 320
|
||||
12, 11, 52, 13, 3, 208, 138, 0, 7, 60, 526, 141, 151,1069, 181, 275, # 336
|
||||
1591, 83, 132,1475, 126, 331, 829, 15, 69, 160, 59, 22, 157, 55,1079, 312, # 352
|
||||
109, 38, 23, 25, 10, 19, 79,5195, 61, 382,1124, 8, 30,5196,5197,5198, # 368
|
||||
5199,5200,5201,5202,5203,5204,5205,5206, 89, 62, 74, 34,2416, 112, 139, 196, # 384
|
||||
271, 149, 84, 607, 131, 765, 46, 88, 153, 683, 76, 874, 101, 258, 57, 80, # 400
|
||||
32, 364, 121,1508, 169,1547, 68, 235, 145,2999, 41, 360,3027, 70, 63, 31, # 416
|
||||
43, 259, 262,1383, 99, 533, 194, 66, 93, 846, 217, 192, 56, 106, 58, 565, # 432
|
||||
280, 272, 311, 256, 146, 82, 308, 71, 100, 128, 214, 655, 110, 261, 104,1140, # 448
|
||||
54, 51, 36, 87, 67,3070, 185,2618,2936,2020, 28,1066,2390,2059,5207,5208, # 464
|
||||
5209,5210,5211,5212,5213,5214,5215,5216,4615,5217,5218,5219,5220,5221,5222,5223, # 480
|
||||
5224,5225,5226,5227,5228,5229,5230,5231,5232,5233,5234,5235,5236,3514,5237,5238, # 496
|
||||
5239,5240,5241,5242,5243,5244,2297,2031,4616,4310,3692,5245,3071,5246,3598,5247, # 512
|
||||
4617,3231,3515,5248,4101,4311,4618,3808,4312,4102,5249,4103,4104,3599,5250,5251, # 528
|
||||
5252,5253,5254,5255,5256,5257,5258,5259,5260,5261,5262,5263,5264,5265,5266,5267, # 544
|
||||
5268,5269,5270,5271,5272,5273,5274,5275,5276,5277,5278,5279,5280,5281,5282,5283, # 560
|
||||
5284,5285,5286,5287,5288,5289,5290,5291,5292,5293,5294,5295,5296,5297,5298,5299, # 576
|
||||
5300,5301,5302,5303,5304,5305,5306,5307,5308,5309,5310,5311,5312,5313,5314,5315, # 592
|
||||
5316,5317,5318,5319,5320,5321,5322,5323,5324,5325,5326,5327,5328,5329,5330,5331, # 608
|
||||
5332,5333,5334,5335,5336,5337,5338,5339,5340,5341,5342,5343,5344,5345,5346,5347, # 624
|
||||
5348,5349,5350,5351,5352,5353,5354,5355,5356,5357,5358,5359,5360,5361,5362,5363, # 640
|
||||
5364,5365,5366,5367,5368,5369,5370,5371,5372,5373,5374,5375,5376,5377,5378,5379, # 656
|
||||
5380,5381, 363, 642,2787,2878,2788,2789,2316,3232,2317,3434,2011, 165,1942,3930, # 672
|
||||
3931,3932,3933,5382,4619,5383,4620,5384,5385,5386,5387,5388,5389,5390,5391,5392, # 688
|
||||
5393,5394,5395,5396,5397,5398,5399,5400,5401,5402,5403,5404,5405,5406,5407,5408, # 704
|
||||
5409,5410,5411,5412,5413,5414,5415,5416,5417,5418,5419,5420,5421,5422,5423,5424, # 720
|
||||
5425,5426,5427,5428,5429,5430,5431,5432,5433,5434,5435,5436,5437,5438,5439,5440, # 736
|
||||
5441,5442,5443,5444,5445,5446,5447,5448,5449,5450,5451,5452,5453,5454,5455,5456, # 752
|
||||
5457,5458,5459,5460,5461,5462,5463,5464,5465,5466,5467,5468,5469,5470,5471,5472, # 768
|
||||
5473,5474,5475,5476,5477,5478,5479,5480,5481,5482,5483,5484,5485,5486,5487,5488, # 784
|
||||
5489,5490,5491,5492,5493,5494,5495,5496,5497,5498,5499,5500,5501,5502,5503,5504, # 800
|
||||
5505,5506,5507,5508,5509,5510,5511,5512,5513,5514,5515,5516,5517,5518,5519,5520, # 816
|
||||
5521,5522,5523,5524,5525,5526,5527,5528,5529,5530,5531,5532,5533,5534,5535,5536, # 832
|
||||
5537,5538,5539,5540,5541,5542,5543,5544,5545,5546,5547,5548,5549,5550,5551,5552, # 848
|
||||
5553,5554,5555,5556,5557,5558,5559,5560,5561,5562,5563,5564,5565,5566,5567,5568, # 864
|
||||
5569,5570,5571,5572,5573,5574,5575,5576,5577,5578,5579,5580,5581,5582,5583,5584, # 880
|
||||
5585,5586,5587,5588,5589,5590,5591,5592,5593,5594,5595,5596,5597,5598,5599,5600, # 896
|
||||
5601,5602,5603,5604,5605,5606,5607,5608,5609,5610,5611,5612,5613,5614,5615,5616, # 912
|
||||
5617,5618,5619,5620,5621,5622,5623,5624,5625,5626,5627,5628,5629,5630,5631,5632, # 928
|
||||
5633,5634,5635,5636,5637,5638,5639,5640,5641,5642,5643,5644,5645,5646,5647,5648, # 944
|
||||
5649,5650,5651,5652,5653,5654,5655,5656,5657,5658,5659,5660,5661,5662,5663,5664, # 960
|
||||
5665,5666,5667,5668,5669,5670,5671,5672,5673,5674,5675,5676,5677,5678,5679,5680, # 976
|
||||
5681,5682,5683,5684,5685,5686,5687,5688,5689,5690,5691,5692,5693,5694,5695,5696, # 992
|
||||
5697,5698,5699,5700,5701,5702,5703,5704,5705,5706,5707,5708,5709,5710,5711,5712, # 1008
|
||||
5713,5714,5715,5716,5717,5718,5719,5720,5721,5722,5723,5724,5725,5726,5727,5728, # 1024
|
||||
5729,5730,5731,5732,5733,5734,5735,5736,5737,5738,5739,5740,5741,5742,5743,5744, # 1040
|
||||
5745,5746,5747,5748,5749,5750,5751,5752,5753,5754,5755,5756,5757,5758,5759,5760, # 1056
|
||||
5761,5762,5763,5764,5765,5766,5767,5768,5769,5770,5771,5772,5773,5774,5775,5776, # 1072
|
||||
5777,5778,5779,5780,5781,5782,5783,5784,5785,5786,5787,5788,5789,5790,5791,5792, # 1088
|
||||
5793,5794,5795,5796,5797,5798,5799,5800,5801,5802,5803,5804,5805,5806,5807,5808, # 1104
|
||||
5809,5810,5811,5812,5813,5814,5815,5816,5817,5818,5819,5820,5821,5822,5823,5824, # 1120
|
||||
5825,5826,5827,5828,5829,5830,5831,5832,5833,5834,5835,5836,5837,5838,5839,5840, # 1136
|
||||
5841,5842,5843,5844,5845,5846,5847,5848,5849,5850,5851,5852,5853,5854,5855,5856, # 1152
|
||||
5857,5858,5859,5860,5861,5862,5863,5864,5865,5866,5867,5868,5869,5870,5871,5872, # 1168
|
||||
5873,5874,5875,5876,5877,5878,5879,5880,5881,5882,5883,5884,5885,5886,5887,5888, # 1184
|
||||
5889,5890,5891,5892,5893,5894,5895,5896,5897,5898,5899,5900,5901,5902,5903,5904, # 1200
|
||||
5905,5906,5907,5908,5909,5910,5911,5912,5913,5914,5915,5916,5917,5918,5919,5920, # 1216
|
||||
5921,5922,5923,5924,5925,5926,5927,5928,5929,5930,5931,5932,5933,5934,5935,5936, # 1232
|
||||
5937,5938,5939,5940,5941,5942,5943,5944,5945,5946,5947,5948,5949,5950,5951,5952, # 1248
|
||||
5953,5954,5955,5956,5957,5958,5959,5960,5961,5962,5963,5964,5965,5966,5967,5968, # 1264
|
||||
5969,5970,5971,5972,5973,5974,5975,5976,5977,5978,5979,5980,5981,5982,5983,5984, # 1280
|
||||
5985,5986,5987,5988,5989,5990,5991,5992,5993,5994,5995,5996,5997,5998,5999,6000, # 1296
|
||||
6001,6002,6003,6004,6005,6006,6007,6008,6009,6010,6011,6012,6013,6014,6015,6016, # 1312
|
||||
6017,6018,6019,6020,6021,6022,6023,6024,6025,6026,6027,6028,6029,6030,6031,6032, # 1328
|
||||
6033,6034,6035,6036,6037,6038,6039,6040,6041,6042,6043,6044,6045,6046,6047,6048, # 1344
|
||||
6049,6050,6051,6052,6053,6054,6055,6056,6057,6058,6059,6060,6061,6062,6063,6064, # 1360
|
||||
6065,6066,6067,6068,6069,6070,6071,6072,6073,6074,6075,6076,6077,6078,6079,6080, # 1376
|
||||
6081,6082,6083,6084,6085,6086,6087,6088,6089,6090,6091,6092,6093,6094,6095,6096, # 1392
|
||||
6097,6098,6099,6100,6101,6102,6103,6104,6105,6106,6107,6108,6109,6110,6111,6112, # 1408
|
||||
6113,6114,2044,2060,4621, 997,1235, 473,1186,4622, 920,3378,6115,6116, 379,1108, # 1424
|
||||
4313,2657,2735,3934,6117,3809, 636,3233, 573,1026,3693,3435,2974,3300,2298,4105, # 1440
|
||||
854,2937,2463, 393,2581,2417, 539, 752,1280,2750,2480, 140,1161, 440, 708,1569, # 1456
|
||||
665,2497,1746,1291,1523,3000, 164,1603, 847,1331, 537,1997, 486, 508,1693,2418, # 1472
|
||||
1970,2227, 878,1220, 299,1030, 969, 652,2751, 624,1137,3301,2619, 65,3302,2045, # 1488
|
||||
1761,1859,3120,1930,3694,3516, 663,1767, 852, 835,3695, 269, 767,2826,2339,1305, # 1504
|
||||
896,1150, 770,1616,6118, 506,1502,2075,1012,2519, 775,2520,2975,2340,2938,4314, # 1520
|
||||
3028,2086,1224,1943,2286,6119,3072,4315,2240,1273,1987,3935,1557, 175, 597, 985, # 1536
|
||||
3517,2419,2521,1416,3029, 585, 938,1931,1007,1052,1932,1685,6120,3379,4316,4623, # 1552
|
||||
804, 599,3121,1333,2128,2539,1159,1554,2032,3810, 687,2033,2904, 952, 675,1467, # 1568
|
||||
3436,6121,2241,1096,1786,2440,1543,1924, 980,1813,2228, 781,2692,1879, 728,1918, # 1584
|
||||
3696,4624, 548,1950,4625,1809,1088,1356,3303,2522,1944, 502, 972, 373, 513,2827, # 1600
|
||||
586,2377,2391,1003,1976,1631,6122,2464,1084, 648,1776,4626,2141, 324, 962,2012, # 1616
|
||||
2177,2076,1384, 742,2178,1448,1173,1810, 222, 102, 301, 445, 125,2420, 662,2498, # 1632
|
||||
277, 200,1476,1165,1068, 224,2562,1378,1446, 450,1880, 659, 791, 582,4627,2939, # 1648
|
||||
3936,1516,1274, 555,2099,3697,1020,1389,1526,3380,1762,1723,1787,2229, 412,2114, # 1664
|
||||
1900,2392,3518, 512,2597, 427,1925,2341,3122,1653,1686,2465,2499, 697, 330, 273, # 1680
|
||||
380,2162, 951, 832, 780, 991,1301,3073, 965,2270,3519, 668,2523,2636,1286, 535, # 1696
|
||||
1407, 518, 671, 957,2658,2378, 267, 611,2197,3030,6123, 248,2299, 967,1799,2356, # 1712
|
||||
850,1418,3437,1876,1256,1480,2828,1718,6124,6125,1755,1664,2405,6126,4628,2879, # 1728
|
||||
2829, 499,2179, 676,4629, 557,2329,2214,2090, 325,3234, 464, 811,3001, 992,2342, # 1744
|
||||
2481,1232,1469, 303,2242, 466,1070,2163, 603,1777,2091,4630,2752,4631,2714, 322, # 1760
|
||||
2659,1964,1768, 481,2188,1463,2330,2857,3600,2092,3031,2421,4632,2318,2070,1849, # 1776
|
||||
2598,4633,1302,2254,1668,1701,2422,3811,2905,3032,3123,2046,4106,1763,1694,4634, # 1792
|
||||
1604, 943,1724,1454, 917, 868,2215,1169,2940, 552,1145,1800,1228,1823,1955, 316, # 1808
|
||||
1080,2510, 361,1807,2830,4107,2660,3381,1346,1423,1134,4108,6127, 541,1263,1229, # 1824
|
||||
1148,2540, 545, 465,1833,2880,3438,1901,3074,2482, 816,3937, 713,1788,2500, 122, # 1840
|
||||
1575, 195,1451,2501,1111,6128, 859, 374,1225,2243,2483,4317, 390,1033,3439,3075, # 1856
|
||||
2524,1687, 266, 793,1440,2599, 946, 779, 802, 507, 897,1081, 528,2189,1292, 711, # 1872
|
||||
1866,1725,1167,1640, 753, 398,2661,1053, 246, 348,4318, 137,1024,3440,1600,2077, # 1888
|
||||
2129, 825,4319, 698, 238, 521, 187,2300,1157,2423,1641,1605,1464,1610,1097,2541, # 1904
|
||||
1260,1436, 759,2255,1814,2150, 705,3235, 409,2563,3304, 561,3033,2005,2564, 726, # 1920
|
||||
1956,2343,3698,4109, 949,3812,3813,3520,1669, 653,1379,2525, 881,2198, 632,2256, # 1936
|
||||
1027, 778,1074, 733,1957, 514,1481,2466, 554,2180, 702,3938,1606,1017,1398,6129, # 1952
|
||||
1380,3521, 921, 993,1313, 594, 449,1489,1617,1166, 768,1426,1360, 495,1794,3601, # 1968
|
||||
1177,3602,1170,4320,2344, 476, 425,3167,4635,3168,1424, 401,2662,1171,3382,1998, # 1984
|
||||
1089,4110, 477,3169, 474,6130,1909, 596,2831,1842, 494, 693,1051,1028,1207,3076, # 2000
|
||||
606,2115, 727,2790,1473,1115, 743,3522, 630, 805,1532,4321,2021, 366,1057, 838, # 2016
|
||||
684,1114,2142,4322,2050,1492,1892,1808,2271,3814,2424,1971,1447,1373,3305,1090, # 2032
|
||||
1536,3939,3523,3306,1455,2199, 336, 369,2331,1035, 584,2393, 902, 718,2600,6131, # 2048
|
||||
2753, 463,2151,1149,1611,2467, 715,1308,3124,1268, 343,1413,3236,1517,1347,2663, # 2064
|
||||
2093,3940,2022,1131,1553,2100,2941,1427,3441,2942,1323,2484,6132,1980, 872,2368, # 2080
|
||||
2441,2943, 320,2369,2116,1082, 679,1933,3941,2791,3815, 625,1143,2023, 422,2200, # 2096
|
||||
3816,6133, 730,1695, 356,2257,1626,2301,2858,2637,1627,1778, 937, 883,2906,2693, # 2112
|
||||
3002,1769,1086, 400,1063,1325,3307,2792,4111,3077, 456,2345,1046, 747,6134,1524, # 2128
|
||||
884,1094,3383,1474,2164,1059, 974,1688,2181,2258,1047, 345,1665,1187, 358, 875, # 2144
|
||||
3170, 305, 660,3524,2190,1334,1135,3171,1540,1649,2542,1527, 927, 968,2793, 885, # 2160
|
||||
1972,1850, 482, 500,2638,1218,1109,1085,2543,1654,2034, 876, 78,2287,1482,1277, # 2176
|
||||
861,1675,1083,1779, 724,2754, 454, 397,1132,1612,2332, 893, 672,1237, 257,2259, # 2192
|
||||
2370, 135,3384, 337,2244, 547, 352, 340, 709,2485,1400, 788,1138,2511, 540, 772, # 2208
|
||||
1682,2260,2272,2544,2013,1843,1902,4636,1999,1562,2288,4637,2201,1403,1533, 407, # 2224
|
||||
576,3308,1254,2071, 978,3385, 170, 136,1201,3125,2664,3172,2394, 213, 912, 873, # 2240
|
||||
3603,1713,2202, 699,3604,3699, 813,3442, 493, 531,1054, 468,2907,1483, 304, 281, # 2256
|
||||
4112,1726,1252,2094, 339,2319,2130,2639, 756,1563,2944, 748, 571,2976,1588,2425, # 2272
|
||||
2715,1851,1460,2426,1528,1392,1973,3237, 288,3309, 685,3386, 296, 892,2716,2216, # 2288
|
||||
1570,2245, 722,1747,2217, 905,3238,1103,6135,1893,1441,1965, 251,1805,2371,3700, # 2304
|
||||
2601,1919,1078, 75,2182,1509,1592,1270,2640,4638,2152,6136,3310,3817, 524, 706, # 2320
|
||||
1075, 292,3818,1756,2602, 317, 98,3173,3605,3525,1844,2218,3819,2502, 814, 567, # 2336
|
||||
385,2908,1534,6137, 534,1642,3239, 797,6138,1670,1529, 953,4323, 188,1071, 538, # 2352
|
||||
178, 729,3240,2109,1226,1374,2000,2357,2977, 731,2468,1116,2014,2051,6139,1261, # 2368
|
||||
1593, 803,2859,2736,3443, 556, 682, 823,1541,6140,1369,2289,1706,2794, 845, 462, # 2384
|
||||
2603,2665,1361, 387, 162,2358,1740, 739,1770,1720,1304,1401,3241,1049, 627,1571, # 2400
|
||||
2427,3526,1877,3942,1852,1500, 431,1910,1503, 677, 297,2795, 286,1433,1038,1198, # 2416
|
||||
2290,1133,1596,4113,4639,2469,1510,1484,3943,6141,2442, 108, 712,4640,2372, 866, # 2432
|
||||
3701,2755,3242,1348, 834,1945,1408,3527,2395,3243,1811, 824, 994,1179,2110,1548, # 2448
|
||||
1453, 790,3003, 690,4324,4325,2832,2909,3820,1860,3821, 225,1748, 310, 346,1780, # 2464
|
||||
2470, 821,1993,2717,2796, 828, 877,3528,2860,2471,1702,2165,2910,2486,1789, 453, # 2480
|
||||
359,2291,1676, 73,1164,1461,1127,3311, 421, 604, 314,1037, 589, 116,2487, 737, # 2496
|
||||
837,1180, 111, 244, 735,6142,2261,1861,1362, 986, 523, 418, 581,2666,3822, 103, # 2512
|
||||
855, 503,1414,1867,2488,1091, 657,1597, 979, 605,1316,4641,1021,2443,2078,2001, # 2528
|
||||
1209, 96, 587,2166,1032, 260,1072,2153, 173, 94, 226,3244, 819,2006,4642,4114, # 2544
|
||||
2203, 231,1744, 782, 97,2667, 786,3387, 887, 391, 442,2219,4326,1425,6143,2694, # 2560
|
||||
633,1544,1202, 483,2015, 592,2052,1958,2472,1655, 419, 129,4327,3444,3312,1714, # 2576
|
||||
1257,3078,4328,1518,1098, 865,1310,1019,1885,1512,1734, 469,2444, 148, 773, 436, # 2592
|
||||
1815,1868,1128,1055,4329,1245,2756,3445,2154,1934,1039,4643, 579,1238, 932,2320, # 2608
|
||||
353, 205, 801, 115,2428, 944,2321,1881, 399,2565,1211, 678, 766,3944, 335,2101, # 2624
|
||||
1459,1781,1402,3945,2737,2131,1010, 844, 981,1326,1013, 550,1816,1545,2620,1335, # 2640
|
||||
1008, 371,2881, 936,1419,1613,3529,1456,1395,2273,1834,2604,1317,2738,2503, 416, # 2656
|
||||
1643,4330, 806,1126, 229, 591,3946,1314,1981,1576,1837,1666, 347,1790, 977,3313, # 2672
|
||||
764,2861,1853, 688,2429,1920,1462, 77, 595, 415,2002,3034, 798,1192,4115,6144, # 2688
|
||||
2978,4331,3035,2695,2582,2072,2566, 430,2430,1727, 842,1396,3947,3702, 613, 377, # 2704
|
||||
278, 236,1417,3388,3314,3174, 757,1869, 107,3530,6145,1194, 623,2262, 207,1253, # 2720
|
||||
2167,3446,3948, 492,1117,1935, 536,1838,2757,1246,4332, 696,2095,2406,1393,1572, # 2736
|
||||
3175,1782, 583, 190, 253,1390,2230, 830,3126,3389, 934,3245,1703,1749,2979,1870, # 2752
|
||||
2545,1656,2204, 869,2346,4116,3176,1817, 496,1764,4644, 942,1504, 404,1903,1122, # 2768
|
||||
1580,3606,2945,1022, 515, 372,1735, 955,2431,3036,6146,2797,1110,2302,2798, 617, # 2784
|
||||
6147, 441, 762,1771,3447,3607,3608,1904, 840,3037, 86, 939,1385, 572,1370,2445, # 2800
|
||||
1336, 114,3703, 898, 294, 203,3315, 703,1583,2274, 429, 961,4333,1854,1951,3390, # 2816
|
||||
2373,3704,4334,1318,1381, 966,1911,2322,1006,1155, 309, 989, 458,2718,1795,1372, # 2832
|
||||
1203, 252,1689,1363,3177, 517,1936, 168,1490, 562, 193,3823,1042,4117,1835, 551, # 2848
|
||||
470,4645, 395, 489,3448,1871,1465,2583,2641, 417,1493, 279,1295, 511,1236,1119, # 2864
|
||||
72,1231,1982,1812,3004, 871,1564, 984,3449,1667,2696,2096,4646,2347,2833,1673, # 2880
|
||||
3609, 695,3246,2668, 807,1183,4647, 890, 388,2333,1801,1457,2911,1765,1477,1031, # 2896
|
||||
3316,3317,1278,3391,2799,2292,2526, 163,3450,4335,2669,1404,1802,6148,2323,2407, # 2912
|
||||
1584,1728,1494,1824,1269, 298, 909,3318,1034,1632, 375, 776,1683,2061, 291, 210, # 2928
|
||||
1123, 809,1249,1002,2642,3038, 206,1011,2132, 144, 975, 882,1565, 342, 667, 754, # 2944
|
||||
1442,2143,1299,2303,2062, 447, 626,2205,1221,2739,2912,1144,1214,2206,2584, 760, # 2960
|
||||
1715, 614, 950,1281,2670,2621, 810, 577,1287,2546,4648, 242,2168, 250,2643, 691, # 2976
|
||||
123,2644, 647, 313,1029, 689,1357,2946,1650, 216, 771,1339,1306, 808,2063, 549, # 2992
|
||||
913,1371,2913,2914,6149,1466,1092,1174,1196,1311,2605,2396,1783,1796,3079, 406, # 3008
|
||||
2671,2117,3949,4649, 487,1825,2220,6150,2915, 448,2348,1073,6151,2397,1707, 130, # 3024
|
||||
900,1598, 329, 176,1959,2527,1620,6152,2275,4336,3319,1983,2191,3705,3610,2155, # 3040
|
||||
3706,1912,1513,1614,6153,1988, 646, 392,2304,1589,3320,3039,1826,1239,1352,1340, # 3056
|
||||
2916, 505,2567,1709,1437,2408,2547, 906,6154,2672, 384,1458,1594,1100,1329, 710, # 3072
|
||||
423,3531,2064,2231,2622,1989,2673,1087,1882, 333, 841,3005,1296,2882,2379, 580, # 3088
|
||||
1937,1827,1293,2585, 601, 574, 249,1772,4118,2079,1120, 645, 901,1176,1690, 795, # 3104
|
||||
2207, 478,1434, 516,1190,1530, 761,2080, 930,1264, 355, 435,1552, 644,1791, 987, # 3120
|
||||
220,1364,1163,1121,1538, 306,2169,1327,1222, 546,2645, 218, 241, 610,1704,3321, # 3136
|
||||
1984,1839,1966,2528, 451,6155,2586,3707,2568, 907,3178, 254,2947, 186,1845,4650, # 3152
|
||||
745, 432,1757, 428,1633, 888,2246,2221,2489,3611,2118,1258,1265, 956,3127,1784, # 3168
|
||||
4337,2490, 319, 510, 119, 457,3612, 274,2035,2007,4651,1409,3128, 970,2758, 590, # 3184
|
||||
2800, 661,2247,4652,2008,3950,1420,1549,3080,3322,3951,1651,1375,2111, 485,2491, # 3200
|
||||
1429,1156,6156,2548,2183,1495, 831,1840,2529,2446, 501,1657, 307,1894,3247,1341, # 3216
|
||||
666, 899,2156,1539,2549,1559, 886, 349,2208,3081,2305,1736,3824,2170,2759,1014, # 3232
|
||||
1913,1386, 542,1397,2948, 490, 368, 716, 362, 159, 282,2569,1129,1658,1288,1750, # 3248
|
||||
2674, 276, 649,2016, 751,1496, 658,1818,1284,1862,2209,2087,2512,3451, 622,2834, # 3264
|
||||
376, 117,1060,2053,1208,1721,1101,1443, 247,1250,3179,1792,3952,2760,2398,3953, # 3280
|
||||
6157,2144,3708, 446,2432,1151,2570,3452,2447,2761,2835,1210,2448,3082, 424,2222, # 3296
|
||||
1251,2449,2119,2836, 504,1581,4338, 602, 817, 857,3825,2349,2306, 357,3826,1470, # 3312
|
||||
1883,2883, 255, 958, 929,2917,3248, 302,4653,1050,1271,1751,2307,1952,1430,2697, # 3328
|
||||
2719,2359, 354,3180, 777, 158,2036,4339,1659,4340,4654,2308,2949,2248,1146,2232, # 3344
|
||||
3532,2720,1696,2623,3827,6158,3129,1550,2698,1485,1297,1428, 637, 931,2721,2145, # 3360
|
||||
914,2550,2587, 81,2450, 612, 827,2646,1242,4655,1118,2884, 472,1855,3181,3533, # 3376
|
||||
3534, 569,1353,2699,1244,1758,2588,4119,2009,2762,2171,3709,1312,1531,6159,1152, # 3392
|
||||
1938, 134,1830, 471,3710,2276,1112,1535,3323,3453,3535, 982,1337,2950, 488, 826, # 3408
|
||||
674,1058,1628,4120,2017, 522,2399, 211, 568,1367,3454, 350, 293,1872,1139,3249, # 3424
|
||||
1399,1946,3006,1300,2360,3324, 588, 736,6160,2606, 744, 669,3536,3828,6161,1358, # 3440
|
||||
199, 723, 848, 933, 851,1939,1505,1514,1338,1618,1831,4656,1634,3613, 443,2740, # 3456
|
||||
3829, 717,1947, 491,1914,6162,2551,1542,4121,1025,6163,1099,1223, 198,3040,2722, # 3472
|
||||
370, 410,1905,2589, 998,1248,3182,2380, 519,1449,4122,1710, 947, 928,1153,4341, # 3488
|
||||
2277, 344,2624,1511, 615, 105, 161,1212,1076,1960,3130,2054,1926,1175,1906,2473, # 3504
|
||||
414,1873,2801,6164,2309, 315,1319,3325, 318,2018,2146,2157, 963, 631, 223,4342, # 3520
|
||||
4343,2675, 479,3711,1197,2625,3712,2676,2361,6165,4344,4123,6166,2451,3183,1886, # 3536
|
||||
2184,1674,1330,1711,1635,1506, 799, 219,3250,3083,3954,1677,3713,3326,2081,3614, # 3552
|
||||
1652,2073,4657,1147,3041,1752, 643,1961, 147,1974,3955,6167,1716,2037, 918,3007, # 3568
|
||||
1994, 120,1537, 118, 609,3184,4345, 740,3455,1219, 332,1615,3830,6168,1621,2980, # 3584
|
||||
1582, 783, 212, 553,2350,3714,1349,2433,2082,4124, 889,6169,2310,1275,1410, 973, # 3600
|
||||
166,1320,3456,1797,1215,3185,2885,1846,2590,2763,4658, 629, 822,3008, 763, 940, # 3616
|
||||
1990,2862, 439,2409,1566,1240,1622, 926,1282,1907,2764, 654,2210,1607, 327,1130, # 3632
|
||||
3956,1678,1623,6170,2434,2192, 686, 608,3831,3715, 903,3957,3042,6171,2741,1522, # 3648
|
||||
1915,1105,1555,2552,1359, 323,3251,4346,3457, 738,1354,2553,2311,2334,1828,2003, # 3664
|
||||
3832,1753,2351,1227,6172,1887,4125,1478,6173,2410,1874,1712,1847, 520,1204,2607, # 3680
|
||||
264,4659, 836,2677,2102, 600,4660,3833,2278,3084,6174,4347,3615,1342, 640, 532, # 3696
|
||||
543,2608,1888,2400,2591,1009,4348,1497, 341,1737,3616,2723,1394, 529,3252,1321, # 3712
|
||||
983,4661,1515,2120, 971,2592, 924, 287,1662,3186,4349,2700,4350,1519, 908,1948, # 3728
|
||||
2452, 156, 796,1629,1486,2223,2055, 694,4126,1259,1036,3392,1213,2249,2742,1889, # 3744
|
||||
1230,3958,1015, 910, 408, 559,3617,4662, 746, 725, 935,4663,3959,3009,1289, 563, # 3760
|
||||
867,4664,3960,1567,2981,2038,2626, 988,2263,2381,4351, 143,2374, 704,1895,6175, # 3776
|
||||
1188,3716,2088, 673,3085,2362,4352, 484,1608,1921,2765,2918, 215, 904,3618,3537, # 3792
|
||||
894, 509, 976,3043,2701,3961,4353,2837,2982, 498,6176,6177,1102,3538,1332,3393, # 3808
|
||||
1487,1636,1637, 233, 245,3962, 383, 650, 995,3044, 460,1520,1206,2352, 749,3327, # 3824
|
||||
530, 700, 389,1438,1560,1773,3963,2264, 719,2951,2724,3834, 870,1832,1644,1000, # 3840
|
||||
839,2474,3717, 197,1630,3394, 365,2886,3964,1285,2133, 734, 922, 818,1106, 732, # 3856
|
||||
480,2083,1774,3458, 923,2279,1350, 221,3086, 85,2233,2234,3835,1585,3010,2147, # 3872
|
||||
1387,1705,2382,1619,2475, 133, 239,2802,1991,1016,2084,2383, 411,2838,1113, 651, # 3888
|
||||
1985,1160,3328, 990,1863,3087,1048,1276,2647, 265,2627,1599,3253,2056, 150, 638, # 3904
|
||||
2019, 656, 853, 326,1479, 680,1439,4354,1001,1759, 413,3459,3395,2492,1431, 459, # 3920
|
||||
4355,1125,3329,2265,1953,1450,2065,2863, 849, 351,2678,3131,3254,3255,1104,1577, # 3936
|
||||
227,1351,1645,2453,2193,1421,2887, 812,2121, 634, 95,2435, 201,2312,4665,1646, # 3952
|
||||
1671,2743,1601,2554,2702,2648,2280,1315,1366,2089,3132,1573,3718,3965,1729,1189, # 3968
|
||||
328,2679,1077,1940,1136, 558,1283, 964,1195, 621,2074,1199,1743,3460,3619,1896, # 3984
|
||||
1916,1890,3836,2952,1154,2112,1064, 862, 378,3011,2066,2113,2803,1568,2839,6178, # 4000
|
||||
3088,2919,1941,1660,2004,1992,2194, 142, 707,1590,1708,1624,1922,1023,1836,1233, # 4016
|
||||
1004,2313, 789, 741,3620,6179,1609,2411,1200,4127,3719,3720,4666,2057,3721, 593, # 4032
|
||||
2840, 367,2920,1878,6180,3461,1521, 628,1168, 692,2211,2649, 300, 720,2067,2571, # 4048
|
||||
2953,3396, 959,2504,3966,3539,3462,1977, 701,6181, 954,1043, 800, 681, 183,3722, # 4064
|
||||
1803,1730,3540,4128,2103, 815,2314, 174, 467, 230,2454,1093,2134, 755,3541,3397, # 4080
|
||||
1141,1162,6182,1738,2039, 270,3256,2513,1005,1647,2185,3837, 858,1679,1897,1719, # 4096
|
||||
2954,2324,1806, 402, 670, 167,4129,1498,2158,2104, 750,6183, 915, 189,1680,1551, # 4112
|
||||
455,4356,1501,2455, 405,1095,2955, 338,1586,1266,1819, 570, 641,1324, 237,1556, # 4128
|
||||
2650,1388,3723,6184,1368,2384,1343,1978,3089,2436, 879,3724, 792,1191, 758,3012, # 4144
|
||||
1411,2135,1322,4357, 240,4667,1848,3725,1574,6185, 420,3045,1546,1391, 714,4358, # 4160
|
||||
1967, 941,1864, 863, 664, 426, 560,1731,2680,1785,2864,1949,2363, 403,3330,1415, # 4176
|
||||
1279,2136,1697,2335, 204, 721,2097,3838, 90,6186,2085,2505, 191,3967, 124,2148, # 4192
|
||||
1376,1798,1178,1107,1898,1405, 860,4359,1243,1272,2375,2983,1558,2456,1638, 113, # 4208
|
||||
3621, 578,1923,2609, 880, 386,4130, 784,2186,2266,1422,2956,2172,1722, 497, 263, # 4224
|
||||
2514,1267,2412,2610, 177,2703,3542, 774,1927,1344, 616,1432,1595,1018, 172,4360, # 4240
|
||||
2325, 911,4361, 438,1468,3622, 794,3968,2024,2173,1681,1829,2957, 945, 895,3090, # 4256
|
||||
575,2212,2476, 475,2401,2681, 785,2744,1745,2293,2555,1975,3133,2865, 394,4668, # 4272
|
||||
3839, 635,4131, 639, 202,1507,2195,2766,1345,1435,2572,3726,1908,1184,1181,2457, # 4288
|
||||
3727,3134,4362, 843,2611, 437, 916,4669, 234, 769,1884,3046,3047,3623, 833,6187, # 4304
|
||||
1639,2250,2402,1355,1185,2010,2047, 999, 525,1732,1290,1488,2612, 948,1578,3728, # 4320
|
||||
2413,2477,1216,2725,2159, 334,3840,1328,3624,2921,1525,4132, 564,1056, 891,4363, # 4336
|
||||
1444,1698,2385,2251,3729,1365,2281,2235,1717,6188, 864,3841,2515, 444, 527,2767, # 4352
|
||||
2922,3625, 544, 461,6189, 566, 209,2437,3398,2098,1065,2068,3331,3626,3257,2137, # 4368 #last 512
|
||||
)
|
||||
|
||||
|
||||
@@ -1,233 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
|
||||
# This is hiragana 2-char sequence table, the number in each cell represents its frequency category
|
||||
jp2CharContext = (
|
||||
(0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1),
|
||||
(2,4,0,4,0,3,0,4,0,3,4,4,4,2,4,3,3,4,3,2,3,3,4,2,3,3,3,2,4,1,4,3,3,1,5,4,3,4,3,4,3,5,3,0,3,5,4,2,0,3,1,0,3,3,0,3,3,0,1,1,0,4,3,0,3,3,0,4,0,2,0,3,5,5,5,5,4,0,4,1,0,3,4),
|
||||
(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2),
|
||||
(0,4,0,5,0,5,0,4,0,4,5,4,4,3,5,3,5,1,5,3,4,3,4,4,3,4,3,3,4,3,5,4,4,3,5,5,3,5,5,5,3,5,5,3,4,5,5,3,1,3,2,0,3,4,0,4,2,0,4,2,1,5,3,2,3,5,0,4,0,2,0,5,4,4,5,4,5,0,4,0,0,4,4),
|
||||
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
|
||||
(0,3,0,4,0,3,0,3,0,4,5,4,3,3,3,3,4,3,5,4,4,3,5,4,4,3,4,3,4,4,4,4,5,3,4,4,3,4,5,5,4,5,5,1,4,5,4,3,0,3,3,1,3,3,0,4,4,0,3,3,1,5,3,3,3,5,0,4,0,3,0,4,4,3,4,3,3,0,4,1,1,3,4),
|
||||
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
|
||||
(0,4,0,3,0,3,0,4,0,3,4,4,3,2,2,1,2,1,3,1,3,3,3,3,3,4,3,1,3,3,5,3,3,0,4,3,0,5,4,3,3,5,4,4,3,4,4,5,0,1,2,0,1,2,0,2,2,0,1,0,0,5,2,2,1,4,0,3,0,1,0,4,4,3,5,4,3,0,2,1,0,4,3),
|
||||
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
|
||||
(0,3,0,5,0,4,0,2,1,4,4,2,4,1,4,2,4,2,4,3,3,3,4,3,3,3,3,1,4,2,3,3,3,1,4,4,1,1,1,4,3,3,2,0,2,4,3,2,0,3,3,0,3,1,1,0,0,0,3,3,0,4,2,2,3,4,0,4,0,3,0,4,4,5,3,4,4,0,3,0,0,1,4),
|
||||
(1,4,0,4,0,4,0,4,0,3,5,4,4,3,4,3,5,4,3,3,4,3,5,4,4,4,4,3,4,2,4,3,3,1,5,4,3,2,4,5,4,5,5,4,4,5,4,4,0,3,2,2,3,3,0,4,3,1,3,2,1,4,3,3,4,5,0,3,0,2,0,4,5,5,4,5,4,0,4,0,0,5,4),
|
||||
(0,5,0,5,0,4,0,3,0,4,4,3,4,3,3,3,4,0,4,4,4,3,4,3,4,3,3,1,4,2,4,3,4,0,5,4,1,4,5,4,4,5,3,2,4,3,4,3,2,4,1,3,3,3,2,3,2,0,4,3,3,4,3,3,3,4,0,4,0,3,0,4,5,4,4,4,3,0,4,1,0,1,3),
|
||||
(0,3,1,4,0,3,0,2,0,3,4,4,3,1,4,2,3,3,4,3,4,3,4,3,4,4,3,2,3,1,5,4,4,1,4,4,3,5,4,4,3,5,5,4,3,4,4,3,1,2,3,1,2,2,0,3,2,0,3,1,0,5,3,3,3,4,3,3,3,3,4,4,4,4,5,4,2,0,3,3,2,4,3),
|
||||
(0,2,0,3,0,1,0,1,0,0,3,2,0,0,2,0,1,0,2,1,3,3,3,1,2,3,1,0,1,0,4,2,1,1,3,3,0,4,3,3,1,4,3,3,0,3,3,2,0,0,0,0,1,0,0,2,0,0,0,0,0,4,1,0,2,3,2,2,2,1,3,3,3,4,4,3,2,0,3,1,0,3,3),
|
||||
(0,4,0,4,0,3,0,3,0,4,4,4,3,3,3,3,3,3,4,3,4,2,4,3,4,3,3,2,4,3,4,5,4,1,4,5,3,5,4,5,3,5,4,0,3,5,5,3,1,3,3,2,2,3,0,3,4,1,3,3,2,4,3,3,3,4,0,4,0,3,0,4,5,4,4,5,3,0,4,1,0,3,4),
|
||||
(0,2,0,3,0,3,0,0,0,2,2,2,1,0,1,0,0,0,3,0,3,0,3,0,1,3,1,0,3,1,3,3,3,1,3,3,3,0,1,3,1,3,4,0,0,3,1,1,0,3,2,0,0,0,0,1,3,0,1,0,0,3,3,2,0,3,0,0,0,0,0,3,4,3,4,3,3,0,3,0,0,2,3),
|
||||
(2,3,0,3,0,2,0,1,0,3,3,4,3,1,3,1,1,1,3,1,4,3,4,3,3,3,0,0,3,1,5,4,3,1,4,3,2,5,5,4,4,4,4,3,3,4,4,4,0,2,1,1,3,2,0,1,2,0,0,1,0,4,1,3,3,3,0,3,0,1,0,4,4,4,5,5,3,0,2,0,0,4,4),
|
||||
(0,2,0,1,0,3,1,3,0,2,3,3,3,0,3,1,0,0,3,0,3,2,3,1,3,2,1,1,0,0,4,2,1,0,2,3,1,4,3,2,0,4,4,3,1,3,1,3,0,1,0,0,1,0,0,0,1,0,0,0,0,4,1,1,1,2,0,3,0,0,0,3,4,2,4,3,2,0,1,0,0,3,3),
|
||||
(0,1,0,4,0,5,0,4,0,2,4,4,2,3,3,2,3,3,5,3,3,3,4,3,4,2,3,0,4,3,3,3,4,1,4,3,2,1,5,5,3,4,5,1,3,5,4,2,0,3,3,0,1,3,0,4,2,0,1,3,1,4,3,3,3,3,0,3,0,1,0,3,4,4,4,5,5,0,3,0,1,4,5),
|
||||
(0,2,0,3,0,3,0,0,0,2,3,1,3,0,4,0,1,1,3,0,3,4,3,2,3,1,0,3,3,2,3,1,3,0,2,3,0,2,1,4,1,2,2,0,0,3,3,0,0,2,0,0,0,1,0,0,0,0,2,2,0,3,2,1,3,3,0,2,0,2,0,0,3,3,1,2,4,0,3,0,2,2,3),
|
||||
(2,4,0,5,0,4,0,4,0,2,4,4,4,3,4,3,3,3,1,2,4,3,4,3,4,4,5,0,3,3,3,3,2,0,4,3,1,4,3,4,1,4,4,3,3,4,4,3,1,2,3,0,4,2,0,4,1,0,3,3,0,4,3,3,3,4,0,4,0,2,0,3,5,3,4,5,2,0,3,0,0,4,5),
|
||||
(0,3,0,4,0,1,0,1,0,1,3,2,2,1,3,0,3,0,2,0,2,0,3,0,2,0,0,0,1,0,1,1,0,0,3,1,0,0,0,4,0,3,1,0,2,1,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,4,2,2,3,1,0,3,0,0,0,1,4,4,4,3,0,0,4,0,0,1,4),
|
||||
(1,4,1,5,0,3,0,3,0,4,5,4,4,3,5,3,3,4,4,3,4,1,3,3,3,3,2,1,4,1,5,4,3,1,4,4,3,5,4,4,3,5,4,3,3,4,4,4,0,3,3,1,2,3,0,3,1,0,3,3,0,5,4,4,4,4,4,4,3,3,5,4,4,3,3,5,4,0,3,2,0,4,4),
|
||||
(0,2,0,3,0,1,0,0,0,1,3,3,3,2,4,1,3,0,3,1,3,0,2,2,1,1,0,0,2,0,4,3,1,0,4,3,0,4,4,4,1,4,3,1,1,3,3,1,0,2,0,0,1,3,0,0,0,0,2,0,0,4,3,2,4,3,5,4,3,3,3,4,3,3,4,3,3,0,2,1,0,3,3),
|
||||
(0,2,0,4,0,3,0,2,0,2,5,5,3,4,4,4,4,1,4,3,3,0,4,3,4,3,1,3,3,2,4,3,0,3,4,3,0,3,4,4,2,4,4,0,4,5,3,3,2,2,1,1,1,2,0,1,5,0,3,3,2,4,3,3,3,4,0,3,0,2,0,4,4,3,5,5,0,0,3,0,2,3,3),
|
||||
(0,3,0,4,0,3,0,1,0,3,4,3,3,1,3,3,3,0,3,1,3,0,4,3,3,1,1,0,3,0,3,3,0,0,4,4,0,1,5,4,3,3,5,0,3,3,4,3,0,2,0,1,1,1,0,1,3,0,1,2,1,3,3,2,3,3,0,3,0,1,0,1,3,3,4,4,1,0,1,2,2,1,3),
|
||||
(0,1,0,4,0,4,0,3,0,1,3,3,3,2,3,1,1,0,3,0,3,3,4,3,2,4,2,0,1,0,4,3,2,0,4,3,0,5,3,3,2,4,4,4,3,3,3,4,0,1,3,0,0,1,0,0,1,0,0,0,0,4,2,3,3,3,0,3,0,0,0,4,4,4,5,3,2,0,3,3,0,3,5),
|
||||
(0,2,0,3,0,0,0,3,0,1,3,0,2,0,0,0,1,0,3,1,1,3,3,0,0,3,0,0,3,0,2,3,1,0,3,1,0,3,3,2,0,4,2,2,0,2,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,2,1,2,0,1,0,1,0,0,0,1,3,1,2,0,0,0,1,0,0,1,4),
|
||||
(0,3,0,3,0,5,0,1,0,2,4,3,1,3,3,2,1,1,5,2,1,0,5,1,2,0,0,0,3,3,2,2,3,2,4,3,0,0,3,3,1,3,3,0,2,5,3,4,0,3,3,0,1,2,0,2,2,0,3,2,0,2,2,3,3,3,0,2,0,1,0,3,4,4,2,5,4,0,3,0,0,3,5),
|
||||
(0,3,0,3,0,3,0,1,0,3,3,3,3,0,3,0,2,0,2,1,1,0,2,0,1,0,0,0,2,1,0,0,1,0,3,2,0,0,3,3,1,2,3,1,0,3,3,0,0,1,0,0,0,0,0,2,0,0,0,0,0,2,3,1,2,3,0,3,0,1,0,3,2,1,0,4,3,0,1,1,0,3,3),
|
||||
(0,4,0,5,0,3,0,3,0,4,5,5,4,3,5,3,4,3,5,3,3,2,5,3,4,4,4,3,4,3,4,5,5,3,4,4,3,4,4,5,4,4,4,3,4,5,5,4,2,3,4,2,3,4,0,3,3,1,4,3,2,4,3,3,5,5,0,3,0,3,0,5,5,5,5,4,4,0,4,0,1,4,4),
|
||||
(0,4,0,4,0,3,0,3,0,3,5,4,4,2,3,2,5,1,3,2,5,1,4,2,3,2,3,3,4,3,3,3,3,2,5,4,1,3,3,5,3,4,4,0,4,4,3,1,1,3,1,0,2,3,0,2,3,0,3,0,0,4,3,1,3,4,0,3,0,2,0,4,4,4,3,4,5,0,4,0,0,3,4),
|
||||
(0,3,0,3,0,3,1,2,0,3,4,4,3,3,3,0,2,2,4,3,3,1,3,3,3,1,1,0,3,1,4,3,2,3,4,4,2,4,4,4,3,4,4,3,2,4,4,3,1,3,3,1,3,3,0,4,1,0,2,2,1,4,3,2,3,3,5,4,3,3,5,4,4,3,3,0,4,0,3,2,2,4,4),
|
||||
(0,2,0,1,0,0,0,0,0,1,2,1,3,0,0,0,0,0,2,0,1,2,1,0,0,1,0,0,0,0,3,0,0,1,0,1,1,3,1,0,0,0,1,1,0,1,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,1,2,2,0,3,4,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1),
|
||||
(0,1,0,0,0,1,0,0,0,0,4,0,4,1,4,0,3,0,4,0,3,0,4,0,3,0,3,0,4,1,5,1,4,0,0,3,0,5,0,5,2,0,1,0,0,0,2,1,4,0,1,3,0,0,3,0,0,3,1,1,4,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0),
|
||||
(1,4,0,5,0,3,0,2,0,3,5,4,4,3,4,3,5,3,4,3,3,0,4,3,3,3,3,3,3,2,4,4,3,1,3,4,4,5,4,4,3,4,4,1,3,5,4,3,3,3,1,2,2,3,3,1,3,1,3,3,3,5,3,3,4,5,0,3,0,3,0,3,4,3,4,4,3,0,3,0,2,4,3),
|
||||
(0,1,0,4,0,0,0,0,0,1,4,0,4,1,4,2,4,0,3,0,1,0,1,0,0,0,0,0,2,0,3,1,1,1,0,3,0,0,0,1,2,1,0,0,1,1,1,1,0,1,0,0,0,1,0,0,3,0,0,0,0,3,2,0,2,2,0,1,0,0,0,2,3,2,3,3,0,0,0,0,2,1,0),
|
||||
(0,5,1,5,0,3,0,3,0,5,4,4,5,1,5,3,3,0,4,3,4,3,5,3,4,3,3,2,4,3,4,3,3,0,3,3,1,4,4,3,4,4,4,3,4,5,5,3,2,3,1,1,3,3,1,3,1,1,3,3,2,4,5,3,3,5,0,4,0,3,0,4,4,3,5,3,3,0,3,4,0,4,3),
|
||||
(0,5,0,5,0,3,0,2,0,4,4,3,5,2,4,3,3,3,4,4,4,3,5,3,5,3,3,1,4,0,4,3,3,0,3,3,0,4,4,4,4,5,4,3,3,5,5,3,2,3,1,2,3,2,0,1,0,0,3,2,2,4,4,3,1,5,0,4,0,3,0,4,3,1,3,2,1,0,3,3,0,3,3),
|
||||
(0,4,0,5,0,5,0,4,0,4,5,5,5,3,4,3,3,2,5,4,4,3,5,3,5,3,4,0,4,3,4,4,3,2,4,4,3,4,5,4,4,5,5,0,3,5,5,4,1,3,3,2,3,3,1,3,1,0,4,3,1,4,4,3,4,5,0,4,0,2,0,4,3,4,4,3,3,0,4,0,0,5,5),
|
||||
(0,4,0,4,0,5,0,1,1,3,3,4,4,3,4,1,3,0,5,1,3,0,3,1,3,1,1,0,3,0,3,3,4,0,4,3,0,4,4,4,3,4,4,0,3,5,4,1,0,3,0,0,2,3,0,3,1,0,3,1,0,3,2,1,3,5,0,3,0,1,0,3,2,3,3,4,4,0,2,2,0,4,4),
|
||||
(2,4,0,5,0,4,0,3,0,4,5,5,4,3,5,3,5,3,5,3,5,2,5,3,4,3,3,4,3,4,5,3,2,1,5,4,3,2,3,4,5,3,4,1,2,5,4,3,0,3,3,0,3,2,0,2,3,0,4,1,0,3,4,3,3,5,0,3,0,1,0,4,5,5,5,4,3,0,4,2,0,3,5),
|
||||
(0,5,0,4,0,4,0,2,0,5,4,3,4,3,4,3,3,3,4,3,4,2,5,3,5,3,4,1,4,3,4,4,4,0,3,5,0,4,4,4,4,5,3,1,3,4,5,3,3,3,3,3,3,3,0,2,2,0,3,3,2,4,3,3,3,5,3,4,1,3,3,5,3,2,0,0,0,0,4,3,1,3,3),
|
||||
(0,1,0,3,0,3,0,1,0,1,3,3,3,2,3,3,3,0,3,0,0,0,3,1,3,0,0,0,2,2,2,3,0,0,3,2,0,1,2,4,1,3,3,0,0,3,3,3,0,1,0,0,2,1,0,0,3,0,3,1,0,3,0,0,1,3,0,2,0,1,0,3,3,1,3,3,0,0,1,1,0,3,3),
|
||||
(0,2,0,3,0,2,1,4,0,2,2,3,1,1,3,1,1,0,2,0,3,1,2,3,1,3,0,0,1,0,4,3,2,3,3,3,1,4,2,3,3,3,3,1,0,3,1,4,0,1,1,0,1,2,0,1,1,0,1,1,0,3,1,3,2,2,0,1,0,0,0,2,3,3,3,1,0,0,0,0,0,2,3),
|
||||
(0,5,0,4,0,5,0,2,0,4,5,5,3,3,4,3,3,1,5,4,4,2,4,4,4,3,4,2,4,3,5,5,4,3,3,4,3,3,5,5,4,5,5,1,3,4,5,3,1,4,3,1,3,3,0,3,3,1,4,3,1,4,5,3,3,5,0,4,0,3,0,5,3,3,1,4,3,0,4,0,1,5,3),
|
||||
(0,5,0,5,0,4,0,2,0,4,4,3,4,3,3,3,3,3,5,4,4,4,4,4,4,5,3,3,5,2,4,4,4,3,4,4,3,3,4,4,5,5,3,3,4,3,4,3,3,4,3,3,3,3,1,2,2,1,4,3,3,5,4,4,3,4,0,4,0,3,0,4,4,4,4,4,1,0,4,2,0,2,4),
|
||||
(0,4,0,4,0,3,0,1,0,3,5,2,3,0,3,0,2,1,4,2,3,3,4,1,4,3,3,2,4,1,3,3,3,0,3,3,0,0,3,3,3,5,3,3,3,3,3,2,0,2,0,0,2,0,0,2,0,0,1,0,0,3,1,2,2,3,0,3,0,2,0,4,4,3,3,4,1,0,3,0,0,2,4),
|
||||
(0,0,0,4,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,1,0,2,0,1,0,0,0,0,0,3,1,3,0,3,2,0,0,0,1,0,3,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,4,0,2,0,0,0,0,0,0,2),
|
||||
(0,2,1,3,0,2,0,2,0,3,3,3,3,1,3,1,3,3,3,3,3,3,4,2,2,1,2,1,4,0,4,3,1,3,3,3,2,4,3,5,4,3,3,3,3,3,3,3,0,1,3,0,2,0,0,1,0,0,1,0,0,4,2,0,2,3,0,3,3,0,3,3,4,2,3,1,4,0,1,2,0,2,3),
|
||||
(0,3,0,3,0,1,0,3,0,2,3,3,3,0,3,1,2,0,3,3,2,3,3,2,3,2,3,1,3,0,4,3,2,0,3,3,1,4,3,3,2,3,4,3,1,3,3,1,1,0,1,1,0,1,0,1,0,1,0,0,0,4,1,1,0,3,0,3,1,0,2,3,3,3,3,3,1,0,0,2,0,3,3),
|
||||
(0,0,0,0,0,0,0,0,0,0,3,0,2,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,3,0,3,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,2,3,0,0,0,0,0,0,0,0,3),
|
||||
(0,2,0,3,1,3,0,3,0,2,3,3,3,1,3,1,3,1,3,1,3,3,3,1,3,0,2,3,1,1,4,3,3,2,3,3,1,2,2,4,1,3,3,0,1,4,2,3,0,1,3,0,3,0,0,1,3,0,2,0,0,3,3,2,1,3,0,3,0,2,0,3,4,4,4,3,1,0,3,0,0,3,3),
|
||||
(0,2,0,1,0,2,0,0,0,1,3,2,2,1,3,0,1,1,3,0,3,2,3,1,2,0,2,0,1,1,3,3,3,0,3,3,1,1,2,3,2,3,3,1,2,3,2,0,0,1,0,0,0,0,0,0,3,0,1,0,0,2,1,2,1,3,0,3,0,0,0,3,4,4,4,3,2,0,2,0,0,2,4),
|
||||
(0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,3,1,0,0,0,0,0,0,0,3),
|
||||
(0,3,0,3,0,2,0,3,0,3,3,3,2,3,2,2,2,0,3,1,3,3,3,2,3,3,0,0,3,0,3,2,2,0,2,3,1,4,3,4,3,3,2,3,1,5,4,4,0,3,1,2,1,3,0,3,1,1,2,0,2,3,1,3,1,3,0,3,0,1,0,3,3,4,4,2,1,0,2,1,0,2,4),
|
||||
(0,1,0,3,0,1,0,2,0,1,4,2,5,1,4,0,2,0,2,1,3,1,4,0,2,1,0,0,2,1,4,1,1,0,3,3,0,5,1,3,2,3,3,1,0,3,2,3,0,1,0,0,0,0,0,0,1,0,0,0,0,4,0,1,0,3,0,2,0,1,0,3,3,3,4,3,3,0,0,0,0,2,3),
|
||||
(0,0,0,1,0,0,0,0,0,0,2,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,1,0,0,0,0,0,3),
|
||||
(0,1,0,3,0,4,0,3,0,2,4,3,1,0,3,2,2,1,3,1,2,2,3,1,1,1,2,1,3,0,1,2,0,1,3,2,1,3,0,5,5,1,0,0,1,3,2,1,0,3,0,0,1,0,0,0,0,0,3,4,0,1,1,1,3,2,0,2,0,1,0,2,3,3,1,2,3,0,1,0,1,0,4),
|
||||
(0,0,0,1,0,3,0,3,0,2,2,1,0,0,4,0,3,0,3,1,3,0,3,0,3,0,1,0,3,0,3,1,3,0,3,3,0,0,1,2,1,1,1,0,1,2,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,2,2,1,2,0,0,2,0,0,0,0,2,3,3,3,3,0,0,0,0,1,4),
|
||||
(0,0,0,3,0,3,0,0,0,0,3,1,1,0,3,0,1,0,2,0,1,0,0,0,0,0,0,0,1,0,3,0,2,0,2,3,0,0,2,2,3,1,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,2,3),
|
||||
(2,4,0,5,0,5,0,4,0,3,4,3,3,3,4,3,3,3,4,3,4,4,5,4,5,5,5,2,3,0,5,5,4,1,5,4,3,1,5,4,3,4,4,3,3,4,3,3,0,3,2,0,2,3,0,3,0,0,3,3,0,5,3,2,3,3,0,3,0,3,0,3,4,5,4,5,3,0,4,3,0,3,4),
|
||||
(0,3,0,3,0,3,0,3,0,3,3,4,3,2,3,2,3,0,4,3,3,3,3,3,3,3,3,0,3,2,4,3,3,1,3,4,3,4,4,4,3,4,4,3,2,4,4,1,0,2,0,0,1,1,0,2,0,0,3,1,0,5,3,2,1,3,0,3,0,1,2,4,3,2,4,3,3,0,3,2,0,4,4),
|
||||
(0,3,0,3,0,1,0,0,0,1,4,3,3,2,3,1,3,1,4,2,3,2,4,2,3,4,3,0,2,2,3,3,3,0,3,3,3,0,3,4,1,3,3,0,3,4,3,3,0,1,1,0,1,0,0,0,4,0,3,0,0,3,1,2,1,3,0,4,0,1,0,4,3,3,4,3,3,0,2,0,0,3,3),
|
||||
(0,3,0,4,0,1,0,3,0,3,4,3,3,0,3,3,3,1,3,1,3,3,4,3,3,3,0,0,3,1,5,3,3,1,3,3,2,5,4,3,3,4,5,3,2,5,3,4,0,1,0,0,0,0,0,2,0,0,1,1,0,4,2,2,1,3,0,3,0,2,0,4,4,3,5,3,2,0,1,1,0,3,4),
|
||||
(0,5,0,4,0,5,0,2,0,4,4,3,3,2,3,3,3,1,4,3,4,1,5,3,4,3,4,0,4,2,4,3,4,1,5,4,0,4,4,4,4,5,4,1,3,5,4,2,1,4,1,1,3,2,0,3,1,0,3,2,1,4,3,3,3,4,0,4,0,3,0,4,4,4,3,3,3,0,4,2,0,3,4),
|
||||
(1,4,0,4,0,3,0,1,0,3,3,3,1,1,3,3,2,2,3,3,1,0,3,2,2,1,2,0,3,1,2,1,2,0,3,2,0,2,2,3,3,4,3,0,3,3,1,2,0,1,1,3,1,2,0,0,3,0,1,1,0,3,2,2,3,3,0,3,0,0,0,2,3,3,4,3,3,0,1,0,0,1,4),
|
||||
(0,4,0,4,0,4,0,0,0,3,4,4,3,1,4,2,3,2,3,3,3,1,4,3,4,0,3,0,4,2,3,3,2,2,5,4,2,1,3,4,3,4,3,1,3,3,4,2,0,2,1,0,3,3,0,0,2,0,3,1,0,4,4,3,4,3,0,4,0,1,0,2,4,4,4,4,4,0,3,2,0,3,3),
|
||||
(0,0,0,1,0,4,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,3,2,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2),
|
||||
(0,2,0,3,0,4,0,4,0,1,3,3,3,0,4,0,2,1,2,1,1,1,2,0,3,1,1,0,1,0,3,1,0,0,3,3,2,0,1,1,0,0,0,0,0,1,0,2,0,2,2,0,3,1,0,0,1,0,1,1,0,1,2,0,3,0,0,0,0,1,0,0,3,3,4,3,1,0,1,0,3,0,2),
|
||||
(0,0,0,3,0,5,0,0,0,0,1,0,2,0,3,1,0,1,3,0,0,0,2,0,0,0,1,0,0,0,1,1,0,0,4,0,0,0,2,3,0,1,4,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,3,0,0,0,0,0,3),
|
||||
(0,2,0,5,0,5,0,1,0,2,4,3,3,2,5,1,3,2,3,3,3,0,4,1,2,0,3,0,4,0,2,2,1,1,5,3,0,0,1,4,2,3,2,0,3,3,3,2,0,2,4,1,1,2,0,1,1,0,3,1,0,1,3,1,2,3,0,2,0,0,0,1,3,5,4,4,4,0,3,0,0,1,3),
|
||||
(0,4,0,5,0,4,0,4,0,4,5,4,3,3,4,3,3,3,4,3,4,4,5,3,4,5,4,2,4,2,3,4,3,1,4,4,1,3,5,4,4,5,5,4,4,5,5,5,2,3,3,1,4,3,1,3,3,0,3,3,1,4,3,4,4,4,0,3,0,4,0,3,3,4,4,5,0,0,4,3,0,4,5),
|
||||
(0,4,0,4,0,3,0,3,0,3,4,4,4,3,3,2,4,3,4,3,4,3,5,3,4,3,2,1,4,2,4,4,3,1,3,4,2,4,5,5,3,4,5,4,1,5,4,3,0,3,2,2,3,2,1,3,1,0,3,3,3,5,3,3,3,5,4,4,2,3,3,4,3,3,3,2,1,0,3,2,1,4,3),
|
||||
(0,4,0,5,0,4,0,3,0,3,5,5,3,2,4,3,4,0,5,4,4,1,4,4,4,3,3,3,4,3,5,5,2,3,3,4,1,2,5,5,3,5,5,2,3,5,5,4,0,3,2,0,3,3,1,1,5,1,4,1,0,4,3,2,3,5,0,4,0,3,0,5,4,3,4,3,0,0,4,1,0,4,4),
|
||||
(1,3,0,4,0,2,0,2,0,2,5,5,3,3,3,3,3,0,4,2,3,4,4,4,3,4,0,0,3,4,5,4,3,3,3,3,2,5,5,4,5,5,5,4,3,5,5,5,1,3,1,0,1,0,0,3,2,0,4,2,0,5,2,3,2,4,1,3,0,3,0,4,5,4,5,4,3,0,4,2,0,5,4),
|
||||
(0,3,0,4,0,5,0,3,0,3,4,4,3,2,3,2,3,3,3,3,3,2,4,3,3,2,2,0,3,3,3,3,3,1,3,3,3,0,4,4,3,4,4,1,1,4,4,2,0,3,1,0,1,1,0,4,1,0,2,3,1,3,3,1,3,4,0,3,0,1,0,3,1,3,0,0,1,0,2,0,0,4,4),
|
||||
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
|
||||
(0,3,0,3,0,2,0,3,0,1,5,4,3,3,3,1,4,2,1,2,3,4,4,2,4,4,5,0,3,1,4,3,4,0,4,3,3,3,2,3,2,5,3,4,3,2,2,3,0,0,3,0,2,1,0,1,2,0,0,0,0,2,1,1,3,1,0,2,0,4,0,3,4,4,4,5,2,0,2,0,0,1,3),
|
||||
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,0,4,2,1,1,0,1,0,3,2,0,0,3,1,1,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,0,2,0,0,0,1,4,0,4,2,1,0,0,0,0,0,1),
|
||||
(0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,3,1,0,0,0,2,0,2,1,0,0,1,2,1,0,1,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,3,1,0,0,0,0,0,1,0,0,2,1,0,0,0,0,0,0,0,0,2),
|
||||
(0,4,0,4,0,4,0,3,0,4,4,3,4,2,4,3,2,0,4,4,4,3,5,3,5,3,3,2,4,2,4,3,4,3,1,4,0,2,3,4,4,4,3,3,3,4,4,4,3,4,1,3,4,3,2,1,2,1,3,3,3,4,4,3,3,5,0,4,0,3,0,4,3,3,3,2,1,0,3,0,0,3,3),
|
||||
(0,4,0,3,0,3,0,3,0,3,5,5,3,3,3,3,4,3,4,3,3,3,4,4,4,3,3,3,3,4,3,5,3,3,1,3,2,4,5,5,5,5,4,3,4,5,5,3,2,2,3,3,3,3,2,3,3,1,2,3,2,4,3,3,3,4,0,4,0,2,0,4,3,2,2,1,2,0,3,0,0,4,1),
|
||||
)
|
||||
|
||||
class JapaneseContextAnalysis(object):
|
||||
NUM_OF_CATEGORY = 6
|
||||
DONT_KNOW = -1
|
||||
ENOUGH_REL_THRESHOLD = 100
|
||||
MAX_REL_THRESHOLD = 1000
|
||||
MINIMUM_DATA_THRESHOLD = 4
|
||||
|
||||
def __init__(self):
|
||||
self._total_rel = None
|
||||
self._rel_sample = None
|
||||
self._need_to_skip_char_num = None
|
||||
self._last_char_order = None
|
||||
self._done = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self._total_rel = 0 # total sequence received
|
||||
# category counters, each integer counts sequence in its category
|
||||
self._rel_sample = [0] * self.NUM_OF_CATEGORY
|
||||
# if last byte in current buffer is not the last byte of a character,
|
||||
# we need to know how many bytes to skip in next buffer
|
||||
self._need_to_skip_char_num = 0
|
||||
self._last_char_order = -1 # The order of previous char
|
||||
# If this flag is set to True, detection is done and conclusion has
|
||||
# been made
|
||||
self._done = False
|
||||
|
||||
def feed(self, byte_str, num_bytes):
|
||||
if self._done:
|
||||
return
|
||||
|
||||
# The buffer we got is byte oriented, and a character may span in more than one
|
||||
# buffers. In case the last one or two byte in last buffer is not
|
||||
# complete, we record how many byte needed to complete that character
|
||||
# and skip these bytes here. We can choose to record those bytes as
|
||||
# well and analyse the character once it is complete, but since a
|
||||
# character will not make much difference, by simply skipping
|
||||
# this character will simply our logic and improve performance.
|
||||
i = self._need_to_skip_char_num
|
||||
while i < num_bytes:
|
||||
order, char_len = self.get_order(byte_str[i:i + 2])
|
||||
i += char_len
|
||||
if i > num_bytes:
|
||||
self._need_to_skip_char_num = i - num_bytes
|
||||
self._last_char_order = -1
|
||||
else:
|
||||
if (order != -1) and (self._last_char_order != -1):
|
||||
self._total_rel += 1
|
||||
if self._total_rel > self.MAX_REL_THRESHOLD:
|
||||
self._done = True
|
||||
break
|
||||
self._rel_sample[jp2CharContext[self._last_char_order][order]] += 1
|
||||
self._last_char_order = order
|
||||
|
||||
def got_enough_data(self):
|
||||
return self._total_rel > self.ENOUGH_REL_THRESHOLD
|
||||
|
||||
def get_confidence(self):
|
||||
# This is just one way to calculate confidence. It works well for me.
|
||||
if self._total_rel > self.MINIMUM_DATA_THRESHOLD:
|
||||
return (self._total_rel - self._rel_sample[0]) / self._total_rel
|
||||
else:
|
||||
return self.DONT_KNOW
|
||||
|
||||
def get_order(self, byte_str):
|
||||
return -1, 1
|
||||
|
||||
class SJISContextAnalysis(JapaneseContextAnalysis):
|
||||
def __init__(self):
|
||||
super(SJISContextAnalysis, self).__init__()
|
||||
self._charset_name = "SHIFT_JIS"
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return self._charset_name
|
||||
|
||||
def get_order(self, byte_str):
|
||||
if not byte_str:
|
||||
return -1, 1
|
||||
# find out current char's byte length
|
||||
first_char = byte_str[0]
|
||||
if (0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC):
|
||||
char_len = 2
|
||||
if (first_char == 0x87) or (0xFA <= first_char <= 0xFC):
|
||||
self._charset_name = "CP932"
|
||||
else:
|
||||
char_len = 1
|
||||
|
||||
# return its order if it is hiragana
|
||||
if len(byte_str) > 1:
|
||||
second_char = byte_str[1]
|
||||
if (first_char == 202) and (0x9F <= second_char <= 0xF1):
|
||||
return second_char - 0x9F, char_len
|
||||
|
||||
return -1, char_len
|
||||
|
||||
class EUCJPContextAnalysis(JapaneseContextAnalysis):
|
||||
def get_order(self, byte_str):
|
||||
if not byte_str:
|
||||
return -1, 1
|
||||
# find out current char's byte length
|
||||
first_char = byte_str[0]
|
||||
if (first_char == 0x8E) or (0xA1 <= first_char <= 0xFE):
|
||||
char_len = 2
|
||||
elif first_char == 0x8F:
|
||||
char_len = 3
|
||||
else:
|
||||
char_len = 1
|
||||
|
||||
# return its order if it is hiragana
|
||||
if len(byte_str) > 1:
|
||||
second_char = byte_str[1]
|
||||
if (first_char == 0xA4) and (0xA1 <= second_char <= 0xF3):
|
||||
return second_char - 0xA1, char_len
|
||||
|
||||
return -1, char_len
|
||||
|
||||
|
||||
@@ -1,228 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# 255: Control characters that usually does not exist in any text
|
||||
# 254: Carriage/Return
|
||||
# 253: symbol (punctuation) that does not belong to word
|
||||
# 252: 0 - 9
|
||||
|
||||
# Character Mapping Table:
|
||||
# this table is modified base on win1251BulgarianCharToOrderMap, so
|
||||
# only number <64 is sure valid
|
||||
|
||||
Latin5_BulgarianCharToOrderMap = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, # 40
|
||||
110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, # 50
|
||||
253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, # 60
|
||||
116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, # 70
|
||||
194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209, # 80
|
||||
210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225, # 90
|
||||
81,226,227,228,229,230,105,231,232,233,234,235,236, 45,237,238, # a0
|
||||
31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, # b0
|
||||
39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,239, 67,240, 60, 56, # c0
|
||||
1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, # d0
|
||||
7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,241, 42, 16, # e0
|
||||
62,242,243,244, 58,245, 98,246,247,248,249,250,251, 91,252,253, # f0
|
||||
)
|
||||
|
||||
win1251BulgarianCharToOrderMap = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, # 40
|
||||
110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, # 50
|
||||
253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, # 60
|
||||
116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, # 70
|
||||
206,207,208,209,210,211,212,213,120,214,215,216,217,218,219,220, # 80
|
||||
221, 78, 64, 83,121, 98,117,105,222,223,224,225,226,227,228,229, # 90
|
||||
88,230,231,232,233,122, 89,106,234,235,236,237,238, 45,239,240, # a0
|
||||
73, 80,118,114,241,242,243,244,245, 62, 58,246,247,248,249,250, # b0
|
||||
31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, # c0
|
||||
39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,251, 67,252, 60, 56, # d0
|
||||
1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, # e0
|
||||
7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,253, 42, 16, # f0
|
||||
)
|
||||
|
||||
# Model Table:
|
||||
# total sequences: 100%
|
||||
# first 512 sequences: 96.9392%
|
||||
# first 1024 sequences:3.0618%
|
||||
# rest sequences: 0.2992%
|
||||
# negative sequences: 0.0020%
|
||||
BulgarianLangModel = (
|
||||
0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,2,2,1,2,2,
|
||||
3,1,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,0,1,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,3,0,3,1,0,
|
||||
0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,1,3,3,3,3,2,2,2,1,1,2,0,1,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,3,3,3,2,3,2,2,3,3,1,1,2,3,3,2,3,3,3,3,2,1,2,0,2,0,3,0,0,
|
||||
0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,3,3,3,1,3,3,3,3,3,2,3,2,3,3,3,3,3,2,3,3,1,3,0,3,0,2,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,3,3,3,3,1,3,3,2,3,3,3,1,3,3,2,3,2,2,2,0,0,2,0,2,0,2,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,3,3,1,2,2,3,2,1,1,2,0,2,0,0,0,0,
|
||||
1,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,3,3,3,2,3,3,1,2,3,2,2,2,3,3,3,3,3,2,2,3,1,2,0,2,1,2,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,3,1,3,3,3,3,3,2,3,3,3,2,3,3,2,3,2,2,2,3,1,2,0,1,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,3,3,3,3,3,3,3,1,1,1,2,2,1,3,1,3,2,2,3,0,0,1,0,1,0,1,0,0,
|
||||
0,0,0,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,3,2,2,3,2,2,3,1,2,1,1,1,2,3,1,3,1,2,2,0,1,1,1,1,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,3,1,3,2,2,3,3,1,2,3,1,1,3,3,3,3,1,2,2,1,1,1,0,2,0,2,0,1,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,2,2,3,3,3,2,2,1,1,2,0,2,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,0,1,2,1,3,3,2,3,3,3,3,3,2,3,2,1,0,3,1,2,1,2,1,2,3,2,1,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,1,1,2,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,1,3,3,2,3,3,2,2,2,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,3,3,3,3,0,3,3,3,3,3,2,1,1,2,1,3,3,0,3,1,1,1,1,3,2,0,1,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,1,1,3,1,3,3,2,3,2,2,2,3,0,2,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,2,3,3,2,2,3,2,1,1,1,1,1,3,1,3,1,1,0,0,0,1,0,0,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,2,3,2,0,3,2,0,3,0,2,0,0,2,1,3,1,0,0,1,0,0,0,1,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,2,1,1,1,1,2,1,1,2,1,1,1,2,2,1,2,1,1,1,0,1,1,0,1,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,2,1,3,1,1,2,1,3,2,1,1,0,1,2,3,2,1,1,1,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,3,3,3,3,2,2,1,0,1,0,0,1,0,0,0,2,1,0,3,0,0,1,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,2,3,2,3,3,1,3,2,1,1,1,2,1,1,2,1,3,0,1,0,0,0,1,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,1,1,2,2,3,3,2,3,2,2,2,3,1,2,2,1,1,2,1,1,2,2,0,1,1,0,1,0,2,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,2,1,3,1,0,2,2,1,3,2,1,0,0,2,0,2,0,1,0,0,0,0,0,0,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,3,3,1,2,0,2,3,1,2,3,2,0,1,3,1,2,1,1,1,0,0,1,0,0,2,2,2,3,
|
||||
2,2,2,2,1,2,1,1,2,2,1,1,2,0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,1,
|
||||
3,3,3,3,3,2,1,2,2,1,2,0,2,0,1,0,1,2,1,2,1,1,0,0,0,1,0,1,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,2,3,3,1,1,3,1,0,3,2,1,0,0,0,1,2,0,2,0,1,0,0,0,1,0,1,2,1,2,2,
|
||||
1,1,1,1,1,1,1,2,2,2,1,1,1,1,1,1,1,0,1,2,1,1,1,0,0,0,0,0,1,1,0,0,
|
||||
3,1,0,1,0,2,3,2,2,2,3,2,2,2,2,2,1,0,2,1,2,1,1,1,0,1,2,1,2,2,2,1,
|
||||
1,1,2,2,2,2,1,2,1,1,0,1,2,1,2,2,2,1,1,1,0,1,1,1,1,2,0,1,0,0,0,0,
|
||||
2,3,2,3,3,0,0,2,1,0,2,1,0,0,0,0,2,3,0,2,0,0,0,0,0,1,0,0,2,0,1,2,
|
||||
2,1,2,1,2,2,1,1,1,2,1,1,1,0,1,2,2,1,1,1,1,1,0,1,1,1,0,0,1,2,0,0,
|
||||
3,3,2,2,3,0,2,3,1,1,2,0,0,0,1,0,0,2,0,2,0,0,0,1,0,1,0,1,2,0,2,2,
|
||||
1,1,1,1,2,1,0,1,2,2,2,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,
|
||||
2,3,2,3,3,0,0,3,0,1,1,0,1,0,0,0,2,2,1,2,0,0,0,0,0,0,0,0,2,0,1,2,
|
||||
2,2,1,1,1,1,1,2,2,2,1,0,2,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,
|
||||
3,3,3,3,2,2,2,2,2,0,2,1,1,1,1,2,1,2,1,1,0,2,0,1,0,1,0,0,2,0,1,2,
|
||||
1,1,1,1,1,1,1,2,2,1,1,0,2,0,1,0,2,0,0,1,1,1,0,0,2,0,0,0,1,1,0,0,
|
||||
2,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0,0,0,0,1,2,0,1,2,
|
||||
2,2,2,1,1,2,1,1,2,2,2,1,2,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,1,1,0,0,
|
||||
2,3,3,3,3,0,2,2,0,2,1,0,0,0,1,1,1,2,0,2,0,0,0,3,0,0,0,0,2,0,2,2,
|
||||
1,1,1,2,1,2,1,1,2,2,2,1,2,0,1,1,1,0,1,1,1,1,0,2,1,0,0,0,1,1,0,0,
|
||||
2,3,3,3,3,0,2,1,0,0,2,0,0,0,0,0,1,2,0,2,0,0,0,0,0,0,0,0,2,0,1,2,
|
||||
1,1,1,2,1,1,1,1,2,2,2,0,1,0,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,
|
||||
3,3,2,2,3,0,1,0,1,0,0,0,0,0,0,0,1,1,0,3,0,0,0,0,0,0,0,0,1,0,2,2,
|
||||
1,1,1,1,1,2,1,1,2,2,1,2,2,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,0,
|
||||
3,1,0,1,0,2,2,2,2,3,2,1,1,1,2,3,0,0,1,0,2,1,1,0,1,1,1,1,2,1,1,1,
|
||||
1,2,2,1,2,1,2,2,1,1,0,1,2,1,2,2,1,1,1,0,0,1,1,1,2,1,0,1,0,0,0,0,
|
||||
2,1,0,1,0,3,1,2,2,2,2,1,2,2,1,1,1,0,2,1,2,2,1,1,2,1,1,0,2,1,1,1,
|
||||
1,2,2,2,2,2,2,2,1,2,0,1,1,0,2,1,1,1,1,1,0,0,1,1,1,1,0,1,0,0,0,0,
|
||||
2,1,1,1,1,2,2,2,2,1,2,2,2,1,2,2,1,1,2,1,2,3,2,2,1,1,1,1,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,2,2,3,2,0,1,2,0,1,2,1,1,0,1,0,1,2,1,2,0,0,0,1,1,0,0,0,1,0,0,2,
|
||||
1,1,0,0,1,1,0,1,1,1,1,0,2,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,
|
||||
2,0,0,0,0,1,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,2,1,1,1,
|
||||
1,2,2,2,2,1,1,2,1,2,1,1,1,0,2,1,2,1,1,1,0,2,1,1,1,1,0,1,0,0,0,0,
|
||||
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,
|
||||
1,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,2,2,3,2,0,0,0,0,1,0,0,0,0,0,0,1,1,0,2,0,0,0,0,0,0,0,0,1,0,1,2,
|
||||
1,1,1,1,1,1,0,0,2,2,2,2,2,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,
|
||||
2,3,1,2,1,0,1,1,0,2,2,2,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,1,0,1,2,
|
||||
1,1,1,1,2,1,1,1,1,1,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,
|
||||
2,2,2,2,2,0,0,2,0,0,2,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,0,2,2,
|
||||
1,1,1,1,1,0,0,1,2,1,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,2,2,2,2,0,0,2,0,1,1,0,0,0,1,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,1,1,
|
||||
0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,2,2,3,2,0,0,1,0,0,1,0,0,0,0,0,0,1,0,2,0,0,0,1,0,0,0,0,0,0,0,2,
|
||||
1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,1,2,2,2,1,2,1,2,2,1,1,2,1,1,1,0,1,1,1,1,2,0,1,0,1,1,1,1,0,1,1,
|
||||
1,1,2,1,1,1,1,1,1,0,0,1,2,1,1,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,
|
||||
1,0,0,1,3,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,2,2,2,1,0,0,1,0,2,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,2,0,0,1,
|
||||
0,2,0,1,0,0,1,1,2,0,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,2,2,2,2,0,1,1,0,2,1,0,1,1,1,0,0,1,0,2,0,1,0,0,0,0,0,0,0,0,0,1,
|
||||
0,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,2,2,2,2,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
0,1,0,1,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,0,1,0,0,1,2,1,1,1,1,1,1,2,2,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,
|
||||
1,1,2,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,2,1,2,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
|
||||
0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,2,0,0,2,0,1,0,0,1,0,0,1,
|
||||
1,1,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,
|
||||
1,1,1,1,1,1,1,2,0,0,0,0,0,0,2,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
)
|
||||
|
||||
Latin5BulgarianModel = {
|
||||
'char_to_order_map': Latin5_BulgarianCharToOrderMap,
|
||||
'precedence_matrix': BulgarianLangModel,
|
||||
'typical_positive_ratio': 0.969392,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "ISO-8859-5",
|
||||
'language': 'Bulgairan',
|
||||
}
|
||||
|
||||
Win1251BulgarianModel = {
|
||||
'char_to_order_map': win1251BulgarianCharToOrderMap,
|
||||
'precedence_matrix': BulgarianLangModel,
|
||||
'typical_positive_ratio': 0.969392,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "windows-1251",
|
||||
'language': 'Bulgarian',
|
||||
}
|
||||
@@ -1,333 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# KOI8-R language model
|
||||
# Character Mapping Table:
|
||||
KOI8R_char_to_order_map = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
|
||||
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
|
||||
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
|
||||
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
|
||||
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, # 80
|
||||
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, # 90
|
||||
223,224,225, 68,226,227,228,229,230,231,232,233,234,235,236,237, # a0
|
||||
238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253, # b0
|
||||
27, 3, 21, 28, 13, 2, 39, 19, 26, 4, 23, 11, 8, 12, 5, 1, # c0
|
||||
15, 16, 9, 7, 6, 14, 24, 10, 17, 18, 20, 25, 30, 29, 22, 54, # d0
|
||||
59, 37, 44, 58, 41, 48, 53, 46, 55, 42, 60, 36, 49, 38, 31, 34, # e0
|
||||
35, 43, 45, 32, 40, 52, 56, 33, 61, 62, 51, 57, 47, 63, 50, 70, # f0
|
||||
)
|
||||
|
||||
win1251_char_to_order_map = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
|
||||
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
|
||||
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
|
||||
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
|
||||
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
|
||||
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
|
||||
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
|
||||
239,240,241,242,243,244,245,246, 68,247,248,249,250,251,252,253,
|
||||
37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
|
||||
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
|
||||
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
|
||||
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
|
||||
)
|
||||
|
||||
latin5_char_to_order_map = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
|
||||
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
|
||||
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
|
||||
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
|
||||
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
|
||||
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
|
||||
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
|
||||
37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
|
||||
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
|
||||
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
|
||||
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
|
||||
239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,
|
||||
)
|
||||
|
||||
macCyrillic_char_to_order_map = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
|
||||
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
|
||||
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
|
||||
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
|
||||
37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
|
||||
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
|
||||
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
|
||||
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
|
||||
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
|
||||
239,240,241,242,243,244,245,246,247,248,249,250,251,252, 68, 16,
|
||||
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
|
||||
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27,255,
|
||||
)
|
||||
|
||||
IBM855_char_to_order_map = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
|
||||
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
|
||||
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
|
||||
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
|
||||
191,192,193,194, 68,195,196,197,198,199,200,201,202,203,204,205,
|
||||
206,207,208,209,210,211,212,213,214,215,216,217, 27, 59, 54, 70,
|
||||
3, 37, 21, 44, 28, 58, 13, 41, 2, 48, 39, 53, 19, 46,218,219,
|
||||
220,221,222,223,224, 26, 55, 4, 42,225,226,227,228, 23, 60,229,
|
||||
230,231,232,233,234,235, 11, 36,236,237,238,239,240,241,242,243,
|
||||
8, 49, 12, 38, 5, 31, 1, 34, 15,244,245,246,247, 35, 16,248,
|
||||
43, 9, 45, 7, 32, 6, 40, 14, 52, 24, 56, 10, 33, 17, 61,249,
|
||||
250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,252,255,
|
||||
)
|
||||
|
||||
IBM866_char_to_order_map = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
|
||||
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
|
||||
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
|
||||
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
|
||||
37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
|
||||
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
|
||||
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
|
||||
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
|
||||
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
|
||||
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
|
||||
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
|
||||
239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,
|
||||
)
|
||||
|
||||
# Model Table:
|
||||
# total sequences: 100%
|
||||
# first 512 sequences: 97.6601%
|
||||
# first 1024 sequences: 2.3389%
|
||||
# rest sequences: 0.1237%
|
||||
# negative sequences: 0.0009%
|
||||
RussianLangModel = (
|
||||
0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,3,3,3,3,1,3,3,3,2,3,2,3,3,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,2,2,2,0,0,2,
|
||||
3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,2,3,2,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,2,2,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,2,3,3,1,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1,
|
||||
0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1,
|
||||
0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,2,2,2,3,1,3,3,1,3,3,3,3,2,2,3,0,2,2,2,3,3,2,1,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,2,3,3,3,3,3,2,2,3,2,3,3,3,2,1,2,2,0,1,2,2,2,2,2,2,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,0,2,2,3,3,2,1,2,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,2,3,3,1,2,3,2,2,3,2,3,3,3,3,2,2,3,0,3,2,2,3,1,1,1,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,3,3,3,3,2,2,2,0,3,3,3,2,2,2,2,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,2,3,2,2,0,1,3,2,1,2,2,1,0,
|
||||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,2,1,1,3,0,1,1,1,1,2,1,1,0,2,2,2,1,2,0,1,0,
|
||||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,2,3,3,2,2,2,2,1,3,2,3,2,3,2,1,2,2,0,1,1,2,1,2,1,2,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,2,2,2,2,0,2,2,2,2,3,1,1,0,
|
||||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||
3,2,3,2,2,3,3,3,3,3,3,3,3,3,1,3,2,0,0,3,3,3,3,2,3,3,3,3,2,3,2,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,3,3,3,3,3,2,2,3,3,0,2,1,0,3,2,3,2,3,0,0,1,2,0,0,1,0,1,2,1,1,0,
|
||||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,3,0,2,3,3,3,3,2,3,3,3,3,1,2,2,0,0,2,3,2,2,2,3,2,3,2,2,3,0,0,
|
||||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,2,3,0,2,3,2,3,0,1,2,3,3,2,0,2,3,0,0,2,3,2,2,0,1,3,1,3,2,2,1,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,1,3,0,2,3,3,3,3,3,3,3,3,2,1,3,2,0,0,2,2,3,3,3,2,3,3,0,2,2,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,2,2,3,3,2,2,2,3,3,0,0,1,1,1,1,1,2,0,0,1,1,1,1,0,1,0,
|
||||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,0,3,2,3,3,2,3,2,0,2,1,0,1,1,0,1,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,2,3,3,3,2,2,2,2,3,1,3,2,3,1,1,2,1,0,2,2,2,2,1,3,1,0,
|
||||
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||
2,2,3,3,3,3,3,1,2,2,1,3,1,0,3,0,0,3,0,0,0,1,1,0,1,2,1,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,2,2,1,1,3,3,3,2,2,1,2,2,3,1,1,2,0,0,2,2,1,3,0,0,2,1,1,2,1,1,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,2,3,3,3,3,1,2,2,2,1,2,1,3,3,1,1,2,1,2,1,2,2,0,2,0,0,1,1,0,1,0,
|
||||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,3,3,3,3,3,2,1,3,2,2,3,2,0,3,2,0,3,0,1,0,1,1,0,0,1,1,1,1,0,1,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,2,3,3,3,2,2,2,3,3,1,2,1,2,1,0,1,0,1,1,0,1,0,0,2,1,1,1,0,1,0,
|
||||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
|
||||
3,1,1,2,1,2,3,3,2,2,1,2,2,3,0,2,1,0,0,2,2,3,2,1,2,2,2,2,2,3,1,0,
|
||||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,1,1,0,1,1,2,2,1,1,3,0,0,1,3,1,1,1,0,0,0,1,0,1,1,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,1,3,3,3,2,0,0,0,2,1,0,1,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,0,1,0,0,2,3,2,2,2,1,2,2,2,1,2,1,0,0,1,1,1,0,2,0,1,1,1,0,0,1,1,
|
||||
1,0,0,0,0,0,1,2,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||
2,3,3,3,3,0,0,0,0,1,0,0,0,0,3,0,1,2,1,0,0,0,0,0,0,0,1,1,0,0,1,1,
|
||||
1,0,1,0,1,2,0,0,1,1,2,1,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,1,1,0,
|
||||
2,2,3,2,2,2,3,1,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,0,1,0,1,1,1,0,2,1,
|
||||
1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,
|
||||
3,3,3,2,2,2,2,3,2,2,1,1,2,2,2,2,1,1,3,1,2,1,2,0,0,1,1,0,1,0,2,1,
|
||||
1,1,1,1,1,2,1,0,1,1,1,1,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0,
|
||||
2,0,0,1,0,3,2,2,2,2,1,2,1,2,1,2,0,0,0,2,1,2,2,1,1,2,2,0,1,1,0,2,
|
||||
1,1,1,1,1,0,1,1,1,2,1,1,1,2,1,0,1,2,1,1,1,1,0,1,1,1,0,0,1,0,0,1,
|
||||
1,3,2,2,2,1,1,1,2,3,0,0,0,0,2,0,2,2,1,0,0,0,0,0,0,1,0,0,0,0,1,1,
|
||||
1,0,1,1,0,1,0,1,1,0,1,1,0,2,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,
|
||||
2,3,2,3,2,1,2,2,2,2,1,0,0,0,2,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,2,1,
|
||||
1,1,2,1,0,2,0,0,1,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,
|
||||
3,0,0,1,0,2,2,2,3,2,2,2,2,2,2,2,0,0,0,2,1,2,1,1,1,2,2,0,0,0,1,2,
|
||||
1,1,1,1,1,0,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1,
|
||||
2,3,2,3,3,2,0,1,1,1,0,0,1,0,2,0,1,1,3,1,0,0,0,0,0,0,0,1,0,0,2,1,
|
||||
1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,
|
||||
2,3,3,3,3,1,2,2,2,2,0,1,1,0,2,1,1,1,2,1,0,1,1,0,0,1,0,1,0,0,2,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,3,3,3,2,0,0,1,1,2,2,1,0,0,2,0,1,1,3,0,0,1,0,0,0,0,0,1,0,1,2,1,
|
||||
1,1,2,0,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,1,0,
|
||||
1,3,2,3,2,1,0,0,2,2,2,0,1,0,2,0,1,1,1,0,1,0,0,0,3,0,1,1,0,0,2,1,
|
||||
1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,0,2,1,1,0,1,0,0,0,1,0,1,0,0,1,1,0,
|
||||
3,1,2,1,1,2,2,2,2,2,2,1,2,2,1,1,0,0,0,2,2,2,0,0,0,1,2,1,0,1,0,1,
|
||||
2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,2,1,1,1,0,1,0,1,1,0,1,1,1,0,0,1,
|
||||
3,0,0,0,0,2,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1,
|
||||
1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,
|
||||
1,3,3,2,2,0,0,0,2,2,0,0,0,1,2,0,1,1,2,0,0,0,0,0,0,0,0,1,0,0,2,1,
|
||||
0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,
|
||||
2,3,2,3,2,0,0,0,0,1,1,0,0,0,2,0,2,0,2,0,0,0,0,0,1,0,0,1,0,0,1,1,
|
||||
1,1,2,0,1,2,1,0,1,1,2,1,1,1,1,1,2,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,
|
||||
1,3,2,2,2,1,0,0,2,2,1,0,1,2,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,
|
||||
0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,0,0,1,0,2,3,1,2,2,2,2,2,2,1,1,0,0,0,1,0,1,0,2,1,1,1,0,0,0,0,1,
|
||||
1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
|
||||
2,0,2,0,0,1,0,3,2,1,2,1,2,2,0,1,0,0,0,2,1,0,0,2,1,1,1,1,0,2,0,2,
|
||||
2,1,1,1,1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,0,1,
|
||||
1,2,2,2,2,1,0,0,1,0,0,0,0,0,2,0,1,1,1,1,0,0,0,0,1,0,1,2,0,0,2,0,
|
||||
1,0,1,1,1,2,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,
|
||||
2,1,2,2,2,0,3,0,1,1,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
0,0,0,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,
|
||||
1,2,2,3,2,2,0,0,1,1,2,0,1,2,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,
|
||||
0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,
|
||||
2,2,1,1,2,1,2,2,2,2,2,1,2,2,0,1,0,0,0,1,2,2,2,1,2,1,1,1,1,1,2,1,
|
||||
1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,
|
||||
1,2,2,2,2,0,1,0,2,2,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,
|
||||
0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,2,2,2,2,0,0,0,2,2,2,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,
|
||||
0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,2,2,2,2,0,0,0,0,1,0,0,1,1,2,0,0,0,0,1,0,1,0,0,1,0,0,2,0,0,0,1,
|
||||
0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,2,2,2,1,1,2,0,2,1,1,1,1,0,2,2,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1,
|
||||
0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,0,2,1,2,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,
|
||||
0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,
|
||||
1,0,0,0,0,2,0,1,2,1,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,1,
|
||||
0,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
1,1,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,
|
||||
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,0,0,
|
||||
0,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
|
||||
)
|
||||
|
||||
Koi8rModel = {
|
||||
'char_to_order_map': KOI8R_char_to_order_map,
|
||||
'precedence_matrix': RussianLangModel,
|
||||
'typical_positive_ratio': 0.976601,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "KOI8-R",
|
||||
'language': 'Russian',
|
||||
}
|
||||
|
||||
Win1251CyrillicModel = {
|
||||
'char_to_order_map': win1251_char_to_order_map,
|
||||
'precedence_matrix': RussianLangModel,
|
||||
'typical_positive_ratio': 0.976601,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "windows-1251",
|
||||
'language': 'Russian',
|
||||
}
|
||||
|
||||
Latin5CyrillicModel = {
|
||||
'char_to_order_map': latin5_char_to_order_map,
|
||||
'precedence_matrix': RussianLangModel,
|
||||
'typical_positive_ratio': 0.976601,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "ISO-8859-5",
|
||||
'language': 'Russian',
|
||||
}
|
||||
|
||||
MacCyrillicModel = {
|
||||
'char_to_order_map': macCyrillic_char_to_order_map,
|
||||
'precedence_matrix': RussianLangModel,
|
||||
'typical_positive_ratio': 0.976601,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "MacCyrillic",
|
||||
'language': 'Russian',
|
||||
}
|
||||
|
||||
Ibm866Model = {
|
||||
'char_to_order_map': IBM866_char_to_order_map,
|
||||
'precedence_matrix': RussianLangModel,
|
||||
'typical_positive_ratio': 0.976601,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "IBM866",
|
||||
'language': 'Russian',
|
||||
}
|
||||
|
||||
Ibm855Model = {
|
||||
'char_to_order_map': IBM855_char_to_order_map,
|
||||
'precedence_matrix': RussianLangModel,
|
||||
'typical_positive_ratio': 0.976601,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "IBM855",
|
||||
'language': 'Russian',
|
||||
}
|
||||
@@ -1,225 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# 255: Control characters that usually does not exist in any text
|
||||
# 254: Carriage/Return
|
||||
# 253: symbol (punctuation) that does not belong to word
|
||||
# 252: 0 - 9
|
||||
|
||||
# Character Mapping Table:
|
||||
Latin7_char_to_order_map = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40
|
||||
79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50
|
||||
253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60
|
||||
78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90
|
||||
253,233, 90,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0
|
||||
253,253,253,253,247,248, 61, 36, 46, 71, 73,253, 54,253,108,123, # b0
|
||||
110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0
|
||||
35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0
|
||||
124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0
|
||||
9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0
|
||||
)
|
||||
|
||||
win1253_char_to_order_map = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40
|
||||
79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50
|
||||
253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60
|
||||
78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90
|
||||
253,233, 61,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0
|
||||
253,253,253,253,247,253,253, 36, 46, 71, 73,253, 54,253,108,123, # b0
|
||||
110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0
|
||||
35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0
|
||||
124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0
|
||||
9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0
|
||||
)
|
||||
|
||||
# Model Table:
|
||||
# total sequences: 100%
|
||||
# first 512 sequences: 98.2851%
|
||||
# first 1024 sequences:1.7001%
|
||||
# rest sequences: 0.0359%
|
||||
# negative sequences: 0.0148%
|
||||
GreekLangModel = (
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,3,2,2,3,3,3,3,3,3,3,3,1,3,3,3,0,2,2,3,3,0,3,0,3,2,0,3,3,3,0,
|
||||
3,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,3,0,3,3,0,3,2,3,3,0,3,2,3,3,3,0,0,3,0,3,0,3,3,2,0,0,0,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
|
||||
0,2,3,2,2,3,3,3,3,3,3,3,3,0,3,3,3,3,0,2,3,3,0,3,3,3,3,2,3,3,3,0,
|
||||
2,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,2,1,3,3,3,3,2,3,3,2,3,3,2,0,
|
||||
0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,2,3,3,0,
|
||||
2,0,1,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,3,2,3,0,0,0,0,3,3,0,3,1,3,3,3,0,3,3,0,3,3,3,3,0,0,0,0,
|
||||
2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,3,0,3,0,3,3,3,3,3,0,3,2,2,2,3,0,2,3,3,3,3,3,2,3,3,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,3,3,2,2,2,3,3,3,3,0,3,1,3,3,3,3,2,3,3,3,3,3,3,3,2,2,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,3,2,0,3,0,0,0,3,3,2,3,3,3,3,3,0,0,3,2,3,0,2,3,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,0,3,3,3,3,0,0,3,3,0,2,3,0,3,0,3,3,3,0,0,3,0,3,0,2,2,3,3,0,0,
|
||||
0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,3,2,0,3,2,3,3,3,3,0,3,3,3,3,3,0,3,3,2,3,2,3,3,2,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,2,3,2,3,3,3,3,3,3,0,2,3,2,3,2,2,2,3,2,3,3,2,3,0,2,2,2,3,0,
|
||||
2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,3,0,0,0,3,3,3,2,3,3,0,0,3,0,3,0,0,0,3,2,0,3,0,3,0,0,2,0,2,0,
|
||||
0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,0,0,0,3,3,0,3,3,3,0,0,1,2,3,0,
|
||||
3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,3,2,0,0,3,2,2,3,3,0,3,3,3,3,3,2,1,3,0,3,2,3,3,2,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,3,3,0,2,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,3,0,3,2,3,0,0,3,3,3,0,
|
||||
3,0,0,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,0,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,2,0,3,2,3,0,0,3,2,3,0,
|
||||
2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,3,1,2,2,3,3,3,3,3,3,0,2,3,0,3,0,0,0,3,3,0,3,0,2,0,0,2,3,1,0,
|
||||
2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,0,3,3,3,3,0,3,0,3,3,2,3,0,3,3,3,3,3,3,0,3,3,3,0,2,3,0,0,3,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,0,3,3,3,0,0,3,0,0,0,3,3,0,3,0,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,3,0,0,0,3,3,3,3,3,3,0,0,3,0,2,0,0,0,3,3,0,3,0,3,0,0,2,0,2,0,
|
||||
0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,3,3,0,3,0,2,0,3,2,0,3,2,3,2,3,0,0,3,2,3,2,3,3,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,3,0,0,2,3,3,3,3,3,0,0,0,3,0,2,1,0,0,3,2,2,2,0,3,0,0,2,2,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,0,3,3,3,2,0,3,0,3,0,3,3,0,2,1,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,3,3,3,0,3,3,3,3,3,3,0,2,3,0,3,0,0,0,2,1,0,2,2,3,0,0,2,2,2,0,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,3,0,0,2,3,3,3,2,3,0,0,1,3,0,2,0,0,0,0,3,0,1,0,2,0,0,1,1,1,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,3,1,0,3,0,0,0,3,2,0,3,2,3,3,3,0,0,3,0,3,2,2,2,1,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,0,3,3,3,0,0,3,0,0,0,0,2,0,2,3,3,2,2,2,2,3,0,2,0,2,2,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,2,0,0,0,0,0,0,2,3,0,2,0,2,3,2,0,0,3,0,3,0,3,1,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,3,2,3,3,2,2,3,0,2,0,3,0,0,0,2,0,0,0,0,1,2,0,2,0,2,0,
|
||||
0,2,0,2,0,2,2,0,0,1,0,2,2,2,0,2,2,2,0,2,2,2,0,0,2,0,0,1,0,0,0,0,
|
||||
0,2,0,3,3,2,0,0,0,0,0,0,1,3,0,2,0,2,2,2,0,0,2,0,3,0,0,2,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,0,2,3,2,0,2,2,0,2,0,2,2,0,2,0,2,2,2,0,0,0,0,0,0,2,3,0,0,0,2,
|
||||
0,1,2,0,0,0,0,2,2,0,0,0,2,1,0,2,2,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,
|
||||
0,0,2,1,0,2,3,2,2,3,2,3,2,0,0,3,3,3,0,0,3,2,0,0,0,1,1,0,2,0,2,2,
|
||||
0,2,0,2,0,2,2,0,0,2,0,2,2,2,0,2,2,2,2,0,0,2,0,0,0,2,0,1,0,0,0,0,
|
||||
0,3,0,3,3,2,2,0,3,0,0,0,2,2,0,2,2,2,1,2,0,0,1,2,2,0,0,3,0,0,0,2,
|
||||
0,1,2,0,0,0,1,2,0,0,0,0,0,0,0,2,2,0,1,0,0,2,0,0,0,2,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,3,3,2,2,0,0,0,2,0,2,3,3,0,2,0,0,0,0,0,0,2,2,2,0,2,2,0,2,0,2,
|
||||
0,2,2,0,0,2,2,2,2,1,0,0,2,2,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,
|
||||
0,2,0,3,2,3,0,0,0,3,0,0,2,2,0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,0,2,
|
||||
0,0,2,2,0,0,2,2,2,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,2,0,0,3,2,0,2,2,2,2,2,0,0,0,2,0,0,0,0,2,0,1,0,0,2,0,1,0,0,0,
|
||||
0,2,2,2,0,2,2,0,1,2,0,2,2,2,0,2,2,2,2,1,2,2,0,0,2,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
|
||||
0,2,0,2,0,2,2,0,0,0,0,1,2,1,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,3,2,3,0,0,2,0,0,0,2,2,0,2,0,0,0,1,0,0,2,0,2,0,2,2,0,0,0,0,
|
||||
0,0,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,
|
||||
0,2,2,3,2,2,0,0,0,0,0,0,1,3,0,2,0,2,2,0,0,0,1,0,2,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,0,2,0,3,2,0,2,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
0,0,2,0,0,0,0,1,1,0,0,2,1,2,0,2,2,0,1,0,0,1,0,0,0,2,0,0,0,0,0,0,
|
||||
0,3,0,2,2,2,0,0,2,0,0,0,2,0,0,0,2,3,0,2,0,0,0,0,0,0,2,2,0,0,0,2,
|
||||
0,1,2,0,0,0,1,2,2,1,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,1,2,0,2,2,0,2,0,0,2,0,0,0,0,1,2,1,0,2,1,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,2,0,0,0,3,1,2,2,0,2,0,0,0,0,2,0,0,0,2,0,0,3,0,0,0,0,2,2,2,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,1,0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,2,
|
||||
0,2,2,0,0,2,2,2,2,2,0,1,2,0,0,0,2,2,0,1,0,2,0,0,2,2,0,0,0,0,0,0,
|
||||
0,0,0,0,1,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0,0,0,2,0,2,0,0,0,0,2,
|
||||
0,1,2,0,0,0,0,2,2,1,0,1,0,1,0,2,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,
|
||||
0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,2,0,0,2,2,0,0,0,0,1,0,0,0,0,0,0,2,
|
||||
0,2,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,
|
||||
0,2,2,2,2,0,0,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,1,
|
||||
0,0,2,0,0,0,0,1,2,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,
|
||||
0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,2,2,2,0,0,0,2,0,0,0,0,0,0,0,0,2,
|
||||
0,0,1,0,0,0,0,2,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,2,
|
||||
0,0,2,0,0,0,0,2,2,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,0,2,2,1,0,0,0,0,0,0,2,0,0,2,0,2,2,2,0,0,0,0,0,0,2,0,0,0,0,2,
|
||||
0,0,2,0,0,2,0,2,2,0,0,0,0,2,0,2,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,
|
||||
0,0,3,0,0,0,2,2,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,
|
||||
0,2,2,2,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,
|
||||
0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
|
||||
0,2,0,0,0,2,0,0,0,0,0,1,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,2,0,0,0,
|
||||
0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,2,0,2,0,0,0,
|
||||
0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
)
|
||||
|
||||
Latin7GreekModel = {
|
||||
'char_to_order_map': Latin7_char_to_order_map,
|
||||
'precedence_matrix': GreekLangModel,
|
||||
'typical_positive_ratio': 0.982851,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "ISO-8859-7",
|
||||
'language': 'Greek',
|
||||
}
|
||||
|
||||
Win1253GreekModel = {
|
||||
'char_to_order_map': win1253_char_to_order_map,
|
||||
'precedence_matrix': GreekLangModel,
|
||||
'typical_positive_ratio': 0.982851,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "windows-1253",
|
||||
'language': 'Greek',
|
||||
}
|
||||
@@ -1,200 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Simon Montagu
|
||||
# Portions created by the Initial Developer are Copyright (C) 2005
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
# Shy Shalom - original C code
|
||||
# Shoshannah Forbes - original C code (?)
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# 255: Control characters that usually does not exist in any text
|
||||
# 254: Carriage/Return
|
||||
# 253: symbol (punctuation) that does not belong to word
|
||||
# 252: 0 - 9
|
||||
|
||||
# Windows-1255 language model
|
||||
# Character Mapping Table:
|
||||
WIN1255_CHAR_TO_ORDER_MAP = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253, 69, 91, 79, 80, 92, 89, 97, 90, 68,111,112, 82, 73, 95, 85, # 40
|
||||
78,121, 86, 71, 67,102,107, 84,114,103,115,253,253,253,253,253, # 50
|
||||
253, 50, 74, 60, 61, 42, 76, 70, 64, 53,105, 93, 56, 65, 54, 49, # 60
|
||||
66,110, 51, 43, 44, 63, 81, 77, 98, 75,108,253,253,253,253,253, # 70
|
||||
124,202,203,204,205, 40, 58,206,207,208,209,210,211,212,213,214,
|
||||
215, 83, 52, 47, 46, 72, 32, 94,216,113,217,109,218,219,220,221,
|
||||
34,116,222,118,100,223,224,117,119,104,125,225,226, 87, 99,227,
|
||||
106,122,123,228, 55,229,230,101,231,232,120,233, 48, 39, 57,234,
|
||||
30, 59, 41, 88, 33, 37, 36, 31, 29, 35,235, 62, 28,236,126,237,
|
||||
238, 38, 45,239,240,241,242,243,127,244,245,246,247,248,249,250,
|
||||
9, 8, 20, 16, 3, 2, 24, 14, 22, 1, 25, 15, 4, 11, 6, 23,
|
||||
12, 19, 13, 26, 18, 27, 21, 17, 7, 10, 5,251,252,128, 96,253,
|
||||
)
|
||||
|
||||
# Model Table:
|
||||
# total sequences: 100%
|
||||
# first 512 sequences: 98.4004%
|
||||
# first 1024 sequences: 1.5981%
|
||||
# rest sequences: 0.087%
|
||||
# negative sequences: 0.0015%
|
||||
HEBREW_LANG_MODEL = (
|
||||
0,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,2,1,2,0,1,0,0,
|
||||
3,0,3,1,0,0,1,3,2,0,1,1,2,0,2,2,2,1,1,1,1,2,1,1,1,2,0,0,2,2,0,1,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,
|
||||
1,2,1,2,1,2,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,
|
||||
1,2,1,3,1,1,0,0,2,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,1,2,2,1,3,
|
||||
1,2,1,1,2,2,0,0,2,2,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,2,3,2,
|
||||
1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,2,3,2,2,2,1,2,2,2,2,
|
||||
1,2,1,1,2,2,0,1,2,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2,2,2,2,2,
|
||||
0,2,0,2,2,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,2,2,
|
||||
0,2,1,2,2,2,0,0,2,1,0,0,0,0,1,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,3,2,2,2,
|
||||
1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,
|
||||
3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,2,0,2,
|
||||
0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,2,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,3,2,1,2,1,1,1,
|
||||
0,1,1,1,1,1,3,0,1,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||
3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,
|
||||
0,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,
|
||||
0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,2,3,3,3,2,1,2,3,3,2,3,3,3,3,2,3,2,1,2,0,2,1,2,
|
||||
0,2,0,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,2,3,3,3,1,2,2,3,3,2,3,2,3,2,2,3,1,2,2,0,2,2,2,
|
||||
0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,2,2,3,3,3,3,1,3,2,2,2,
|
||||
0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,2,2,2,1,2,2,0,2,2,2,2,
|
||||
0,2,0,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,1,3,2,3,3,2,3,3,2,2,1,2,2,2,2,2,2,
|
||||
0,2,1,2,1,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,2,3,2,3,3,2,3,3,3,3,2,3,2,3,3,3,3,3,2,2,2,2,2,2,2,1,
|
||||
0,2,0,1,2,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,2,1,2,3,3,3,3,3,3,3,2,3,2,3,2,1,2,3,0,2,1,2,2,
|
||||
0,2,1,1,2,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0,
|
||||
3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,1,3,1,2,2,2,1,2,3,3,1,2,1,2,2,2,2,
|
||||
0,1,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,1,3,3,3,1,2,2,2,2,1,1,2,2,2,2,2,2,
|
||||
0,2,0,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,2,3,3,3,2,2,3,3,3,2,1,2,3,2,3,2,2,2,2,1,2,1,1,1,2,2,
|
||||
0,2,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||
3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0,
|
||||
1,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,2,3,3,2,3,1,2,2,2,2,3,2,3,1,1,2,2,1,2,2,1,1,0,2,2,2,2,
|
||||
0,1,0,1,2,2,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
|
||||
3,0,0,1,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,2,0,
|
||||
0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,1,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
|
||||
3,2,2,1,2,2,2,2,2,2,2,1,2,2,1,2,2,1,1,1,1,1,1,1,1,2,1,1,0,3,3,3,
|
||||
0,3,0,2,2,2,2,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||
2,2,2,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,2,1,2,2,2,1,1,1,2,0,1,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,2,2,2,2,2,2,0,2,2,0,0,0,0,0,0,
|
||||
0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,3,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,1,0,2,1,0,
|
||||
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
|
||||
0,3,1,1,2,2,2,2,2,1,2,2,2,1,1,2,2,2,2,2,2,2,1,2,2,1,0,1,1,1,1,0,
|
||||
0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,2,1,1,1,1,2,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0,
|
||||
0,0,2,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,0,0,
|
||||
2,1,1,2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,1,2,1,2,1,1,1,1,0,0,0,0,
|
||||
0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,2,1,2,2,2,2,2,2,2,2,2,2,1,2,1,2,1,1,2,1,1,1,2,1,2,1,2,0,1,0,1,
|
||||
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,1,2,2,2,1,2,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,2,1,2,1,1,0,1,0,1,
|
||||
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,1,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,
|
||||
0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||
3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,2,0,1,1,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,0,1,1,0,0,
|
||||
0,1,1,1,2,1,2,2,2,0,2,0,2,0,1,1,2,1,1,1,1,2,1,0,1,1,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,0,1,0,0,0,0,0,1,0,1,2,2,0,1,0,0,1,1,2,2,1,2,0,2,0,0,0,1,2,0,1,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,2,0,2,1,2,0,2,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,1,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,1,2,2,0,0,1,0,0,0,1,0,0,1,
|
||||
1,1,2,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,2,1,
|
||||
0,2,0,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,1,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,
|
||||
2,0,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,2,1,1,2,0,1,0,0,0,1,1,0,1,
|
||||
1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,0,0,2,1,1,2,0,2,0,0,0,1,1,0,1,
|
||||
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,2,2,1,2,1,1,0,1,0,0,0,1,1,0,1,
|
||||
2,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,
|
||||
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,2,1,1,1,0,2,1,1,0,0,0,2,1,0,1,
|
||||
1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,0,2,1,1,0,1,0,0,0,1,1,0,1,
|
||||
2,2,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,0,1,2,1,0,2,0,0,0,1,1,0,1,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,
|
||||
0,1,0,0,2,0,2,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,
|
||||
1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,2,1,1,1,1,1,0,1,0,0,0,0,1,0,1,
|
||||
0,1,1,1,2,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0,
|
||||
)
|
||||
|
||||
Win1255HebrewModel = {
|
||||
'char_to_order_map': WIN1255_CHAR_TO_ORDER_MAP,
|
||||
'precedence_matrix': HEBREW_LANG_MODEL,
|
||||
'typical_positive_ratio': 0.984004,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "windows-1255",
|
||||
'language': 'Hebrew',
|
||||
}
|
||||
@@ -1,225 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# 255: Control characters that usually does not exist in any text
|
||||
# 254: Carriage/Return
|
||||
# 253: symbol (punctuation) that does not belong to word
|
||||
# 252: 0 - 9
|
||||
|
||||
# Character Mapping Table:
|
||||
Latin2_HungarianCharToOrderMap = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47,
|
||||
46, 71, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253,
|
||||
253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8,
|
||||
23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253,
|
||||
159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,
|
||||
175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,
|
||||
191,192,193,194,195,196,197, 75,198,199,200,201,202,203,204,205,
|
||||
79,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,
|
||||
221, 51, 81,222, 78,223,224,225,226, 44,227,228,229, 61,230,231,
|
||||
232,233,234, 58,235, 66, 59,236,237,238, 60, 69, 63,239,240,241,
|
||||
82, 14, 74,242, 70, 80,243, 72,244, 15, 83, 77, 84, 30, 76, 85,
|
||||
245,246,247, 25, 73, 42, 24,248,249,250, 31, 56, 29,251,252,253,
|
||||
)
|
||||
|
||||
win1250HungarianCharToOrderMap = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47,
|
||||
46, 72, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253,
|
||||
253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8,
|
||||
23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253,
|
||||
161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,
|
||||
177,178,179,180, 78,181, 69,182,183,184,185,186,187,188,189,190,
|
||||
191,192,193,194,195,196,197, 76,198,199,200,201,202,203,204,205,
|
||||
81,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,
|
||||
221, 51, 83,222, 80,223,224,225,226, 44,227,228,229, 61,230,231,
|
||||
232,233,234, 58,235, 66, 59,236,237,238, 60, 70, 63,239,240,241,
|
||||
84, 14, 75,242, 71, 82,243, 73,244, 15, 85, 79, 86, 30, 77, 87,
|
||||
245,246,247, 25, 74, 42, 24,248,249,250, 31, 56, 29,251,252,253,
|
||||
)
|
||||
|
||||
# Model Table:
|
||||
# total sequences: 100%
|
||||
# first 512 sequences: 94.7368%
|
||||
# first 1024 sequences:5.2623%
|
||||
# rest sequences: 0.8894%
|
||||
# negative sequences: 0.0009%
|
||||
HungarianLangModel = (
|
||||
0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
|
||||
3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,1,1,2,2,2,2,2,1,2,
|
||||
3,2,2,3,3,3,3,3,2,3,3,3,3,3,3,1,2,3,3,3,3,2,3,3,1,1,3,3,0,1,1,1,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,
|
||||
3,2,1,3,3,3,3,3,2,3,3,3,3,3,1,1,2,3,3,3,3,3,3,3,1,1,3,2,0,1,1,1,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,1,1,2,3,3,3,1,3,3,3,3,3,1,3,3,2,2,0,3,2,3,
|
||||
0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,3,3,2,3,3,2,2,3,2,3,2,0,3,2,2,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,1,2,3,2,2,3,1,2,3,3,2,2,0,3,3,3,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,0,2,3,2,
|
||||
0,0,0,1,1,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,1,1,1,3,3,2,1,3,2,2,3,2,1,3,2,2,1,0,3,3,1,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
3,2,2,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,3,2,2,3,1,1,3,2,0,1,1,1,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,1,3,3,3,3,3,2,2,1,3,3,3,0,1,1,2,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,2,0,3,2,3,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,1,3,2,2,2,3,1,1,3,3,1,1,0,3,3,2,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,2,3,3,3,3,3,1,2,3,2,2,0,2,2,2,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
3,3,3,2,2,2,3,1,3,3,2,2,1,3,3,3,1,1,3,1,2,3,2,3,2,2,2,1,0,2,2,2,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
|
||||
3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,2,2,3,2,1,0,3,2,0,1,1,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,1,0,3,3,3,3,0,2,3,0,0,2,1,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,2,2,3,3,2,2,2,2,3,3,0,1,2,3,2,3,2,2,3,2,1,2,0,2,2,2,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,1,2,3,3,3,2,1,2,3,3,2,2,2,3,2,3,3,1,3,3,1,1,0,2,3,2,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
3,3,3,1,2,2,2,2,3,3,3,1,1,1,3,3,1,1,3,1,1,3,2,1,2,3,1,1,0,2,2,2,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
3,3,3,2,1,2,1,1,3,3,1,1,1,1,3,3,1,1,2,2,1,2,1,1,2,2,1,1,0,2,2,1,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
3,3,3,1,1,2,1,1,3,3,1,0,1,1,3,3,2,0,1,1,2,3,1,0,2,2,1,0,0,1,3,2,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
3,2,1,3,3,3,3,3,1,2,3,2,3,3,2,1,1,3,2,3,2,1,2,2,0,1,2,1,0,0,1,1,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,2,2,2,2,3,1,2,2,1,1,3,3,0,3,2,1,2,3,2,1,3,3,1,1,0,2,1,3,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
3,3,3,2,2,2,3,2,3,3,3,2,1,1,3,3,1,1,1,2,2,3,2,3,2,2,2,1,0,2,2,1,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
1,0,0,3,3,3,3,3,0,0,3,3,2,3,0,0,0,2,3,3,1,0,1,2,0,0,1,1,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,1,2,3,3,3,3,3,1,2,3,3,2,2,1,1,0,3,3,2,2,1,2,2,1,0,2,2,0,1,1,1,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,2,2,1,3,1,2,3,3,2,2,1,1,2,2,1,1,1,1,3,2,1,1,1,1,2,1,0,1,2,1,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
|
||||
2,3,3,1,1,1,1,1,3,3,3,0,1,1,3,3,1,1,1,1,1,2,2,0,3,1,1,2,0,2,1,1,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
3,1,0,1,2,1,2,2,0,1,2,3,1,2,0,0,0,2,1,1,1,1,1,2,0,0,1,1,0,0,0,0,
|
||||
1,2,1,2,2,2,1,2,1,2,0,2,0,2,2,1,1,2,1,1,2,1,1,1,0,1,0,0,0,1,1,0,
|
||||
1,1,1,2,3,2,3,3,0,1,2,2,3,1,0,1,0,2,1,2,2,0,1,1,0,0,1,1,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,0,0,3,3,2,2,1,0,0,3,2,3,2,0,0,0,1,1,3,0,0,1,1,0,0,2,1,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,1,1,2,2,3,3,1,0,1,3,2,3,1,1,1,0,1,1,1,1,1,3,1,0,0,2,2,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,1,1,1,2,2,2,1,0,1,2,3,3,2,0,0,0,2,1,1,1,2,1,1,1,0,1,1,1,0,0,0,
|
||||
1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,2,1,1,1,1,1,1,0,1,1,1,0,0,1,1,
|
||||
3,2,2,1,0,0,1,1,2,2,0,3,0,1,2,1,1,0,0,1,1,1,0,1,1,1,1,0,2,1,1,1,
|
||||
2,2,1,1,1,2,1,2,1,1,1,1,1,1,1,2,1,1,1,2,3,1,1,1,1,1,1,1,1,1,0,1,
|
||||
2,3,3,0,1,0,0,0,3,3,1,0,0,1,2,2,1,0,0,0,0,2,0,0,1,1,1,0,2,1,1,1,
|
||||
2,1,1,1,1,1,1,2,1,1,0,1,1,0,1,1,1,0,1,2,1,1,0,1,1,1,1,1,1,1,0,1,
|
||||
2,3,3,0,1,0,0,0,2,2,0,0,0,0,1,2,2,0,0,0,0,1,0,0,1,1,0,0,2,0,1,0,
|
||||
2,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1,
|
||||
3,2,2,0,1,0,1,0,2,3,2,0,0,1,2,2,1,0,0,1,1,1,0,0,2,1,0,1,2,2,1,1,
|
||||
2,1,1,1,1,1,1,2,1,1,1,1,1,1,0,2,1,0,1,1,0,1,1,1,0,1,1,2,1,1,0,1,
|
||||
2,2,2,0,0,1,0,0,2,2,1,1,0,0,2,1,1,0,0,0,1,2,0,0,2,1,0,0,2,1,1,1,
|
||||
2,1,1,1,1,2,1,2,1,1,1,2,2,1,1,2,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,
|
||||
1,2,3,0,0,0,1,0,3,2,1,0,0,1,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,2,1,
|
||||
1,1,0,0,0,1,0,1,1,1,1,1,2,0,0,1,0,0,0,2,0,0,1,1,1,1,1,1,1,1,0,1,
|
||||
3,0,0,2,1,2,2,1,0,0,2,1,2,2,0,0,0,2,1,1,1,0,1,1,0,0,1,1,2,0,0,0,
|
||||
1,2,1,2,2,1,1,2,1,2,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,0,0,1,
|
||||
1,3,2,0,0,0,1,0,2,2,2,0,0,0,2,2,1,0,0,0,0,3,1,1,1,1,0,0,2,1,1,1,
|
||||
2,1,0,1,1,1,0,1,1,1,1,1,1,1,0,2,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,
|
||||
2,3,2,0,0,0,1,0,2,2,0,0,0,0,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,1,0,
|
||||
2,1,1,1,1,2,1,2,1,2,0,1,1,1,0,2,1,1,1,2,1,1,1,1,0,1,1,1,1,1,0,1,
|
||||
3,1,1,2,2,2,3,2,1,1,2,2,1,1,0,1,0,2,2,1,1,1,1,1,0,0,1,1,0,1,1,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,2,2,0,0,0,0,0,2,2,0,0,0,0,2,2,1,0,0,0,1,1,0,0,1,2,0,0,2,1,1,1,
|
||||
2,2,1,1,1,2,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,1,1,0,1,2,1,1,1,0,1,
|
||||
1,0,0,1,2,3,2,1,0,0,2,0,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,
|
||||
1,2,1,2,1,2,1,1,1,2,0,2,1,1,1,0,1,2,0,0,1,1,1,0,0,0,0,0,0,0,0,0,
|
||||
2,3,2,0,0,0,0,0,1,1,2,1,0,0,1,1,1,0,0,0,0,2,0,0,1,1,0,0,2,1,1,1,
|
||||
2,1,1,1,1,1,1,2,1,0,1,1,1,1,0,2,1,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,
|
||||
1,2,2,0,1,1,1,0,2,2,2,0,0,0,3,2,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,
|
||||
1,1,0,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,0,0,1,1,1,0,1,0,1,
|
||||
2,1,0,2,1,1,2,2,1,1,2,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,0,
|
||||
1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,1,0,
|
||||
1,2,3,0,0,0,1,0,2,2,0,0,0,0,2,2,0,0,0,0,0,1,0,0,1,0,0,0,2,0,1,0,
|
||||
2,1,1,1,1,1,0,2,0,0,0,1,2,1,1,1,1,0,1,2,0,1,0,1,0,1,1,1,0,1,0,1,
|
||||
2,2,2,0,0,0,1,0,2,1,2,0,0,0,1,1,2,0,0,0,0,1,0,0,1,1,0,0,2,1,0,1,
|
||||
2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1,
|
||||
1,2,2,0,0,0,1,0,2,2,2,0,0,0,1,1,0,0,0,0,0,1,1,0,2,0,0,1,1,1,0,1,
|
||||
1,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,0,1,
|
||||
1,0,0,1,0,1,2,1,0,0,1,1,1,2,0,0,0,1,1,0,1,0,1,1,0,0,1,0,0,0,0,0,
|
||||
0,2,1,2,1,1,1,1,1,2,0,2,0,1,1,0,1,2,1,0,1,1,1,0,0,0,0,0,0,1,0,0,
|
||||
2,1,1,0,1,2,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,2,1,0,1,
|
||||
2,2,1,1,1,1,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,0,1,0,1,1,1,1,1,0,1,
|
||||
1,2,2,0,0,0,0,0,1,1,0,0,0,0,2,1,0,0,0,0,0,2,0,0,2,2,0,0,2,0,0,1,
|
||||
2,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,
|
||||
1,1,2,0,0,3,1,0,2,1,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,
|
||||
1,2,1,0,1,1,1,2,1,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,0,0,
|
||||
2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,2,0,0,0,
|
||||
2,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,1,0,1,
|
||||
2,1,1,1,2,1,1,1,0,1,1,2,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,1,0,1,1,1,1,1,0,0,1,1,2,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,0,0,
|
||||
1,2,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,
|
||||
2,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,1,1,2,0,0,1,0,0,1,0,1,0,0,0,
|
||||
0,1,1,1,1,1,1,1,1,2,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,
|
||||
1,0,0,1,1,1,1,1,0,0,2,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,
|
||||
0,1,1,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,0,
|
||||
1,0,0,1,1,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||
0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,1,0,0,0,0,0,0,1,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,1,1,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,
|
||||
2,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
|
||||
)
|
||||
|
||||
Latin2HungarianModel = {
|
||||
'char_to_order_map': Latin2_HungarianCharToOrderMap,
|
||||
'precedence_matrix': HungarianLangModel,
|
||||
'typical_positive_ratio': 0.947368,
|
||||
'keep_english_letter': True,
|
||||
'charset_name': "ISO-8859-2",
|
||||
'language': 'Hungarian',
|
||||
}
|
||||
|
||||
Win1250HungarianModel = {
|
||||
'char_to_order_map': win1250HungarianCharToOrderMap,
|
||||
'precedence_matrix': HungarianLangModel,
|
||||
'typical_positive_ratio': 0.947368,
|
||||
'keep_english_letter': True,
|
||||
'charset_name': "windows-1250",
|
||||
'language': 'Hungarian',
|
||||
}
|
||||
@@ -1,199 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# 255: Control characters that usually does not exist in any text
|
||||
# 254: Carriage/Return
|
||||
# 253: symbol (punctuation) that does not belong to word
|
||||
# 252: 0 - 9
|
||||
|
||||
# The following result for thai was collected from a limited sample (1M).
|
||||
|
||||
# Character Mapping Table:
|
||||
TIS620CharToOrderMap = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253,182,106,107,100,183,184,185,101, 94,186,187,108,109,110,111, # 40
|
||||
188,189,190, 89, 95,112,113,191,192,193,194,253,253,253,253,253, # 50
|
||||
253, 64, 72, 73,114, 74,115,116,102, 81,201,117, 90,103, 78, 82, # 60
|
||||
96,202, 91, 79, 84,104,105, 97, 98, 92,203,253,253,253,253,253, # 70
|
||||
209,210,211,212,213, 88,214,215,216,217,218,219,220,118,221,222,
|
||||
223,224, 99, 85, 83,225,226,227,228,229,230,231,232,233,234,235,
|
||||
236, 5, 30,237, 24,238, 75, 8, 26, 52, 34, 51,119, 47, 58, 57,
|
||||
49, 53, 55, 43, 20, 19, 44, 14, 48, 3, 17, 25, 39, 62, 31, 54,
|
||||
45, 9, 16, 2, 61, 15,239, 12, 42, 46, 18, 21, 76, 4, 66, 63,
|
||||
22, 10, 1, 36, 23, 13, 40, 27, 32, 35, 86,240,241,242,243,244,
|
||||
11, 28, 41, 29, 33,245, 50, 37, 6, 7, 67, 77, 38, 93,246,247,
|
||||
68, 56, 59, 65, 69, 60, 70, 80, 71, 87,248,249,250,251,252,253,
|
||||
)
|
||||
|
||||
# Model Table:
|
||||
# total sequences: 100%
|
||||
# first 512 sequences: 92.6386%
|
||||
# first 1024 sequences:7.3177%
|
||||
# rest sequences: 1.0230%
|
||||
# negative sequences: 0.0436%
|
||||
ThaiLangModel = (
|
||||
0,1,3,3,3,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,3,3,
|
||||
0,3,3,0,0,0,1,3,0,3,3,2,3,3,0,1,2,3,3,3,3,0,2,0,2,0,0,3,2,1,2,2,
|
||||
3,0,3,3,2,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,0,3,2,3,0,2,2,2,3,
|
||||
0,2,3,0,0,0,0,1,0,1,2,3,1,1,3,2,2,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,
|
||||
3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,3,3,2,3,2,3,3,2,2,2,
|
||||
3,1,2,3,0,3,3,2,2,1,2,3,3,1,2,0,1,3,0,1,0,0,1,0,0,0,0,0,0,0,1,1,
|
||||
3,3,2,2,3,3,3,3,1,2,3,3,3,3,3,2,2,2,2,3,3,2,2,3,3,2,2,3,2,3,2,2,
|
||||
3,3,1,2,3,1,2,2,3,3,1,0,2,1,0,0,3,1,2,1,0,0,1,0,0,0,0,0,0,1,0,1,
|
||||
3,3,3,3,3,3,2,2,3,3,3,3,2,3,2,2,3,3,2,2,3,2,2,2,2,1,1,3,1,2,1,1,
|
||||
3,2,1,0,2,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,2,3,2,3,3,2,2,3,2,3,3,2,3,1,1,2,3,2,2,2,3,2,2,2,2,2,1,2,1,
|
||||
2,2,1,1,3,3,2,1,0,1,2,2,0,1,3,0,0,0,1,1,0,0,0,0,0,2,3,0,0,2,1,1,
|
||||
3,3,2,3,3,2,0,0,3,3,0,3,3,0,2,2,3,1,2,2,1,1,1,0,2,2,2,0,2,2,1,1,
|
||||
0,2,1,0,2,0,0,2,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||
3,3,2,3,3,2,0,0,3,3,0,2,3,0,2,1,2,2,2,2,1,2,0,0,2,2,2,0,2,2,1,1,
|
||||
0,2,1,0,2,0,0,2,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,
|
||||
3,3,2,3,2,3,2,0,2,2,1,3,2,1,3,2,1,2,3,2,2,3,0,2,3,2,2,1,2,2,2,2,
|
||||
1,2,2,0,0,0,0,2,0,1,2,0,1,1,1,0,1,0,3,1,1,0,0,0,0,0,0,0,0,0,1,0,
|
||||
3,3,2,3,3,2,3,2,2,2,3,2,2,3,2,2,1,2,3,2,2,3,1,3,2,2,2,3,2,2,2,3,
|
||||
3,2,1,3,0,1,1,1,0,2,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,2,0,0,
|
||||
1,0,0,3,0,3,3,3,3,3,0,0,3,0,2,2,3,3,3,3,3,0,0,0,1,1,3,0,0,0,0,2,
|
||||
0,0,1,0,0,0,0,0,0,0,2,3,0,0,0,3,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
|
||||
2,0,3,3,3,3,0,0,2,3,0,0,3,0,3,3,2,3,3,3,3,3,0,0,3,3,3,0,0,0,3,3,
|
||||
0,0,3,0,0,0,0,2,0,0,2,1,1,3,0,0,1,0,0,2,3,0,1,0,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,2,3,3,3,3,3,3,3,1,2,1,3,3,2,2,1,2,2,2,3,1,1,2,0,2,1,2,1,
|
||||
2,2,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,
|
||||
3,0,2,1,2,3,3,3,0,2,0,2,2,0,2,1,3,2,2,1,2,1,0,0,2,2,1,0,2,1,2,2,
|
||||
0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,2,1,3,3,1,1,3,0,2,3,1,1,3,2,1,1,2,0,2,2,3,2,1,1,1,1,1,2,
|
||||
3,0,0,1,3,1,2,1,2,0,3,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
|
||||
3,3,1,1,3,2,3,3,3,1,3,2,1,3,2,1,3,2,2,2,2,1,3,3,1,2,1,3,1,2,3,0,
|
||||
2,1,1,3,2,2,2,1,2,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
|
||||
3,3,2,3,2,3,3,2,3,2,3,2,3,3,2,1,0,3,2,2,2,1,2,2,2,1,2,2,1,2,1,1,
|
||||
2,2,2,3,0,1,3,1,1,1,1,0,1,1,0,2,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,2,3,2,2,1,1,3,2,3,2,3,2,0,3,2,2,1,2,0,2,2,2,1,2,2,2,2,1,
|
||||
3,2,1,2,2,1,0,2,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,3,2,3,1,2,3,3,2,2,3,0,1,1,2,0,3,3,2,2,3,0,1,1,3,0,0,0,0,
|
||||
3,1,0,3,3,0,2,0,2,1,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,2,3,2,3,3,0,1,3,1,1,2,1,2,1,1,3,1,1,0,2,3,1,1,1,1,1,1,1,1,
|
||||
3,1,1,2,2,2,2,1,1,1,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,2,2,1,1,2,1,3,3,2,3,2,2,3,2,2,3,1,2,2,1,2,0,3,2,1,2,2,2,2,2,1,
|
||||
3,2,1,2,2,2,1,1,1,1,0,0,1,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,1,3,3,0,2,1,0,3,2,0,0,3,1,0,1,1,0,1,0,0,0,0,0,1,
|
||||
1,0,0,1,0,3,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,2,2,2,3,0,0,1,3,0,3,2,0,3,2,2,3,3,3,3,3,1,0,2,2,2,0,2,2,1,2,
|
||||
0,2,3,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,0,2,3,1,3,3,2,3,3,0,3,3,0,3,2,2,3,2,3,3,3,0,0,2,2,3,0,1,1,1,3,
|
||||
0,0,3,0,0,0,2,2,0,1,3,0,1,2,2,2,3,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,
|
||||
3,2,3,3,2,0,3,3,2,2,3,1,3,2,1,3,2,0,1,2,2,0,2,3,2,1,0,3,0,0,0,0,
|
||||
3,0,0,2,3,1,3,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,1,3,2,2,2,1,2,0,1,3,1,1,3,1,3,0,0,2,1,1,1,1,2,1,1,1,0,2,1,0,1,
|
||||
1,2,0,0,0,3,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,3,1,0,0,0,1,0,
|
||||
3,3,3,3,2,2,2,2,2,1,3,1,1,1,2,0,1,1,2,1,2,1,3,2,0,0,3,1,1,1,1,1,
|
||||
3,1,0,2,3,0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,2,3,0,3,3,0,2,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,2,3,1,3,0,0,1,2,0,0,2,0,3,3,2,3,3,3,2,3,0,0,2,2,2,0,0,0,2,2,
|
||||
0,0,1,0,0,0,0,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,1,2,3,1,3,3,0,0,1,0,3,0,0,0,0,0,
|
||||
0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,1,2,3,1,2,3,1,0,3,0,2,2,1,0,2,1,1,2,0,1,0,0,1,1,1,1,0,1,0,0,
|
||||
1,0,0,0,0,1,1,0,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,2,1,0,1,1,1,3,1,2,2,2,2,2,2,1,1,1,1,0,3,1,0,1,3,1,1,1,1,
|
||||
1,1,0,2,0,1,3,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1,
|
||||
3,0,2,2,1,3,3,2,3,3,0,1,1,0,2,2,1,2,1,3,3,1,0,0,3,2,0,0,0,0,2,1,
|
||||
0,1,0,0,0,0,1,2,0,1,1,3,1,1,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
|
||||
0,0,3,0,0,1,0,0,0,3,0,0,3,0,3,1,0,1,1,1,3,2,0,0,0,3,0,0,0,0,2,0,
|
||||
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||
3,3,1,3,2,1,3,3,1,2,2,0,1,2,1,0,1,2,0,0,0,0,0,3,0,0,0,3,0,0,0,0,
|
||||
3,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,1,2,0,3,3,3,2,2,0,1,1,0,1,3,0,0,0,2,2,0,0,0,0,3,1,0,1,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,2,3,1,2,0,0,2,1,0,3,1,0,1,2,0,1,1,1,1,3,0,0,3,1,1,0,2,2,1,1,
|
||||
0,2,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,0,3,1,2,0,0,2,2,0,1,2,0,1,0,1,3,1,2,1,0,0,0,2,0,3,0,0,0,1,0,
|
||||
0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,1,1,2,2,0,0,0,2,0,2,1,0,1,1,0,1,1,1,2,1,0,0,1,1,1,0,2,1,1,1,
|
||||
0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,
|
||||
0,0,0,2,0,1,3,1,1,1,1,0,0,0,0,3,2,0,1,0,0,0,1,2,0,0,0,1,0,0,0,0,
|
||||
0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,0,2,3,2,2,0,0,0,1,0,0,0,0,2,3,2,1,2,2,3,0,0,0,2,3,1,0,0,0,1,1,
|
||||
0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,
|
||||
3,3,2,2,0,1,0,0,0,0,2,0,2,0,1,0,0,0,1,1,0,0,0,2,1,0,1,0,1,1,0,0,
|
||||
0,1,0,2,0,0,1,0,3,0,1,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,1,0,0,1,0,0,0,0,0,1,1,2,0,0,0,0,1,0,0,1,3,1,0,0,0,0,1,1,0,0,
|
||||
0,1,0,0,0,0,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,1,1,1,1,2,3,0,0,2,1,1,1,1,1,0,2,1,1,0,0,0,2,1,0,1,2,1,1,0,1,
|
||||
2,1,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,3,1,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,
|
||||
0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,2,0,0,0,0,0,0,1,2,1,0,1,1,0,2,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,2,0,0,0,1,3,0,1,0,0,0,2,0,0,0,0,0,0,0,1,2,0,0,0,0,0,
|
||||
3,3,0,0,1,1,2,0,0,1,2,1,0,1,1,1,0,1,1,0,0,2,1,1,0,1,0,0,1,1,1,0,
|
||||
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,2,2,1,0,0,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
|
||||
2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,3,0,0,1,1,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,1,0,1,2,0,1,2,0,0,1,1,0,2,0,1,0,0,1,0,0,0,0,1,0,0,0,2,0,0,0,0,
|
||||
1,0,0,1,0,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,1,0,0,0,0,0,0,0,1,1,0,1,1,0,2,1,3,0,0,0,0,1,1,0,0,0,0,0,0,0,3,
|
||||
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,0,1,0,1,0,0,2,0,0,2,0,0,1,1,2,0,0,1,1,0,0,0,1,0,0,0,1,1,0,0,0,
|
||||
1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||
1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,3,0,0,0,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,
|
||||
1,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,1,1,0,0,2,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
)
|
||||
|
||||
TIS620ThaiModel = {
|
||||
'char_to_order_map': TIS620CharToOrderMap,
|
||||
'precedence_matrix': ThaiLangModel,
|
||||
'typical_positive_ratio': 0.926386,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "TIS-620",
|
||||
'language': 'Thai',
|
||||
}
|
||||
@@ -1,193 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
# Özgür Baskın - Turkish Language Model
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# 255: Control characters that usually does not exist in any text
|
||||
# 254: Carriage/Return
|
||||
# 253: symbol (punctuation) that does not belong to word
|
||||
# 252: 0 - 9
|
||||
|
||||
# Character Mapping Table:
|
||||
Latin5_TurkishCharToOrderMap = (
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
||||
255, 23, 37, 47, 39, 29, 52, 36, 45, 53, 60, 16, 49, 20, 46, 42,
|
||||
48, 69, 44, 35, 31, 51, 38, 62, 65, 43, 56,255,255,255,255,255,
|
||||
255, 1, 21, 28, 12, 2, 18, 27, 25, 3, 24, 10, 5, 13, 4, 15,
|
||||
26, 64, 7, 8, 9, 14, 32, 57, 58, 11, 22,255,255,255,255,255,
|
||||
180,179,178,177,176,175,174,173,172,171,170,169,168,167,166,165,
|
||||
164,163,162,161,160,159,101,158,157,156,155,154,153,152,151,106,
|
||||
150,149,148,147,146,145,144,100,143,142,141,140,139,138,137,136,
|
||||
94, 80, 93,135,105,134,133, 63,132,131,130,129,128,127,126,125,
|
||||
124,104, 73, 99, 79, 85,123, 54,122, 98, 92,121,120, 91,103,119,
|
||||
68,118,117, 97,116,115, 50, 90,114,113,112,111, 55, 41, 40, 86,
|
||||
89, 70, 59, 78, 71, 82, 88, 33, 77, 66, 84, 83,110, 75, 61, 96,
|
||||
30, 67,109, 74, 87,102, 34, 95, 81,108, 76, 72, 17, 6, 19,107,
|
||||
)
|
||||
|
||||
TurkishLangModel = (
|
||||
3,2,3,3,3,1,3,3,3,3,3,3,3,3,2,1,1,3,3,1,3,3,0,3,3,3,3,3,0,3,1,3,
|
||||
3,2,1,0,0,1,1,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,2,2,0,0,1,0,0,1,
|
||||
3,2,2,3,3,0,3,3,3,3,3,3,3,2,3,1,0,3,3,1,3,3,0,3,3,3,3,3,0,3,0,3,
|
||||
3,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,2,2,0,0,0,1,0,1,
|
||||
3,3,2,3,3,0,3,3,3,3,3,3,3,2,3,1,1,3,3,0,3,3,1,2,3,3,3,3,0,3,0,3,
|
||||
3,1,1,0,0,0,1,0,0,0,0,1,1,0,1,2,1,0,0,0,1,0,0,0,0,2,0,0,0,0,0,1,
|
||||
3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,1,3,3,2,0,3,2,1,2,2,1,3,3,0,0,0,2,
|
||||
2,2,0,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,0,1,
|
||||
3,3,3,2,3,3,1,2,3,3,3,3,3,3,3,1,3,2,1,0,3,2,0,1,2,3,3,2,1,0,0,2,
|
||||
2,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,2,0,0,0,
|
||||
1,0,1,3,3,1,3,3,3,3,3,3,3,1,2,0,0,2,3,0,2,3,0,0,2,2,2,3,0,3,0,1,
|
||||
2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,0,3,2,0,2,3,2,3,3,1,0,0,2,
|
||||
3,2,0,0,1,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,0,2,0,0,1,
|
||||
3,3,3,2,3,3,2,3,3,3,3,2,3,3,3,0,3,3,0,0,2,1,0,0,2,3,2,2,0,0,0,2,
|
||||
2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,2,0,0,1,
|
||||
3,3,3,2,3,3,3,3,3,3,3,2,3,3,3,0,3,2,0,1,3,2,1,1,3,2,3,2,1,0,0,2,
|
||||
2,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,
|
||||
3,3,3,2,3,3,3,3,3,3,3,2,3,3,3,0,3,2,2,0,2,3,0,0,2,2,2,2,0,0,0,2,
|
||||
3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2,0,1,0,0,0,
|
||||
3,3,3,3,3,3,3,2,2,2,2,3,2,3,3,0,3,3,1,1,2,2,0,0,2,2,3,2,0,0,1,3,
|
||||
0,3,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,
|
||||
3,3,3,2,3,3,3,2,1,2,2,3,2,3,3,0,3,2,0,0,1,1,0,1,1,2,1,2,0,0,0,1,
|
||||
0,3,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,
|
||||
3,3,3,2,3,3,2,3,2,2,2,3,3,3,3,1,3,1,1,0,3,2,1,1,3,3,2,3,1,0,0,1,
|
||||
1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2,0,0,1,
|
||||
3,2,2,3,3,0,3,3,3,3,3,3,3,2,2,1,0,3,3,1,3,3,0,1,3,3,2,3,0,3,0,3,
|
||||
2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
|
||||
2,2,2,3,3,0,3,3,3,3,3,3,3,3,3,0,0,3,2,0,3,3,0,3,2,3,3,3,0,3,1,3,
|
||||
2,0,0,0,0,0,0,0,0,0,0,1,0,1,2,0,1,0,0,0,0,0,0,0,2,2,0,0,1,0,0,1,
|
||||
3,3,3,1,2,3,3,1,0,0,1,0,0,3,3,2,3,0,0,2,0,0,2,0,2,0,0,0,2,0,2,0,
|
||||
0,3,1,0,1,0,0,0,2,2,1,0,1,1,2,1,2,2,2,0,2,1,1,0,0,0,2,0,0,0,0,0,
|
||||
1,2,1,3,3,0,3,3,3,3,3,2,3,0,0,0,0,2,3,0,2,3,1,0,2,3,1,3,0,3,0,2,
|
||||
3,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,1,3,3,2,2,3,2,2,0,1,2,3,0,1,2,1,0,1,0,0,0,1,0,2,2,0,0,0,1,
|
||||
1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,
|
||||
3,3,3,1,3,3,1,1,3,3,1,1,3,3,1,0,2,1,2,0,2,1,0,0,1,1,2,1,0,0,0,2,
|
||||
2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,1,0,2,1,3,0,0,2,0,0,3,3,0,3,0,0,1,0,1,2,0,0,1,1,2,2,0,1,0,
|
||||
0,1,2,1,1,0,1,0,1,1,1,1,1,0,1,1,1,2,2,1,2,0,1,0,0,0,0,0,0,1,0,0,
|
||||
3,3,3,2,3,2,3,3,0,2,2,2,3,3,3,0,3,0,0,0,2,2,0,1,2,1,1,1,0,0,0,1,
|
||||
0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
|
||||
3,3,3,3,3,3,2,1,2,2,3,3,3,3,2,0,2,0,0,0,2,2,0,0,2,1,3,3,0,0,1,1,
|
||||
1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,
|
||||
1,1,2,3,3,0,3,3,3,3,3,3,2,2,0,2,0,2,3,2,3,2,2,2,2,2,2,2,1,3,2,3,
|
||||
2,0,2,1,2,2,2,2,1,1,2,2,1,2,2,1,2,0,0,2,1,1,0,2,1,0,0,1,0,0,0,1,
|
||||
2,3,3,1,1,1,0,1,1,1,2,3,2,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,
|
||||
0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,2,2,2,3,2,3,2,2,1,3,3,3,0,2,1,2,0,2,1,0,0,1,1,1,1,1,0,0,1,
|
||||
2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,2,0,1,0,0,0,
|
||||
3,3,3,2,3,3,3,3,3,2,3,1,2,3,3,1,2,0,0,0,0,0,0,0,3,2,1,1,0,0,0,0,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
|
||||
3,3,3,2,2,3,3,2,1,1,1,1,1,3,3,0,3,1,0,0,1,1,0,0,3,1,2,1,0,0,0,0,
|
||||
0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,
|
||||
3,3,3,2,2,3,2,2,2,3,2,1,1,3,3,0,3,0,0,0,0,1,0,0,3,1,1,2,0,0,0,1,
|
||||
1,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
1,1,1,3,3,0,3,3,3,3,3,2,2,2,1,2,0,2,1,2,2,1,1,0,1,2,2,2,2,2,2,2,
|
||||
0,0,2,1,2,1,2,1,0,1,1,3,1,2,1,1,2,0,0,2,0,1,0,1,0,1,0,0,0,1,0,1,
|
||||
3,3,3,1,3,3,3,0,1,1,0,2,2,3,1,0,3,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,
|
||||
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,2,0,0,2,2,1,0,0,1,0,0,3,3,1,3,0,0,1,1,0,2,0,3,0,0,0,2,0,1,1,
|
||||
0,1,2,0,1,2,2,0,2,2,2,2,1,0,2,1,1,0,2,0,2,1,2,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,1,3,2,3,2,0,2,2,2,1,3,2,0,2,1,2,0,1,2,0,0,1,0,2,2,0,0,0,2,
|
||||
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,
|
||||
3,3,3,0,3,3,1,1,2,3,1,0,3,2,3,0,3,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,
|
||||
1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,3,3,0,3,3,2,3,3,2,2,0,0,0,0,1,2,0,1,3,0,0,0,3,1,1,0,3,0,2,
|
||||
2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,1,2,2,1,0,3,1,1,1,1,3,3,2,3,0,0,1,0,1,2,0,2,2,0,2,2,0,2,1,
|
||||
0,2,2,1,1,1,1,0,2,1,1,0,1,1,1,1,2,1,2,1,2,0,1,0,1,0,0,0,0,0,0,0,
|
||||
3,3,3,0,1,1,3,0,0,1,1,0,0,2,2,0,3,0,0,1,1,0,1,0,0,0,0,0,2,0,0,0,
|
||||
0,3,1,0,1,0,1,0,2,0,0,1,0,1,0,1,1,1,2,1,1,0,2,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,0,2,0,2,0,1,1,1,0,0,3,3,0,2,0,0,1,0,0,2,1,1,0,1,0,1,0,1,0,
|
||||
0,2,0,1,2,0,2,0,2,1,1,0,1,0,2,1,1,0,2,1,1,0,1,0,0,0,1,1,0,0,0,0,
|
||||
3,2,3,0,1,0,0,0,0,0,0,0,0,1,2,0,1,0,0,1,0,0,1,0,0,0,0,0,2,0,0,0,
|
||||
0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,2,1,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,0,0,2,3,0,0,1,0,1,0,2,3,2,3,0,0,1,3,0,2,1,0,0,0,0,2,0,1,0,
|
||||
0,2,1,0,0,1,1,0,2,1,0,0,1,0,0,1,1,0,1,1,2,0,1,0,0,0,0,1,0,0,0,0,
|
||||
3,2,2,0,0,1,1,0,0,0,0,0,0,3,1,1,1,0,0,0,0,0,1,0,0,0,0,0,2,0,1,0,
|
||||
0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,3,3,0,2,3,2,2,1,2,2,1,1,2,0,1,3,2,2,2,0,0,2,2,0,0,0,1,2,1,
|
||||
3,0,2,1,1,0,1,1,1,0,1,2,2,2,1,1,2,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,
|
||||
0,1,1,2,3,0,3,3,3,2,2,2,2,1,0,1,0,1,0,1,2,2,0,0,2,2,1,3,1,1,2,1,
|
||||
0,0,1,1,2,0,1,1,0,0,1,2,0,2,1,1,2,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,
|
||||
3,3,2,0,0,3,1,0,0,0,0,0,0,3,2,1,2,0,0,1,0,0,2,0,0,0,0,0,2,0,1,0,
|
||||
0,2,1,1,0,0,1,0,1,2,0,0,1,1,0,0,2,1,1,1,1,0,2,0,0,0,0,0,0,0,0,0,
|
||||
3,3,2,0,0,1,0,0,0,0,1,0,0,3,3,2,2,0,0,1,0,0,2,0,1,0,0,0,2,0,1,0,
|
||||
0,0,1,1,0,0,2,0,2,1,0,0,1,1,2,1,2,0,2,1,2,1,1,1,0,0,1,1,0,0,0,0,
|
||||
3,3,2,0,0,2,2,0,0,0,1,1,0,2,2,1,3,1,0,1,0,1,2,0,0,0,0,0,1,0,1,0,
|
||||
0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,2,0,0,0,1,0,0,1,0,0,2,3,1,2,0,0,1,0,0,2,0,0,0,1,0,2,0,2,0,
|
||||
0,1,1,2,2,1,2,0,2,1,1,0,0,1,1,0,1,1,1,1,2,1,1,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,0,2,1,2,1,0,0,1,1,0,3,3,1,2,0,0,1,0,0,2,0,2,0,1,1,2,0,0,0,
|
||||
0,0,1,1,1,1,2,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,
|
||||
3,3,3,0,2,2,3,2,0,0,1,0,0,2,3,1,0,0,0,0,0,0,2,0,2,0,0,0,2,0,0,0,
|
||||
0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,2,3,0,0,0,0,0,0,0,1,0,0,2,2,2,2,0,0,1,0,0,2,0,0,0,0,0,2,0,1,0,
|
||||
0,0,2,1,1,0,1,0,2,1,1,0,0,1,1,2,1,0,2,0,2,0,1,0,0,0,2,0,0,0,0,0,
|
||||
0,0,0,2,2,0,2,1,1,1,1,2,2,0,0,1,0,1,0,0,1,3,0,0,0,0,1,0,0,2,1,0,
|
||||
0,0,1,0,1,0,0,0,0,0,2,1,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
|
||||
2,0,0,2,3,0,2,3,1,2,2,0,2,0,0,2,0,2,1,1,1,2,1,0,0,1,2,1,1,2,1,0,
|
||||
1,0,2,0,1,0,1,1,0,0,2,2,1,2,1,1,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,0,2,1,2,0,0,0,1,0,0,3,2,0,1,0,0,1,0,0,2,0,0,0,1,2,1,0,1,0,
|
||||
0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,2,2,0,2,2,1,1,0,1,1,1,1,1,0,0,1,2,1,1,1,0,1,0,0,0,1,1,1,1,
|
||||
0,0,2,1,0,1,1,1,0,1,1,2,1,2,1,1,2,0,1,1,2,1,0,2,0,0,0,0,0,0,0,0,
|
||||
3,2,2,0,0,2,0,0,0,0,0,0,0,2,2,0,2,0,0,1,0,0,2,0,0,0,0,0,2,0,0,0,
|
||||
0,2,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,3,2,0,2,2,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,
|
||||
2,0,1,0,1,0,1,1,0,0,1,2,0,1,0,1,1,0,0,1,0,1,0,2,0,0,0,0,0,0,0,0,
|
||||
2,2,2,0,1,1,0,0,0,1,0,0,0,1,2,0,1,0,0,1,0,0,1,0,0,0,0,1,2,0,1,0,
|
||||
0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,2,2,2,1,0,1,1,1,0,0,0,0,1,2,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
|
||||
1,1,2,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,1,
|
||||
0,0,1,2,2,0,2,1,2,1,1,2,2,0,0,0,0,1,0,0,1,1,0,0,2,0,0,0,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
|
||||
2,2,2,0,0,0,1,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,2,2,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,1,0,0,0,0,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
)
|
||||
|
||||
Latin5TurkishModel = {
|
||||
'char_to_order_map': Latin5_TurkishCharToOrderMap,
|
||||
'precedence_matrix': TurkishLangModel,
|
||||
'typical_positive_ratio': 0.970290,
|
||||
'keep_english_letter': True,
|
||||
'charset_name': "ISO-8859-9",
|
||||
'language': 'Turkish',
|
||||
}
|
||||
@@ -1,145 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
# Shy Shalom - original C code
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .charsetprober import CharSetProber
|
||||
from .enums import ProbingState
|
||||
|
||||
FREQ_CAT_NUM = 4
|
||||
|
||||
UDF = 0 # undefined
|
||||
OTH = 1 # other
|
||||
ASC = 2 # ascii capital letter
|
||||
ASS = 3 # ascii small letter
|
||||
ACV = 4 # accent capital vowel
|
||||
ACO = 5 # accent capital other
|
||||
ASV = 6 # accent small vowel
|
||||
ASO = 7 # accent small other
|
||||
CLASS_NUM = 8 # total classes
|
||||
|
||||
Latin1_CharToClass = (
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 10 - 17
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 18 - 1F
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 20 - 27
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 28 - 2F
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 30 - 37
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 38 - 3F
|
||||
OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 40 - 47
|
||||
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 48 - 4F
|
||||
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 50 - 57
|
||||
ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, # 58 - 5F
|
||||
OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 60 - 67
|
||||
ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 68 - 6F
|
||||
ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 70 - 77
|
||||
ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, # 78 - 7F
|
||||
OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, # 80 - 87
|
||||
OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, # 88 - 8F
|
||||
UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 90 - 97
|
||||
OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, # 98 - 9F
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A0 - A7
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A8 - AF
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B0 - B7
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B8 - BF
|
||||
ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, # C0 - C7
|
||||
ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, # C8 - CF
|
||||
ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, # D0 - D7
|
||||
ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, # D8 - DF
|
||||
ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, # E0 - E7
|
||||
ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # E8 - EF
|
||||
ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, # F0 - F7
|
||||
ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, # F8 - FF
|
||||
)
|
||||
|
||||
# 0 : illegal
|
||||
# 1 : very unlikely
|
||||
# 2 : normal
|
||||
# 3 : very likely
|
||||
Latin1ClassModel = (
|
||||
# UDF OTH ASC ASS ACV ACO ASV ASO
|
||||
0, 0, 0, 0, 0, 0, 0, 0, # UDF
|
||||
0, 3, 3, 3, 3, 3, 3, 3, # OTH
|
||||
0, 3, 3, 3, 3, 3, 3, 3, # ASC
|
||||
0, 3, 3, 3, 1, 1, 3, 3, # ASS
|
||||
0, 3, 3, 3, 1, 2, 1, 2, # ACV
|
||||
0, 3, 3, 3, 3, 3, 3, 3, # ACO
|
||||
0, 3, 1, 3, 1, 1, 1, 3, # ASV
|
||||
0, 3, 1, 3, 1, 1, 3, 3, # ASO
|
||||
)
|
||||
|
||||
|
||||
class Latin1Prober(CharSetProber):
|
||||
def __init__(self):
|
||||
super(Latin1Prober, self).__init__()
|
||||
self._last_char_class = None
|
||||
self._freq_counter = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self._last_char_class = OTH
|
||||
self._freq_counter = [0] * FREQ_CAT_NUM
|
||||
CharSetProber.reset(self)
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "ISO-8859-1"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return ""
|
||||
|
||||
def feed(self, byte_str):
|
||||
byte_str = self.filter_with_english_letters(byte_str)
|
||||
for c in byte_str:
|
||||
char_class = Latin1_CharToClass[c]
|
||||
freq = Latin1ClassModel[(self._last_char_class * CLASS_NUM)
|
||||
+ char_class]
|
||||
if freq == 0:
|
||||
self._state = ProbingState.NOT_ME
|
||||
break
|
||||
self._freq_counter[freq] += 1
|
||||
self._last_char_class = char_class
|
||||
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
if self.state == ProbingState.NOT_ME:
|
||||
return 0.01
|
||||
|
||||
total = sum(self._freq_counter)
|
||||
if total < 0.01:
|
||||
confidence = 0.0
|
||||
else:
|
||||
confidence = ((self._freq_counter[3] - self._freq_counter[1] * 20.0)
|
||||
/ total)
|
||||
if confidence < 0.0:
|
||||
confidence = 0.0
|
||||
# lower the confidence of latin1 so that other more accurate
|
||||
# detector can take priority.
|
||||
confidence = confidence * 0.73
|
||||
return confidence
|
||||
@@ -1,91 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
# Shy Shalom - original C code
|
||||
# Proofpoint, Inc.
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .charsetprober import CharSetProber
|
||||
from .enums import ProbingState, MachineState
|
||||
|
||||
|
||||
class MultiByteCharSetProber(CharSetProber):
|
||||
"""
|
||||
MultiByteCharSetProber
|
||||
"""
|
||||
|
||||
def __init__(self, lang_filter=None):
|
||||
super(MultiByteCharSetProber, self).__init__(lang_filter=lang_filter)
|
||||
self.distribution_analyzer = None
|
||||
self.coding_sm = None
|
||||
self._last_char = [0, 0]
|
||||
|
||||
def reset(self):
|
||||
super(MultiByteCharSetProber, self).reset()
|
||||
if self.coding_sm:
|
||||
self.coding_sm.reset()
|
||||
if self.distribution_analyzer:
|
||||
self.distribution_analyzer.reset()
|
||||
self._last_char = [0, 0]
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def feed(self, byte_str):
|
||||
for i in range(len(byte_str)):
|
||||
coding_state = self.coding_sm.next_state(byte_str[i])
|
||||
if coding_state == MachineState.ERROR:
|
||||
self.logger.debug('%s %s prober hit error at byte %s',
|
||||
self.charset_name, self.language, i)
|
||||
self._state = ProbingState.NOT_ME
|
||||
break
|
||||
elif coding_state == MachineState.ITS_ME:
|
||||
self._state = ProbingState.FOUND_IT
|
||||
break
|
||||
elif coding_state == MachineState.START:
|
||||
char_len = self.coding_sm.get_current_charlen()
|
||||
if i == 0:
|
||||
self._last_char[1] = byte_str[0]
|
||||
self.distribution_analyzer.feed(self._last_char, char_len)
|
||||
else:
|
||||
self.distribution_analyzer.feed(byte_str[i - 1:i + 1],
|
||||
char_len)
|
||||
|
||||
self._last_char[0] = byte_str[-1]
|
||||
|
||||
if self.state == ProbingState.DETECTING:
|
||||
if (self.distribution_analyzer.got_enough_data() and
|
||||
(self.get_confidence() > self.SHORTCUT_THRESHOLD)):
|
||||
self._state = ProbingState.FOUND_IT
|
||||
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
return self.distribution_analyzer.get_confidence()
|
||||
@@ -1,54 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
# Shy Shalom - original C code
|
||||
# Proofpoint, Inc.
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .charsetgroupprober import CharSetGroupProber
|
||||
from .utf8prober import UTF8Prober
|
||||
from .sjisprober import SJISProber
|
||||
from .eucjpprober import EUCJPProber
|
||||
from .gb2312prober import GB2312Prober
|
||||
from .euckrprober import EUCKRProber
|
||||
from .cp949prober import CP949Prober
|
||||
from .big5prober import Big5Prober
|
||||
from .euctwprober import EUCTWProber
|
||||
|
||||
|
||||
class MBCSGroupProber(CharSetGroupProber):
|
||||
def __init__(self, lang_filter=None):
|
||||
super(MBCSGroupProber, self).__init__(lang_filter=lang_filter)
|
||||
self.probers = [
|
||||
UTF8Prober(),
|
||||
SJISProber(),
|
||||
EUCJPProber(),
|
||||
GB2312Prober(),
|
||||
EUCKRProber(),
|
||||
CP949Prober(),
|
||||
Big5Prober(),
|
||||
EUCTWProber()
|
||||
]
|
||||
self.reset()
|
||||
@@ -1,572 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .enums import MachineState
|
||||
|
||||
# BIG5
|
||||
|
||||
BIG5_CLS = (
|
||||
1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value
|
||||
1,1,1,1,1,1,0,0, # 08 - 0f
|
||||
1,1,1,1,1,1,1,1, # 10 - 17
|
||||
1,1,1,0,1,1,1,1, # 18 - 1f
|
||||
1,1,1,1,1,1,1,1, # 20 - 27
|
||||
1,1,1,1,1,1,1,1, # 28 - 2f
|
||||
1,1,1,1,1,1,1,1, # 30 - 37
|
||||
1,1,1,1,1,1,1,1, # 38 - 3f
|
||||
2,2,2,2,2,2,2,2, # 40 - 47
|
||||
2,2,2,2,2,2,2,2, # 48 - 4f
|
||||
2,2,2,2,2,2,2,2, # 50 - 57
|
||||
2,2,2,2,2,2,2,2, # 58 - 5f
|
||||
2,2,2,2,2,2,2,2, # 60 - 67
|
||||
2,2,2,2,2,2,2,2, # 68 - 6f
|
||||
2,2,2,2,2,2,2,2, # 70 - 77
|
||||
2,2,2,2,2,2,2,1, # 78 - 7f
|
||||
4,4,4,4,4,4,4,4, # 80 - 87
|
||||
4,4,4,4,4,4,4,4, # 88 - 8f
|
||||
4,4,4,4,4,4,4,4, # 90 - 97
|
||||
4,4,4,4,4,4,4,4, # 98 - 9f
|
||||
4,3,3,3,3,3,3,3, # a0 - a7
|
||||
3,3,3,3,3,3,3,3, # a8 - af
|
||||
3,3,3,3,3,3,3,3, # b0 - b7
|
||||
3,3,3,3,3,3,3,3, # b8 - bf
|
||||
3,3,3,3,3,3,3,3, # c0 - c7
|
||||
3,3,3,3,3,3,3,3, # c8 - cf
|
||||
3,3,3,3,3,3,3,3, # d0 - d7
|
||||
3,3,3,3,3,3,3,3, # d8 - df
|
||||
3,3,3,3,3,3,3,3, # e0 - e7
|
||||
3,3,3,3,3,3,3,3, # e8 - ef
|
||||
3,3,3,3,3,3,3,3, # f0 - f7
|
||||
3,3,3,3,3,3,3,0 # f8 - ff
|
||||
)
|
||||
|
||||
BIG5_ST = (
|
||||
MachineState.ERROR,MachineState.START,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,#08-0f
|
||||
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START#10-17
|
||||
)
|
||||
|
||||
BIG5_CHAR_LEN_TABLE = (0, 1, 1, 2, 0)
|
||||
|
||||
BIG5_SM_MODEL = {'class_table': BIG5_CLS,
|
||||
'class_factor': 5,
|
||||
'state_table': BIG5_ST,
|
||||
'char_len_table': BIG5_CHAR_LEN_TABLE,
|
||||
'name': 'Big5'}
|
||||
|
||||
# CP949
|
||||
|
||||
CP949_CLS = (
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0, # 00 - 0f
|
||||
1,1,1,1,1,1,1,1, 1,1,1,0,1,1,1,1, # 10 - 1f
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 20 - 2f
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 30 - 3f
|
||||
1,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4, # 40 - 4f
|
||||
4,4,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 50 - 5f
|
||||
1,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5, # 60 - 6f
|
||||
5,5,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 70 - 7f
|
||||
0,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 80 - 8f
|
||||
6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 90 - 9f
|
||||
6,7,7,7,7,7,7,7, 7,7,7,7,7,8,8,8, # a0 - af
|
||||
7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7, # b0 - bf
|
||||
7,7,7,7,7,7,9,2, 2,3,2,2,2,2,2,2, # c0 - cf
|
||||
2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # d0 - df
|
||||
2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # e0 - ef
|
||||
2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,0, # f0 - ff
|
||||
)
|
||||
|
||||
CP949_ST = (
|
||||
#cls= 0 1 2 3 4 5 6 7 8 9 # previous state =
|
||||
MachineState.ERROR,MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START, 4, 5,MachineState.ERROR, 6, # MachineState.START
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, # MachineState.ERROR
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME, # MachineState.ITS_ME
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START, # 3
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, # 4
|
||||
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, # 5
|
||||
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START, # 6
|
||||
)
|
||||
|
||||
CP949_CHAR_LEN_TABLE = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2)
|
||||
|
||||
CP949_SM_MODEL = {'class_table': CP949_CLS,
|
||||
'class_factor': 10,
|
||||
'state_table': CP949_ST,
|
||||
'char_len_table': CP949_CHAR_LEN_TABLE,
|
||||
'name': 'CP949'}
|
||||
|
||||
# EUC-JP
|
||||
|
||||
EUCJP_CLS = (
|
||||
4,4,4,4,4,4,4,4, # 00 - 07
|
||||
4,4,4,4,4,4,5,5, # 08 - 0f
|
||||
4,4,4,4,4,4,4,4, # 10 - 17
|
||||
4,4,4,5,4,4,4,4, # 18 - 1f
|
||||
4,4,4,4,4,4,4,4, # 20 - 27
|
||||
4,4,4,4,4,4,4,4, # 28 - 2f
|
||||
4,4,4,4,4,4,4,4, # 30 - 37
|
||||
4,4,4,4,4,4,4,4, # 38 - 3f
|
||||
4,4,4,4,4,4,4,4, # 40 - 47
|
||||
4,4,4,4,4,4,4,4, # 48 - 4f
|
||||
4,4,4,4,4,4,4,4, # 50 - 57
|
||||
4,4,4,4,4,4,4,4, # 58 - 5f
|
||||
4,4,4,4,4,4,4,4, # 60 - 67
|
||||
4,4,4,4,4,4,4,4, # 68 - 6f
|
||||
4,4,4,4,4,4,4,4, # 70 - 77
|
||||
4,4,4,4,4,4,4,4, # 78 - 7f
|
||||
5,5,5,5,5,5,5,5, # 80 - 87
|
||||
5,5,5,5,5,5,1,3, # 88 - 8f
|
||||
5,5,5,5,5,5,5,5, # 90 - 97
|
||||
5,5,5,5,5,5,5,5, # 98 - 9f
|
||||
5,2,2,2,2,2,2,2, # a0 - a7
|
||||
2,2,2,2,2,2,2,2, # a8 - af
|
||||
2,2,2,2,2,2,2,2, # b0 - b7
|
||||
2,2,2,2,2,2,2,2, # b8 - bf
|
||||
2,2,2,2,2,2,2,2, # c0 - c7
|
||||
2,2,2,2,2,2,2,2, # c8 - cf
|
||||
2,2,2,2,2,2,2,2, # d0 - d7
|
||||
2,2,2,2,2,2,2,2, # d8 - df
|
||||
0,0,0,0,0,0,0,0, # e0 - e7
|
||||
0,0,0,0,0,0,0,0, # e8 - ef
|
||||
0,0,0,0,0,0,0,0, # f0 - f7
|
||||
0,0,0,0,0,0,0,5 # f8 - ff
|
||||
)
|
||||
|
||||
EUCJP_ST = (
|
||||
3, 4, 3, 5,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.START,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#10-17
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 3,MachineState.ERROR,#18-1f
|
||||
3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START#20-27
|
||||
)
|
||||
|
||||
EUCJP_CHAR_LEN_TABLE = (2, 2, 2, 3, 1, 0)
|
||||
|
||||
EUCJP_SM_MODEL = {'class_table': EUCJP_CLS,
|
||||
'class_factor': 6,
|
||||
'state_table': EUCJP_ST,
|
||||
'char_len_table': EUCJP_CHAR_LEN_TABLE,
|
||||
'name': 'EUC-JP'}
|
||||
|
||||
# EUC-KR
|
||||
|
||||
EUCKR_CLS = (
|
||||
1,1,1,1,1,1,1,1, # 00 - 07
|
||||
1,1,1,1,1,1,0,0, # 08 - 0f
|
||||
1,1,1,1,1,1,1,1, # 10 - 17
|
||||
1,1,1,0,1,1,1,1, # 18 - 1f
|
||||
1,1,1,1,1,1,1,1, # 20 - 27
|
||||
1,1,1,1,1,1,1,1, # 28 - 2f
|
||||
1,1,1,1,1,1,1,1, # 30 - 37
|
||||
1,1,1,1,1,1,1,1, # 38 - 3f
|
||||
1,1,1,1,1,1,1,1, # 40 - 47
|
||||
1,1,1,1,1,1,1,1, # 48 - 4f
|
||||
1,1,1,1,1,1,1,1, # 50 - 57
|
||||
1,1,1,1,1,1,1,1, # 58 - 5f
|
||||
1,1,1,1,1,1,1,1, # 60 - 67
|
||||
1,1,1,1,1,1,1,1, # 68 - 6f
|
||||
1,1,1,1,1,1,1,1, # 70 - 77
|
||||
1,1,1,1,1,1,1,1, # 78 - 7f
|
||||
0,0,0,0,0,0,0,0, # 80 - 87
|
||||
0,0,0,0,0,0,0,0, # 88 - 8f
|
||||
0,0,0,0,0,0,0,0, # 90 - 97
|
||||
0,0,0,0,0,0,0,0, # 98 - 9f
|
||||
0,2,2,2,2,2,2,2, # a0 - a7
|
||||
2,2,2,2,2,3,3,3, # a8 - af
|
||||
2,2,2,2,2,2,2,2, # b0 - b7
|
||||
2,2,2,2,2,2,2,2, # b8 - bf
|
||||
2,2,2,2,2,2,2,2, # c0 - c7
|
||||
2,3,2,2,2,2,2,2, # c8 - cf
|
||||
2,2,2,2,2,2,2,2, # d0 - d7
|
||||
2,2,2,2,2,2,2,2, # d8 - df
|
||||
2,2,2,2,2,2,2,2, # e0 - e7
|
||||
2,2,2,2,2,2,2,2, # e8 - ef
|
||||
2,2,2,2,2,2,2,2, # f0 - f7
|
||||
2,2,2,2,2,2,2,0 # f8 - ff
|
||||
)
|
||||
|
||||
EUCKR_ST = (
|
||||
MachineState.ERROR,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #08-0f
|
||||
)
|
||||
|
||||
EUCKR_CHAR_LEN_TABLE = (0, 1, 2, 0)
|
||||
|
||||
EUCKR_SM_MODEL = {'class_table': EUCKR_CLS,
|
||||
'class_factor': 4,
|
||||
'state_table': EUCKR_ST,
|
||||
'char_len_table': EUCKR_CHAR_LEN_TABLE,
|
||||
'name': 'EUC-KR'}
|
||||
|
||||
# EUC-TW
|
||||
|
||||
EUCTW_CLS = (
|
||||
2,2,2,2,2,2,2,2, # 00 - 07
|
||||
2,2,2,2,2,2,0,0, # 08 - 0f
|
||||
2,2,2,2,2,2,2,2, # 10 - 17
|
||||
2,2,2,0,2,2,2,2, # 18 - 1f
|
||||
2,2,2,2,2,2,2,2, # 20 - 27
|
||||
2,2,2,2,2,2,2,2, # 28 - 2f
|
||||
2,2,2,2,2,2,2,2, # 30 - 37
|
||||
2,2,2,2,2,2,2,2, # 38 - 3f
|
||||
2,2,2,2,2,2,2,2, # 40 - 47
|
||||
2,2,2,2,2,2,2,2, # 48 - 4f
|
||||
2,2,2,2,2,2,2,2, # 50 - 57
|
||||
2,2,2,2,2,2,2,2, # 58 - 5f
|
||||
2,2,2,2,2,2,2,2, # 60 - 67
|
||||
2,2,2,2,2,2,2,2, # 68 - 6f
|
||||
2,2,2,2,2,2,2,2, # 70 - 77
|
||||
2,2,2,2,2,2,2,2, # 78 - 7f
|
||||
0,0,0,0,0,0,0,0, # 80 - 87
|
||||
0,0,0,0,0,0,6,0, # 88 - 8f
|
||||
0,0,0,0,0,0,0,0, # 90 - 97
|
||||
0,0,0,0,0,0,0,0, # 98 - 9f
|
||||
0,3,4,4,4,4,4,4, # a0 - a7
|
||||
5,5,1,1,1,1,1,1, # a8 - af
|
||||
1,1,1,1,1,1,1,1, # b0 - b7
|
||||
1,1,1,1,1,1,1,1, # b8 - bf
|
||||
1,1,3,1,3,3,3,3, # c0 - c7
|
||||
3,3,3,3,3,3,3,3, # c8 - cf
|
||||
3,3,3,3,3,3,3,3, # d0 - d7
|
||||
3,3,3,3,3,3,3,3, # d8 - df
|
||||
3,3,3,3,3,3,3,3, # e0 - e7
|
||||
3,3,3,3,3,3,3,3, # e8 - ef
|
||||
3,3,3,3,3,3,3,3, # f0 - f7
|
||||
3,3,3,3,3,3,3,0 # f8 - ff
|
||||
)
|
||||
|
||||
EUCTW_ST = (
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.START, 3, 3, 3, 4,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,MachineState.ERROR,#10-17
|
||||
MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f
|
||||
5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,#20-27
|
||||
MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f
|
||||
)
|
||||
|
||||
EUCTW_CHAR_LEN_TABLE = (0, 0, 1, 2, 2, 2, 3)
|
||||
|
||||
EUCTW_SM_MODEL = {'class_table': EUCTW_CLS,
|
||||
'class_factor': 7,
|
||||
'state_table': EUCTW_ST,
|
||||
'char_len_table': EUCTW_CHAR_LEN_TABLE,
|
||||
'name': 'x-euc-tw'}
|
||||
|
||||
# GB2312
|
||||
|
||||
GB2312_CLS = (
|
||||
1,1,1,1,1,1,1,1, # 00 - 07
|
||||
1,1,1,1,1,1,0,0, # 08 - 0f
|
||||
1,1,1,1,1,1,1,1, # 10 - 17
|
||||
1,1,1,0,1,1,1,1, # 18 - 1f
|
||||
1,1,1,1,1,1,1,1, # 20 - 27
|
||||
1,1,1,1,1,1,1,1, # 28 - 2f
|
||||
3,3,3,3,3,3,3,3, # 30 - 37
|
||||
3,3,1,1,1,1,1,1, # 38 - 3f
|
||||
2,2,2,2,2,2,2,2, # 40 - 47
|
||||
2,2,2,2,2,2,2,2, # 48 - 4f
|
||||
2,2,2,2,2,2,2,2, # 50 - 57
|
||||
2,2,2,2,2,2,2,2, # 58 - 5f
|
||||
2,2,2,2,2,2,2,2, # 60 - 67
|
||||
2,2,2,2,2,2,2,2, # 68 - 6f
|
||||
2,2,2,2,2,2,2,2, # 70 - 77
|
||||
2,2,2,2,2,2,2,4, # 78 - 7f
|
||||
5,6,6,6,6,6,6,6, # 80 - 87
|
||||
6,6,6,6,6,6,6,6, # 88 - 8f
|
||||
6,6,6,6,6,6,6,6, # 90 - 97
|
||||
6,6,6,6,6,6,6,6, # 98 - 9f
|
||||
6,6,6,6,6,6,6,6, # a0 - a7
|
||||
6,6,6,6,6,6,6,6, # a8 - af
|
||||
6,6,6,6,6,6,6,6, # b0 - b7
|
||||
6,6,6,6,6,6,6,6, # b8 - bf
|
||||
6,6,6,6,6,6,6,6, # c0 - c7
|
||||
6,6,6,6,6,6,6,6, # c8 - cf
|
||||
6,6,6,6,6,6,6,6, # d0 - d7
|
||||
6,6,6,6,6,6,6,6, # d8 - df
|
||||
6,6,6,6,6,6,6,6, # e0 - e7
|
||||
6,6,6,6,6,6,6,6, # e8 - ef
|
||||
6,6,6,6,6,6,6,6, # f0 - f7
|
||||
6,6,6,6,6,6,6,0 # f8 - ff
|
||||
)
|
||||
|
||||
GB2312_ST = (
|
||||
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, 3,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,#10-17
|
||||
4,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f
|
||||
MachineState.ERROR,MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,#20-27
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f
|
||||
)
|
||||
|
||||
# To be accurate, the length of class 6 can be either 2 or 4.
|
||||
# But it is not necessary to discriminate between the two since
|
||||
# it is used for frequency analysis only, and we are validating
|
||||
# each code range there as well. So it is safe to set it to be
|
||||
# 2 here.
|
||||
GB2312_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 1, 2)
|
||||
|
||||
GB2312_SM_MODEL = {'class_table': GB2312_CLS,
|
||||
'class_factor': 7,
|
||||
'state_table': GB2312_ST,
|
||||
'char_len_table': GB2312_CHAR_LEN_TABLE,
|
||||
'name': 'GB2312'}
|
||||
|
||||
# Shift_JIS
|
||||
|
||||
SJIS_CLS = (
|
||||
1,1,1,1,1,1,1,1, # 00 - 07
|
||||
1,1,1,1,1,1,0,0, # 08 - 0f
|
||||
1,1,1,1,1,1,1,1, # 10 - 17
|
||||
1,1,1,0,1,1,1,1, # 18 - 1f
|
||||
1,1,1,1,1,1,1,1, # 20 - 27
|
||||
1,1,1,1,1,1,1,1, # 28 - 2f
|
||||
1,1,1,1,1,1,1,1, # 30 - 37
|
||||
1,1,1,1,1,1,1,1, # 38 - 3f
|
||||
2,2,2,2,2,2,2,2, # 40 - 47
|
||||
2,2,2,2,2,2,2,2, # 48 - 4f
|
||||
2,2,2,2,2,2,2,2, # 50 - 57
|
||||
2,2,2,2,2,2,2,2, # 58 - 5f
|
||||
2,2,2,2,2,2,2,2, # 60 - 67
|
||||
2,2,2,2,2,2,2,2, # 68 - 6f
|
||||
2,2,2,2,2,2,2,2, # 70 - 77
|
||||
2,2,2,2,2,2,2,1, # 78 - 7f
|
||||
3,3,3,3,3,2,2,3, # 80 - 87
|
||||
3,3,3,3,3,3,3,3, # 88 - 8f
|
||||
3,3,3,3,3,3,3,3, # 90 - 97
|
||||
3,3,3,3,3,3,3,3, # 98 - 9f
|
||||
#0xa0 is illegal in sjis encoding, but some pages does
|
||||
#contain such byte. We need to be more error forgiven.
|
||||
2,2,2,2,2,2,2,2, # a0 - a7
|
||||
2,2,2,2,2,2,2,2, # a8 - af
|
||||
2,2,2,2,2,2,2,2, # b0 - b7
|
||||
2,2,2,2,2,2,2,2, # b8 - bf
|
||||
2,2,2,2,2,2,2,2, # c0 - c7
|
||||
2,2,2,2,2,2,2,2, # c8 - cf
|
||||
2,2,2,2,2,2,2,2, # d0 - d7
|
||||
2,2,2,2,2,2,2,2, # d8 - df
|
||||
3,3,3,3,3,3,3,3, # e0 - e7
|
||||
3,3,3,3,3,4,4,4, # e8 - ef
|
||||
3,3,3,3,3,3,3,3, # f0 - f7
|
||||
3,3,3,3,3,0,0,0) # f8 - ff
|
||||
|
||||
|
||||
SJIS_ST = (
|
||||
MachineState.ERROR,MachineState.START,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START #10-17
|
||||
)
|
||||
|
||||
SJIS_CHAR_LEN_TABLE = (0, 1, 1, 2, 0, 0)
|
||||
|
||||
SJIS_SM_MODEL = {'class_table': SJIS_CLS,
|
||||
'class_factor': 6,
|
||||
'state_table': SJIS_ST,
|
||||
'char_len_table': SJIS_CHAR_LEN_TABLE,
|
||||
'name': 'Shift_JIS'}
|
||||
|
||||
# UCS2-BE
|
||||
|
||||
UCS2BE_CLS = (
|
||||
0,0,0,0,0,0,0,0, # 00 - 07
|
||||
0,0,1,0,0,2,0,0, # 08 - 0f
|
||||
0,0,0,0,0,0,0,0, # 10 - 17
|
||||
0,0,0,3,0,0,0,0, # 18 - 1f
|
||||
0,0,0,0,0,0,0,0, # 20 - 27
|
||||
0,3,3,3,3,3,0,0, # 28 - 2f
|
||||
0,0,0,0,0,0,0,0, # 30 - 37
|
||||
0,0,0,0,0,0,0,0, # 38 - 3f
|
||||
0,0,0,0,0,0,0,0, # 40 - 47
|
||||
0,0,0,0,0,0,0,0, # 48 - 4f
|
||||
0,0,0,0,0,0,0,0, # 50 - 57
|
||||
0,0,0,0,0,0,0,0, # 58 - 5f
|
||||
0,0,0,0,0,0,0,0, # 60 - 67
|
||||
0,0,0,0,0,0,0,0, # 68 - 6f
|
||||
0,0,0,0,0,0,0,0, # 70 - 77
|
||||
0,0,0,0,0,0,0,0, # 78 - 7f
|
||||
0,0,0,0,0,0,0,0, # 80 - 87
|
||||
0,0,0,0,0,0,0,0, # 88 - 8f
|
||||
0,0,0,0,0,0,0,0, # 90 - 97
|
||||
0,0,0,0,0,0,0,0, # 98 - 9f
|
||||
0,0,0,0,0,0,0,0, # a0 - a7
|
||||
0,0,0,0,0,0,0,0, # a8 - af
|
||||
0,0,0,0,0,0,0,0, # b0 - b7
|
||||
0,0,0,0,0,0,0,0, # b8 - bf
|
||||
0,0,0,0,0,0,0,0, # c0 - c7
|
||||
0,0,0,0,0,0,0,0, # c8 - cf
|
||||
0,0,0,0,0,0,0,0, # d0 - d7
|
||||
0,0,0,0,0,0,0,0, # d8 - df
|
||||
0,0,0,0,0,0,0,0, # e0 - e7
|
||||
0,0,0,0,0,0,0,0, # e8 - ef
|
||||
0,0,0,0,0,0,0,0, # f0 - f7
|
||||
0,0,0,0,0,0,4,5 # f8 - ff
|
||||
)
|
||||
|
||||
UCS2BE_ST = (
|
||||
5, 7, 7,MachineState.ERROR, 4, 3,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME, 6, 6, 6, 6,MachineState.ERROR,MachineState.ERROR,#10-17
|
||||
6, 6, 6, 6, 6,MachineState.ITS_ME, 6, 6,#18-1f
|
||||
6, 6, 6, 6, 5, 7, 7,MachineState.ERROR,#20-27
|
||||
5, 8, 6, 6,MachineState.ERROR, 6, 6, 6,#28-2f
|
||||
6, 6, 6, 6,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #30-37
|
||||
)
|
||||
|
||||
UCS2BE_CHAR_LEN_TABLE = (2, 2, 2, 0, 2, 2)
|
||||
|
||||
UCS2BE_SM_MODEL = {'class_table': UCS2BE_CLS,
|
||||
'class_factor': 6,
|
||||
'state_table': UCS2BE_ST,
|
||||
'char_len_table': UCS2BE_CHAR_LEN_TABLE,
|
||||
'name': 'UTF-16BE'}
|
||||
|
||||
# UCS2-LE
|
||||
|
||||
UCS2LE_CLS = (
|
||||
0,0,0,0,0,0,0,0, # 00 - 07
|
||||
0,0,1,0,0,2,0,0, # 08 - 0f
|
||||
0,0,0,0,0,0,0,0, # 10 - 17
|
||||
0,0,0,3,0,0,0,0, # 18 - 1f
|
||||
0,0,0,0,0,0,0,0, # 20 - 27
|
||||
0,3,3,3,3,3,0,0, # 28 - 2f
|
||||
0,0,0,0,0,0,0,0, # 30 - 37
|
||||
0,0,0,0,0,0,0,0, # 38 - 3f
|
||||
0,0,0,0,0,0,0,0, # 40 - 47
|
||||
0,0,0,0,0,0,0,0, # 48 - 4f
|
||||
0,0,0,0,0,0,0,0, # 50 - 57
|
||||
0,0,0,0,0,0,0,0, # 58 - 5f
|
||||
0,0,0,0,0,0,0,0, # 60 - 67
|
||||
0,0,0,0,0,0,0,0, # 68 - 6f
|
||||
0,0,0,0,0,0,0,0, # 70 - 77
|
||||
0,0,0,0,0,0,0,0, # 78 - 7f
|
||||
0,0,0,0,0,0,0,0, # 80 - 87
|
||||
0,0,0,0,0,0,0,0, # 88 - 8f
|
||||
0,0,0,0,0,0,0,0, # 90 - 97
|
||||
0,0,0,0,0,0,0,0, # 98 - 9f
|
||||
0,0,0,0,0,0,0,0, # a0 - a7
|
||||
0,0,0,0,0,0,0,0, # a8 - af
|
||||
0,0,0,0,0,0,0,0, # b0 - b7
|
||||
0,0,0,0,0,0,0,0, # b8 - bf
|
||||
0,0,0,0,0,0,0,0, # c0 - c7
|
||||
0,0,0,0,0,0,0,0, # c8 - cf
|
||||
0,0,0,0,0,0,0,0, # d0 - d7
|
||||
0,0,0,0,0,0,0,0, # d8 - df
|
||||
0,0,0,0,0,0,0,0, # e0 - e7
|
||||
0,0,0,0,0,0,0,0, # e8 - ef
|
||||
0,0,0,0,0,0,0,0, # f0 - f7
|
||||
0,0,0,0,0,0,4,5 # f8 - ff
|
||||
)
|
||||
|
||||
UCS2LE_ST = (
|
||||
6, 6, 7, 6, 4, 3,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME, 5, 5, 5,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,#10-17
|
||||
5, 5, 5,MachineState.ERROR, 5,MachineState.ERROR, 6, 6,#18-1f
|
||||
7, 6, 8, 8, 5, 5, 5,MachineState.ERROR,#20-27
|
||||
5, 5, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5, 5,#28-2f
|
||||
5, 5, 5,MachineState.ERROR, 5,MachineState.ERROR,MachineState.START,MachineState.START #30-37
|
||||
)
|
||||
|
||||
UCS2LE_CHAR_LEN_TABLE = (2, 2, 2, 2, 2, 2)
|
||||
|
||||
UCS2LE_SM_MODEL = {'class_table': UCS2LE_CLS,
|
||||
'class_factor': 6,
|
||||
'state_table': UCS2LE_ST,
|
||||
'char_len_table': UCS2LE_CHAR_LEN_TABLE,
|
||||
'name': 'UTF-16LE'}
|
||||
|
||||
# UTF-8
|
||||
|
||||
UTF8_CLS = (
|
||||
1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value
|
||||
1,1,1,1,1,1,0,0, # 08 - 0f
|
||||
1,1,1,1,1,1,1,1, # 10 - 17
|
||||
1,1,1,0,1,1,1,1, # 18 - 1f
|
||||
1,1,1,1,1,1,1,1, # 20 - 27
|
||||
1,1,1,1,1,1,1,1, # 28 - 2f
|
||||
1,1,1,1,1,1,1,1, # 30 - 37
|
||||
1,1,1,1,1,1,1,1, # 38 - 3f
|
||||
1,1,1,1,1,1,1,1, # 40 - 47
|
||||
1,1,1,1,1,1,1,1, # 48 - 4f
|
||||
1,1,1,1,1,1,1,1, # 50 - 57
|
||||
1,1,1,1,1,1,1,1, # 58 - 5f
|
||||
1,1,1,1,1,1,1,1, # 60 - 67
|
||||
1,1,1,1,1,1,1,1, # 68 - 6f
|
||||
1,1,1,1,1,1,1,1, # 70 - 77
|
||||
1,1,1,1,1,1,1,1, # 78 - 7f
|
||||
2,2,2,2,3,3,3,3, # 80 - 87
|
||||
4,4,4,4,4,4,4,4, # 88 - 8f
|
||||
4,4,4,4,4,4,4,4, # 90 - 97
|
||||
4,4,4,4,4,4,4,4, # 98 - 9f
|
||||
5,5,5,5,5,5,5,5, # a0 - a7
|
||||
5,5,5,5,5,5,5,5, # a8 - af
|
||||
5,5,5,5,5,5,5,5, # b0 - b7
|
||||
5,5,5,5,5,5,5,5, # b8 - bf
|
||||
0,0,6,6,6,6,6,6, # c0 - c7
|
||||
6,6,6,6,6,6,6,6, # c8 - cf
|
||||
6,6,6,6,6,6,6,6, # d0 - d7
|
||||
6,6,6,6,6,6,6,6, # d8 - df
|
||||
7,8,8,8,8,8,8,8, # e0 - e7
|
||||
8,8,8,8,8,9,8,8, # e8 - ef
|
||||
10,11,11,11,11,11,11,11, # f0 - f7
|
||||
12,13,13,13,14,15,0,0 # f8 - ff
|
||||
)
|
||||
|
||||
UTF8_ST = (
|
||||
MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 12, 10,#00-07
|
||||
9, 11, 8, 7, 6, 5, 4, 3,#08-0f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#10-17
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#20-27
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#28-2f
|
||||
MachineState.ERROR,MachineState.ERROR, 5, 5, 5, 5,MachineState.ERROR,MachineState.ERROR,#30-37
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#38-3f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5, 5, 5,MachineState.ERROR,MachineState.ERROR,#40-47
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#48-4f
|
||||
MachineState.ERROR,MachineState.ERROR, 7, 7, 7, 7,MachineState.ERROR,MachineState.ERROR,#50-57
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#58-5f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 7, 7,MachineState.ERROR,MachineState.ERROR,#60-67
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#68-6f
|
||||
MachineState.ERROR,MachineState.ERROR, 9, 9, 9, 9,MachineState.ERROR,MachineState.ERROR,#70-77
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#78-7f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 9,MachineState.ERROR,MachineState.ERROR,#80-87
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#88-8f
|
||||
MachineState.ERROR,MachineState.ERROR, 12, 12, 12, 12,MachineState.ERROR,MachineState.ERROR,#90-97
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#98-9f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 12,MachineState.ERROR,MachineState.ERROR,#a0-a7
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#a8-af
|
||||
MachineState.ERROR,MachineState.ERROR, 12, 12, 12,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#b0-b7
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#b8-bf
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,#c0-c7
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR #c8-cf
|
||||
)
|
||||
|
||||
UTF8_CHAR_LEN_TABLE = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6)
|
||||
|
||||
UTF8_SM_MODEL = {'class_table': UTF8_CLS,
|
||||
'class_factor': 16,
|
||||
'state_table': UTF8_ST,
|
||||
'char_len_table': UTF8_CHAR_LEN_TABLE,
|
||||
'name': 'UTF-8'}
|
||||
@@ -1,132 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
# Shy Shalom - original C code
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .charsetprober import CharSetProber
|
||||
from .enums import CharacterCategory, ProbingState, SequenceLikelihood
|
||||
|
||||
|
||||
class SingleByteCharSetProber(CharSetProber):
|
||||
SAMPLE_SIZE = 64
|
||||
SB_ENOUGH_REL_THRESHOLD = 1024 # 0.25 * SAMPLE_SIZE^2
|
||||
POSITIVE_SHORTCUT_THRESHOLD = 0.95
|
||||
NEGATIVE_SHORTCUT_THRESHOLD = 0.05
|
||||
|
||||
def __init__(self, model, reversed=False, name_prober=None):
|
||||
super(SingleByteCharSetProber, self).__init__()
|
||||
self._model = model
|
||||
# TRUE if we need to reverse every pair in the model lookup
|
||||
self._reversed = reversed
|
||||
# Optional auxiliary prober for name decision
|
||||
self._name_prober = name_prober
|
||||
self._last_order = None
|
||||
self._seq_counters = None
|
||||
self._total_seqs = None
|
||||
self._total_char = None
|
||||
self._freq_char = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
super(SingleByteCharSetProber, self).reset()
|
||||
# char order of last character
|
||||
self._last_order = 255
|
||||
self._seq_counters = [0] * SequenceLikelihood.get_num_categories()
|
||||
self._total_seqs = 0
|
||||
self._total_char = 0
|
||||
# characters that fall in our sampling range
|
||||
self._freq_char = 0
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
if self._name_prober:
|
||||
return self._name_prober.charset_name
|
||||
else:
|
||||
return self._model['charset_name']
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
if self._name_prober:
|
||||
return self._name_prober.language
|
||||
else:
|
||||
return self._model.get('language')
|
||||
|
||||
def feed(self, byte_str):
|
||||
if not self._model['keep_english_letter']:
|
||||
byte_str = self.filter_international_words(byte_str)
|
||||
if not byte_str:
|
||||
return self.state
|
||||
char_to_order_map = self._model['char_to_order_map']
|
||||
for i, c in enumerate(byte_str):
|
||||
# XXX: Order is in range 1-64, so one would think we want 0-63 here,
|
||||
# but that leads to 27 more test failures than before.
|
||||
order = char_to_order_map[c]
|
||||
# XXX: This was SYMBOL_CAT_ORDER before, with a value of 250, but
|
||||
# CharacterCategory.SYMBOL is actually 253, so we use CONTROL
|
||||
# to make it closer to the original intent. The only difference
|
||||
# is whether or not we count digits and control characters for
|
||||
# _total_char purposes.
|
||||
if order < CharacterCategory.CONTROL:
|
||||
self._total_char += 1
|
||||
if order < self.SAMPLE_SIZE:
|
||||
self._freq_char += 1
|
||||
if self._last_order < self.SAMPLE_SIZE:
|
||||
self._total_seqs += 1
|
||||
if not self._reversed:
|
||||
i = (self._last_order * self.SAMPLE_SIZE) + order
|
||||
model = self._model['precedence_matrix'][i]
|
||||
else: # reverse the order of the letters in the lookup
|
||||
i = (order * self.SAMPLE_SIZE) + self._last_order
|
||||
model = self._model['precedence_matrix'][i]
|
||||
self._seq_counters[model] += 1
|
||||
self._last_order = order
|
||||
|
||||
charset_name = self._model['charset_name']
|
||||
if self.state == ProbingState.DETECTING:
|
||||
if self._total_seqs > self.SB_ENOUGH_REL_THRESHOLD:
|
||||
confidence = self.get_confidence()
|
||||
if confidence > self.POSITIVE_SHORTCUT_THRESHOLD:
|
||||
self.logger.debug('%s confidence = %s, we have a winner',
|
||||
charset_name, confidence)
|
||||
self._state = ProbingState.FOUND_IT
|
||||
elif confidence < self.NEGATIVE_SHORTCUT_THRESHOLD:
|
||||
self.logger.debug('%s confidence = %s, below negative '
|
||||
'shortcut threshhold %s', charset_name,
|
||||
confidence,
|
||||
self.NEGATIVE_SHORTCUT_THRESHOLD)
|
||||
self._state = ProbingState.NOT_ME
|
||||
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
r = 0.01
|
||||
if self._total_seqs > 0:
|
||||
r = ((1.0 * self._seq_counters[SequenceLikelihood.POSITIVE]) /
|
||||
self._total_seqs / self._model['typical_positive_ratio'])
|
||||
r = r * self._freq_char / self._total_char
|
||||
if r >= 1.0:
|
||||
r = 0.99
|
||||
return r
|
||||
@@ -1,73 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
# Shy Shalom - original C code
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .charsetgroupprober import CharSetGroupProber
|
||||
from .sbcharsetprober import SingleByteCharSetProber
|
||||
from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel,
|
||||
Latin5CyrillicModel, MacCyrillicModel,
|
||||
Ibm866Model, Ibm855Model)
|
||||
from .langgreekmodel import Latin7GreekModel, Win1253GreekModel
|
||||
from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel
|
||||
# from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel
|
||||
from .langthaimodel import TIS620ThaiModel
|
||||
from .langhebrewmodel import Win1255HebrewModel
|
||||
from .hebrewprober import HebrewProber
|
||||
from .langturkishmodel import Latin5TurkishModel
|
||||
|
||||
|
||||
class SBCSGroupProber(CharSetGroupProber):
|
||||
def __init__(self):
|
||||
super(SBCSGroupProber, self).__init__()
|
||||
self.probers = [
|
||||
SingleByteCharSetProber(Win1251CyrillicModel),
|
||||
SingleByteCharSetProber(Koi8rModel),
|
||||
SingleByteCharSetProber(Latin5CyrillicModel),
|
||||
SingleByteCharSetProber(MacCyrillicModel),
|
||||
SingleByteCharSetProber(Ibm866Model),
|
||||
SingleByteCharSetProber(Ibm855Model),
|
||||
SingleByteCharSetProber(Latin7GreekModel),
|
||||
SingleByteCharSetProber(Win1253GreekModel),
|
||||
SingleByteCharSetProber(Latin5BulgarianModel),
|
||||
SingleByteCharSetProber(Win1251BulgarianModel),
|
||||
# TODO: Restore Hungarian encodings (iso-8859-2 and windows-1250)
|
||||
# after we retrain model.
|
||||
# SingleByteCharSetProber(Latin2HungarianModel),
|
||||
# SingleByteCharSetProber(Win1250HungarianModel),
|
||||
SingleByteCharSetProber(TIS620ThaiModel),
|
||||
SingleByteCharSetProber(Latin5TurkishModel),
|
||||
]
|
||||
hebrew_prober = HebrewProber()
|
||||
logical_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel,
|
||||
False, hebrew_prober)
|
||||
visual_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel, True,
|
||||
hebrew_prober)
|
||||
hebrew_prober.set_model_probers(logical_hebrew_prober, visual_hebrew_prober)
|
||||
self.probers.extend([hebrew_prober, logical_hebrew_prober,
|
||||
visual_hebrew_prober])
|
||||
|
||||
self.reset()
|
||||
@@ -1,92 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .mbcharsetprober import MultiByteCharSetProber
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .chardistribution import SJISDistributionAnalysis
|
||||
from .jpcntx import SJISContextAnalysis
|
||||
from .mbcssm import SJIS_SM_MODEL
|
||||
from .enums import ProbingState, MachineState
|
||||
|
||||
|
||||
class SJISProber(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
super(SJISProber, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(SJIS_SM_MODEL)
|
||||
self.distribution_analyzer = SJISDistributionAnalysis()
|
||||
self.context_analyzer = SJISContextAnalysis()
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
super(SJISProber, self).reset()
|
||||
self.context_analyzer.reset()
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return self.context_analyzer.charset_name
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return "Japanese"
|
||||
|
||||
def feed(self, byte_str):
|
||||
for i in range(len(byte_str)):
|
||||
coding_state = self.coding_sm.next_state(byte_str[i])
|
||||
if coding_state == MachineState.ERROR:
|
||||
self.logger.debug('%s %s prober hit error at byte %s',
|
||||
self.charset_name, self.language, i)
|
||||
self._state = ProbingState.NOT_ME
|
||||
break
|
||||
elif coding_state == MachineState.ITS_ME:
|
||||
self._state = ProbingState.FOUND_IT
|
||||
break
|
||||
elif coding_state == MachineState.START:
|
||||
char_len = self.coding_sm.get_current_charlen()
|
||||
if i == 0:
|
||||
self._last_char[1] = byte_str[0]
|
||||
self.context_analyzer.feed(self._last_char[2 - char_len:],
|
||||
char_len)
|
||||
self.distribution_analyzer.feed(self._last_char, char_len)
|
||||
else:
|
||||
self.context_analyzer.feed(byte_str[i + 1 - char_len:i + 3
|
||||
- char_len], char_len)
|
||||
self.distribution_analyzer.feed(byte_str[i - 1:i + 1],
|
||||
char_len)
|
||||
|
||||
self._last_char[0] = byte_str[-1]
|
||||
|
||||
if self.state == ProbingState.DETECTING:
|
||||
if (self.context_analyzer.got_enough_data() and
|
||||
(self.get_confidence() > self.SHORTCUT_THRESHOLD)):
|
||||
self._state = ProbingState.FOUND_IT
|
||||
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
context_conf = self.context_analyzer.get_confidence()
|
||||
distrib_conf = self.distribution_analyzer.get_confidence()
|
||||
return max(context_conf, distrib_conf)
|
||||
@@ -1,278 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
# Shy Shalom - original C code
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
"""
|
||||
Module containing the UniversalDetector detector class, which is the primary
|
||||
class a user of ``chardet`` should use.
|
||||
|
||||
:author: Mark Pilgrim (initial port to Python)
|
||||
:author: Shy Shalom (original C code)
|
||||
:author: Dan Blanchard (major refactoring for 3.0)
|
||||
:author: Ian Cordasco
|
||||
"""
|
||||
|
||||
|
||||
import codecs
|
||||
import logging
|
||||
import re
|
||||
|
||||
from .enums import InputState, LanguageFilter, ProbingState
|
||||
from .escprober import EscCharSetProber
|
||||
from .latin1prober import Latin1Prober
|
||||
from .mbcsgroupprober import MBCSGroupProber
|
||||
from .sbcsgroupprober import SBCSGroupProber
|
||||
|
||||
|
||||
class UniversalDetector(object):
|
||||
"""
|
||||
The ``UniversalDetector`` class underlies the ``chardet.detect`` function
|
||||
and coordinates all of the different charset probers.
|
||||
|
||||
To get a ``dict`` containing an encoding and its confidence, you can simply
|
||||
run:
|
||||
|
||||
.. code::
|
||||
|
||||
u = UniversalDetector()
|
||||
u.feed(some_bytes)
|
||||
u.close()
|
||||
detected = u.result
|
||||
|
||||
"""
|
||||
|
||||
MINIMUM_THRESHOLD = 0.20
|
||||
HIGH_BYTE_DETECTOR = re.compile(b'[\x80-\xFF]')
|
||||
ESC_DETECTOR = re.compile(b'(\033|~{)')
|
||||
WIN_BYTE_DETECTOR = re.compile(b'[\x80-\x9F]')
|
||||
ISO_WIN_MAP = {'iso-8859-1': 'Windows-1252',
|
||||
'iso-8859-2': 'Windows-1250',
|
||||
'iso-8859-5': 'Windows-1251',
|
||||
'iso-8859-6': 'Windows-1256',
|
||||
'iso-8859-7': 'Windows-1253',
|
||||
'iso-8859-8': 'Windows-1255',
|
||||
'iso-8859-9': 'Windows-1254',
|
||||
'iso-8859-13': 'Windows-1257'}
|
||||
|
||||
def __init__(self, lang_filter=LanguageFilter.ALL):
|
||||
self._esc_charset_prober = None
|
||||
self._charset_probers = []
|
||||
self.result = None
|
||||
self.done = None
|
||||
self._got_data = None
|
||||
self._input_state = None
|
||||
self._last_char = None
|
||||
self.lang_filter = lang_filter
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self._has_win_bytes = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
"""
|
||||
Reset the UniversalDetector and all of its probers back to their
|
||||
initial states. This is called by ``__init__``, so you only need to
|
||||
call this directly in between analyses of different documents.
|
||||
"""
|
||||
self.result = {'encoding': None, 'confidence': 0.0, 'language': None}
|
||||
self.done = False
|
||||
self._got_data = False
|
||||
self._has_win_bytes = False
|
||||
self._input_state = InputState.PURE_ASCII
|
||||
self._last_char = b''
|
||||
if self._esc_charset_prober:
|
||||
self._esc_charset_prober.reset()
|
||||
for prober in self._charset_probers:
|
||||
prober.reset()
|
||||
|
||||
def feed(self, byte_str):
|
||||
"""
|
||||
Takes a chunk of a document and feeds it through all of the relevant
|
||||
charset probers.
|
||||
|
||||
After calling ``feed``, you can check the value of the ``done``
|
||||
attribute to see if you need to continue feeding the
|
||||
``UniversalDetector`` more data, or if it has made a prediction
|
||||
(in the ``result`` attribute).
|
||||
|
||||
.. note::
|
||||
You should always call ``close`` when you're done feeding in your
|
||||
document if ``done`` is not already ``True``.
|
||||
"""
|
||||
if self.done:
|
||||
return
|
||||
|
||||
if not len(byte_str):
|
||||
return
|
||||
|
||||
if not isinstance(byte_str, bytearray):
|
||||
byte_str = bytearray(byte_str)
|
||||
|
||||
# First check for known BOMs, since these are guaranteed to be correct
|
||||
if not self._got_data:
|
||||
# If the data starts with BOM, we know it is UTF
|
||||
if byte_str.startswith(codecs.BOM_UTF8):
|
||||
# EF BB BF UTF-8 with BOM
|
||||
self.result = {'encoding': "UTF-8-SIG",
|
||||
'confidence': 1.0,
|
||||
'language': ''}
|
||||
elif byte_str.startswith((codecs.BOM_UTF32_LE,
|
||||
codecs.BOM_UTF32_BE)):
|
||||
# FF FE 00 00 UTF-32, little-endian BOM
|
||||
# 00 00 FE FF UTF-32, big-endian BOM
|
||||
self.result = {'encoding': "UTF-32",
|
||||
'confidence': 1.0,
|
||||
'language': ''}
|
||||
elif byte_str.startswith(b'\xFE\xFF\x00\x00'):
|
||||
# FE FF 00 00 UCS-4, unusual octet order BOM (3412)
|
||||
self.result = {'encoding': "X-ISO-10646-UCS-4-3412",
|
||||
'confidence': 1.0,
|
||||
'language': ''}
|
||||
elif byte_str.startswith(b'\x00\x00\xFF\xFE'):
|
||||
# 00 00 FF FE UCS-4, unusual octet order BOM (2143)
|
||||
self.result = {'encoding': "X-ISO-10646-UCS-4-2143",
|
||||
'confidence': 1.0,
|
||||
'language': ''}
|
||||
elif byte_str.startswith((codecs.BOM_LE, codecs.BOM_BE)):
|
||||
# FF FE UTF-16, little endian BOM
|
||||
# FE FF UTF-16, big endian BOM
|
||||
self.result = {'encoding': "UTF-16",
|
||||
'confidence': 1.0,
|
||||
'language': ''}
|
||||
|
||||
self._got_data = True
|
||||
if self.result['encoding'] is not None:
|
||||
self.done = True
|
||||
return
|
||||
|
||||
# If none of those matched and we've only see ASCII so far, check
|
||||
# for high bytes and escape sequences
|
||||
if self._input_state == InputState.PURE_ASCII:
|
||||
if self.HIGH_BYTE_DETECTOR.search(byte_str):
|
||||
self._input_state = InputState.HIGH_BYTE
|
||||
elif self._input_state == InputState.PURE_ASCII and \
|
||||
self.ESC_DETECTOR.search(self._last_char + byte_str):
|
||||
self._input_state = InputState.ESC_ASCII
|
||||
|
||||
self._last_char = byte_str[-1:]
|
||||
|
||||
# If we've seen escape sequences, use the EscCharSetProber, which
|
||||
# uses a simple state machine to check for known escape sequences in
|
||||
# HZ and ISO-2022 encodings, since those are the only encodings that
|
||||
# use such sequences.
|
||||
if self._input_state == InputState.ESC_ASCII:
|
||||
if not self._esc_charset_prober:
|
||||
self._esc_charset_prober = EscCharSetProber(self.lang_filter)
|
||||
if self._esc_charset_prober.feed(byte_str) == ProbingState.FOUND_IT:
|
||||
self.result = {'encoding':
|
||||
self._esc_charset_prober.charset_name,
|
||||
'confidence':
|
||||
self._esc_charset_prober.get_confidence(),
|
||||
'language':
|
||||
self._esc_charset_prober.language}
|
||||
self.done = True
|
||||
# If we've seen high bytes (i.e., those with values greater than 127),
|
||||
# we need to do more complicated checks using all our multi-byte and
|
||||
# single-byte probers that are left. The single-byte probers
|
||||
# use character bigram distributions to determine the encoding, whereas
|
||||
# the multi-byte probers use a combination of character unigram and
|
||||
# bigram distributions.
|
||||
elif self._input_state == InputState.HIGH_BYTE:
|
||||
if not self._charset_probers:
|
||||
self._charset_probers = [MBCSGroupProber(self.lang_filter)]
|
||||
# If we're checking non-CJK encodings, use single-byte prober
|
||||
if self.lang_filter & LanguageFilter.NON_CJK:
|
||||
self._charset_probers.append(SBCSGroupProber())
|
||||
self._charset_probers.append(Latin1Prober())
|
||||
for prober in self._charset_probers:
|
||||
if prober.feed(byte_str) == ProbingState.FOUND_IT:
|
||||
self.result = {'encoding': prober.charset_name,
|
||||
'confidence': prober.get_confidence(),
|
||||
'language': prober.language}
|
||||
self.done = True
|
||||
break
|
||||
if self.WIN_BYTE_DETECTOR.search(byte_str):
|
||||
self._has_win_bytes = True
|
||||
|
||||
def close(self):
|
||||
"""
|
||||
Stop analyzing the current document and come up with a final
|
||||
prediction.
|
||||
|
||||
:returns: The ``result`` attribute, a ``dict`` with the keys
|
||||
`encoding`, `confidence`, and `language`.
|
||||
"""
|
||||
# Don't bother with checks if we're already done
|
||||
if self.done:
|
||||
return self.result
|
||||
self.done = True
|
||||
|
||||
if not self._got_data:
|
||||
self.logger.debug('no data received!')
|
||||
|
||||
# Default to ASCII if it is all we've seen so far
|
||||
elif self._input_state == InputState.PURE_ASCII:
|
||||
self.result = {'encoding': 'ascii',
|
||||
'confidence': 1.0,
|
||||
'language': ''}
|
||||
|
||||
# If we have seen non-ASCII, return the best that met MINIMUM_THRESHOLD
|
||||
elif self._input_state == InputState.HIGH_BYTE:
|
||||
prober_confidence = None
|
||||
max_prober_confidence = 0.0
|
||||
max_prober = None
|
||||
for prober in self._charset_probers:
|
||||
if not prober:
|
||||
continue
|
||||
prober_confidence = prober.get_confidence()
|
||||
if prober_confidence > max_prober_confidence:
|
||||
max_prober_confidence = prober_confidence
|
||||
max_prober = prober
|
||||
if max_prober and (max_prober_confidence > self.MINIMUM_THRESHOLD):
|
||||
charset_name = max_prober.charset_name
|
||||
lower_charset_name = max_prober.charset_name.lower()
|
||||
confidence = max_prober.get_confidence()
|
||||
# Use Windows encoding name instead of ISO-8859 if we saw any
|
||||
# extra Windows-specific bytes
|
||||
if lower_charset_name.startswith('iso-8859'):
|
||||
if self._has_win_bytes:
|
||||
charset_name = self.ISO_WIN_MAP.get(lower_charset_name,
|
||||
charset_name)
|
||||
self.result = {'encoding': charset_name,
|
||||
'confidence': confidence,
|
||||
'language': max_prober.language}
|
||||
|
||||
# Log all prober confidences if none met MINIMUM_THRESHOLD
|
||||
if self.logger.getEffectiveLevel() == logging.DEBUG:
|
||||
if self.result['encoding'] is None:
|
||||
self.logger.debug('no probers hit minimum threshold')
|
||||
for prober in self._charset_probers[0].probers:
|
||||
if not prober:
|
||||
continue
|
||||
self.logger.debug('%s %s confidence = %s',
|
||||
prober.charset_name,
|
||||
prober.language,
|
||||
prober.get_confidence())
|
||||
return self.result
|
||||
@@ -1,82 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .charsetprober import CharSetProber
|
||||
from .enums import ProbingState, MachineState
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .mbcssm import UTF8_SM_MODEL
|
||||
|
||||
|
||||
|
||||
class UTF8Prober(CharSetProber):
|
||||
ONE_CHAR_PROB = 0.5
|
||||
|
||||
def __init__(self):
|
||||
super(UTF8Prober, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(UTF8_SM_MODEL)
|
||||
self._num_mb_chars = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
super(UTF8Prober, self).reset()
|
||||
self.coding_sm.reset()
|
||||
self._num_mb_chars = 0
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "utf-8"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return ""
|
||||
|
||||
def feed(self, byte_str):
|
||||
for c in byte_str:
|
||||
coding_state = self.coding_sm.next_state(c)
|
||||
if coding_state == MachineState.ERROR:
|
||||
self._state = ProbingState.NOT_ME
|
||||
break
|
||||
elif coding_state == MachineState.ITS_ME:
|
||||
self._state = ProbingState.FOUND_IT
|
||||
break
|
||||
elif coding_state == MachineState.START:
|
||||
if self.coding_sm.get_current_charlen() >= 2:
|
||||
self._num_mb_chars += 1
|
||||
|
||||
if self.state == ProbingState.DETECTING:
|
||||
if self.get_confidence() > self.SHORTCUT_THRESHOLD:
|
||||
self._state = ProbingState.FOUND_IT
|
||||
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
unlike = 0.99
|
||||
if self._num_mb_chars < 6:
|
||||
unlike *= self.ONE_CHAR_PROB ** self._num_mb_chars
|
||||
return 1.0 - unlike
|
||||
else:
|
||||
return unlike
|
||||
@@ -1,9 +0,0 @@
|
||||
"""
|
||||
This module exists only to simplify retrieving the version number of chardet
|
||||
from within setup.py and from chardet subpackages.
|
||||
|
||||
:author: Dan Blanchard (dan.blanchard@gmail.com)
|
||||
"""
|
||||
|
||||
__version__ = "3.0.2"
|
||||
VERSION = __version__.split('.')
|
||||
@@ -1 +0,0 @@
|
||||
from .core import *
|
||||
@@ -1,118 +0,0 @@
|
||||
from .core import encode, decode, alabel, ulabel, IDNAError
|
||||
import codecs
|
||||
import re
|
||||
|
||||
_unicode_dots_re = re.compile(u'[\u002e\u3002\uff0e\uff61]')
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self, data, errors='strict'):
|
||||
|
||||
if errors != 'strict':
|
||||
raise IDNAError("Unsupported error handling \"{0}\"".format(errors))
|
||||
|
||||
if not data:
|
||||
return "", 0
|
||||
|
||||
return encode(data), len(data)
|
||||
|
||||
def decode(self, data, errors='strict'):
|
||||
|
||||
if errors != 'strict':
|
||||
raise IDNAError("Unsupported error handling \"{0}\"".format(errors))
|
||||
|
||||
if not data:
|
||||
return u"", 0
|
||||
|
||||
return decode(data), len(data)
|
||||
|
||||
class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
|
||||
def _buffer_encode(self, data, errors, final):
|
||||
if errors != 'strict':
|
||||
raise IDNAError("Unsupported error handling \"{0}\"".format(errors))
|
||||
|
||||
if not data:
|
||||
return ("", 0)
|
||||
|
||||
labels = _unicode_dots_re.split(data)
|
||||
trailing_dot = u''
|
||||
if labels:
|
||||
if not labels[-1]:
|
||||
trailing_dot = '.'
|
||||
del labels[-1]
|
||||
elif not final:
|
||||
# Keep potentially unfinished label until the next call
|
||||
del labels[-1]
|
||||
if labels:
|
||||
trailing_dot = '.'
|
||||
|
||||
result = []
|
||||
size = 0
|
||||
for label in labels:
|
||||
result.append(alabel(label))
|
||||
if size:
|
||||
size += 1
|
||||
size += len(label)
|
||||
|
||||
# Join with U+002E
|
||||
result = ".".join(result) + trailing_dot
|
||||
size += len(trailing_dot)
|
||||
return (result, size)
|
||||
|
||||
class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
|
||||
def _buffer_decode(self, data, errors, final):
|
||||
if errors != 'strict':
|
||||
raise IDNAError("Unsupported error handling \"{0}\"".format(errors))
|
||||
|
||||
if not data:
|
||||
return (u"", 0)
|
||||
|
||||
# IDNA allows decoding to operate on Unicode strings, too.
|
||||
if isinstance(data, unicode):
|
||||
labels = _unicode_dots_re.split(data)
|
||||
else:
|
||||
# Must be ASCII string
|
||||
data = str(data)
|
||||
unicode(data, "ascii")
|
||||
labels = data.split(".")
|
||||
|
||||
trailing_dot = u''
|
||||
if labels:
|
||||
if not labels[-1]:
|
||||
trailing_dot = u'.'
|
||||
del labels[-1]
|
||||
elif not final:
|
||||
# Keep potentially unfinished label until the next call
|
||||
del labels[-1]
|
||||
if labels:
|
||||
trailing_dot = u'.'
|
||||
|
||||
result = []
|
||||
size = 0
|
||||
for label in labels:
|
||||
result.append(ulabel(label))
|
||||
if size:
|
||||
size += 1
|
||||
size += len(label)
|
||||
|
||||
result = u".".join(result) + trailing_dot
|
||||
size += len(trailing_dot)
|
||||
return (result, size)
|
||||
|
||||
|
||||
class StreamWriter(Codec, codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec, codecs.StreamReader):
|
||||
pass
|
||||
|
||||
def getregentry():
|
||||
return codecs.CodecInfo(
|
||||
name='idna',
|
||||
encode=Codec().encode,
|
||||
decode=Codec().decode,
|
||||
incrementalencoder=IncrementalEncoder,
|
||||
incrementaldecoder=IncrementalDecoder,
|
||||
streamwriter=StreamWriter,
|
||||
streamreader=StreamReader,
|
||||
)
|
||||
@@ -1,12 +0,0 @@
|
||||
from .core import *
|
||||
from .codec import *
|
||||
|
||||
def ToASCII(label):
|
||||
return encode(label)
|
||||
|
||||
def ToUnicode(label):
|
||||
return decode(label)
|
||||
|
||||
def nameprep(s):
|
||||
raise NotImplementedError("IDNA 2008 does not utilise nameprep protocol")
|
||||
|
||||
@@ -1,387 +0,0 @@
|
||||
from . import idnadata
|
||||
import bisect
|
||||
import unicodedata
|
||||
import re
|
||||
import sys
|
||||
from .intranges import intranges_contain
|
||||
|
||||
_virama_combining_class = 9
|
||||
_alabel_prefix = b'xn--'
|
||||
_unicode_dots_re = re.compile(u'[\u002e\u3002\uff0e\uff61]')
|
||||
|
||||
if sys.version_info[0] == 3:
|
||||
unicode = str
|
||||
unichr = chr
|
||||
|
||||
class IDNAError(UnicodeError):
|
||||
""" Base exception for all IDNA-encoding related problems """
|
||||
pass
|
||||
|
||||
|
||||
class IDNABidiError(IDNAError):
|
||||
""" Exception when bidirectional requirements are not satisfied """
|
||||
pass
|
||||
|
||||
|
||||
class InvalidCodepoint(IDNAError):
|
||||
""" Exception when a disallowed or unallocated codepoint is used """
|
||||
pass
|
||||
|
||||
|
||||
class InvalidCodepointContext(IDNAError):
|
||||
""" Exception when the codepoint is not valid in the context it is used """
|
||||
pass
|
||||
|
||||
|
||||
def _combining_class(cp):
|
||||
return unicodedata.combining(unichr(cp))
|
||||
|
||||
def _is_script(cp, script):
|
||||
return intranges_contain(ord(cp), idnadata.scripts[script])
|
||||
|
||||
def _punycode(s):
|
||||
return s.encode('punycode')
|
||||
|
||||
def _unot(s):
|
||||
return 'U+{0:04X}'.format(s)
|
||||
|
||||
|
||||
def valid_label_length(label):
|
||||
|
||||
if len(label) > 63:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def valid_string_length(label, trailing_dot):
|
||||
|
||||
if len(label) > (254 if trailing_dot else 253):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def check_bidi(label, check_ltr=False):
|
||||
|
||||
# Bidi rules should only be applied if string contains RTL characters
|
||||
bidi_label = False
|
||||
for (idx, cp) in enumerate(label, 1):
|
||||
direction = unicodedata.bidirectional(cp)
|
||||
if direction == '':
|
||||
# String likely comes from a newer version of Unicode
|
||||
raise IDNABidiError('Unknown directionality in label {0} at position {1}'.format(repr(label), idx))
|
||||
if direction in ['R', 'AL', 'AN']:
|
||||
bidi_label = True
|
||||
break
|
||||
if not bidi_label and not check_ltr:
|
||||
return True
|
||||
|
||||
# Bidi rule 1
|
||||
direction = unicodedata.bidirectional(label[0])
|
||||
if direction in ['R', 'AL']:
|
||||
rtl = True
|
||||
elif direction == 'L':
|
||||
rtl = False
|
||||
else:
|
||||
raise IDNABidiError('First codepoint in label {0} must be directionality L, R or AL'.format(repr(label)))
|
||||
|
||||
valid_ending = False
|
||||
number_type = False
|
||||
for (idx, cp) in enumerate(label, 1):
|
||||
direction = unicodedata.bidirectional(cp)
|
||||
|
||||
if rtl:
|
||||
# Bidi rule 2
|
||||
if not direction in ['R', 'AL', 'AN', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM']:
|
||||
raise IDNABidiError('Invalid direction for codepoint at position {0} in a right-to-left label'.format(idx))
|
||||
# Bidi rule 3
|
||||
if direction in ['R', 'AL', 'EN', 'AN']:
|
||||
valid_ending = True
|
||||
elif direction != 'NSM':
|
||||
valid_ending = False
|
||||
# Bidi rule 4
|
||||
if direction in ['AN', 'EN']:
|
||||
if not number_type:
|
||||
number_type = direction
|
||||
else:
|
||||
if number_type != direction:
|
||||
raise IDNABidiError('Can not mix numeral types in a right-to-left label')
|
||||
else:
|
||||
# Bidi rule 5
|
||||
if not direction in ['L', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM']:
|
||||
raise IDNABidiError('Invalid direction for codepoint at position {0} in a left-to-right label'.format(idx))
|
||||
# Bidi rule 6
|
||||
if direction in ['L', 'EN']:
|
||||
valid_ending = True
|
||||
elif direction != 'NSM':
|
||||
valid_ending = False
|
||||
|
||||
if not valid_ending:
|
||||
raise IDNABidiError('Label ends with illegal codepoint directionality')
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def check_initial_combiner(label):
|
||||
|
||||
if unicodedata.category(label[0])[0] == 'M':
|
||||
raise IDNAError('Label begins with an illegal combining character')
|
||||
return True
|
||||
|
||||
|
||||
def check_hyphen_ok(label):
|
||||
|
||||
if label[2:4] == '--':
|
||||
raise IDNAError('Label has disallowed hyphens in 3rd and 4th position')
|
||||
if label[0] == '-' or label[-1] == '-':
|
||||
raise IDNAError('Label must not start or end with a hyphen')
|
||||
return True
|
||||
|
||||
|
||||
def check_nfc(label):
|
||||
|
||||
if unicodedata.normalize('NFC', label) != label:
|
||||
raise IDNAError('Label must be in Normalization Form C')
|
||||
|
||||
|
||||
def valid_contextj(label, pos):
|
||||
|
||||
cp_value = ord(label[pos])
|
||||
|
||||
if cp_value == 0x200c:
|
||||
|
||||
if pos > 0:
|
||||
if _combining_class(ord(label[pos - 1])) == _virama_combining_class:
|
||||
return True
|
||||
|
||||
ok = False
|
||||
for i in range(pos-1, -1, -1):
|
||||
joining_type = idnadata.joining_types.get(ord(label[i]))
|
||||
if joining_type == ord('T'):
|
||||
continue
|
||||
if joining_type in [ord('L'), ord('D')]:
|
||||
ok = True
|
||||
break
|
||||
|
||||
if not ok:
|
||||
return False
|
||||
|
||||
ok = False
|
||||
for i in range(pos+1, len(label)):
|
||||
joining_type = idnadata.joining_types.get(ord(label[i]))
|
||||
if joining_type == ord('T'):
|
||||
continue
|
||||
if joining_type in [ord('R'), ord('D')]:
|
||||
ok = True
|
||||
break
|
||||
return ok
|
||||
|
||||
if cp_value == 0x200d:
|
||||
|
||||
if pos > 0:
|
||||
if _combining_class(ord(label[pos - 1])) == _virama_combining_class:
|
||||
return True
|
||||
return False
|
||||
|
||||
else:
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def valid_contexto(label, pos, exception=False):
|
||||
|
||||
cp_value = ord(label[pos])
|
||||
|
||||
if cp_value == 0x00b7:
|
||||
if 0 < pos < len(label)-1:
|
||||
if ord(label[pos - 1]) == 0x006c and ord(label[pos + 1]) == 0x006c:
|
||||
return True
|
||||
return False
|
||||
|
||||
elif cp_value == 0x0375:
|
||||
if pos < len(label)-1 and len(label) > 1:
|
||||
return _is_script(label[pos + 1], 'Greek')
|
||||
return False
|
||||
|
||||
elif cp_value == 0x05f3 or cp_value == 0x05f4:
|
||||
if pos > 0:
|
||||
return _is_script(label[pos - 1], 'Hebrew')
|
||||
return False
|
||||
|
||||
elif cp_value == 0x30fb:
|
||||
for cp in label:
|
||||
if cp == u'\u30fb':
|
||||
continue
|
||||
if _is_script(cp, 'Hiragana') or _is_script(cp, 'Katakana') or _is_script(cp, 'Han'):
|
||||
return True
|
||||
return False
|
||||
|
||||
elif 0x660 <= cp_value <= 0x669:
|
||||
for cp in label:
|
||||
if 0x6f0 <= ord(cp) <= 0x06f9:
|
||||
return False
|
||||
return True
|
||||
|
||||
elif 0x6f0 <= cp_value <= 0x6f9:
|
||||
for cp in label:
|
||||
if 0x660 <= ord(cp) <= 0x0669:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def check_label(label):
|
||||
|
||||
if isinstance(label, (bytes, bytearray)):
|
||||
label = label.decode('utf-8')
|
||||
if len(label) == 0:
|
||||
raise IDNAError('Empty Label')
|
||||
|
||||
check_nfc(label)
|
||||
check_hyphen_ok(label)
|
||||
check_initial_combiner(label)
|
||||
|
||||
for (pos, cp) in enumerate(label):
|
||||
cp_value = ord(cp)
|
||||
if intranges_contain(cp_value, idnadata.codepoint_classes['PVALID']):
|
||||
continue
|
||||
elif intranges_contain(cp_value, idnadata.codepoint_classes['CONTEXTJ']):
|
||||
if not valid_contextj(label, pos):
|
||||
raise InvalidCodepointContext('Joiner {0} not allowed at position {1} in {2}'.format(_unot(cp_value), pos+1, repr(label)))
|
||||
elif intranges_contain(cp_value, idnadata.codepoint_classes['CONTEXTO']):
|
||||
if not valid_contexto(label, pos):
|
||||
raise InvalidCodepointContext('Codepoint {0} not allowed at position {1} in {2}'.format(_unot(cp_value), pos+1, repr(label)))
|
||||
else:
|
||||
raise InvalidCodepoint('Codepoint {0} at position {1} of {2} not allowed'.format(_unot(cp_value), pos+1, repr(label)))
|
||||
|
||||
check_bidi(label)
|
||||
|
||||
|
||||
def alabel(label):
|
||||
|
||||
try:
|
||||
label = label.encode('ascii')
|
||||
try:
|
||||
ulabel(label)
|
||||
except IDNAError:
|
||||
raise IDNAError('The label {0} is not a valid A-label'.format(label))
|
||||
if not valid_label_length(label):
|
||||
raise IDNAError('Label too long')
|
||||
return label
|
||||
except UnicodeEncodeError:
|
||||
pass
|
||||
|
||||
if not label:
|
||||
raise IDNAError('No Input')
|
||||
|
||||
label = unicode(label)
|
||||
check_label(label)
|
||||
label = _punycode(label)
|
||||
label = _alabel_prefix + label
|
||||
|
||||
if not valid_label_length(label):
|
||||
raise IDNAError('Label too long')
|
||||
|
||||
return label
|
||||
|
||||
|
||||
def ulabel(label):
|
||||
|
||||
if not isinstance(label, (bytes, bytearray)):
|
||||
try:
|
||||
label = label.encode('ascii')
|
||||
except UnicodeEncodeError:
|
||||
check_label(label)
|
||||
return label
|
||||
|
||||
label = label.lower()
|
||||
if label.startswith(_alabel_prefix):
|
||||
label = label[len(_alabel_prefix):]
|
||||
else:
|
||||
check_label(label)
|
||||
return label.decode('ascii')
|
||||
|
||||
label = label.decode('punycode')
|
||||
check_label(label)
|
||||
return label
|
||||
|
||||
|
||||
def uts46_remap(domain, std3_rules=True, transitional=False):
|
||||
"""Re-map the characters in the string according to UTS46 processing."""
|
||||
from .uts46data import uts46data
|
||||
output = u""
|
||||
try:
|
||||
for pos, char in enumerate(domain):
|
||||
code_point = ord(char)
|
||||
uts46row = uts46data[code_point if code_point < 256 else
|
||||
bisect.bisect_left(uts46data, (code_point, "Z")) - 1]
|
||||
status = uts46row[1]
|
||||
replacement = uts46row[2] if len(uts46row) == 3 else None
|
||||
if (status == "V" or
|
||||
(status == "D" and not transitional) or
|
||||
(status == "3" and std3_rules and replacement is None)):
|
||||
output += char
|
||||
elif replacement is not None and (status == "M" or
|
||||
(status == "3" and std3_rules) or
|
||||
(status == "D" and transitional)):
|
||||
output += replacement
|
||||
elif status != "I":
|
||||
raise IndexError()
|
||||
return unicodedata.normalize("NFC", output)
|
||||
except IndexError:
|
||||
raise InvalidCodepoint(
|
||||
"Codepoint {0} not allowed at position {1} in {2}".format(
|
||||
_unot(code_point), pos + 1, repr(domain)))
|
||||
|
||||
|
||||
def encode(s, strict=False, uts46=False, std3_rules=False, transitional=False):
|
||||
|
||||
if isinstance(s, (bytes, bytearray)):
|
||||
s = s.decode("ascii")
|
||||
if uts46:
|
||||
s = uts46_remap(s, std3_rules, transitional)
|
||||
trailing_dot = False
|
||||
result = []
|
||||
if strict:
|
||||
labels = s.split('.')
|
||||
else:
|
||||
labels = _unicode_dots_re.split(s)
|
||||
while labels and not labels[0]:
|
||||
del labels[0]
|
||||
if not labels:
|
||||
raise IDNAError('Empty domain')
|
||||
if labels[-1] == '':
|
||||
del labels[-1]
|
||||
trailing_dot = True
|
||||
for label in labels:
|
||||
result.append(alabel(label))
|
||||
if trailing_dot:
|
||||
result.append(b'')
|
||||
s = b'.'.join(result)
|
||||
if not valid_string_length(s, trailing_dot):
|
||||
raise IDNAError('Domain too long')
|
||||
return s
|
||||
|
||||
|
||||
def decode(s, strict=False, uts46=False, std3_rules=False):
|
||||
|
||||
if isinstance(s, (bytes, bytearray)):
|
||||
s = s.decode("ascii")
|
||||
if uts46:
|
||||
s = uts46_remap(s, std3_rules, False)
|
||||
trailing_dot = False
|
||||
result = []
|
||||
if not strict:
|
||||
labels = _unicode_dots_re.split(s)
|
||||
else:
|
||||
labels = s.split(u'.')
|
||||
while labels and not labels[0]:
|
||||
del labels[0]
|
||||
if not labels:
|
||||
raise IDNAError('Empty domain')
|
||||
if not labels[-1]:
|
||||
del labels[-1]
|
||||
trailing_dot = True
|
||||
for label in labels:
|
||||
result.append(ulabel(label))
|
||||
if trailing_dot:
|
||||
result.append(u'')
|
||||
return u'.'.join(result)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,53 +0,0 @@
|
||||
"""
|
||||
Given a list of integers, made up of (hopefully) a small number of long runs
|
||||
of consecutive integers, compute a representation of the form
|
||||
((start1, end1), (start2, end2) ...). Then answer the question "was x present
|
||||
in the original list?" in time O(log(# runs)).
|
||||
"""
|
||||
|
||||
import bisect
|
||||
|
||||
def intranges_from_list(list_):
|
||||
"""Represent a list of integers as a sequence of ranges:
|
||||
((start_0, end_0), (start_1, end_1), ...), such that the original
|
||||
integers are exactly those x such that start_i <= x < end_i for some i.
|
||||
|
||||
Ranges are encoded as single integers (start << 32 | end), not as tuples.
|
||||
"""
|
||||
|
||||
sorted_list = sorted(list_)
|
||||
ranges = []
|
||||
last_write = -1
|
||||
for i in range(len(sorted_list)):
|
||||
if i+1 < len(sorted_list):
|
||||
if sorted_list[i] == sorted_list[i+1]-1:
|
||||
continue
|
||||
current_range = sorted_list[last_write+1:i+1]
|
||||
ranges.append(_encode_range(current_range[0], current_range[-1] + 1))
|
||||
last_write = i
|
||||
|
||||
return tuple(ranges)
|
||||
|
||||
def _encode_range(start, end):
|
||||
return (start << 32) | end
|
||||
|
||||
def _decode_range(r):
|
||||
return (r >> 32), (r & ((1 << 32) - 1))
|
||||
|
||||
|
||||
def intranges_contain(int_, ranges):
|
||||
"""Determine if `int_` falls into one of the ranges in `ranges`."""
|
||||
tuple_ = _encode_range(int_, 0)
|
||||
pos = bisect.bisect_left(ranges, tuple_)
|
||||
# we could be immediately ahead of a tuple (start, end)
|
||||
# with start < int_ <= end
|
||||
if pos > 0:
|
||||
left, right = _decode_range(ranges[pos-1])
|
||||
if left <= int_ < right:
|
||||
return True
|
||||
# or we could be immediately behind a tuple (int_, end)
|
||||
if pos < len(ranges):
|
||||
left, _ = _decode_range(ranges[pos])
|
||||
if left == int_:
|
||||
return True
|
||||
return False
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,97 +0,0 @@
|
||||
"""
|
||||
urllib3 - Thread-safe connection pooling and re-using.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
import warnings
|
||||
|
||||
from .connectionpool import (
|
||||
HTTPConnectionPool,
|
||||
HTTPSConnectionPool,
|
||||
connection_from_url
|
||||
)
|
||||
|
||||
from . import exceptions
|
||||
from .filepost import encode_multipart_formdata
|
||||
from .poolmanager import PoolManager, ProxyManager, proxy_from_url
|
||||
from .response import HTTPResponse
|
||||
from .util.request import make_headers
|
||||
from .util.url import get_host
|
||||
from .util.timeout import Timeout
|
||||
from .util.retry import Retry
|
||||
|
||||
|
||||
# Set default logging handler to avoid "No handler found" warnings.
|
||||
import logging
|
||||
try: # Python 2.7+
|
||||
from logging import NullHandler
|
||||
except ImportError:
|
||||
class NullHandler(logging.Handler):
|
||||
def emit(self, record):
|
||||
pass
|
||||
|
||||
__author__ = 'Andrey Petrov (andrey.petrov@shazow.net)'
|
||||
__license__ = 'MIT'
|
||||
__version__ = '1.21.1'
|
||||
|
||||
__all__ = (
|
||||
'HTTPConnectionPool',
|
||||
'HTTPSConnectionPool',
|
||||
'PoolManager',
|
||||
'ProxyManager',
|
||||
'HTTPResponse',
|
||||
'Retry',
|
||||
'Timeout',
|
||||
'add_stderr_logger',
|
||||
'connection_from_url',
|
||||
'disable_warnings',
|
||||
'encode_multipart_formdata',
|
||||
'get_host',
|
||||
'make_headers',
|
||||
'proxy_from_url',
|
||||
)
|
||||
|
||||
logging.getLogger(__name__).addHandler(NullHandler())
|
||||
|
||||
|
||||
def add_stderr_logger(level=logging.DEBUG):
|
||||
"""
|
||||
Helper for quickly adding a StreamHandler to the logger. Useful for
|
||||
debugging.
|
||||
|
||||
Returns the handler after adding it.
|
||||
"""
|
||||
# This method needs to be in this __init__.py to get the __name__ correct
|
||||
# even if urllib3 is vendored within another package.
|
||||
logger = logging.getLogger(__name__)
|
||||
handler = logging.StreamHandler()
|
||||
handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
|
||||
logger.addHandler(handler)
|
||||
logger.setLevel(level)
|
||||
logger.debug('Added a stderr logging handler to logger: %s', __name__)
|
||||
return handler
|
||||
|
||||
|
||||
# ... Clean up.
|
||||
del NullHandler
|
||||
|
||||
|
||||
# All warning filters *must* be appended unless you're really certain that they
|
||||
# shouldn't be: otherwise, it's very hard for users to use most Python
|
||||
# mechanisms to silence them.
|
||||
# SecurityWarning's always go off by default.
|
||||
warnings.simplefilter('always', exceptions.SecurityWarning, append=True)
|
||||
# SubjectAltNameWarning's should go off once per host
|
||||
warnings.simplefilter('default', exceptions.SubjectAltNameWarning, append=True)
|
||||
# InsecurePlatformWarning's don't vary between requests, so we keep it default.
|
||||
warnings.simplefilter('default', exceptions.InsecurePlatformWarning,
|
||||
append=True)
|
||||
# SNIMissingWarnings should go off only once.
|
||||
warnings.simplefilter('default', exceptions.SNIMissingWarning, append=True)
|
||||
|
||||
|
||||
def disable_warnings(category=exceptions.HTTPWarning):
|
||||
"""
|
||||
Helper for quickly disabling all urllib3 warnings.
|
||||
"""
|
||||
warnings.simplefilter('ignore', category)
|
||||
@@ -1,314 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
from collections import Mapping, MutableMapping
|
||||
try:
|
||||
from threading import RLock
|
||||
except ImportError: # Platform-specific: No threads available
|
||||
class RLock:
|
||||
def __enter__(self):
|
||||
pass
|
||||
|
||||
def __exit__(self, exc_type, exc_value, traceback):
|
||||
pass
|
||||
|
||||
|
||||
try: # Python 2.7+
|
||||
from collections import OrderedDict
|
||||
except ImportError:
|
||||
from .packages.ordered_dict import OrderedDict
|
||||
from .packages.six import iterkeys, itervalues, PY3
|
||||
|
||||
|
||||
__all__ = ['RecentlyUsedContainer', 'HTTPHeaderDict']
|
||||
|
||||
|
||||
_Null = object()
|
||||
|
||||
|
||||
class RecentlyUsedContainer(MutableMapping):
|
||||
"""
|
||||
Provides a thread-safe dict-like container which maintains up to
|
||||
``maxsize`` keys while throwing away the least-recently-used keys beyond
|
||||
``maxsize``.
|
||||
|
||||
:param maxsize:
|
||||
Maximum number of recent elements to retain.
|
||||
|
||||
:param dispose_func:
|
||||
Every time an item is evicted from the container,
|
||||
``dispose_func(value)`` is called. Callback which will get called
|
||||
"""
|
||||
|
||||
ContainerCls = OrderedDict
|
||||
|
||||
def __init__(self, maxsize=10, dispose_func=None):
|
||||
self._maxsize = maxsize
|
||||
self.dispose_func = dispose_func
|
||||
|
||||
self._container = self.ContainerCls()
|
||||
self.lock = RLock()
|
||||
|
||||
def __getitem__(self, key):
|
||||
# Re-insert the item, moving it to the end of the eviction line.
|
||||
with self.lock:
|
||||
item = self._container.pop(key)
|
||||
self._container[key] = item
|
||||
return item
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
evicted_value = _Null
|
||||
with self.lock:
|
||||
# Possibly evict the existing value of 'key'
|
||||
evicted_value = self._container.get(key, _Null)
|
||||
self._container[key] = value
|
||||
|
||||
# If we didn't evict an existing value, we might have to evict the
|
||||
# least recently used item from the beginning of the container.
|
||||
if len(self._container) > self._maxsize:
|
||||
_key, evicted_value = self._container.popitem(last=False)
|
||||
|
||||
if self.dispose_func and evicted_value is not _Null:
|
||||
self.dispose_func(evicted_value)
|
||||
|
||||
def __delitem__(self, key):
|
||||
with self.lock:
|
||||
value = self._container.pop(key)
|
||||
|
||||
if self.dispose_func:
|
||||
self.dispose_func(value)
|
||||
|
||||
def __len__(self):
|
||||
with self.lock:
|
||||
return len(self._container)
|
||||
|
||||
def __iter__(self):
|
||||
raise NotImplementedError('Iteration over this class is unlikely to be threadsafe.')
|
||||
|
||||
def clear(self):
|
||||
with self.lock:
|
||||
# Copy pointers to all values, then wipe the mapping
|
||||
values = list(itervalues(self._container))
|
||||
self._container.clear()
|
||||
|
||||
if self.dispose_func:
|
||||
for value in values:
|
||||
self.dispose_func(value)
|
||||
|
||||
def keys(self):
|
||||
with self.lock:
|
||||
return list(iterkeys(self._container))
|
||||
|
||||
|
||||
class HTTPHeaderDict(MutableMapping):
|
||||
"""
|
||||
:param headers:
|
||||
An iterable of field-value pairs. Must not contain multiple field names
|
||||
when compared case-insensitively.
|
||||
|
||||
:param kwargs:
|
||||
Additional field-value pairs to pass in to ``dict.update``.
|
||||
|
||||
A ``dict`` like container for storing HTTP Headers.
|
||||
|
||||
Field names are stored and compared case-insensitively in compliance with
|
||||
RFC 7230. Iteration provides the first case-sensitive key seen for each
|
||||
case-insensitive pair.
|
||||
|
||||
Using ``__setitem__`` syntax overwrites fields that compare equal
|
||||
case-insensitively in order to maintain ``dict``'s api. For fields that
|
||||
compare equal, instead create a new ``HTTPHeaderDict`` and use ``.add``
|
||||
in a loop.
|
||||
|
||||
If multiple fields that are equal case-insensitively are passed to the
|
||||
constructor or ``.update``, the behavior is undefined and some will be
|
||||
lost.
|
||||
|
||||
>>> headers = HTTPHeaderDict()
|
||||
>>> headers.add('Set-Cookie', 'foo=bar')
|
||||
>>> headers.add('set-cookie', 'baz=quxx')
|
||||
>>> headers['content-length'] = '7'
|
||||
>>> headers['SET-cookie']
|
||||
'foo=bar, baz=quxx'
|
||||
>>> headers['Content-Length']
|
||||
'7'
|
||||
"""
|
||||
|
||||
def __init__(self, headers=None, **kwargs):
|
||||
super(HTTPHeaderDict, self).__init__()
|
||||
self._container = OrderedDict()
|
||||
if headers is not None:
|
||||
if isinstance(headers, HTTPHeaderDict):
|
||||
self._copy_from(headers)
|
||||
else:
|
||||
self.extend(headers)
|
||||
if kwargs:
|
||||
self.extend(kwargs)
|
||||
|
||||
def __setitem__(self, key, val):
|
||||
self._container[key.lower()] = [key, val]
|
||||
return self._container[key.lower()]
|
||||
|
||||
def __getitem__(self, key):
|
||||
val = self._container[key.lower()]
|
||||
return ', '.join(val[1:])
|
||||
|
||||
def __delitem__(self, key):
|
||||
del self._container[key.lower()]
|
||||
|
||||
def __contains__(self, key):
|
||||
return key.lower() in self._container
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, Mapping) and not hasattr(other, 'keys'):
|
||||
return False
|
||||
if not isinstance(other, type(self)):
|
||||
other = type(self)(other)
|
||||
return (dict((k.lower(), v) for k, v in self.itermerged()) ==
|
||||
dict((k.lower(), v) for k, v in other.itermerged()))
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self.__eq__(other)
|
||||
|
||||
if not PY3: # Python 2
|
||||
iterkeys = MutableMapping.iterkeys
|
||||
itervalues = MutableMapping.itervalues
|
||||
|
||||
__marker = object()
|
||||
|
||||
def __len__(self):
|
||||
return len(self._container)
|
||||
|
||||
def __iter__(self):
|
||||
# Only provide the originally cased names
|
||||
for vals in self._container.values():
|
||||
yield vals[0]
|
||||
|
||||
def pop(self, key, default=__marker):
|
||||
'''D.pop(k[,d]) -> v, remove specified key and return the corresponding value.
|
||||
If key is not found, d is returned if given, otherwise KeyError is raised.
|
||||
'''
|
||||
# Using the MutableMapping function directly fails due to the private marker.
|
||||
# Using ordinary dict.pop would expose the internal structures.
|
||||
# So let's reinvent the wheel.
|
||||
try:
|
||||
value = self[key]
|
||||
except KeyError:
|
||||
if default is self.__marker:
|
||||
raise
|
||||
return default
|
||||
else:
|
||||
del self[key]
|
||||
return value
|
||||
|
||||
def discard(self, key):
|
||||
try:
|
||||
del self[key]
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
def add(self, key, val):
|
||||
"""Adds a (name, value) pair, doesn't overwrite the value if it already
|
||||
exists.
|
||||
|
||||
>>> headers = HTTPHeaderDict(foo='bar')
|
||||
>>> headers.add('Foo', 'baz')
|
||||
>>> headers['foo']
|
||||
'bar, baz'
|
||||
"""
|
||||
key_lower = key.lower()
|
||||
new_vals = [key, val]
|
||||
# Keep the common case aka no item present as fast as possible
|
||||
vals = self._container.setdefault(key_lower, new_vals)
|
||||
if new_vals is not vals:
|
||||
vals.append(val)
|
||||
|
||||
def extend(self, *args, **kwargs):
|
||||
"""Generic import function for any type of header-like object.
|
||||
Adapted version of MutableMapping.update in order to insert items
|
||||
with self.add instead of self.__setitem__
|
||||
"""
|
||||
if len(args) > 1:
|
||||
raise TypeError("extend() takes at most 1 positional "
|
||||
"arguments ({0} given)".format(len(args)))
|
||||
other = args[0] if len(args) >= 1 else ()
|
||||
|
||||
if isinstance(other, HTTPHeaderDict):
|
||||
for key, val in other.iteritems():
|
||||
self.add(key, val)
|
||||
elif isinstance(other, Mapping):
|
||||
for key in other:
|
||||
self.add(key, other[key])
|
||||
elif hasattr(other, "keys"):
|
||||
for key in other.keys():
|
||||
self.add(key, other[key])
|
||||
else:
|
||||
for key, value in other:
|
||||
self.add(key, value)
|
||||
|
||||
for key, value in kwargs.items():
|
||||
self.add(key, value)
|
||||
|
||||
def getlist(self, key):
|
||||
"""Returns a list of all the values for the named field. Returns an
|
||||
empty list if the key doesn't exist."""
|
||||
try:
|
||||
vals = self._container[key.lower()]
|
||||
except KeyError:
|
||||
return []
|
||||
else:
|
||||
return vals[1:]
|
||||
|
||||
# Backwards compatibility for httplib
|
||||
getheaders = getlist
|
||||
getallmatchingheaders = getlist
|
||||
iget = getlist
|
||||
|
||||
def __repr__(self):
|
||||
return "%s(%s)" % (type(self).__name__, dict(self.itermerged()))
|
||||
|
||||
def _copy_from(self, other):
|
||||
for key in other:
|
||||
val = other.getlist(key)
|
||||
if isinstance(val, list):
|
||||
# Don't need to convert tuples
|
||||
val = list(val)
|
||||
self._container[key.lower()] = [key] + val
|
||||
|
||||
def copy(self):
|
||||
clone = type(self)()
|
||||
clone._copy_from(self)
|
||||
return clone
|
||||
|
||||
def iteritems(self):
|
||||
"""Iterate over all header lines, including duplicate ones."""
|
||||
for key in self:
|
||||
vals = self._container[key.lower()]
|
||||
for val in vals[1:]:
|
||||
yield vals[0], val
|
||||
|
||||
def itermerged(self):
|
||||
"""Iterate over all headers, merging duplicate ones together."""
|
||||
for key in self:
|
||||
val = self._container[key.lower()]
|
||||
yield val[0], ', '.join(val[1:])
|
||||
|
||||
def items(self):
|
||||
return list(self.iteritems())
|
||||
|
||||
@classmethod
|
||||
def from_httplib(cls, message): # Python 2
|
||||
"""Read headers from a Python 2 httplib message object."""
|
||||
# python2.7 does not expose a proper API for exporting multiheaders
|
||||
# efficiently. This function re-reads raw lines from the message
|
||||
# object and extracts the multiheaders properly.
|
||||
headers = []
|
||||
|
||||
for line in message.headers:
|
||||
if line.startswith((' ', '\t')):
|
||||
key, value = headers[-1]
|
||||
headers[-1] = (key, value + '\r\n' + line.rstrip())
|
||||
continue
|
||||
|
||||
key, value = line.split(':', 1)
|
||||
headers.append((key, value.strip()))
|
||||
|
||||
return cls(headers)
|
||||
@@ -1,373 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
import datetime
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import socket
|
||||
from socket import error as SocketError, timeout as SocketTimeout
|
||||
import warnings
|
||||
from .packages import six
|
||||
from .packages.six.moves.http_client import HTTPConnection as _HTTPConnection
|
||||
from .packages.six.moves.http_client import HTTPException # noqa: F401
|
||||
|
||||
try: # Compiled with SSL?
|
||||
import ssl
|
||||
BaseSSLError = ssl.SSLError
|
||||
except (ImportError, AttributeError): # Platform-specific: No SSL.
|
||||
ssl = None
|
||||
|
||||
class BaseSSLError(BaseException):
|
||||
pass
|
||||
|
||||
|
||||
try: # Python 3:
|
||||
# Not a no-op, we're adding this to the namespace so it can be imported.
|
||||
ConnectionError = ConnectionError
|
||||
except NameError: # Python 2:
|
||||
class ConnectionError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
from .exceptions import (
|
||||
NewConnectionError,
|
||||
ConnectTimeoutError,
|
||||
SubjectAltNameWarning,
|
||||
SystemTimeWarning,
|
||||
)
|
||||
from .packages.ssl_match_hostname import match_hostname, CertificateError
|
||||
|
||||
from .util.ssl_ import (
|
||||
resolve_cert_reqs,
|
||||
resolve_ssl_version,
|
||||
assert_fingerprint,
|
||||
create_urllib3_context,
|
||||
ssl_wrap_socket
|
||||
)
|
||||
|
||||
|
||||
from .util import connection
|
||||
|
||||
from ._collections import HTTPHeaderDict
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
port_by_scheme = {
|
||||
'http': 80,
|
||||
'https': 443,
|
||||
}
|
||||
|
||||
# When updating RECENT_DATE, move it to
|
||||
# within two years of the current date, and no
|
||||
# earlier than 6 months ago.
|
||||
RECENT_DATE = datetime.date(2016, 1, 1)
|
||||
|
||||
|
||||
class DummyConnection(object):
|
||||
"""Used to detect a failed ConnectionCls import."""
|
||||
pass
|
||||
|
||||
|
||||
class HTTPConnection(_HTTPConnection, object):
|
||||
"""
|
||||
Based on httplib.HTTPConnection but provides an extra constructor
|
||||
backwards-compatibility layer between older and newer Pythons.
|
||||
|
||||
Additional keyword parameters are used to configure attributes of the connection.
|
||||
Accepted parameters include:
|
||||
|
||||
- ``strict``: See the documentation on :class:`urllib3.connectionpool.HTTPConnectionPool`
|
||||
- ``source_address``: Set the source address for the current connection.
|
||||
|
||||
.. note:: This is ignored for Python 2.6. It is only applied for 2.7 and 3.x
|
||||
|
||||
- ``socket_options``: Set specific options on the underlying socket. If not specified, then
|
||||
defaults are loaded from ``HTTPConnection.default_socket_options`` which includes disabling
|
||||
Nagle's algorithm (sets TCP_NODELAY to 1) unless the connection is behind a proxy.
|
||||
|
||||
For example, if you wish to enable TCP Keep Alive in addition to the defaults,
|
||||
you might pass::
|
||||
|
||||
HTTPConnection.default_socket_options + [
|
||||
(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1),
|
||||
]
|
||||
|
||||
Or you may want to disable the defaults by passing an empty list (e.g., ``[]``).
|
||||
"""
|
||||
|
||||
default_port = port_by_scheme['http']
|
||||
|
||||
#: Disable Nagle's algorithm by default.
|
||||
#: ``[(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]``
|
||||
default_socket_options = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]
|
||||
|
||||
#: Whether this connection verifies the host's certificate.
|
||||
is_verified = False
|
||||
|
||||
def __init__(self, *args, **kw):
|
||||
if six.PY3: # Python 3
|
||||
kw.pop('strict', None)
|
||||
|
||||
# Pre-set source_address in case we have an older Python like 2.6.
|
||||
self.source_address = kw.get('source_address')
|
||||
|
||||
if sys.version_info < (2, 7): # Python 2.6
|
||||
# _HTTPConnection on Python 2.6 will balk at this keyword arg, but
|
||||
# not newer versions. We can still use it when creating a
|
||||
# connection though, so we pop it *after* we have saved it as
|
||||
# self.source_address.
|
||||
kw.pop('source_address', None)
|
||||
|
||||
#: The socket options provided by the user. If no options are
|
||||
#: provided, we use the default options.
|
||||
self.socket_options = kw.pop('socket_options', self.default_socket_options)
|
||||
|
||||
# Superclass also sets self.source_address in Python 2.7+.
|
||||
_HTTPConnection.__init__(self, *args, **kw)
|
||||
|
||||
def _new_conn(self):
|
||||
""" Establish a socket connection and set nodelay settings on it.
|
||||
|
||||
:return: New socket connection.
|
||||
"""
|
||||
extra_kw = {}
|
||||
if self.source_address:
|
||||
extra_kw['source_address'] = self.source_address
|
||||
|
||||
if self.socket_options:
|
||||
extra_kw['socket_options'] = self.socket_options
|
||||
|
||||
try:
|
||||
conn = connection.create_connection(
|
||||
(self.host, self.port), self.timeout, **extra_kw)
|
||||
|
||||
except SocketTimeout as e:
|
||||
raise ConnectTimeoutError(
|
||||
self, "Connection to %s timed out. (connect timeout=%s)" %
|
||||
(self.host, self.timeout))
|
||||
|
||||
except SocketError as e:
|
||||
raise NewConnectionError(
|
||||
self, "Failed to establish a new connection: %s" % e)
|
||||
|
||||
return conn
|
||||
|
||||
def _prepare_conn(self, conn):
|
||||
self.sock = conn
|
||||
# the _tunnel_host attribute was added in python 2.6.3 (via
|
||||
# http://hg.python.org/cpython/rev/0f57b30a152f) so pythons 2.6(0-2) do
|
||||
# not have them.
|
||||
if getattr(self, '_tunnel_host', None):
|
||||
# TODO: Fix tunnel so it doesn't depend on self.sock state.
|
||||
self._tunnel()
|
||||
# Mark this connection as not reusable
|
||||
self.auto_open = 0
|
||||
|
||||
def connect(self):
|
||||
conn = self._new_conn()
|
||||
self._prepare_conn(conn)
|
||||
|
||||
def request_chunked(self, method, url, body=None, headers=None):
|
||||
"""
|
||||
Alternative to the common request method, which sends the
|
||||
body with chunked encoding and not as one block
|
||||
"""
|
||||
headers = HTTPHeaderDict(headers if headers is not None else {})
|
||||
skip_accept_encoding = 'accept-encoding' in headers
|
||||
skip_host = 'host' in headers
|
||||
self.putrequest(
|
||||
method,
|
||||
url,
|
||||
skip_accept_encoding=skip_accept_encoding,
|
||||
skip_host=skip_host
|
||||
)
|
||||
for header, value in headers.items():
|
||||
self.putheader(header, value)
|
||||
if 'transfer-encoding' not in headers:
|
||||
self.putheader('Transfer-Encoding', 'chunked')
|
||||
self.endheaders()
|
||||
|
||||
if body is not None:
|
||||
stringish_types = six.string_types + (six.binary_type,)
|
||||
if isinstance(body, stringish_types):
|
||||
body = (body,)
|
||||
for chunk in body:
|
||||
if not chunk:
|
||||
continue
|
||||
if not isinstance(chunk, six.binary_type):
|
||||
chunk = chunk.encode('utf8')
|
||||
len_str = hex(len(chunk))[2:]
|
||||
self.send(len_str.encode('utf-8'))
|
||||
self.send(b'\r\n')
|
||||
self.send(chunk)
|
||||
self.send(b'\r\n')
|
||||
|
||||
# After the if clause, to always have a closed body
|
||||
self.send(b'0\r\n\r\n')
|
||||
|
||||
|
||||
class HTTPSConnection(HTTPConnection):
|
||||
default_port = port_by_scheme['https']
|
||||
|
||||
ssl_version = None
|
||||
|
||||
def __init__(self, host, port=None, key_file=None, cert_file=None,
|
||||
strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
|
||||
ssl_context=None, **kw):
|
||||
|
||||
HTTPConnection.__init__(self, host, port, strict=strict,
|
||||
timeout=timeout, **kw)
|
||||
|
||||
self.key_file = key_file
|
||||
self.cert_file = cert_file
|
||||
self.ssl_context = ssl_context
|
||||
|
||||
# Required property for Google AppEngine 1.9.0 which otherwise causes
|
||||
# HTTPS requests to go out as HTTP. (See Issue #356)
|
||||
self._protocol = 'https'
|
||||
|
||||
def connect(self):
|
||||
conn = self._new_conn()
|
||||
self._prepare_conn(conn)
|
||||
|
||||
if self.ssl_context is None:
|
||||
self.ssl_context = create_urllib3_context(
|
||||
ssl_version=resolve_ssl_version(None),
|
||||
cert_reqs=resolve_cert_reqs(None),
|
||||
)
|
||||
|
||||
self.sock = ssl_wrap_socket(
|
||||
sock=conn,
|
||||
keyfile=self.key_file,
|
||||
certfile=self.cert_file,
|
||||
ssl_context=self.ssl_context,
|
||||
)
|
||||
|
||||
|
||||
class VerifiedHTTPSConnection(HTTPSConnection):
|
||||
"""
|
||||
Based on httplib.HTTPSConnection but wraps the socket with
|
||||
SSL certification.
|
||||
"""
|
||||
cert_reqs = None
|
||||
ca_certs = None
|
||||
ca_cert_dir = None
|
||||
ssl_version = None
|
||||
assert_fingerprint = None
|
||||
|
||||
def set_cert(self, key_file=None, cert_file=None,
|
||||
cert_reqs=None, ca_certs=None,
|
||||
assert_hostname=None, assert_fingerprint=None,
|
||||
ca_cert_dir=None):
|
||||
"""
|
||||
This method should only be called once, before the connection is used.
|
||||
"""
|
||||
# If cert_reqs is not provided, we can try to guess. If the user gave
|
||||
# us a cert database, we assume they want to use it: otherwise, if
|
||||
# they gave us an SSL Context object we should use whatever is set for
|
||||
# it.
|
||||
if cert_reqs is None:
|
||||
if ca_certs or ca_cert_dir:
|
||||
cert_reqs = 'CERT_REQUIRED'
|
||||
elif self.ssl_context is not None:
|
||||
cert_reqs = self.ssl_context.verify_mode
|
||||
|
||||
self.key_file = key_file
|
||||
self.cert_file = cert_file
|
||||
self.cert_reqs = cert_reqs
|
||||
self.assert_hostname = assert_hostname
|
||||
self.assert_fingerprint = assert_fingerprint
|
||||
self.ca_certs = ca_certs and os.path.expanduser(ca_certs)
|
||||
self.ca_cert_dir = ca_cert_dir and os.path.expanduser(ca_cert_dir)
|
||||
|
||||
def connect(self):
|
||||
# Add certificate verification
|
||||
conn = self._new_conn()
|
||||
|
||||
hostname = self.host
|
||||
if getattr(self, '_tunnel_host', None):
|
||||
# _tunnel_host was added in Python 2.6.3
|
||||
# (See: http://hg.python.org/cpython/rev/0f57b30a152f)
|
||||
|
||||
self.sock = conn
|
||||
# Calls self._set_hostport(), so self.host is
|
||||
# self._tunnel_host below.
|
||||
self._tunnel()
|
||||
# Mark this connection as not reusable
|
||||
self.auto_open = 0
|
||||
|
||||
# Override the host with the one we're requesting data from.
|
||||
hostname = self._tunnel_host
|
||||
|
||||
is_time_off = datetime.date.today() < RECENT_DATE
|
||||
if is_time_off:
|
||||
warnings.warn((
|
||||
'System time is way off (before {0}). This will probably '
|
||||
'lead to SSL verification errors').format(RECENT_DATE),
|
||||
SystemTimeWarning
|
||||
)
|
||||
|
||||
# Wrap socket using verification with the root certs in
|
||||
# trusted_root_certs
|
||||
if self.ssl_context is None:
|
||||
self.ssl_context = create_urllib3_context(
|
||||
ssl_version=resolve_ssl_version(self.ssl_version),
|
||||
cert_reqs=resolve_cert_reqs(self.cert_reqs),
|
||||
)
|
||||
|
||||
context = self.ssl_context
|
||||
context.verify_mode = resolve_cert_reqs(self.cert_reqs)
|
||||
self.sock = ssl_wrap_socket(
|
||||
sock=conn,
|
||||
keyfile=self.key_file,
|
||||
certfile=self.cert_file,
|
||||
ca_certs=self.ca_certs,
|
||||
ca_cert_dir=self.ca_cert_dir,
|
||||
server_hostname=hostname,
|
||||
ssl_context=context)
|
||||
|
||||
if self.assert_fingerprint:
|
||||
assert_fingerprint(self.sock.getpeercert(binary_form=True),
|
||||
self.assert_fingerprint)
|
||||
elif context.verify_mode != ssl.CERT_NONE \
|
||||
and not getattr(context, 'check_hostname', False) \
|
||||
and self.assert_hostname is not False:
|
||||
# While urllib3 attempts to always turn off hostname matching from
|
||||
# the TLS library, this cannot always be done. So we check whether
|
||||
# the TLS Library still thinks it's matching hostnames.
|
||||
cert = self.sock.getpeercert()
|
||||
if not cert.get('subjectAltName', ()):
|
||||
warnings.warn((
|
||||
'Certificate for {0} has no `subjectAltName`, falling back to check for a '
|
||||
'`commonName` for now. This feature is being removed by major browsers and '
|
||||
'deprecated by RFC 2818. (See https://github.com/shazow/urllib3/issues/497 '
|
||||
'for details.)'.format(hostname)),
|
||||
SubjectAltNameWarning
|
||||
)
|
||||
_match_hostname(cert, self.assert_hostname or hostname)
|
||||
|
||||
self.is_verified = (
|
||||
context.verify_mode == ssl.CERT_REQUIRED or
|
||||
self.assert_fingerprint is not None
|
||||
)
|
||||
|
||||
|
||||
def _match_hostname(cert, asserted_hostname):
|
||||
try:
|
||||
match_hostname(cert, asserted_hostname)
|
||||
except CertificateError as e:
|
||||
log.error(
|
||||
'Certificate did not match expected hostname: %s. '
|
||||
'Certificate: %s', asserted_hostname, cert
|
||||
)
|
||||
# Add cert to exception and reraise so client code can inspect
|
||||
# the cert when catching the exception, if they want to
|
||||
e._peer_cert = cert
|
||||
raise
|
||||
|
||||
|
||||
if ssl:
|
||||
# Make a copy for testing.
|
||||
UnverifiedHTTPSConnection = HTTPSConnection
|
||||
HTTPSConnection = VerifiedHTTPSConnection
|
||||
else:
|
||||
HTTPSConnection = DummyConnection
|
||||
@@ -1,899 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
import errno
|
||||
import logging
|
||||
import sys
|
||||
import warnings
|
||||
|
||||
from socket import error as SocketError, timeout as SocketTimeout
|
||||
import socket
|
||||
|
||||
|
||||
from .exceptions import (
|
||||
ClosedPoolError,
|
||||
ProtocolError,
|
||||
EmptyPoolError,
|
||||
HeaderParsingError,
|
||||
HostChangedError,
|
||||
LocationValueError,
|
||||
MaxRetryError,
|
||||
ProxyError,
|
||||
ReadTimeoutError,
|
||||
SSLError,
|
||||
TimeoutError,
|
||||
InsecureRequestWarning,
|
||||
NewConnectionError,
|
||||
)
|
||||
from .packages.ssl_match_hostname import CertificateError
|
||||
from .packages import six
|
||||
from .packages.six.moves import queue
|
||||
from .connection import (
|
||||
port_by_scheme,
|
||||
DummyConnection,
|
||||
HTTPConnection, HTTPSConnection, VerifiedHTTPSConnection,
|
||||
HTTPException, BaseSSLError,
|
||||
)
|
||||
from .request import RequestMethods
|
||||
from .response import HTTPResponse
|
||||
|
||||
from .util.connection import is_connection_dropped
|
||||
from .util.request import set_file_position
|
||||
from .util.response import assert_header_parsing
|
||||
from .util.retry import Retry
|
||||
from .util.timeout import Timeout
|
||||
from .util.url import get_host, Url
|
||||
|
||||
|
||||
if six.PY2:
|
||||
# Queue is imported for side effects on MS Windows
|
||||
import Queue as _unused_module_Queue # noqa: F401
|
||||
|
||||
xrange = six.moves.xrange
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
_Default = object()
|
||||
|
||||
|
||||
# Pool objects
|
||||
class ConnectionPool(object):
|
||||
"""
|
||||
Base class for all connection pools, such as
|
||||
:class:`.HTTPConnectionPool` and :class:`.HTTPSConnectionPool`.
|
||||
"""
|
||||
|
||||
scheme = None
|
||||
QueueCls = queue.LifoQueue
|
||||
|
||||
def __init__(self, host, port=None):
|
||||
if not host:
|
||||
raise LocationValueError("No host specified.")
|
||||
|
||||
self.host = _ipv6_host(host).lower()
|
||||
self.port = port
|
||||
|
||||
def __str__(self):
|
||||
return '%s(host=%r, port=%r)' % (type(self).__name__,
|
||||
self.host, self.port)
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self.close()
|
||||
# Return False to re-raise any potential exceptions
|
||||
return False
|
||||
|
||||
def close(self):
|
||||
"""
|
||||
Close all pooled connections and disable the pool.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
# This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252
|
||||
_blocking_errnos = set([errno.EAGAIN, errno.EWOULDBLOCK])
|
||||
|
||||
|
||||
class HTTPConnectionPool(ConnectionPool, RequestMethods):
|
||||
"""
|
||||
Thread-safe connection pool for one host.
|
||||
|
||||
:param host:
|
||||
Host used for this HTTP Connection (e.g. "localhost"), passed into
|
||||
:class:`httplib.HTTPConnection`.
|
||||
|
||||
:param port:
|
||||
Port used for this HTTP Connection (None is equivalent to 80), passed
|
||||
into :class:`httplib.HTTPConnection`.
|
||||
|
||||
:param strict:
|
||||
Causes BadStatusLine to be raised if the status line can't be parsed
|
||||
as a valid HTTP/1.0 or 1.1 status line, passed into
|
||||
:class:`httplib.HTTPConnection`.
|
||||
|
||||
.. note::
|
||||
Only works in Python 2. This parameter is ignored in Python 3.
|
||||
|
||||
:param timeout:
|
||||
Socket timeout in seconds for each individual connection. This can
|
||||
be a float or integer, which sets the timeout for the HTTP request,
|
||||
or an instance of :class:`urllib3.util.Timeout` which gives you more
|
||||
fine-grained control over request timeouts. After the constructor has
|
||||
been parsed, this is always a `urllib3.util.Timeout` object.
|
||||
|
||||
:param maxsize:
|
||||
Number of connections to save that can be reused. More than 1 is useful
|
||||
in multithreaded situations. If ``block`` is set to False, more
|
||||
connections will be created but they will not be saved once they've
|
||||
been used.
|
||||
|
||||
:param block:
|
||||
If set to True, no more than ``maxsize`` connections will be used at
|
||||
a time. When no free connections are available, the call will block
|
||||
until a connection has been released. This is a useful side effect for
|
||||
particular multithreaded situations where one does not want to use more
|
||||
than maxsize connections per host to prevent flooding.
|
||||
|
||||
:param headers:
|
||||
Headers to include with all requests, unless other headers are given
|
||||
explicitly.
|
||||
|
||||
:param retries:
|
||||
Retry configuration to use by default with requests in this pool.
|
||||
|
||||
:param _proxy:
|
||||
Parsed proxy URL, should not be used directly, instead, see
|
||||
:class:`urllib3.connectionpool.ProxyManager`"
|
||||
|
||||
:param _proxy_headers:
|
||||
A dictionary with proxy headers, should not be used directly,
|
||||
instead, see :class:`urllib3.connectionpool.ProxyManager`"
|
||||
|
||||
:param \\**conn_kw:
|
||||
Additional parameters are used to create fresh :class:`urllib3.connection.HTTPConnection`,
|
||||
:class:`urllib3.connection.HTTPSConnection` instances.
|
||||
"""
|
||||
|
||||
scheme = 'http'
|
||||
ConnectionCls = HTTPConnection
|
||||
ResponseCls = HTTPResponse
|
||||
|
||||
def __init__(self, host, port=None, strict=False,
|
||||
timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False,
|
||||
headers=None, retries=None,
|
||||
_proxy=None, _proxy_headers=None,
|
||||
**conn_kw):
|
||||
ConnectionPool.__init__(self, host, port)
|
||||
RequestMethods.__init__(self, headers)
|
||||
|
||||
self.strict = strict
|
||||
|
||||
if not isinstance(timeout, Timeout):
|
||||
timeout = Timeout.from_float(timeout)
|
||||
|
||||
if retries is None:
|
||||
retries = Retry.DEFAULT
|
||||
|
||||
self.timeout = timeout
|
||||
self.retries = retries
|
||||
|
||||
self.pool = self.QueueCls(maxsize)
|
||||
self.block = block
|
||||
|
||||
self.proxy = _proxy
|
||||
self.proxy_headers = _proxy_headers or {}
|
||||
|
||||
# Fill the queue up so that doing get() on it will block properly
|
||||
for _ in xrange(maxsize):
|
||||
self.pool.put(None)
|
||||
|
||||
# These are mostly for testing and debugging purposes.
|
||||
self.num_connections = 0
|
||||
self.num_requests = 0
|
||||
self.conn_kw = conn_kw
|
||||
|
||||
if self.proxy:
|
||||
# Enable Nagle's algorithm for proxies, to avoid packet fragmentation.
|
||||
# We cannot know if the user has added default socket options, so we cannot replace the
|
||||
# list.
|
||||
self.conn_kw.setdefault('socket_options', [])
|
||||
|
||||
def _new_conn(self):
|
||||
"""
|
||||
Return a fresh :class:`HTTPConnection`.
|
||||
"""
|
||||
self.num_connections += 1
|
||||
log.debug("Starting new HTTP connection (%d): %s",
|
||||
self.num_connections, self.host)
|
||||
|
||||
conn = self.ConnectionCls(host=self.host, port=self.port,
|
||||
timeout=self.timeout.connect_timeout,
|
||||
strict=self.strict, **self.conn_kw)
|
||||
return conn
|
||||
|
||||
def _get_conn(self, timeout=None):
|
||||
"""
|
||||
Get a connection. Will return a pooled connection if one is available.
|
||||
|
||||
If no connections are available and :prop:`.block` is ``False``, then a
|
||||
fresh connection is returned.
|
||||
|
||||
:param timeout:
|
||||
Seconds to wait before giving up and raising
|
||||
:class:`urllib3.exceptions.EmptyPoolError` if the pool is empty and
|
||||
:prop:`.block` is ``True``.
|
||||
"""
|
||||
conn = None
|
||||
try:
|
||||
conn = self.pool.get(block=self.block, timeout=timeout)
|
||||
|
||||
except AttributeError: # self.pool is None
|
||||
raise ClosedPoolError(self, "Pool is closed.")
|
||||
|
||||
except queue.Empty:
|
||||
if self.block:
|
||||
raise EmptyPoolError(self,
|
||||
"Pool reached maximum size and no more "
|
||||
"connections are allowed.")
|
||||
pass # Oh well, we'll create a new connection then
|
||||
|
||||
# If this is a persistent connection, check if it got disconnected
|
||||
if conn and is_connection_dropped(conn):
|
||||
log.debug("Resetting dropped connection: %s", self.host)
|
||||
conn.close()
|
||||
if getattr(conn, 'auto_open', 1) == 0:
|
||||
# This is a proxied connection that has been mutated by
|
||||
# httplib._tunnel() and cannot be reused (since it would
|
||||
# attempt to bypass the proxy)
|
||||
conn = None
|
||||
|
||||
return conn or self._new_conn()
|
||||
|
||||
def _put_conn(self, conn):
|
||||
"""
|
||||
Put a connection back into the pool.
|
||||
|
||||
:param conn:
|
||||
Connection object for the current host and port as returned by
|
||||
:meth:`._new_conn` or :meth:`._get_conn`.
|
||||
|
||||
If the pool is already full, the connection is closed and discarded
|
||||
because we exceeded maxsize. If connections are discarded frequently,
|
||||
then maxsize should be increased.
|
||||
|
||||
If the pool is closed, then the connection will be closed and discarded.
|
||||
"""
|
||||
try:
|
||||
self.pool.put(conn, block=False)
|
||||
return # Everything is dandy, done.
|
||||
except AttributeError:
|
||||
# self.pool is None.
|
||||
pass
|
||||
except queue.Full:
|
||||
# This should never happen if self.block == True
|
||||
log.warning(
|
||||
"Connection pool is full, discarding connection: %s",
|
||||
self.host)
|
||||
|
||||
# Connection never got put back into the pool, close it.
|
||||
if conn:
|
||||
conn.close()
|
||||
|
||||
def _validate_conn(self, conn):
|
||||
"""
|
||||
Called right before a request is made, after the socket is created.
|
||||
"""
|
||||
pass
|
||||
|
||||
def _prepare_proxy(self, conn):
|
||||
# Nothing to do for HTTP connections.
|
||||
pass
|
||||
|
||||
def _get_timeout(self, timeout):
|
||||
""" Helper that always returns a :class:`urllib3.util.Timeout` """
|
||||
if timeout is _Default:
|
||||
return self.timeout.clone()
|
||||
|
||||
if isinstance(timeout, Timeout):
|
||||
return timeout.clone()
|
||||
else:
|
||||
# User passed us an int/float. This is for backwards compatibility,
|
||||
# can be removed later
|
||||
return Timeout.from_float(timeout)
|
||||
|
||||
def _raise_timeout(self, err, url, timeout_value):
|
||||
"""Is the error actually a timeout? Will raise a ReadTimeout or pass"""
|
||||
|
||||
if isinstance(err, SocketTimeout):
|
||||
raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value)
|
||||
|
||||
# See the above comment about EAGAIN in Python 3. In Python 2 we have
|
||||
# to specifically catch it and throw the timeout error
|
||||
if hasattr(err, 'errno') and err.errno in _blocking_errnos:
|
||||
raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value)
|
||||
|
||||
# Catch possible read timeouts thrown as SSL errors. If not the
|
||||
# case, rethrow the original. We need to do this because of:
|
||||
# http://bugs.python.org/issue10272
|
||||
if 'timed out' in str(err) or 'did not complete (read)' in str(err): # Python 2.6
|
||||
raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value)
|
||||
|
||||
def _make_request(self, conn, method, url, timeout=_Default, chunked=False,
|
||||
**httplib_request_kw):
|
||||
"""
|
||||
Perform a request on a given urllib connection object taken from our
|
||||
pool.
|
||||
|
||||
:param conn:
|
||||
a connection from one of our connection pools
|
||||
|
||||
:param timeout:
|
||||
Socket timeout in seconds for the request. This can be a
|
||||
float or integer, which will set the same timeout value for
|
||||
the socket connect and the socket read, or an instance of
|
||||
:class:`urllib3.util.Timeout`, which gives you more fine-grained
|
||||
control over your timeouts.
|
||||
"""
|
||||
self.num_requests += 1
|
||||
|
||||
timeout_obj = self._get_timeout(timeout)
|
||||
timeout_obj.start_connect()
|
||||
conn.timeout = timeout_obj.connect_timeout
|
||||
|
||||
# Trigger any extra validation we need to do.
|
||||
try:
|
||||
self._validate_conn(conn)
|
||||
except (SocketTimeout, BaseSSLError) as e:
|
||||
# Py2 raises this as a BaseSSLError, Py3 raises it as socket timeout.
|
||||
self._raise_timeout(err=e, url=url, timeout_value=conn.timeout)
|
||||
raise
|
||||
|
||||
# conn.request() calls httplib.*.request, not the method in
|
||||
# urllib3.request. It also calls makefile (recv) on the socket.
|
||||
if chunked:
|
||||
conn.request_chunked(method, url, **httplib_request_kw)
|
||||
else:
|
||||
conn.request(method, url, **httplib_request_kw)
|
||||
|
||||
# Reset the timeout for the recv() on the socket
|
||||
read_timeout = timeout_obj.read_timeout
|
||||
|
||||
# App Engine doesn't have a sock attr
|
||||
if getattr(conn, 'sock', None):
|
||||
# In Python 3 socket.py will catch EAGAIN and return None when you
|
||||
# try and read into the file pointer created by http.client, which
|
||||
# instead raises a BadStatusLine exception. Instead of catching
|
||||
# the exception and assuming all BadStatusLine exceptions are read
|
||||
# timeouts, check for a zero timeout before making the request.
|
||||
if read_timeout == 0:
|
||||
raise ReadTimeoutError(
|
||||
self, url, "Read timed out. (read timeout=%s)" % read_timeout)
|
||||
if read_timeout is Timeout.DEFAULT_TIMEOUT:
|
||||
conn.sock.settimeout(socket.getdefaulttimeout())
|
||||
else: # None or a value
|
||||
conn.sock.settimeout(read_timeout)
|
||||
|
||||
# Receive the response from the server
|
||||
try:
|
||||
try: # Python 2.7, use buffering of HTTP responses
|
||||
httplib_response = conn.getresponse(buffering=True)
|
||||
except TypeError: # Python 2.6 and older, Python 3
|
||||
try:
|
||||
httplib_response = conn.getresponse()
|
||||
except Exception as e:
|
||||
# Remove the TypeError from the exception chain in Python 3;
|
||||
# otherwise it looks like a programming error was the cause.
|
||||
six.raise_from(e, None)
|
||||
except (SocketTimeout, BaseSSLError, SocketError) as e:
|
||||
self._raise_timeout(err=e, url=url, timeout_value=read_timeout)
|
||||
raise
|
||||
|
||||
# AppEngine doesn't have a version attr.
|
||||
http_version = getattr(conn, '_http_vsn_str', 'HTTP/?')
|
||||
log.debug("%s://%s:%s \"%s %s %s\" %s %s", self.scheme, self.host, self.port,
|
||||
method, url, http_version, httplib_response.status,
|
||||
httplib_response.length)
|
||||
|
||||
try:
|
||||
assert_header_parsing(httplib_response.msg)
|
||||
except HeaderParsingError as hpe: # Platform-specific: Python 3
|
||||
log.warning(
|
||||
'Failed to parse headers (url=%s): %s',
|
||||
self._absolute_url(url), hpe, exc_info=True)
|
||||
|
||||
return httplib_response
|
||||
|
||||
def _absolute_url(self, path):
|
||||
return Url(scheme=self.scheme, host=self.host, port=self.port, path=path).url
|
||||
|
||||
def close(self):
|
||||
"""
|
||||
Close all pooled connections and disable the pool.
|
||||
"""
|
||||
# Disable access to the pool
|
||||
old_pool, self.pool = self.pool, None
|
||||
|
||||
try:
|
||||
while True:
|
||||
conn = old_pool.get(block=False)
|
||||
if conn:
|
||||
conn.close()
|
||||
|
||||
except queue.Empty:
|
||||
pass # Done.
|
||||
|
||||
def is_same_host(self, url):
|
||||
"""
|
||||
Check if the given ``url`` is a member of the same host as this
|
||||
connection pool.
|
||||
"""
|
||||
if url.startswith('/'):
|
||||
return True
|
||||
|
||||
# TODO: Add optional support for socket.gethostbyname checking.
|
||||
scheme, host, port = get_host(url)
|
||||
|
||||
host = _ipv6_host(host).lower()
|
||||
|
||||
# Use explicit default port for comparison when none is given
|
||||
if self.port and not port:
|
||||
port = port_by_scheme.get(scheme)
|
||||
elif not self.port and port == port_by_scheme.get(scheme):
|
||||
port = None
|
||||
|
||||
return (scheme, host, port) == (self.scheme, self.host, self.port)
|
||||
|
||||
def urlopen(self, method, url, body=None, headers=None, retries=None,
|
||||
redirect=True, assert_same_host=True, timeout=_Default,
|
||||
pool_timeout=None, release_conn=None, chunked=False,
|
||||
body_pos=None, **response_kw):
|
||||
"""
|
||||
Get a connection from the pool and perform an HTTP request. This is the
|
||||
lowest level call for making a request, so you'll need to specify all
|
||||
the raw details.
|
||||
|
||||
.. note::
|
||||
|
||||
More commonly, it's appropriate to use a convenience method provided
|
||||
by :class:`.RequestMethods`, such as :meth:`request`.
|
||||
|
||||
.. note::
|
||||
|
||||
`release_conn` will only behave as expected if
|
||||
`preload_content=False` because we want to make
|
||||
`preload_content=False` the default behaviour someday soon without
|
||||
breaking backwards compatibility.
|
||||
|
||||
:param method:
|
||||
HTTP request method (such as GET, POST, PUT, etc.)
|
||||
|
||||
:param body:
|
||||
Data to send in the request body (useful for creating
|
||||
POST requests, see HTTPConnectionPool.post_url for
|
||||
more convenience).
|
||||
|
||||
:param headers:
|
||||
Dictionary of custom headers to send, such as User-Agent,
|
||||
If-None-Match, etc. If None, pool headers are used. If provided,
|
||||
these headers completely replace any pool-specific headers.
|
||||
|
||||
:param retries:
|
||||
Configure the number of retries to allow before raising a
|
||||
:class:`~urllib3.exceptions.MaxRetryError` exception.
|
||||
|
||||
Pass ``None`` to retry until you receive a response. Pass a
|
||||
:class:`~urllib3.util.retry.Retry` object for fine-grained control
|
||||
over different types of retries.
|
||||
Pass an integer number to retry connection errors that many times,
|
||||
but no other types of errors. Pass zero to never retry.
|
||||
|
||||
If ``False``, then retries are disabled and any exception is raised
|
||||
immediately. Also, instead of raising a MaxRetryError on redirects,
|
||||
the redirect response will be returned.
|
||||
|
||||
:type retries: :class:`~urllib3.util.retry.Retry`, False, or an int.
|
||||
|
||||
:param redirect:
|
||||
If True, automatically handle redirects (status codes 301, 302,
|
||||
303, 307, 308). Each redirect counts as a retry. Disabling retries
|
||||
will disable redirect, too.
|
||||
|
||||
:param assert_same_host:
|
||||
If ``True``, will make sure that the host of the pool requests is
|
||||
consistent else will raise HostChangedError. When False, you can
|
||||
use the pool on an HTTP proxy and request foreign hosts.
|
||||
|
||||
:param timeout:
|
||||
If specified, overrides the default timeout for this one
|
||||
request. It may be a float (in seconds) or an instance of
|
||||
:class:`urllib3.util.Timeout`.
|
||||
|
||||
:param pool_timeout:
|
||||
If set and the pool is set to block=True, then this method will
|
||||
block for ``pool_timeout`` seconds and raise EmptyPoolError if no
|
||||
connection is available within the time period.
|
||||
|
||||
:param release_conn:
|
||||
If False, then the urlopen call will not release the connection
|
||||
back into the pool once a response is received (but will release if
|
||||
you read the entire contents of the response such as when
|
||||
`preload_content=True`). This is useful if you're not preloading
|
||||
the response's content immediately. You will need to call
|
||||
``r.release_conn()`` on the response ``r`` to return the connection
|
||||
back into the pool. If None, it takes the value of
|
||||
``response_kw.get('preload_content', True)``.
|
||||
|
||||
:param chunked:
|
||||
If True, urllib3 will send the body using chunked transfer
|
||||
encoding. Otherwise, urllib3 will send the body using the standard
|
||||
content-length form. Defaults to False.
|
||||
|
||||
:param int body_pos:
|
||||
Position to seek to in file-like body in the event of a retry or
|
||||
redirect. Typically this won't need to be set because urllib3 will
|
||||
auto-populate the value when needed.
|
||||
|
||||
:param \\**response_kw:
|
||||
Additional parameters are passed to
|
||||
:meth:`urllib3.response.HTTPResponse.from_httplib`
|
||||
"""
|
||||
if headers is None:
|
||||
headers = self.headers
|
||||
|
||||
if not isinstance(retries, Retry):
|
||||
retries = Retry.from_int(retries, redirect=redirect, default=self.retries)
|
||||
|
||||
if release_conn is None:
|
||||
release_conn = response_kw.get('preload_content', True)
|
||||
|
||||
# Check host
|
||||
if assert_same_host and not self.is_same_host(url):
|
||||
raise HostChangedError(self, url, retries)
|
||||
|
||||
conn = None
|
||||
|
||||
# Track whether `conn` needs to be released before
|
||||
# returning/raising/recursing. Update this variable if necessary, and
|
||||
# leave `release_conn` constant throughout the function. That way, if
|
||||
# the function recurses, the original value of `release_conn` will be
|
||||
# passed down into the recursive call, and its value will be respected.
|
||||
#
|
||||
# See issue #651 [1] for details.
|
||||
#
|
||||
# [1] <https://github.com/shazow/urllib3/issues/651>
|
||||
release_this_conn = release_conn
|
||||
|
||||
# Merge the proxy headers. Only do this in HTTP. We have to copy the
|
||||
# headers dict so we can safely change it without those changes being
|
||||
# reflected in anyone else's copy.
|
||||
if self.scheme == 'http':
|
||||
headers = headers.copy()
|
||||
headers.update(self.proxy_headers)
|
||||
|
||||
# Must keep the exception bound to a separate variable or else Python 3
|
||||
# complains about UnboundLocalError.
|
||||
err = None
|
||||
|
||||
# Keep track of whether we cleanly exited the except block. This
|
||||
# ensures we do proper cleanup in finally.
|
||||
clean_exit = False
|
||||
|
||||
# Rewind body position, if needed. Record current position
|
||||
# for future rewinds in the event of a redirect/retry.
|
||||
body_pos = set_file_position(body, body_pos)
|
||||
|
||||
try:
|
||||
# Request a connection from the queue.
|
||||
timeout_obj = self._get_timeout(timeout)
|
||||
conn = self._get_conn(timeout=pool_timeout)
|
||||
|
||||
conn.timeout = timeout_obj.connect_timeout
|
||||
|
||||
is_new_proxy_conn = self.proxy is not None and not getattr(conn, 'sock', None)
|
||||
if is_new_proxy_conn:
|
||||
self._prepare_proxy(conn)
|
||||
|
||||
# Make the request on the httplib connection object.
|
||||
httplib_response = self._make_request(conn, method, url,
|
||||
timeout=timeout_obj,
|
||||
body=body, headers=headers,
|
||||
chunked=chunked)
|
||||
|
||||
# If we're going to release the connection in ``finally:``, then
|
||||
# the response doesn't need to know about the connection. Otherwise
|
||||
# it will also try to release it and we'll have a double-release
|
||||
# mess.
|
||||
response_conn = conn if not release_conn else None
|
||||
|
||||
# Pass method to Response for length checking
|
||||
response_kw['request_method'] = method
|
||||
|
||||
# Import httplib's response into our own wrapper object
|
||||
response = self.ResponseCls.from_httplib(httplib_response,
|
||||
pool=self,
|
||||
connection=response_conn,
|
||||
retries=retries,
|
||||
**response_kw)
|
||||
|
||||
# Everything went great!
|
||||
clean_exit = True
|
||||
|
||||
except queue.Empty:
|
||||
# Timed out by queue.
|
||||
raise EmptyPoolError(self, "No pool connections are available.")
|
||||
|
||||
except (BaseSSLError, CertificateError) as e:
|
||||
# Close the connection. If a connection is reused on which there
|
||||
# was a Certificate error, the next request will certainly raise
|
||||
# another Certificate error.
|
||||
clean_exit = False
|
||||
raise SSLError(e)
|
||||
|
||||
except SSLError:
|
||||
# Treat SSLError separately from BaseSSLError to preserve
|
||||
# traceback.
|
||||
clean_exit = False
|
||||
raise
|
||||
|
||||
except (TimeoutError, HTTPException, SocketError, ProtocolError) as e:
|
||||
# Discard the connection for these exceptions. It will be
|
||||
# be replaced during the next _get_conn() call.
|
||||
clean_exit = False
|
||||
|
||||
if isinstance(e, (SocketError, NewConnectionError)) and self.proxy:
|
||||
e = ProxyError('Cannot connect to proxy.', e)
|
||||
elif isinstance(e, (SocketError, HTTPException)):
|
||||
e = ProtocolError('Connection aborted.', e)
|
||||
|
||||
retries = retries.increment(method, url, error=e, _pool=self,
|
||||
_stacktrace=sys.exc_info()[2])
|
||||
retries.sleep()
|
||||
|
||||
# Keep track of the error for the retry warning.
|
||||
err = e
|
||||
|
||||
finally:
|
||||
if not clean_exit:
|
||||
# We hit some kind of exception, handled or otherwise. We need
|
||||
# to throw the connection away unless explicitly told not to.
|
||||
# Close the connection, set the variable to None, and make sure
|
||||
# we put the None back in the pool to avoid leaking it.
|
||||
conn = conn and conn.close()
|
||||
release_this_conn = True
|
||||
|
||||
if release_this_conn:
|
||||
# Put the connection back to be reused. If the connection is
|
||||
# expired then it will be None, which will get replaced with a
|
||||
# fresh connection during _get_conn.
|
||||
self._put_conn(conn)
|
||||
|
||||
if not conn:
|
||||
# Try again
|
||||
log.warning("Retrying (%r) after connection "
|
||||
"broken by '%r': %s", retries, err, url)
|
||||
return self.urlopen(method, url, body, headers, retries,
|
||||
redirect, assert_same_host,
|
||||
timeout=timeout, pool_timeout=pool_timeout,
|
||||
release_conn=release_conn, body_pos=body_pos,
|
||||
**response_kw)
|
||||
|
||||
# Handle redirect?
|
||||
redirect_location = redirect and response.get_redirect_location()
|
||||
if redirect_location:
|
||||
if response.status == 303:
|
||||
method = 'GET'
|
||||
|
||||
try:
|
||||
retries = retries.increment(method, url, response=response, _pool=self)
|
||||
except MaxRetryError:
|
||||
if retries.raise_on_redirect:
|
||||
# Release the connection for this response, since we're not
|
||||
# returning it to be released manually.
|
||||
response.release_conn()
|
||||
raise
|
||||
return response
|
||||
|
||||
retries.sleep_for_retry(response)
|
||||
log.debug("Redirecting %s -> %s", url, redirect_location)
|
||||
return self.urlopen(
|
||||
method, redirect_location, body, headers,
|
||||
retries=retries, redirect=redirect,
|
||||
assert_same_host=assert_same_host,
|
||||
timeout=timeout, pool_timeout=pool_timeout,
|
||||
release_conn=release_conn, body_pos=body_pos,
|
||||
**response_kw)
|
||||
|
||||
# Check if we should retry the HTTP response.
|
||||
has_retry_after = bool(response.getheader('Retry-After'))
|
||||
if retries.is_retry(method, response.status, has_retry_after):
|
||||
try:
|
||||
retries = retries.increment(method, url, response=response, _pool=self)
|
||||
except MaxRetryError:
|
||||
if retries.raise_on_status:
|
||||
# Release the connection for this response, since we're not
|
||||
# returning it to be released manually.
|
||||
response.release_conn()
|
||||
raise
|
||||
return response
|
||||
retries.sleep(response)
|
||||
log.debug("Retry: %s", url)
|
||||
return self.urlopen(
|
||||
method, url, body, headers,
|
||||
retries=retries, redirect=redirect,
|
||||
assert_same_host=assert_same_host,
|
||||
timeout=timeout, pool_timeout=pool_timeout,
|
||||
release_conn=release_conn,
|
||||
body_pos=body_pos, **response_kw)
|
||||
|
||||
return response
|
||||
|
||||
|
||||
class HTTPSConnectionPool(HTTPConnectionPool):
|
||||
"""
|
||||
Same as :class:`.HTTPConnectionPool`, but HTTPS.
|
||||
|
||||
When Python is compiled with the :mod:`ssl` module, then
|
||||
:class:`.VerifiedHTTPSConnection` is used, which *can* verify certificates,
|
||||
instead of :class:`.HTTPSConnection`.
|
||||
|
||||
:class:`.VerifiedHTTPSConnection` uses one of ``assert_fingerprint``,
|
||||
``assert_hostname`` and ``host`` in this order to verify connections.
|
||||
If ``assert_hostname`` is False, no verification is done.
|
||||
|
||||
The ``key_file``, ``cert_file``, ``cert_reqs``, ``ca_certs``,
|
||||
``ca_cert_dir``, and ``ssl_version`` are only used if :mod:`ssl` is
|
||||
available and are fed into :meth:`urllib3.util.ssl_wrap_socket` to upgrade
|
||||
the connection socket into an SSL socket.
|
||||
"""
|
||||
|
||||
scheme = 'https'
|
||||
ConnectionCls = HTTPSConnection
|
||||
|
||||
def __init__(self, host, port=None,
|
||||
strict=False, timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1,
|
||||
block=False, headers=None, retries=None,
|
||||
_proxy=None, _proxy_headers=None,
|
||||
key_file=None, cert_file=None, cert_reqs=None,
|
||||
ca_certs=None, ssl_version=None,
|
||||
assert_hostname=None, assert_fingerprint=None,
|
||||
ca_cert_dir=None, **conn_kw):
|
||||
|
||||
HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize,
|
||||
block, headers, retries, _proxy, _proxy_headers,
|
||||
**conn_kw)
|
||||
|
||||
if ca_certs and cert_reqs is None:
|
||||
cert_reqs = 'CERT_REQUIRED'
|
||||
|
||||
self.key_file = key_file
|
||||
self.cert_file = cert_file
|
||||
self.cert_reqs = cert_reqs
|
||||
self.ca_certs = ca_certs
|
||||
self.ca_cert_dir = ca_cert_dir
|
||||
self.ssl_version = ssl_version
|
||||
self.assert_hostname = assert_hostname
|
||||
self.assert_fingerprint = assert_fingerprint
|
||||
|
||||
def _prepare_conn(self, conn):
|
||||
"""
|
||||
Prepare the ``connection`` for :meth:`urllib3.util.ssl_wrap_socket`
|
||||
and establish the tunnel if proxy is used.
|
||||
"""
|
||||
|
||||
if isinstance(conn, VerifiedHTTPSConnection):
|
||||
conn.set_cert(key_file=self.key_file,
|
||||
cert_file=self.cert_file,
|
||||
cert_reqs=self.cert_reqs,
|
||||
ca_certs=self.ca_certs,
|
||||
ca_cert_dir=self.ca_cert_dir,
|
||||
assert_hostname=self.assert_hostname,
|
||||
assert_fingerprint=self.assert_fingerprint)
|
||||
conn.ssl_version = self.ssl_version
|
||||
return conn
|
||||
|
||||
def _prepare_proxy(self, conn):
|
||||
"""
|
||||
Establish tunnel connection early, because otherwise httplib
|
||||
would improperly set Host: header to proxy's IP:port.
|
||||
"""
|
||||
# Python 2.7+
|
||||
try:
|
||||
set_tunnel = conn.set_tunnel
|
||||
except AttributeError: # Platform-specific: Python 2.6
|
||||
set_tunnel = conn._set_tunnel
|
||||
|
||||
if sys.version_info <= (2, 6, 4) and not self.proxy_headers: # Python 2.6.4 and older
|
||||
set_tunnel(self.host, self.port)
|
||||
else:
|
||||
set_tunnel(self.host, self.port, self.proxy_headers)
|
||||
|
||||
conn.connect()
|
||||
|
||||
def _new_conn(self):
|
||||
"""
|
||||
Return a fresh :class:`httplib.HTTPSConnection`.
|
||||
"""
|
||||
self.num_connections += 1
|
||||
log.debug("Starting new HTTPS connection (%d): %s",
|
||||
self.num_connections, self.host)
|
||||
|
||||
if not self.ConnectionCls or self.ConnectionCls is DummyConnection:
|
||||
raise SSLError("Can't connect to HTTPS URL because the SSL "
|
||||
"module is not available.")
|
||||
|
||||
actual_host = self.host
|
||||
actual_port = self.port
|
||||
if self.proxy is not None:
|
||||
actual_host = self.proxy.host
|
||||
actual_port = self.proxy.port
|
||||
|
||||
conn = self.ConnectionCls(host=actual_host, port=actual_port,
|
||||
timeout=self.timeout.connect_timeout,
|
||||
strict=self.strict, **self.conn_kw)
|
||||
|
||||
return self._prepare_conn(conn)
|
||||
|
||||
def _validate_conn(self, conn):
|
||||
"""
|
||||
Called right before a request is made, after the socket is created.
|
||||
"""
|
||||
super(HTTPSConnectionPool, self)._validate_conn(conn)
|
||||
|
||||
# Force connect early to allow us to validate the connection.
|
||||
if not getattr(conn, 'sock', None): # AppEngine might not have `.sock`
|
||||
conn.connect()
|
||||
|
||||
if not conn.is_verified:
|
||||
warnings.warn((
|
||||
'Unverified HTTPS request is being made. '
|
||||
'Adding certificate verification is strongly advised. See: '
|
||||
'https://urllib3.readthedocs.io/en/latest/advanced-usage.html'
|
||||
'#ssl-warnings'),
|
||||
InsecureRequestWarning)
|
||||
|
||||
|
||||
def connection_from_url(url, **kw):
|
||||
"""
|
||||
Given a url, return an :class:`.ConnectionPool` instance of its host.
|
||||
|
||||
This is a shortcut for not having to parse out the scheme, host, and port
|
||||
of the url before creating an :class:`.ConnectionPool` instance.
|
||||
|
||||
:param url:
|
||||
Absolute URL string that must include the scheme. Port is optional.
|
||||
|
||||
:param \\**kw:
|
||||
Passes additional parameters to the constructor of the appropriate
|
||||
:class:`.ConnectionPool`. Useful for specifying things like
|
||||
timeout, maxsize, headers, etc.
|
||||
|
||||
Example::
|
||||
|
||||
>>> conn = connection_from_url('http://google.com/')
|
||||
>>> r = conn.request('GET', '/')
|
||||
"""
|
||||
scheme, host, port = get_host(url)
|
||||
port = port or port_by_scheme.get(scheme, 80)
|
||||
if scheme == 'https':
|
||||
return HTTPSConnectionPool(host, port=port, **kw)
|
||||
else:
|
||||
return HTTPConnectionPool(host, port=port, **kw)
|
||||
|
||||
|
||||
def _ipv6_host(host):
|
||||
"""
|
||||
Process IPv6 address literals
|
||||
"""
|
||||
|
||||
# httplib doesn't like it when we include brackets in IPv6 addresses
|
||||
# Specifically, if we include brackets but also pass the port then
|
||||
# httplib crazily doubles up the square brackets on the Host header.
|
||||
# Instead, we need to make sure we never pass ``None`` as the port.
|
||||
# However, for backward compatibility reasons we can't actually
|
||||
# *assert* that. See http://bugs.python.org/issue28539
|
||||
#
|
||||
# Also if an IPv6 address literal has a zone identifier, the
|
||||
# percent sign might be URIencoded, convert it back into ASCII
|
||||
if host.startswith('[') and host.endswith(']'):
|
||||
host = host.replace('%25', '%').strip('[]')
|
||||
return host
|
||||
@@ -1,590 +0,0 @@
|
||||
"""
|
||||
This module uses ctypes to bind a whole bunch of functions and constants from
|
||||
SecureTransport. The goal here is to provide the low-level API to
|
||||
SecureTransport. These are essentially the C-level functions and constants, and
|
||||
they're pretty gross to work with.
|
||||
|
||||
This code is a bastardised version of the code found in Will Bond's oscrypto
|
||||
library. An enormous debt is owed to him for blazing this trail for us. For
|
||||
that reason, this code should be considered to be covered both by urllib3's
|
||||
license and by oscrypto's:
|
||||
|
||||
Copyright (c) 2015-2016 Will Bond <will@wbond.net>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of this software and associated documentation files (the "Software"),
|
||||
to deal in the Software without restriction, including without limitation
|
||||
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
and/or sell copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
|
||||
import platform
|
||||
from ctypes.util import find_library
|
||||
from ctypes import (
|
||||
c_void_p, c_int32, c_char_p, c_size_t, c_byte, c_uint32, c_ulong, c_long,
|
||||
c_bool
|
||||
)
|
||||
from ctypes import CDLL, POINTER, CFUNCTYPE
|
||||
|
||||
|
||||
security_path = find_library('Security')
|
||||
if not security_path:
|
||||
raise ImportError('The library Security could not be found')
|
||||
|
||||
|
||||
core_foundation_path = find_library('CoreFoundation')
|
||||
if not core_foundation_path:
|
||||
raise ImportError('The library CoreFoundation could not be found')
|
||||
|
||||
|
||||
version = platform.mac_ver()[0]
|
||||
version_info = tuple(map(int, version.split('.')))
|
||||
if version_info < (10, 8):
|
||||
raise OSError(
|
||||
'Only OS X 10.8 and newer are supported, not %s.%s' % (
|
||||
version_info[0], version_info[1]
|
||||
)
|
||||
)
|
||||
|
||||
Security = CDLL(security_path, use_errno=True)
|
||||
CoreFoundation = CDLL(core_foundation_path, use_errno=True)
|
||||
|
||||
Boolean = c_bool
|
||||
CFIndex = c_long
|
||||
CFStringEncoding = c_uint32
|
||||
CFData = c_void_p
|
||||
CFString = c_void_p
|
||||
CFArray = c_void_p
|
||||
CFMutableArray = c_void_p
|
||||
CFDictionary = c_void_p
|
||||
CFError = c_void_p
|
||||
CFType = c_void_p
|
||||
CFTypeID = c_ulong
|
||||
|
||||
CFTypeRef = POINTER(CFType)
|
||||
CFAllocatorRef = c_void_p
|
||||
|
||||
OSStatus = c_int32
|
||||
|
||||
CFDataRef = POINTER(CFData)
|
||||
CFStringRef = POINTER(CFString)
|
||||
CFArrayRef = POINTER(CFArray)
|
||||
CFMutableArrayRef = POINTER(CFMutableArray)
|
||||
CFDictionaryRef = POINTER(CFDictionary)
|
||||
CFArrayCallBacks = c_void_p
|
||||
CFDictionaryKeyCallBacks = c_void_p
|
||||
CFDictionaryValueCallBacks = c_void_p
|
||||
|
||||
SecCertificateRef = POINTER(c_void_p)
|
||||
SecExternalFormat = c_uint32
|
||||
SecExternalItemType = c_uint32
|
||||
SecIdentityRef = POINTER(c_void_p)
|
||||
SecItemImportExportFlags = c_uint32
|
||||
SecItemImportExportKeyParameters = c_void_p
|
||||
SecKeychainRef = POINTER(c_void_p)
|
||||
SSLProtocol = c_uint32
|
||||
SSLCipherSuite = c_uint32
|
||||
SSLContextRef = POINTER(c_void_p)
|
||||
SecTrustRef = POINTER(c_void_p)
|
||||
SSLConnectionRef = c_uint32
|
||||
SecTrustResultType = c_uint32
|
||||
SecTrustOptionFlags = c_uint32
|
||||
SSLProtocolSide = c_uint32
|
||||
SSLConnectionType = c_uint32
|
||||
SSLSessionOption = c_uint32
|
||||
|
||||
|
||||
try:
|
||||
Security.SecItemImport.argtypes = [
|
||||
CFDataRef,
|
||||
CFStringRef,
|
||||
POINTER(SecExternalFormat),
|
||||
POINTER(SecExternalItemType),
|
||||
SecItemImportExportFlags,
|
||||
POINTER(SecItemImportExportKeyParameters),
|
||||
SecKeychainRef,
|
||||
POINTER(CFArrayRef),
|
||||
]
|
||||
Security.SecItemImport.restype = OSStatus
|
||||
|
||||
Security.SecCertificateGetTypeID.argtypes = []
|
||||
Security.SecCertificateGetTypeID.restype = CFTypeID
|
||||
|
||||
Security.SecIdentityGetTypeID.argtypes = []
|
||||
Security.SecIdentityGetTypeID.restype = CFTypeID
|
||||
|
||||
Security.SecKeyGetTypeID.argtypes = []
|
||||
Security.SecKeyGetTypeID.restype = CFTypeID
|
||||
|
||||
Security.SecCertificateCreateWithData.argtypes = [
|
||||
CFAllocatorRef,
|
||||
CFDataRef
|
||||
]
|
||||
Security.SecCertificateCreateWithData.restype = SecCertificateRef
|
||||
|
||||
Security.SecCertificateCopyData.argtypes = [
|
||||
SecCertificateRef
|
||||
]
|
||||
Security.SecCertificateCopyData.restype = CFDataRef
|
||||
|
||||
Security.SecCopyErrorMessageString.argtypes = [
|
||||
OSStatus,
|
||||
c_void_p
|
||||
]
|
||||
Security.SecCopyErrorMessageString.restype = CFStringRef
|
||||
|
||||
Security.SecIdentityCreateWithCertificate.argtypes = [
|
||||
CFTypeRef,
|
||||
SecCertificateRef,
|
||||
POINTER(SecIdentityRef)
|
||||
]
|
||||
Security.SecIdentityCreateWithCertificate.restype = OSStatus
|
||||
|
||||
Security.SecKeychainCreate.argtypes = [
|
||||
c_char_p,
|
||||
c_uint32,
|
||||
c_void_p,
|
||||
Boolean,
|
||||
c_void_p,
|
||||
POINTER(SecKeychainRef)
|
||||
]
|
||||
Security.SecKeychainCreate.restype = OSStatus
|
||||
|
||||
Security.SecKeychainDelete.argtypes = [
|
||||
SecKeychainRef
|
||||
]
|
||||
Security.SecKeychainDelete.restype = OSStatus
|
||||
|
||||
Security.SecPKCS12Import.argtypes = [
|
||||
CFDataRef,
|
||||
CFDictionaryRef,
|
||||
POINTER(CFArrayRef)
|
||||
]
|
||||
Security.SecPKCS12Import.restype = OSStatus
|
||||
|
||||
SSLReadFunc = CFUNCTYPE(OSStatus, SSLConnectionRef, c_void_p, POINTER(c_size_t))
|
||||
SSLWriteFunc = CFUNCTYPE(OSStatus, SSLConnectionRef, POINTER(c_byte), POINTER(c_size_t))
|
||||
|
||||
Security.SSLSetIOFuncs.argtypes = [
|
||||
SSLContextRef,
|
||||
SSLReadFunc,
|
||||
SSLWriteFunc
|
||||
]
|
||||
Security.SSLSetIOFuncs.restype = OSStatus
|
||||
|
||||
Security.SSLSetPeerID.argtypes = [
|
||||
SSLContextRef,
|
||||
c_char_p,
|
||||
c_size_t
|
||||
]
|
||||
Security.SSLSetPeerID.restype = OSStatus
|
||||
|
||||
Security.SSLSetCertificate.argtypes = [
|
||||
SSLContextRef,
|
||||
CFArrayRef
|
||||
]
|
||||
Security.SSLSetCertificate.restype = OSStatus
|
||||
|
||||
Security.SSLSetCertificateAuthorities.argtypes = [
|
||||
SSLContextRef,
|
||||
CFTypeRef,
|
||||
Boolean
|
||||
]
|
||||
Security.SSLSetCertificateAuthorities.restype = OSStatus
|
||||
|
||||
Security.SSLSetConnection.argtypes = [
|
||||
SSLContextRef,
|
||||
SSLConnectionRef
|
||||
]
|
||||
Security.SSLSetConnection.restype = OSStatus
|
||||
|
||||
Security.SSLSetPeerDomainName.argtypes = [
|
||||
SSLContextRef,
|
||||
c_char_p,
|
||||
c_size_t
|
||||
]
|
||||
Security.SSLSetPeerDomainName.restype = OSStatus
|
||||
|
||||
Security.SSLHandshake.argtypes = [
|
||||
SSLContextRef
|
||||
]
|
||||
Security.SSLHandshake.restype = OSStatus
|
||||
|
||||
Security.SSLRead.argtypes = [
|
||||
SSLContextRef,
|
||||
c_char_p,
|
||||
c_size_t,
|
||||
POINTER(c_size_t)
|
||||
]
|
||||
Security.SSLRead.restype = OSStatus
|
||||
|
||||
Security.SSLWrite.argtypes = [
|
||||
SSLContextRef,
|
||||
c_char_p,
|
||||
c_size_t,
|
||||
POINTER(c_size_t)
|
||||
]
|
||||
Security.SSLWrite.restype = OSStatus
|
||||
|
||||
Security.SSLClose.argtypes = [
|
||||
SSLContextRef
|
||||
]
|
||||
Security.SSLClose.restype = OSStatus
|
||||
|
||||
Security.SSLGetNumberSupportedCiphers.argtypes = [
|
||||
SSLContextRef,
|
||||
POINTER(c_size_t)
|
||||
]
|
||||
Security.SSLGetNumberSupportedCiphers.restype = OSStatus
|
||||
|
||||
Security.SSLGetSupportedCiphers.argtypes = [
|
||||
SSLContextRef,
|
||||
POINTER(SSLCipherSuite),
|
||||
POINTER(c_size_t)
|
||||
]
|
||||
Security.SSLGetSupportedCiphers.restype = OSStatus
|
||||
|
||||
Security.SSLSetEnabledCiphers.argtypes = [
|
||||
SSLContextRef,
|
||||
POINTER(SSLCipherSuite),
|
||||
c_size_t
|
||||
]
|
||||
Security.SSLSetEnabledCiphers.restype = OSStatus
|
||||
|
||||
Security.SSLGetNumberEnabledCiphers.argtype = [
|
||||
SSLContextRef,
|
||||
POINTER(c_size_t)
|
||||
]
|
||||
Security.SSLGetNumberEnabledCiphers.restype = OSStatus
|
||||
|
||||
Security.SSLGetEnabledCiphers.argtypes = [
|
||||
SSLContextRef,
|
||||
POINTER(SSLCipherSuite),
|
||||
POINTER(c_size_t)
|
||||
]
|
||||
Security.SSLGetEnabledCiphers.restype = OSStatus
|
||||
|
||||
Security.SSLGetNegotiatedCipher.argtypes = [
|
||||
SSLContextRef,
|
||||
POINTER(SSLCipherSuite)
|
||||
]
|
||||
Security.SSLGetNegotiatedCipher.restype = OSStatus
|
||||
|
||||
Security.SSLGetNegotiatedProtocolVersion.argtypes = [
|
||||
SSLContextRef,
|
||||
POINTER(SSLProtocol)
|
||||
]
|
||||
Security.SSLGetNegotiatedProtocolVersion.restype = OSStatus
|
||||
|
||||
Security.SSLCopyPeerTrust.argtypes = [
|
||||
SSLContextRef,
|
||||
POINTER(SecTrustRef)
|
||||
]
|
||||
Security.SSLCopyPeerTrust.restype = OSStatus
|
||||
|
||||
Security.SecTrustSetAnchorCertificates.argtypes = [
|
||||
SecTrustRef,
|
||||
CFArrayRef
|
||||
]
|
||||
Security.SecTrustSetAnchorCertificates.restype = OSStatus
|
||||
|
||||
Security.SecTrustSetAnchorCertificatesOnly.argstypes = [
|
||||
SecTrustRef,
|
||||
Boolean
|
||||
]
|
||||
Security.SecTrustSetAnchorCertificatesOnly.restype = OSStatus
|
||||
|
||||
Security.SecTrustEvaluate.argtypes = [
|
||||
SecTrustRef,
|
||||
POINTER(SecTrustResultType)
|
||||
]
|
||||
Security.SecTrustEvaluate.restype = OSStatus
|
||||
|
||||
Security.SecTrustGetCertificateCount.argtypes = [
|
||||
SecTrustRef
|
||||
]
|
||||
Security.SecTrustGetCertificateCount.restype = CFIndex
|
||||
|
||||
Security.SecTrustGetCertificateAtIndex.argtypes = [
|
||||
SecTrustRef,
|
||||
CFIndex
|
||||
]
|
||||
Security.SecTrustGetCertificateAtIndex.restype = SecCertificateRef
|
||||
|
||||
Security.SSLCreateContext.argtypes = [
|
||||
CFAllocatorRef,
|
||||
SSLProtocolSide,
|
||||
SSLConnectionType
|
||||
]
|
||||
Security.SSLCreateContext.restype = SSLContextRef
|
||||
|
||||
Security.SSLSetSessionOption.argtypes = [
|
||||
SSLContextRef,
|
||||
SSLSessionOption,
|
||||
Boolean
|
||||
]
|
||||
Security.SSLSetSessionOption.restype = OSStatus
|
||||
|
||||
Security.SSLSetProtocolVersionMin.argtypes = [
|
||||
SSLContextRef,
|
||||
SSLProtocol
|
||||
]
|
||||
Security.SSLSetProtocolVersionMin.restype = OSStatus
|
||||
|
||||
Security.SSLSetProtocolVersionMax.argtypes = [
|
||||
SSLContextRef,
|
||||
SSLProtocol
|
||||
]
|
||||
Security.SSLSetProtocolVersionMax.restype = OSStatus
|
||||
|
||||
Security.SecCopyErrorMessageString.argtypes = [
|
||||
OSStatus,
|
||||
c_void_p
|
||||
]
|
||||
Security.SecCopyErrorMessageString.restype = CFStringRef
|
||||
|
||||
Security.SSLReadFunc = SSLReadFunc
|
||||
Security.SSLWriteFunc = SSLWriteFunc
|
||||
Security.SSLContextRef = SSLContextRef
|
||||
Security.SSLProtocol = SSLProtocol
|
||||
Security.SSLCipherSuite = SSLCipherSuite
|
||||
Security.SecIdentityRef = SecIdentityRef
|
||||
Security.SecKeychainRef = SecKeychainRef
|
||||
Security.SecTrustRef = SecTrustRef
|
||||
Security.SecTrustResultType = SecTrustResultType
|
||||
Security.SecExternalFormat = SecExternalFormat
|
||||
Security.OSStatus = OSStatus
|
||||
|
||||
Security.kSecImportExportPassphrase = CFStringRef.in_dll(
|
||||
Security, 'kSecImportExportPassphrase'
|
||||
)
|
||||
Security.kSecImportItemIdentity = CFStringRef.in_dll(
|
||||
Security, 'kSecImportItemIdentity'
|
||||
)
|
||||
|
||||
# CoreFoundation time!
|
||||
CoreFoundation.CFRetain.argtypes = [
|
||||
CFTypeRef
|
||||
]
|
||||
CoreFoundation.CFRetain.restype = CFTypeRef
|
||||
|
||||
CoreFoundation.CFRelease.argtypes = [
|
||||
CFTypeRef
|
||||
]
|
||||
CoreFoundation.CFRelease.restype = None
|
||||
|
||||
CoreFoundation.CFGetTypeID.argtypes = [
|
||||
CFTypeRef
|
||||
]
|
||||
CoreFoundation.CFGetTypeID.restype = CFTypeID
|
||||
|
||||
CoreFoundation.CFStringCreateWithCString.argtypes = [
|
||||
CFAllocatorRef,
|
||||
c_char_p,
|
||||
CFStringEncoding
|
||||
]
|
||||
CoreFoundation.CFStringCreateWithCString.restype = CFStringRef
|
||||
|
||||
CoreFoundation.CFStringGetCStringPtr.argtypes = [
|
||||
CFStringRef,
|
||||
CFStringEncoding
|
||||
]
|
||||
CoreFoundation.CFStringGetCStringPtr.restype = c_char_p
|
||||
|
||||
CoreFoundation.CFStringGetCString.argtypes = [
|
||||
CFStringRef,
|
||||
c_char_p,
|
||||
CFIndex,
|
||||
CFStringEncoding
|
||||
]
|
||||
CoreFoundation.CFStringGetCString.restype = c_bool
|
||||
|
||||
CoreFoundation.CFDataCreate.argtypes = [
|
||||
CFAllocatorRef,
|
||||
c_char_p,
|
||||
CFIndex
|
||||
]
|
||||
CoreFoundation.CFDataCreate.restype = CFDataRef
|
||||
|
||||
CoreFoundation.CFDataGetLength.argtypes = [
|
||||
CFDataRef
|
||||
]
|
||||
CoreFoundation.CFDataGetLength.restype = CFIndex
|
||||
|
||||
CoreFoundation.CFDataGetBytePtr.argtypes = [
|
||||
CFDataRef
|
||||
]
|
||||
CoreFoundation.CFDataGetBytePtr.restype = c_void_p
|
||||
|
||||
CoreFoundation.CFDictionaryCreate.argtypes = [
|
||||
CFAllocatorRef,
|
||||
POINTER(CFTypeRef),
|
||||
POINTER(CFTypeRef),
|
||||
CFIndex,
|
||||
CFDictionaryKeyCallBacks,
|
||||
CFDictionaryValueCallBacks
|
||||
]
|
||||
CoreFoundation.CFDictionaryCreate.restype = CFDictionaryRef
|
||||
|
||||
CoreFoundation.CFDictionaryGetValue.argtypes = [
|
||||
CFDictionaryRef,
|
||||
CFTypeRef
|
||||
]
|
||||
CoreFoundation.CFDictionaryGetValue.restype = CFTypeRef
|
||||
|
||||
CoreFoundation.CFArrayCreate.argtypes = [
|
||||
CFAllocatorRef,
|
||||
POINTER(CFTypeRef),
|
||||
CFIndex,
|
||||
CFArrayCallBacks,
|
||||
]
|
||||
CoreFoundation.CFArrayCreate.restype = CFArrayRef
|
||||
|
||||
CoreFoundation.CFArrayCreateMutable.argtypes = [
|
||||
CFAllocatorRef,
|
||||
CFIndex,
|
||||
CFArrayCallBacks
|
||||
]
|
||||
CoreFoundation.CFArrayCreateMutable.restype = CFMutableArrayRef
|
||||
|
||||
CoreFoundation.CFArrayAppendValue.argtypes = [
|
||||
CFMutableArrayRef,
|
||||
c_void_p
|
||||
]
|
||||
CoreFoundation.CFArrayAppendValue.restype = None
|
||||
|
||||
CoreFoundation.CFArrayGetCount.argtypes = [
|
||||
CFArrayRef
|
||||
]
|
||||
CoreFoundation.CFArrayGetCount.restype = CFIndex
|
||||
|
||||
CoreFoundation.CFArrayGetValueAtIndex.argtypes = [
|
||||
CFArrayRef,
|
||||
CFIndex
|
||||
]
|
||||
CoreFoundation.CFArrayGetValueAtIndex.restype = c_void_p
|
||||
|
||||
CoreFoundation.kCFAllocatorDefault = CFAllocatorRef.in_dll(
|
||||
CoreFoundation, 'kCFAllocatorDefault'
|
||||
)
|
||||
CoreFoundation.kCFTypeArrayCallBacks = c_void_p.in_dll(CoreFoundation, 'kCFTypeArrayCallBacks')
|
||||
CoreFoundation.kCFTypeDictionaryKeyCallBacks = c_void_p.in_dll(
|
||||
CoreFoundation, 'kCFTypeDictionaryKeyCallBacks'
|
||||
)
|
||||
CoreFoundation.kCFTypeDictionaryValueCallBacks = c_void_p.in_dll(
|
||||
CoreFoundation, 'kCFTypeDictionaryValueCallBacks'
|
||||
)
|
||||
|
||||
CoreFoundation.CFTypeRef = CFTypeRef
|
||||
CoreFoundation.CFArrayRef = CFArrayRef
|
||||
CoreFoundation.CFStringRef = CFStringRef
|
||||
CoreFoundation.CFDictionaryRef = CFDictionaryRef
|
||||
|
||||
except (AttributeError):
|
||||
raise ImportError('Error initializing ctypes')
|
||||
|
||||
|
||||
class CFConst(object):
|
||||
"""
|
||||
A class object that acts as essentially a namespace for CoreFoundation
|
||||
constants.
|
||||
"""
|
||||
kCFStringEncodingUTF8 = CFStringEncoding(0x08000100)
|
||||
|
||||
|
||||
class SecurityConst(object):
|
||||
"""
|
||||
A class object that acts as essentially a namespace for Security constants.
|
||||
"""
|
||||
kSSLSessionOptionBreakOnServerAuth = 0
|
||||
|
||||
kSSLProtocol2 = 1
|
||||
kSSLProtocol3 = 2
|
||||
kTLSProtocol1 = 4
|
||||
kTLSProtocol11 = 7
|
||||
kTLSProtocol12 = 8
|
||||
|
||||
kSSLClientSide = 1
|
||||
kSSLStreamType = 0
|
||||
|
||||
kSecFormatPEMSequence = 10
|
||||
|
||||
kSecTrustResultInvalid = 0
|
||||
kSecTrustResultProceed = 1
|
||||
# This gap is present on purpose: this was kSecTrustResultConfirm, which
|
||||
# is deprecated.
|
||||
kSecTrustResultDeny = 3
|
||||
kSecTrustResultUnspecified = 4
|
||||
kSecTrustResultRecoverableTrustFailure = 5
|
||||
kSecTrustResultFatalTrustFailure = 6
|
||||
kSecTrustResultOtherError = 7
|
||||
|
||||
errSSLProtocol = -9800
|
||||
errSSLWouldBlock = -9803
|
||||
errSSLClosedGraceful = -9805
|
||||
errSSLClosedNoNotify = -9816
|
||||
errSSLClosedAbort = -9806
|
||||
|
||||
errSSLXCertChainInvalid = -9807
|
||||
errSSLCrypto = -9809
|
||||
errSSLInternal = -9810
|
||||
errSSLCertExpired = -9814
|
||||
errSSLCertNotYetValid = -9815
|
||||
errSSLUnknownRootCert = -9812
|
||||
errSSLNoRootCert = -9813
|
||||
errSSLHostNameMismatch = -9843
|
||||
errSSLPeerHandshakeFail = -9824
|
||||
errSSLPeerUserCancelled = -9839
|
||||
errSSLWeakPeerEphemeralDHKey = -9850
|
||||
errSSLServerAuthCompleted = -9841
|
||||
errSSLRecordOverflow = -9847
|
||||
|
||||
errSecVerifyFailed = -67808
|
||||
errSecNoTrustSettings = -25263
|
||||
errSecItemNotFound = -25300
|
||||
errSecInvalidTrustSettings = -25262
|
||||
|
||||
# Cipher suites. We only pick the ones our default cipher string allows.
|
||||
TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 = 0xC02C
|
||||
TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 = 0xC030
|
||||
TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 = 0xC02B
|
||||
TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 = 0xC02F
|
||||
TLS_DHE_DSS_WITH_AES_256_GCM_SHA384 = 0x00A3
|
||||
TLS_DHE_RSA_WITH_AES_256_GCM_SHA384 = 0x009F
|
||||
TLS_DHE_DSS_WITH_AES_128_GCM_SHA256 = 0x00A2
|
||||
TLS_DHE_RSA_WITH_AES_128_GCM_SHA256 = 0x009E
|
||||
TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384 = 0xC024
|
||||
TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384 = 0xC028
|
||||
TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA = 0xC00A
|
||||
TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA = 0xC014
|
||||
TLS_DHE_RSA_WITH_AES_256_CBC_SHA256 = 0x006B
|
||||
TLS_DHE_DSS_WITH_AES_256_CBC_SHA256 = 0x006A
|
||||
TLS_DHE_RSA_WITH_AES_256_CBC_SHA = 0x0039
|
||||
TLS_DHE_DSS_WITH_AES_256_CBC_SHA = 0x0038
|
||||
TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256 = 0xC023
|
||||
TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 = 0xC027
|
||||
TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA = 0xC009
|
||||
TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA = 0xC013
|
||||
TLS_DHE_RSA_WITH_AES_128_CBC_SHA256 = 0x0067
|
||||
TLS_DHE_DSS_WITH_AES_128_CBC_SHA256 = 0x0040
|
||||
TLS_DHE_RSA_WITH_AES_128_CBC_SHA = 0x0033
|
||||
TLS_DHE_DSS_WITH_AES_128_CBC_SHA = 0x0032
|
||||
TLS_RSA_WITH_AES_256_GCM_SHA384 = 0x009D
|
||||
TLS_RSA_WITH_AES_128_GCM_SHA256 = 0x009C
|
||||
TLS_RSA_WITH_AES_256_CBC_SHA256 = 0x003D
|
||||
TLS_RSA_WITH_AES_128_CBC_SHA256 = 0x003C
|
||||
TLS_RSA_WITH_AES_256_CBC_SHA = 0x0035
|
||||
TLS_RSA_WITH_AES_128_CBC_SHA = 0x002F
|
||||
@@ -1,343 +0,0 @@
|
||||
"""
|
||||
Low-level helpers for the SecureTransport bindings.
|
||||
|
||||
These are Python functions that are not directly related to the high-level APIs
|
||||
but are necessary to get them to work. They include a whole bunch of low-level
|
||||
CoreFoundation messing about and memory management. The concerns in this module
|
||||
are almost entirely about trying to avoid memory leaks and providing
|
||||
appropriate and useful assistance to the higher-level code.
|
||||
"""
|
||||
import base64
|
||||
import ctypes
|
||||
import itertools
|
||||
import re
|
||||
import os
|
||||
import ssl
|
||||
import tempfile
|
||||
|
||||
from .bindings import Security, CoreFoundation, CFConst
|
||||
|
||||
|
||||
# This regular expression is used to grab PEM data out of a PEM bundle.
|
||||
_PEM_CERTS_RE = re.compile(
|
||||
b"-----BEGIN CERTIFICATE-----\n(.*?)\n-----END CERTIFICATE-----", re.DOTALL
|
||||
)
|
||||
|
||||
|
||||
def _cf_data_from_bytes(bytestring):
|
||||
"""
|
||||
Given a bytestring, create a CFData object from it. This CFData object must
|
||||
be CFReleased by the caller.
|
||||
"""
|
||||
return CoreFoundation.CFDataCreate(
|
||||
CoreFoundation.kCFAllocatorDefault, bytestring, len(bytestring)
|
||||
)
|
||||
|
||||
|
||||
def _cf_dictionary_from_tuples(tuples):
|
||||
"""
|
||||
Given a list of Python tuples, create an associated CFDictionary.
|
||||
"""
|
||||
dictionary_size = len(tuples)
|
||||
|
||||
# We need to get the dictionary keys and values out in the same order.
|
||||
keys = (t[0] for t in tuples)
|
||||
values = (t[1] for t in tuples)
|
||||
cf_keys = (CoreFoundation.CFTypeRef * dictionary_size)(*keys)
|
||||
cf_values = (CoreFoundation.CFTypeRef * dictionary_size)(*values)
|
||||
|
||||
return CoreFoundation.CFDictionaryCreate(
|
||||
CoreFoundation.kCFAllocatorDefault,
|
||||
cf_keys,
|
||||
cf_values,
|
||||
dictionary_size,
|
||||
CoreFoundation.kCFTypeDictionaryKeyCallBacks,
|
||||
CoreFoundation.kCFTypeDictionaryValueCallBacks,
|
||||
)
|
||||
|
||||
|
||||
def _cf_string_to_unicode(value):
|
||||
"""
|
||||
Creates a Unicode string from a CFString object. Used entirely for error
|
||||
reporting.
|
||||
|
||||
Yes, it annoys me quite a lot that this function is this complex.
|
||||
"""
|
||||
value_as_void_p = ctypes.cast(value, ctypes.POINTER(ctypes.c_void_p))
|
||||
|
||||
string = CoreFoundation.CFStringGetCStringPtr(
|
||||
value_as_void_p,
|
||||
CFConst.kCFStringEncodingUTF8
|
||||
)
|
||||
if string is None:
|
||||
buffer = ctypes.create_string_buffer(1024)
|
||||
result = CoreFoundation.CFStringGetCString(
|
||||
value_as_void_p,
|
||||
buffer,
|
||||
1024,
|
||||
CFConst.kCFStringEncodingUTF8
|
||||
)
|
||||
if not result:
|
||||
raise OSError('Error copying C string from CFStringRef')
|
||||
string = buffer.value
|
||||
if string is not None:
|
||||
string = string.decode('utf-8')
|
||||
return string
|
||||
|
||||
|
||||
def _assert_no_error(error, exception_class=None):
|
||||
"""
|
||||
Checks the return code and throws an exception if there is an error to
|
||||
report
|
||||
"""
|
||||
if error == 0:
|
||||
return
|
||||
|
||||
cf_error_string = Security.SecCopyErrorMessageString(error, None)
|
||||
output = _cf_string_to_unicode(cf_error_string)
|
||||
CoreFoundation.CFRelease(cf_error_string)
|
||||
|
||||
if output is None or output == u'':
|
||||
output = u'OSStatus %s' % error
|
||||
|
||||
if exception_class is None:
|
||||
exception_class = ssl.SSLError
|
||||
|
||||
raise exception_class(output)
|
||||
|
||||
|
||||
def _cert_array_from_pem(pem_bundle):
|
||||
"""
|
||||
Given a bundle of certs in PEM format, turns them into a CFArray of certs
|
||||
that can be used to validate a cert chain.
|
||||
"""
|
||||
der_certs = [
|
||||
base64.b64decode(match.group(1))
|
||||
for match in _PEM_CERTS_RE.finditer(pem_bundle)
|
||||
]
|
||||
if not der_certs:
|
||||
raise ssl.SSLError("No root certificates specified")
|
||||
|
||||
cert_array = CoreFoundation.CFArrayCreateMutable(
|
||||
CoreFoundation.kCFAllocatorDefault,
|
||||
0,
|
||||
ctypes.byref(CoreFoundation.kCFTypeArrayCallBacks)
|
||||
)
|
||||
if not cert_array:
|
||||
raise ssl.SSLError("Unable to allocate memory!")
|
||||
|
||||
try:
|
||||
for der_bytes in der_certs:
|
||||
certdata = _cf_data_from_bytes(der_bytes)
|
||||
if not certdata:
|
||||
raise ssl.SSLError("Unable to allocate memory!")
|
||||
cert = Security.SecCertificateCreateWithData(
|
||||
CoreFoundation.kCFAllocatorDefault, certdata
|
||||
)
|
||||
CoreFoundation.CFRelease(certdata)
|
||||
if not cert:
|
||||
raise ssl.SSLError("Unable to build cert object!")
|
||||
|
||||
CoreFoundation.CFArrayAppendValue(cert_array, cert)
|
||||
CoreFoundation.CFRelease(cert)
|
||||
except Exception:
|
||||
# We need to free the array before the exception bubbles further.
|
||||
# We only want to do that if an error occurs: otherwise, the caller
|
||||
# should free.
|
||||
CoreFoundation.CFRelease(cert_array)
|
||||
|
||||
return cert_array
|
||||
|
||||
|
||||
def _is_cert(item):
|
||||
"""
|
||||
Returns True if a given CFTypeRef is a certificate.
|
||||
"""
|
||||
expected = Security.SecCertificateGetTypeID()
|
||||
return CoreFoundation.CFGetTypeID(item) == expected
|
||||
|
||||
|
||||
def _is_identity(item):
|
||||
"""
|
||||
Returns True if a given CFTypeRef is an identity.
|
||||
"""
|
||||
expected = Security.SecIdentityGetTypeID()
|
||||
return CoreFoundation.CFGetTypeID(item) == expected
|
||||
|
||||
|
||||
def _temporary_keychain():
|
||||
"""
|
||||
This function creates a temporary Mac keychain that we can use to work with
|
||||
credentials. This keychain uses a one-time password and a temporary file to
|
||||
store the data. We expect to have one keychain per socket. The returned
|
||||
SecKeychainRef must be freed by the caller, including calling
|
||||
SecKeychainDelete.
|
||||
|
||||
Returns a tuple of the SecKeychainRef and the path to the temporary
|
||||
directory that contains it.
|
||||
"""
|
||||
# Unfortunately, SecKeychainCreate requires a path to a keychain. This
|
||||
# means we cannot use mkstemp to use a generic temporary file. Instead,
|
||||
# we're going to create a temporary directory and a filename to use there.
|
||||
# This filename will be 8 random bytes expanded into base64. We also need
|
||||
# some random bytes to password-protect the keychain we're creating, so we
|
||||
# ask for 40 random bytes.
|
||||
random_bytes = os.urandom(40)
|
||||
filename = base64.b64encode(random_bytes[:8]).decode('utf-8')
|
||||
password = base64.b64encode(random_bytes[8:]) # Must be valid UTF-8
|
||||
tempdirectory = tempfile.mkdtemp()
|
||||
|
||||
keychain_path = os.path.join(tempdirectory, filename).encode('utf-8')
|
||||
|
||||
# We now want to create the keychain itself.
|
||||
keychain = Security.SecKeychainRef()
|
||||
status = Security.SecKeychainCreate(
|
||||
keychain_path,
|
||||
len(password),
|
||||
password,
|
||||
False,
|
||||
None,
|
||||
ctypes.byref(keychain)
|
||||
)
|
||||
_assert_no_error(status)
|
||||
|
||||
# Having created the keychain, we want to pass it off to the caller.
|
||||
return keychain, tempdirectory
|
||||
|
||||
|
||||
def _load_items_from_file(keychain, path):
|
||||
"""
|
||||
Given a single file, loads all the trust objects from it into arrays and
|
||||
the keychain.
|
||||
Returns a tuple of lists: the first list is a list of identities, the
|
||||
second a list of certs.
|
||||
"""
|
||||
certificates = []
|
||||
identities = []
|
||||
result_array = None
|
||||
|
||||
with open(path, 'rb') as f:
|
||||
raw_filedata = f.read()
|
||||
|
||||
try:
|
||||
filedata = CoreFoundation.CFDataCreate(
|
||||
CoreFoundation.kCFAllocatorDefault,
|
||||
raw_filedata,
|
||||
len(raw_filedata)
|
||||
)
|
||||
result_array = CoreFoundation.CFArrayRef()
|
||||
result = Security.SecItemImport(
|
||||
filedata, # cert data
|
||||
None, # Filename, leaving it out for now
|
||||
None, # What the type of the file is, we don't care
|
||||
None, # what's in the file, we don't care
|
||||
0, # import flags
|
||||
None, # key params, can include passphrase in the future
|
||||
keychain, # The keychain to insert into
|
||||
ctypes.byref(result_array) # Results
|
||||
)
|
||||
_assert_no_error(result)
|
||||
|
||||
# A CFArray is not very useful to us as an intermediary
|
||||
# representation, so we are going to extract the objects we want
|
||||
# and then free the array. We don't need to keep hold of keys: the
|
||||
# keychain already has them!
|
||||
result_count = CoreFoundation.CFArrayGetCount(result_array)
|
||||
for index in range(result_count):
|
||||
item = CoreFoundation.CFArrayGetValueAtIndex(
|
||||
result_array, index
|
||||
)
|
||||
item = ctypes.cast(item, CoreFoundation.CFTypeRef)
|
||||
|
||||
if _is_cert(item):
|
||||
CoreFoundation.CFRetain(item)
|
||||
certificates.append(item)
|
||||
elif _is_identity(item):
|
||||
CoreFoundation.CFRetain(item)
|
||||
identities.append(item)
|
||||
finally:
|
||||
if result_array:
|
||||
CoreFoundation.CFRelease(result_array)
|
||||
|
||||
CoreFoundation.CFRelease(filedata)
|
||||
|
||||
return (identities, certificates)
|
||||
|
||||
|
||||
def _load_client_cert_chain(keychain, *paths):
|
||||
"""
|
||||
Load certificates and maybe keys from a number of files. Has the end goal
|
||||
of returning a CFArray containing one SecIdentityRef, and then zero or more
|
||||
SecCertificateRef objects, suitable for use as a client certificate trust
|
||||
chain.
|
||||
"""
|
||||
# Ok, the strategy.
|
||||
#
|
||||
# This relies on knowing that macOS will not give you a SecIdentityRef
|
||||
# unless you have imported a key into a keychain. This is a somewhat
|
||||
# artificial limitation of macOS (for example, it doesn't necessarily
|
||||
# affect iOS), but there is nothing inside Security.framework that lets you
|
||||
# get a SecIdentityRef without having a key in a keychain.
|
||||
#
|
||||
# So the policy here is we take all the files and iterate them in order.
|
||||
# Each one will use SecItemImport to have one or more objects loaded from
|
||||
# it. We will also point at a keychain that macOS can use to work with the
|
||||
# private key.
|
||||
#
|
||||
# Once we have all the objects, we'll check what we actually have. If we
|
||||
# already have a SecIdentityRef in hand, fab: we'll use that. Otherwise,
|
||||
# we'll take the first certificate (which we assume to be our leaf) and
|
||||
# ask the keychain to give us a SecIdentityRef with that cert's associated
|
||||
# key.
|
||||
#
|
||||
# We'll then return a CFArray containing the trust chain: one
|
||||
# SecIdentityRef and then zero-or-more SecCertificateRef objects. The
|
||||
# responsibility for freeing this CFArray will be with the caller. This
|
||||
# CFArray must remain alive for the entire connection, so in practice it
|
||||
# will be stored with a single SSLSocket, along with the reference to the
|
||||
# keychain.
|
||||
certificates = []
|
||||
identities = []
|
||||
|
||||
# Filter out bad paths.
|
||||
paths = (path for path in paths if path)
|
||||
|
||||
try:
|
||||
for file_path in paths:
|
||||
new_identities, new_certs = _load_items_from_file(
|
||||
keychain, file_path
|
||||
)
|
||||
identities.extend(new_identities)
|
||||
certificates.extend(new_certs)
|
||||
|
||||
# Ok, we have everything. The question is: do we have an identity? If
|
||||
# not, we want to grab one from the first cert we have.
|
||||
if not identities:
|
||||
new_identity = Security.SecIdentityRef()
|
||||
status = Security.SecIdentityCreateWithCertificate(
|
||||
keychain,
|
||||
certificates[0],
|
||||
ctypes.byref(new_identity)
|
||||
)
|
||||
_assert_no_error(status)
|
||||
identities.append(new_identity)
|
||||
|
||||
# We now want to release the original certificate, as we no longer
|
||||
# need it.
|
||||
CoreFoundation.CFRelease(certificates.pop(0))
|
||||
|
||||
# We now need to build a new CFArray that holds the trust chain.
|
||||
trust_chain = CoreFoundation.CFArrayCreateMutable(
|
||||
CoreFoundation.kCFAllocatorDefault,
|
||||
0,
|
||||
ctypes.byref(CoreFoundation.kCFTypeArrayCallBacks),
|
||||
)
|
||||
for item in itertools.chain(identities, certificates):
|
||||
# ArrayAppendValue does a CFRetain on the item. That's fine,
|
||||
# because the finally block will release our other refs to them.
|
||||
CoreFoundation.CFArrayAppendValue(trust_chain, item)
|
||||
|
||||
return trust_chain
|
||||
finally:
|
||||
for obj in itertools.chain(identities, certificates):
|
||||
CoreFoundation.CFRelease(obj)
|
||||
@@ -1,296 +0,0 @@
|
||||
"""
|
||||
This module provides a pool manager that uses Google App Engine's
|
||||
`URLFetch Service <https://cloud.google.com/appengine/docs/python/urlfetch>`_.
|
||||
|
||||
Example usage::
|
||||
|
||||
from urllib3 import PoolManager
|
||||
from urllib3.contrib.appengine import AppEngineManager, is_appengine_sandbox
|
||||
|
||||
if is_appengine_sandbox():
|
||||
# AppEngineManager uses AppEngine's URLFetch API behind the scenes
|
||||
http = AppEngineManager()
|
||||
else:
|
||||
# PoolManager uses a socket-level API behind the scenes
|
||||
http = PoolManager()
|
||||
|
||||
r = http.request('GET', 'https://google.com/')
|
||||
|
||||
There are `limitations <https://cloud.google.com/appengine/docs/python/\
|
||||
urlfetch/#Python_Quotas_and_limits>`_ to the URLFetch service and it may not be
|
||||
the best choice for your application. There are three options for using
|
||||
urllib3 on Google App Engine:
|
||||
|
||||
1. You can use :class:`AppEngineManager` with URLFetch. URLFetch is
|
||||
cost-effective in many circumstances as long as your usage is within the
|
||||
limitations.
|
||||
2. You can use a normal :class:`~urllib3.PoolManager` by enabling sockets.
|
||||
Sockets also have `limitations and restrictions
|
||||
<https://cloud.google.com/appengine/docs/python/sockets/\
|
||||
#limitations-and-restrictions>`_ and have a lower free quota than URLFetch.
|
||||
To use sockets, be sure to specify the following in your ``app.yaml``::
|
||||
|
||||
env_variables:
|
||||
GAE_USE_SOCKETS_HTTPLIB : 'true'
|
||||
|
||||
3. If you are using `App Engine Flexible
|
||||
<https://cloud.google.com/appengine/docs/flexible/>`_, you can use the standard
|
||||
:class:`PoolManager` without any configuration or special environment variables.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
import os
|
||||
import warnings
|
||||
from ..packages.six.moves.urllib.parse import urljoin
|
||||
|
||||
from ..exceptions import (
|
||||
HTTPError,
|
||||
HTTPWarning,
|
||||
MaxRetryError,
|
||||
ProtocolError,
|
||||
TimeoutError,
|
||||
SSLError
|
||||
)
|
||||
|
||||
from ..packages.six import BytesIO
|
||||
from ..request import RequestMethods
|
||||
from ..response import HTTPResponse
|
||||
from ..util.timeout import Timeout
|
||||
from ..util.retry import Retry
|
||||
|
||||
try:
|
||||
from google.appengine.api import urlfetch
|
||||
except ImportError:
|
||||
urlfetch = None
|
||||
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AppEnginePlatformWarning(HTTPWarning):
|
||||
pass
|
||||
|
||||
|
||||
class AppEnginePlatformError(HTTPError):
|
||||
pass
|
||||
|
||||
|
||||
class AppEngineManager(RequestMethods):
|
||||
"""
|
||||
Connection manager for Google App Engine sandbox applications.
|
||||
|
||||
This manager uses the URLFetch service directly instead of using the
|
||||
emulated httplib, and is subject to URLFetch limitations as described in
|
||||
the App Engine documentation `here
|
||||
<https://cloud.google.com/appengine/docs/python/urlfetch>`_.
|
||||
|
||||
Notably it will raise an :class:`AppEnginePlatformError` if:
|
||||
* URLFetch is not available.
|
||||
* If you attempt to use this on App Engine Flexible, as full socket
|
||||
support is available.
|
||||
* If a request size is more than 10 megabytes.
|
||||
* If a response size is more than 32 megabtyes.
|
||||
* If you use an unsupported request method such as OPTIONS.
|
||||
|
||||
Beyond those cases, it will raise normal urllib3 errors.
|
||||
"""
|
||||
|
||||
def __init__(self, headers=None, retries=None, validate_certificate=True,
|
||||
urlfetch_retries=True):
|
||||
if not urlfetch:
|
||||
raise AppEnginePlatformError(
|
||||
"URLFetch is not available in this environment.")
|
||||
|
||||
if is_prod_appengine_mvms():
|
||||
raise AppEnginePlatformError(
|
||||
"Use normal urllib3.PoolManager instead of AppEngineManager"
|
||||
"on Managed VMs, as using URLFetch is not necessary in "
|
||||
"this environment.")
|
||||
|
||||
warnings.warn(
|
||||
"urllib3 is using URLFetch on Google App Engine sandbox instead "
|
||||
"of sockets. To use sockets directly instead of URLFetch see "
|
||||
"https://urllib3.readthedocs.io/en/latest/reference/urllib3.contrib.html.",
|
||||
AppEnginePlatformWarning)
|
||||
|
||||
RequestMethods.__init__(self, headers)
|
||||
self.validate_certificate = validate_certificate
|
||||
self.urlfetch_retries = urlfetch_retries
|
||||
|
||||
self.retries = retries or Retry.DEFAULT
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
# Return False to re-raise any potential exceptions
|
||||
return False
|
||||
|
||||
def urlopen(self, method, url, body=None, headers=None,
|
||||
retries=None, redirect=True, timeout=Timeout.DEFAULT_TIMEOUT,
|
||||
**response_kw):
|
||||
|
||||
retries = self._get_retries(retries, redirect)
|
||||
|
||||
try:
|
||||
follow_redirects = (
|
||||
redirect and
|
||||
retries.redirect != 0 and
|
||||
retries.total)
|
||||
response = urlfetch.fetch(
|
||||
url,
|
||||
payload=body,
|
||||
method=method,
|
||||
headers=headers or {},
|
||||
allow_truncated=False,
|
||||
follow_redirects=self.urlfetch_retries and follow_redirects,
|
||||
deadline=self._get_absolute_timeout(timeout),
|
||||
validate_certificate=self.validate_certificate,
|
||||
)
|
||||
except urlfetch.DeadlineExceededError as e:
|
||||
raise TimeoutError(self, e)
|
||||
|
||||
except urlfetch.InvalidURLError as e:
|
||||
if 'too large' in str(e):
|
||||
raise AppEnginePlatformError(
|
||||
"URLFetch request too large, URLFetch only "
|
||||
"supports requests up to 10mb in size.", e)
|
||||
raise ProtocolError(e)
|
||||
|
||||
except urlfetch.DownloadError as e:
|
||||
if 'Too many redirects' in str(e):
|
||||
raise MaxRetryError(self, url, reason=e)
|
||||
raise ProtocolError(e)
|
||||
|
||||
except urlfetch.ResponseTooLargeError as e:
|
||||
raise AppEnginePlatformError(
|
||||
"URLFetch response too large, URLFetch only supports"
|
||||
"responses up to 32mb in size.", e)
|
||||
|
||||
except urlfetch.SSLCertificateError as e:
|
||||
raise SSLError(e)
|
||||
|
||||
except urlfetch.InvalidMethodError as e:
|
||||
raise AppEnginePlatformError(
|
||||
"URLFetch does not support method: %s" % method, e)
|
||||
|
||||
http_response = self._urlfetch_response_to_http_response(
|
||||
response, retries=retries, **response_kw)
|
||||
|
||||
# Handle redirect?
|
||||
redirect_location = redirect and http_response.get_redirect_location()
|
||||
if redirect_location:
|
||||
# Check for redirect response
|
||||
if (self.urlfetch_retries and retries.raise_on_redirect):
|
||||
raise MaxRetryError(self, url, "too many redirects")
|
||||
else:
|
||||
if http_response.status == 303:
|
||||
method = 'GET'
|
||||
|
||||
try:
|
||||
retries = retries.increment(method, url, response=http_response, _pool=self)
|
||||
except MaxRetryError:
|
||||
if retries.raise_on_redirect:
|
||||
raise MaxRetryError(self, url, "too many redirects")
|
||||
return http_response
|
||||
|
||||
retries.sleep_for_retry(http_response)
|
||||
log.debug("Redirecting %s -> %s", url, redirect_location)
|
||||
redirect_url = urljoin(url, redirect_location)
|
||||
return self.urlopen(
|
||||
method, redirect_url, body, headers,
|
||||
retries=retries, redirect=redirect,
|
||||
timeout=timeout, **response_kw)
|
||||
|
||||
# Check if we should retry the HTTP response.
|
||||
has_retry_after = bool(http_response.getheader('Retry-After'))
|
||||
if retries.is_retry(method, http_response.status, has_retry_after):
|
||||
retries = retries.increment(
|
||||
method, url, response=http_response, _pool=self)
|
||||
log.debug("Retry: %s", url)
|
||||
retries.sleep(http_response)
|
||||
return self.urlopen(
|
||||
method, url,
|
||||
body=body, headers=headers,
|
||||
retries=retries, redirect=redirect,
|
||||
timeout=timeout, **response_kw)
|
||||
|
||||
return http_response
|
||||
|
||||
def _urlfetch_response_to_http_response(self, urlfetch_resp, **response_kw):
|
||||
|
||||
if is_prod_appengine():
|
||||
# Production GAE handles deflate encoding automatically, but does
|
||||
# not remove the encoding header.
|
||||
content_encoding = urlfetch_resp.headers.get('content-encoding')
|
||||
|
||||
if content_encoding == 'deflate':
|
||||
del urlfetch_resp.headers['content-encoding']
|
||||
|
||||
transfer_encoding = urlfetch_resp.headers.get('transfer-encoding')
|
||||
# We have a full response's content,
|
||||
# so let's make sure we don't report ourselves as chunked data.
|
||||
if transfer_encoding == 'chunked':
|
||||
encodings = transfer_encoding.split(",")
|
||||
encodings.remove('chunked')
|
||||
urlfetch_resp.headers['transfer-encoding'] = ','.join(encodings)
|
||||
|
||||
return HTTPResponse(
|
||||
# In order for decoding to work, we must present the content as
|
||||
# a file-like object.
|
||||
body=BytesIO(urlfetch_resp.content),
|
||||
headers=urlfetch_resp.headers,
|
||||
status=urlfetch_resp.status_code,
|
||||
**response_kw
|
||||
)
|
||||
|
||||
def _get_absolute_timeout(self, timeout):
|
||||
if timeout is Timeout.DEFAULT_TIMEOUT:
|
||||
return None # Defer to URLFetch's default.
|
||||
if isinstance(timeout, Timeout):
|
||||
if timeout._read is not None or timeout._connect is not None:
|
||||
warnings.warn(
|
||||
"URLFetch does not support granular timeout settings, "
|
||||
"reverting to total or default URLFetch timeout.",
|
||||
AppEnginePlatformWarning)
|
||||
return timeout.total
|
||||
return timeout
|
||||
|
||||
def _get_retries(self, retries, redirect):
|
||||
if not isinstance(retries, Retry):
|
||||
retries = Retry.from_int(
|
||||
retries, redirect=redirect, default=self.retries)
|
||||
|
||||
if retries.connect or retries.read or retries.redirect:
|
||||
warnings.warn(
|
||||
"URLFetch only supports total retries and does not "
|
||||
"recognize connect, read, or redirect retry parameters.",
|
||||
AppEnginePlatformWarning)
|
||||
|
||||
return retries
|
||||
|
||||
|
||||
def is_appengine():
|
||||
return (is_local_appengine() or
|
||||
is_prod_appengine() or
|
||||
is_prod_appengine_mvms())
|
||||
|
||||
|
||||
def is_appengine_sandbox():
|
||||
return is_appengine() and not is_prod_appengine_mvms()
|
||||
|
||||
|
||||
def is_local_appengine():
|
||||
return ('APPENGINE_RUNTIME' in os.environ and
|
||||
'Development/' in os.environ['SERVER_SOFTWARE'])
|
||||
|
||||
|
||||
def is_prod_appengine():
|
||||
return ('APPENGINE_RUNTIME' in os.environ and
|
||||
'Google App Engine/' in os.environ['SERVER_SOFTWARE'] and
|
||||
not is_prod_appengine_mvms())
|
||||
|
||||
|
||||
def is_prod_appengine_mvms():
|
||||
return os.environ.get('GAE_VM', False) == 'true'
|
||||
@@ -1,112 +0,0 @@
|
||||
"""
|
||||
NTLM authenticating pool, contributed by erikcederstran
|
||||
|
||||
Issue #10, see: http://code.google.com/p/urllib3/issues/detail?id=10
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
|
||||
from logging import getLogger
|
||||
from ntlm import ntlm
|
||||
|
||||
from .. import HTTPSConnectionPool
|
||||
from ..packages.six.moves.http_client import HTTPSConnection
|
||||
|
||||
|
||||
log = getLogger(__name__)
|
||||
|
||||
|
||||
class NTLMConnectionPool(HTTPSConnectionPool):
|
||||
"""
|
||||
Implements an NTLM authentication version of an urllib3 connection pool
|
||||
"""
|
||||
|
||||
scheme = 'https'
|
||||
|
||||
def __init__(self, user, pw, authurl, *args, **kwargs):
|
||||
"""
|
||||
authurl is a random URL on the server that is protected by NTLM.
|
||||
user is the Windows user, probably in the DOMAIN\\username format.
|
||||
pw is the password for the user.
|
||||
"""
|
||||
super(NTLMConnectionPool, self).__init__(*args, **kwargs)
|
||||
self.authurl = authurl
|
||||
self.rawuser = user
|
||||
user_parts = user.split('\\', 1)
|
||||
self.domain = user_parts[0].upper()
|
||||
self.user = user_parts[1]
|
||||
self.pw = pw
|
||||
|
||||
def _new_conn(self):
|
||||
# Performs the NTLM handshake that secures the connection. The socket
|
||||
# must be kept open while requests are performed.
|
||||
self.num_connections += 1
|
||||
log.debug('Starting NTLM HTTPS connection no. %d: https://%s%s',
|
||||
self.num_connections, self.host, self.authurl)
|
||||
|
||||
headers = {}
|
||||
headers['Connection'] = 'Keep-Alive'
|
||||
req_header = 'Authorization'
|
||||
resp_header = 'www-authenticate'
|
||||
|
||||
conn = HTTPSConnection(host=self.host, port=self.port)
|
||||
|
||||
# Send negotiation message
|
||||
headers[req_header] = (
|
||||
'NTLM %s' % ntlm.create_NTLM_NEGOTIATE_MESSAGE(self.rawuser))
|
||||
log.debug('Request headers: %s', headers)
|
||||
conn.request('GET', self.authurl, None, headers)
|
||||
res = conn.getresponse()
|
||||
reshdr = dict(res.getheaders())
|
||||
log.debug('Response status: %s %s', res.status, res.reason)
|
||||
log.debug('Response headers: %s', reshdr)
|
||||
log.debug('Response data: %s [...]', res.read(100))
|
||||
|
||||
# Remove the reference to the socket, so that it can not be closed by
|
||||
# the response object (we want to keep the socket open)
|
||||
res.fp = None
|
||||
|
||||
# Server should respond with a challenge message
|
||||
auth_header_values = reshdr[resp_header].split(', ')
|
||||
auth_header_value = None
|
||||
for s in auth_header_values:
|
||||
if s[:5] == 'NTLM ':
|
||||
auth_header_value = s[5:]
|
||||
if auth_header_value is None:
|
||||
raise Exception('Unexpected %s response header: %s' %
|
||||
(resp_header, reshdr[resp_header]))
|
||||
|
||||
# Send authentication message
|
||||
ServerChallenge, NegotiateFlags = \
|
||||
ntlm.parse_NTLM_CHALLENGE_MESSAGE(auth_header_value)
|
||||
auth_msg = ntlm.create_NTLM_AUTHENTICATE_MESSAGE(ServerChallenge,
|
||||
self.user,
|
||||
self.domain,
|
||||
self.pw,
|
||||
NegotiateFlags)
|
||||
headers[req_header] = 'NTLM %s' % auth_msg
|
||||
log.debug('Request headers: %s', headers)
|
||||
conn.request('GET', self.authurl, None, headers)
|
||||
res = conn.getresponse()
|
||||
log.debug('Response status: %s %s', res.status, res.reason)
|
||||
log.debug('Response headers: %s', dict(res.getheaders()))
|
||||
log.debug('Response data: %s [...]', res.read()[:100])
|
||||
if res.status != 200:
|
||||
if res.status == 401:
|
||||
raise Exception('Server rejected request: wrong '
|
||||
'username or password')
|
||||
raise Exception('Wrong server response: %s %s' %
|
||||
(res.status, res.reason))
|
||||
|
||||
res.fp = None
|
||||
log.debug('Connection established')
|
||||
return conn
|
||||
|
||||
def urlopen(self, method, url, body=None, headers=None, retries=3,
|
||||
redirect=True, assert_same_host=True):
|
||||
if headers is None:
|
||||
headers = {}
|
||||
headers['Connection'] = 'Keep-Alive'
|
||||
return super(NTLMConnectionPool, self).urlopen(method, url, body,
|
||||
headers, retries,
|
||||
redirect,
|
||||
assert_same_host)
|
||||
@@ -1,452 +0,0 @@
|
||||
"""
|
||||
SSL with SNI_-support for Python 2. Follow these instructions if you would
|
||||
like to verify SSL certificates in Python 2. Note, the default libraries do
|
||||
*not* do certificate checking; you need to do additional work to validate
|
||||
certificates yourself.
|
||||
|
||||
This needs the following packages installed:
|
||||
|
||||
* pyOpenSSL (tested with 16.0.0)
|
||||
* cryptography (minimum 1.3.4, from pyopenssl)
|
||||
* idna (minimum 2.0, from cryptography)
|
||||
|
||||
However, pyopenssl depends on cryptography, which depends on idna, so while we
|
||||
use all three directly here we end up having relatively few packages required.
|
||||
|
||||
You can install them with the following command:
|
||||
|
||||
pip install pyopenssl cryptography idna
|
||||
|
||||
To activate certificate checking, call
|
||||
:func:`~urllib3.contrib.pyopenssl.inject_into_urllib3` from your Python code
|
||||
before you begin making HTTP requests. This can be done in a ``sitecustomize``
|
||||
module, or at any other time before your application begins using ``urllib3``,
|
||||
like this::
|
||||
|
||||
try:
|
||||
import urllib3.contrib.pyopenssl
|
||||
urllib3.contrib.pyopenssl.inject_into_urllib3()
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
Now you can use :mod:`urllib3` as you normally would, and it will support SNI
|
||||
when the required modules are installed.
|
||||
|
||||
Activating this module also has the positive side effect of disabling SSL/TLS
|
||||
compression in Python 2 (see `CRIME attack`_).
|
||||
|
||||
If you want to configure the default list of supported cipher suites, you can
|
||||
set the ``urllib3.contrib.pyopenssl.DEFAULT_SSL_CIPHER_LIST`` variable.
|
||||
|
||||
.. _sni: https://en.wikipedia.org/wiki/Server_Name_Indication
|
||||
.. _crime attack: https://en.wikipedia.org/wiki/CRIME_(security_exploit)
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
|
||||
import OpenSSL.SSL
|
||||
from cryptography import x509
|
||||
from cryptography.hazmat.backends.openssl import backend as openssl_backend
|
||||
from cryptography.hazmat.backends.openssl.x509 import _Certificate
|
||||
|
||||
from socket import timeout, error as SocketError
|
||||
from io import BytesIO
|
||||
|
||||
try: # Platform-specific: Python 2
|
||||
from socket import _fileobject
|
||||
except ImportError: # Platform-specific: Python 3
|
||||
_fileobject = None
|
||||
from ..packages.backports.makefile import backport_makefile
|
||||
|
||||
import logging
|
||||
import ssl
|
||||
import six
|
||||
import sys
|
||||
|
||||
from .. import util
|
||||
|
||||
__all__ = ['inject_into_urllib3', 'extract_from_urllib3']
|
||||
|
||||
# SNI always works.
|
||||
HAS_SNI = True
|
||||
|
||||
# Map from urllib3 to PyOpenSSL compatible parameter-values.
|
||||
_openssl_versions = {
|
||||
ssl.PROTOCOL_SSLv23: OpenSSL.SSL.SSLv23_METHOD,
|
||||
ssl.PROTOCOL_TLSv1: OpenSSL.SSL.TLSv1_METHOD,
|
||||
}
|
||||
|
||||
if hasattr(ssl, 'PROTOCOL_TLSv1_1') and hasattr(OpenSSL.SSL, 'TLSv1_1_METHOD'):
|
||||
_openssl_versions[ssl.PROTOCOL_TLSv1_1] = OpenSSL.SSL.TLSv1_1_METHOD
|
||||
|
||||
if hasattr(ssl, 'PROTOCOL_TLSv1_2') and hasattr(OpenSSL.SSL, 'TLSv1_2_METHOD'):
|
||||
_openssl_versions[ssl.PROTOCOL_TLSv1_2] = OpenSSL.SSL.TLSv1_2_METHOD
|
||||
|
||||
try:
|
||||
_openssl_versions.update({ssl.PROTOCOL_SSLv3: OpenSSL.SSL.SSLv3_METHOD})
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
_stdlib_to_openssl_verify = {
|
||||
ssl.CERT_NONE: OpenSSL.SSL.VERIFY_NONE,
|
||||
ssl.CERT_OPTIONAL: OpenSSL.SSL.VERIFY_PEER,
|
||||
ssl.CERT_REQUIRED:
|
||||
OpenSSL.SSL.VERIFY_PEER + OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT,
|
||||
}
|
||||
_openssl_to_stdlib_verify = dict(
|
||||
(v, k) for k, v in _stdlib_to_openssl_verify.items()
|
||||
)
|
||||
|
||||
# OpenSSL will only write 16K at a time
|
||||
SSL_WRITE_BLOCKSIZE = 16384
|
||||
|
||||
orig_util_HAS_SNI = util.HAS_SNI
|
||||
orig_util_SSLContext = util.ssl_.SSLContext
|
||||
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def inject_into_urllib3():
|
||||
'Monkey-patch urllib3 with PyOpenSSL-backed SSL-support.'
|
||||
|
||||
_validate_dependencies_met()
|
||||
|
||||
util.ssl_.SSLContext = PyOpenSSLContext
|
||||
util.HAS_SNI = HAS_SNI
|
||||
util.ssl_.HAS_SNI = HAS_SNI
|
||||
util.IS_PYOPENSSL = True
|
||||
util.ssl_.IS_PYOPENSSL = True
|
||||
|
||||
|
||||
def extract_from_urllib3():
|
||||
'Undo monkey-patching by :func:`inject_into_urllib3`.'
|
||||
|
||||
util.ssl_.SSLContext = orig_util_SSLContext
|
||||
util.HAS_SNI = orig_util_HAS_SNI
|
||||
util.ssl_.HAS_SNI = orig_util_HAS_SNI
|
||||
util.IS_PYOPENSSL = False
|
||||
util.ssl_.IS_PYOPENSSL = False
|
||||
|
||||
|
||||
def _validate_dependencies_met():
|
||||
"""
|
||||
Verifies that PyOpenSSL's package-level dependencies have been met.
|
||||
Throws `ImportError` if they are not met.
|
||||
"""
|
||||
# Method added in `cryptography==1.1`; not available in older versions
|
||||
from cryptography.x509.extensions import Extensions
|
||||
if getattr(Extensions, "get_extension_for_class", None) is None:
|
||||
raise ImportError("'cryptography' module missing required functionality. "
|
||||
"Try upgrading to v1.3.4 or newer.")
|
||||
|
||||
# pyOpenSSL 0.14 and above use cryptography for OpenSSL bindings. The _x509
|
||||
# attribute is only present on those versions.
|
||||
from OpenSSL.crypto import X509
|
||||
x509 = X509()
|
||||
if getattr(x509, "_x509", None) is None:
|
||||
raise ImportError("'pyOpenSSL' module missing required functionality. "
|
||||
"Try upgrading to v0.14 or newer.")
|
||||
|
||||
|
||||
def _dnsname_to_stdlib(name):
|
||||
"""
|
||||
Converts a dNSName SubjectAlternativeName field to the form used by the
|
||||
standard library on the given Python version.
|
||||
|
||||
Cryptography produces a dNSName as a unicode string that was idna-decoded
|
||||
from ASCII bytes. We need to idna-encode that string to get it back, and
|
||||
then on Python 3 we also need to convert to unicode via UTF-8 (the stdlib
|
||||
uses PyUnicode_FromStringAndSize on it, which decodes via UTF-8).
|
||||
"""
|
||||
def idna_encode(name):
|
||||
"""
|
||||
Borrowed wholesale from the Python Cryptography Project. It turns out
|
||||
that we can't just safely call `idna.encode`: it can explode for
|
||||
wildcard names. This avoids that problem.
|
||||
"""
|
||||
import idna
|
||||
|
||||
for prefix in [u'*.', u'.']:
|
||||
if name.startswith(prefix):
|
||||
name = name[len(prefix):]
|
||||
return prefix.encode('ascii') + idna.encode(name)
|
||||
return idna.encode(name)
|
||||
|
||||
name = idna_encode(name)
|
||||
if sys.version_info >= (3, 0):
|
||||
name = name.decode('utf-8')
|
||||
return name
|
||||
|
||||
|
||||
def get_subj_alt_name(peer_cert):
|
||||
"""
|
||||
Given an PyOpenSSL certificate, provides all the subject alternative names.
|
||||
"""
|
||||
# Pass the cert to cryptography, which has much better APIs for this.
|
||||
# This is technically using private APIs, but should work across all
|
||||
# relevant versions until PyOpenSSL gets something proper for this.
|
||||
cert = _Certificate(openssl_backend, peer_cert._x509)
|
||||
|
||||
# We want to find the SAN extension. Ask Cryptography to locate it (it's
|
||||
# faster than looping in Python)
|
||||
try:
|
||||
ext = cert.extensions.get_extension_for_class(
|
||||
x509.SubjectAlternativeName
|
||||
).value
|
||||
except x509.ExtensionNotFound:
|
||||
# No such extension, return the empty list.
|
||||
return []
|
||||
except (x509.DuplicateExtension, x509.UnsupportedExtension,
|
||||
x509.UnsupportedGeneralNameType, UnicodeError) as e:
|
||||
# A problem has been found with the quality of the certificate. Assume
|
||||
# no SAN field is present.
|
||||
log.warning(
|
||||
"A problem was encountered with the certificate that prevented "
|
||||
"urllib3 from finding the SubjectAlternativeName field. This can "
|
||||
"affect certificate validation. The error was %s",
|
||||
e,
|
||||
)
|
||||
return []
|
||||
|
||||
# We want to return dNSName and iPAddress fields. We need to cast the IPs
|
||||
# back to strings because the match_hostname function wants them as
|
||||
# strings.
|
||||
# Sadly the DNS names need to be idna encoded and then, on Python 3, UTF-8
|
||||
# decoded. This is pretty frustrating, but that's what the standard library
|
||||
# does with certificates, and so we need to attempt to do the same.
|
||||
names = [
|
||||
('DNS', _dnsname_to_stdlib(name))
|
||||
for name in ext.get_values_for_type(x509.DNSName)
|
||||
]
|
||||
names.extend(
|
||||
('IP Address', str(name))
|
||||
for name in ext.get_values_for_type(x509.IPAddress)
|
||||
)
|
||||
|
||||
return names
|
||||
|
||||
|
||||
class WrappedSocket(object):
|
||||
'''API-compatibility wrapper for Python OpenSSL's Connection-class.
|
||||
|
||||
Note: _makefile_refs, _drop() and _reuse() are needed for the garbage
|
||||
collector of pypy.
|
||||
'''
|
||||
|
||||
def __init__(self, connection, socket, suppress_ragged_eofs=True):
|
||||
self.connection = connection
|
||||
self.socket = socket
|
||||
self.suppress_ragged_eofs = suppress_ragged_eofs
|
||||
self._makefile_refs = 0
|
||||
self._closed = False
|
||||
|
||||
def fileno(self):
|
||||
return self.socket.fileno()
|
||||
|
||||
# Copy-pasted from Python 3.5 source code
|
||||
def _decref_socketios(self):
|
||||
if self._makefile_refs > 0:
|
||||
self._makefile_refs -= 1
|
||||
if self._closed:
|
||||
self.close()
|
||||
|
||||
def recv(self, *args, **kwargs):
|
||||
try:
|
||||
data = self.connection.recv(*args, **kwargs)
|
||||
except OpenSSL.SSL.SysCallError as e:
|
||||
if self.suppress_ragged_eofs and e.args == (-1, 'Unexpected EOF'):
|
||||
return b''
|
||||
else:
|
||||
raise SocketError(str(e))
|
||||
except OpenSSL.SSL.ZeroReturnError as e:
|
||||
if self.connection.get_shutdown() == OpenSSL.SSL.RECEIVED_SHUTDOWN:
|
||||
return b''
|
||||
else:
|
||||
raise
|
||||
except OpenSSL.SSL.WantReadError:
|
||||
rd = util.wait_for_read(self.socket, self.socket.gettimeout())
|
||||
if not rd:
|
||||
raise timeout('The read operation timed out')
|
||||
else:
|
||||
return self.recv(*args, **kwargs)
|
||||
else:
|
||||
return data
|
||||
|
||||
def recv_into(self, *args, **kwargs):
|
||||
try:
|
||||
return self.connection.recv_into(*args, **kwargs)
|
||||
except OpenSSL.SSL.SysCallError as e:
|
||||
if self.suppress_ragged_eofs and e.args == (-1, 'Unexpected EOF'):
|
||||
return 0
|
||||
else:
|
||||
raise SocketError(str(e))
|
||||
except OpenSSL.SSL.ZeroReturnError as e:
|
||||
if self.connection.get_shutdown() == OpenSSL.SSL.RECEIVED_SHUTDOWN:
|
||||
return 0
|
||||
else:
|
||||
raise
|
||||
except OpenSSL.SSL.WantReadError:
|
||||
rd = util.wait_for_read(self.socket, self.socket.gettimeout())
|
||||
if not rd:
|
||||
raise timeout('The read operation timed out')
|
||||
else:
|
||||
return self.recv_into(*args, **kwargs)
|
||||
|
||||
def settimeout(self, timeout):
|
||||
return self.socket.settimeout(timeout)
|
||||
|
||||
def _send_until_done(self, data):
|
||||
while True:
|
||||
try:
|
||||
return self.connection.send(data)
|
||||
except OpenSSL.SSL.WantWriteError:
|
||||
wr = util.wait_for_write(self.socket, self.socket.gettimeout())
|
||||
if not wr:
|
||||
raise timeout()
|
||||
continue
|
||||
except OpenSSL.SSL.SysCallError as e:
|
||||
raise SocketError(str(e))
|
||||
|
||||
def sendall(self, data):
|
||||
total_sent = 0
|
||||
while total_sent < len(data):
|
||||
sent = self._send_until_done(data[total_sent:total_sent + SSL_WRITE_BLOCKSIZE])
|
||||
total_sent += sent
|
||||
|
||||
def shutdown(self):
|
||||
# FIXME rethrow compatible exceptions should we ever use this
|
||||
self.connection.shutdown()
|
||||
|
||||
def close(self):
|
||||
if self._makefile_refs < 1:
|
||||
try:
|
||||
self._closed = True
|
||||
return self.connection.close()
|
||||
except OpenSSL.SSL.Error:
|
||||
return
|
||||
else:
|
||||
self._makefile_refs -= 1
|
||||
|
||||
def getpeercert(self, binary_form=False):
|
||||
x509 = self.connection.get_peer_certificate()
|
||||
|
||||
if not x509:
|
||||
return x509
|
||||
|
||||
if binary_form:
|
||||
return OpenSSL.crypto.dump_certificate(
|
||||
OpenSSL.crypto.FILETYPE_ASN1,
|
||||
x509)
|
||||
|
||||
return {
|
||||
'subject': (
|
||||
(('commonName', x509.get_subject().CN),),
|
||||
),
|
||||
'subjectAltName': get_subj_alt_name(x509)
|
||||
}
|
||||
|
||||
def _reuse(self):
|
||||
self._makefile_refs += 1
|
||||
|
||||
def _drop(self):
|
||||
if self._makefile_refs < 1:
|
||||
self.close()
|
||||
else:
|
||||
self._makefile_refs -= 1
|
||||
|
||||
|
||||
if _fileobject: # Platform-specific: Python 2
|
||||
def makefile(self, mode, bufsize=-1):
|
||||
self._makefile_refs += 1
|
||||
return _fileobject(self, mode, bufsize, close=True)
|
||||
else: # Platform-specific: Python 3
|
||||
makefile = backport_makefile
|
||||
|
||||
WrappedSocket.makefile = makefile
|
||||
|
||||
|
||||
class PyOpenSSLContext(object):
|
||||
"""
|
||||
I am a wrapper class for the PyOpenSSL ``Context`` object. I am responsible
|
||||
for translating the interface of the standard library ``SSLContext`` object
|
||||
to calls into PyOpenSSL.
|
||||
"""
|
||||
def __init__(self, protocol):
|
||||
self.protocol = _openssl_versions[protocol]
|
||||
self._ctx = OpenSSL.SSL.Context(self.protocol)
|
||||
self._options = 0
|
||||
self.check_hostname = False
|
||||
|
||||
@property
|
||||
def options(self):
|
||||
return self._options
|
||||
|
||||
@options.setter
|
||||
def options(self, value):
|
||||
self._options = value
|
||||
self._ctx.set_options(value)
|
||||
|
||||
@property
|
||||
def verify_mode(self):
|
||||
return _openssl_to_stdlib_verify[self._ctx.get_verify_mode()]
|
||||
|
||||
@verify_mode.setter
|
||||
def verify_mode(self, value):
|
||||
self._ctx.set_verify(
|
||||
_stdlib_to_openssl_verify[value],
|
||||
_verify_callback
|
||||
)
|
||||
|
||||
def set_default_verify_paths(self):
|
||||
self._ctx.set_default_verify_paths()
|
||||
|
||||
def set_ciphers(self, ciphers):
|
||||
if isinstance(ciphers, six.text_type):
|
||||
ciphers = ciphers.encode('utf-8')
|
||||
self._ctx.set_cipher_list(ciphers)
|
||||
|
||||
def load_verify_locations(self, cafile=None, capath=None, cadata=None):
|
||||
if cafile is not None:
|
||||
cafile = cafile.encode('utf-8')
|
||||
if capath is not None:
|
||||
capath = capath.encode('utf-8')
|
||||
self._ctx.load_verify_locations(cafile, capath)
|
||||
if cadata is not None:
|
||||
self._ctx.load_verify_locations(BytesIO(cadata))
|
||||
|
||||
def load_cert_chain(self, certfile, keyfile=None, password=None):
|
||||
self._ctx.use_certificate_file(certfile)
|
||||
if password is not None:
|
||||
self._ctx.set_passwd_cb(lambda max_length, prompt_twice, userdata: password)
|
||||
self._ctx.use_privatekey_file(keyfile or certfile)
|
||||
|
||||
def wrap_socket(self, sock, server_side=False,
|
||||
do_handshake_on_connect=True, suppress_ragged_eofs=True,
|
||||
server_hostname=None):
|
||||
cnx = OpenSSL.SSL.Connection(self._ctx, sock)
|
||||
|
||||
if isinstance(server_hostname, six.text_type): # Platform-specific: Python 3
|
||||
server_hostname = server_hostname.encode('utf-8')
|
||||
|
||||
if server_hostname is not None:
|
||||
cnx.set_tlsext_host_name(server_hostname)
|
||||
|
||||
cnx.set_connect_state()
|
||||
|
||||
while True:
|
||||
try:
|
||||
cnx.do_handshake()
|
||||
except OpenSSL.SSL.WantReadError:
|
||||
rd = util.wait_for_read(sock, sock.gettimeout())
|
||||
if not rd:
|
||||
raise timeout('select timed out')
|
||||
continue
|
||||
except OpenSSL.SSL.Error as e:
|
||||
raise ssl.SSLError('bad handshake: %r' % e)
|
||||
break
|
||||
|
||||
return WrappedSocket(cnx, sock)
|
||||
|
||||
|
||||
def _verify_callback(cnx, x509, err_no, err_depth, return_code):
|
||||
return err_no == 0
|
||||
@@ -1,807 +0,0 @@
|
||||
"""
|
||||
SecureTranport support for urllib3 via ctypes.
|
||||
|
||||
This makes platform-native TLS available to urllib3 users on macOS without the
|
||||
use of a compiler. This is an important feature because the Python Package
|
||||
Index is moving to become a TLSv1.2-or-higher server, and the default OpenSSL
|
||||
that ships with macOS is not capable of doing TLSv1.2. The only way to resolve
|
||||
this is to give macOS users an alternative solution to the problem, and that
|
||||
solution is to use SecureTransport.
|
||||
|
||||
We use ctypes here because this solution must not require a compiler. That's
|
||||
because pip is not allowed to require a compiler either.
|
||||
|
||||
This is not intended to be a seriously long-term solution to this problem.
|
||||
The hope is that PEP 543 will eventually solve this issue for us, at which
|
||||
point we can retire this contrib module. But in the short term, we need to
|
||||
solve the impending tire fire that is Python on Mac without this kind of
|
||||
contrib module. So...here we are.
|
||||
|
||||
To use this module, simply import and inject it::
|
||||
|
||||
import urllib3.contrib.securetransport
|
||||
urllib3.contrib.securetransport.inject_into_urllib3()
|
||||
|
||||
Happy TLSing!
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
|
||||
import contextlib
|
||||
import ctypes
|
||||
import errno
|
||||
import os.path
|
||||
import shutil
|
||||
import socket
|
||||
import ssl
|
||||
import threading
|
||||
import weakref
|
||||
|
||||
from .. import util
|
||||
from ._securetransport.bindings import (
|
||||
Security, SecurityConst, CoreFoundation
|
||||
)
|
||||
from ._securetransport.low_level import (
|
||||
_assert_no_error, _cert_array_from_pem, _temporary_keychain,
|
||||
_load_client_cert_chain
|
||||
)
|
||||
|
||||
try: # Platform-specific: Python 2
|
||||
from socket import _fileobject
|
||||
except ImportError: # Platform-specific: Python 3
|
||||
_fileobject = None
|
||||
from ..packages.backports.makefile import backport_makefile
|
||||
|
||||
try:
|
||||
memoryview(b'')
|
||||
except NameError:
|
||||
raise ImportError("SecureTransport only works on Pythons with memoryview")
|
||||
|
||||
__all__ = ['inject_into_urllib3', 'extract_from_urllib3']
|
||||
|
||||
# SNI always works
|
||||
HAS_SNI = True
|
||||
|
||||
orig_util_HAS_SNI = util.HAS_SNI
|
||||
orig_util_SSLContext = util.ssl_.SSLContext
|
||||
|
||||
# This dictionary is used by the read callback to obtain a handle to the
|
||||
# calling wrapped socket. This is a pretty silly approach, but for now it'll
|
||||
# do. I feel like I should be able to smuggle a handle to the wrapped socket
|
||||
# directly in the SSLConnectionRef, but for now this approach will work I
|
||||
# guess.
|
||||
#
|
||||
# We need to lock around this structure for inserts, but we don't do it for
|
||||
# reads/writes in the callbacks. The reasoning here goes as follows:
|
||||
#
|
||||
# 1. It is not possible to call into the callbacks before the dictionary is
|
||||
# populated, so once in the callback the id must be in the dictionary.
|
||||
# 2. The callbacks don't mutate the dictionary, they only read from it, and
|
||||
# so cannot conflict with any of the insertions.
|
||||
#
|
||||
# This is good: if we had to lock in the callbacks we'd drastically slow down
|
||||
# the performance of this code.
|
||||
_connection_refs = weakref.WeakValueDictionary()
|
||||
_connection_ref_lock = threading.Lock()
|
||||
|
||||
# Limit writes to 16kB. This is OpenSSL's limit, but we'll cargo-cult it over
|
||||
# for no better reason than we need *a* limit, and this one is right there.
|
||||
SSL_WRITE_BLOCKSIZE = 16384
|
||||
|
||||
# This is our equivalent of util.ssl_.DEFAULT_CIPHERS, but expanded out to
|
||||
# individual cipher suites. We need to do this becuase this is how
|
||||
# SecureTransport wants them.
|
||||
CIPHER_SUITES = [
|
||||
SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,
|
||||
SecurityConst.TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,
|
||||
SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,
|
||||
SecurityConst.TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,
|
||||
SecurityConst.TLS_DHE_DSS_WITH_AES_256_GCM_SHA384,
|
||||
SecurityConst.TLS_DHE_RSA_WITH_AES_256_GCM_SHA384,
|
||||
SecurityConst.TLS_DHE_DSS_WITH_AES_128_GCM_SHA256,
|
||||
SecurityConst.TLS_DHE_RSA_WITH_AES_128_GCM_SHA256,
|
||||
SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384,
|
||||
SecurityConst.TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384,
|
||||
SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA,
|
||||
SecurityConst.TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA,
|
||||
SecurityConst.TLS_DHE_RSA_WITH_AES_256_CBC_SHA256,
|
||||
SecurityConst.TLS_DHE_DSS_WITH_AES_256_CBC_SHA256,
|
||||
SecurityConst.TLS_DHE_RSA_WITH_AES_256_CBC_SHA,
|
||||
SecurityConst.TLS_DHE_DSS_WITH_AES_256_CBC_SHA,
|
||||
SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,
|
||||
SecurityConst.TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256,
|
||||
SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA,
|
||||
SecurityConst.TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,
|
||||
SecurityConst.TLS_DHE_RSA_WITH_AES_128_CBC_SHA256,
|
||||
SecurityConst.TLS_DHE_DSS_WITH_AES_128_CBC_SHA256,
|
||||
SecurityConst.TLS_DHE_RSA_WITH_AES_128_CBC_SHA,
|
||||
SecurityConst.TLS_DHE_DSS_WITH_AES_128_CBC_SHA,
|
||||
SecurityConst.TLS_RSA_WITH_AES_256_GCM_SHA384,
|
||||
SecurityConst.TLS_RSA_WITH_AES_128_GCM_SHA256,
|
||||
SecurityConst.TLS_RSA_WITH_AES_256_CBC_SHA256,
|
||||
SecurityConst.TLS_RSA_WITH_AES_128_CBC_SHA256,
|
||||
SecurityConst.TLS_RSA_WITH_AES_256_CBC_SHA,
|
||||
SecurityConst.TLS_RSA_WITH_AES_128_CBC_SHA,
|
||||
]
|
||||
|
||||
# Basically this is simple: for PROTOCOL_SSLv23 we turn it into a low of
|
||||
# TLSv1 and a high of TLSv1.2. For everything else, we pin to that version.
|
||||
_protocol_to_min_max = {
|
||||
ssl.PROTOCOL_SSLv23: (SecurityConst.kTLSProtocol1, SecurityConst.kTLSProtocol12),
|
||||
}
|
||||
|
||||
if hasattr(ssl, "PROTOCOL_SSLv2"):
|
||||
_protocol_to_min_max[ssl.PROTOCOL_SSLv2] = (
|
||||
SecurityConst.kSSLProtocol2, SecurityConst.kSSLProtocol2
|
||||
)
|
||||
if hasattr(ssl, "PROTOCOL_SSLv3"):
|
||||
_protocol_to_min_max[ssl.PROTOCOL_SSLv3] = (
|
||||
SecurityConst.kSSLProtocol3, SecurityConst.kSSLProtocol3
|
||||
)
|
||||
if hasattr(ssl, "PROTOCOL_TLSv1"):
|
||||
_protocol_to_min_max[ssl.PROTOCOL_TLSv1] = (
|
||||
SecurityConst.kTLSProtocol1, SecurityConst.kTLSProtocol1
|
||||
)
|
||||
if hasattr(ssl, "PROTOCOL_TLSv1_1"):
|
||||
_protocol_to_min_max[ssl.PROTOCOL_TLSv1_1] = (
|
||||
SecurityConst.kTLSProtocol11, SecurityConst.kTLSProtocol11
|
||||
)
|
||||
if hasattr(ssl, "PROTOCOL_TLSv1_2"):
|
||||
_protocol_to_min_max[ssl.PROTOCOL_TLSv1_2] = (
|
||||
SecurityConst.kTLSProtocol12, SecurityConst.kTLSProtocol12
|
||||
)
|
||||
if hasattr(ssl, "PROTOCOL_TLS"):
|
||||
_protocol_to_min_max[ssl.PROTOCOL_TLS] = _protocol_to_min_max[ssl.PROTOCOL_SSLv23]
|
||||
|
||||
|
||||
def inject_into_urllib3():
|
||||
"""
|
||||
Monkey-patch urllib3 with SecureTransport-backed SSL-support.
|
||||
"""
|
||||
util.ssl_.SSLContext = SecureTransportContext
|
||||
util.HAS_SNI = HAS_SNI
|
||||
util.ssl_.HAS_SNI = HAS_SNI
|
||||
util.IS_SECURETRANSPORT = True
|
||||
util.ssl_.IS_SECURETRANSPORT = True
|
||||
|
||||
|
||||
def extract_from_urllib3():
|
||||
"""
|
||||
Undo monkey-patching by :func:`inject_into_urllib3`.
|
||||
"""
|
||||
util.ssl_.SSLContext = orig_util_SSLContext
|
||||
util.HAS_SNI = orig_util_HAS_SNI
|
||||
util.ssl_.HAS_SNI = orig_util_HAS_SNI
|
||||
util.IS_SECURETRANSPORT = False
|
||||
util.ssl_.IS_SECURETRANSPORT = False
|
||||
|
||||
|
||||
def _read_callback(connection_id, data_buffer, data_length_pointer):
|
||||
"""
|
||||
SecureTransport read callback. This is called by ST to request that data
|
||||
be returned from the socket.
|
||||
"""
|
||||
wrapped_socket = None
|
||||
try:
|
||||
wrapped_socket = _connection_refs.get(connection_id)
|
||||
if wrapped_socket is None:
|
||||
return SecurityConst.errSSLInternal
|
||||
base_socket = wrapped_socket.socket
|
||||
|
||||
requested_length = data_length_pointer[0]
|
||||
|
||||
timeout = wrapped_socket.gettimeout()
|
||||
error = None
|
||||
read_count = 0
|
||||
buffer = (ctypes.c_char * requested_length).from_address(data_buffer)
|
||||
buffer_view = memoryview(buffer)
|
||||
|
||||
try:
|
||||
while read_count < requested_length:
|
||||
if timeout is None or timeout >= 0:
|
||||
readables = util.wait_for_read([base_socket], timeout)
|
||||
if not readables:
|
||||
raise socket.error(errno.EAGAIN, 'timed out')
|
||||
|
||||
# We need to tell ctypes that we have a buffer that can be
|
||||
# written to. Upsettingly, we do that like this:
|
||||
chunk_size = base_socket.recv_into(
|
||||
buffer_view[read_count:requested_length]
|
||||
)
|
||||
read_count += chunk_size
|
||||
if not chunk_size:
|
||||
if not read_count:
|
||||
return SecurityConst.errSSLClosedGraceful
|
||||
break
|
||||
except (socket.error) as e:
|
||||
error = e.errno
|
||||
|
||||
if error is not None and error != errno.EAGAIN:
|
||||
if error == errno.ECONNRESET:
|
||||
return SecurityConst.errSSLClosedAbort
|
||||
raise
|
||||
|
||||
data_length_pointer[0] = read_count
|
||||
|
||||
if read_count != requested_length:
|
||||
return SecurityConst.errSSLWouldBlock
|
||||
|
||||
return 0
|
||||
except Exception as e:
|
||||
if wrapped_socket is not None:
|
||||
wrapped_socket._exception = e
|
||||
return SecurityConst.errSSLInternal
|
||||
|
||||
|
||||
def _write_callback(connection_id, data_buffer, data_length_pointer):
|
||||
"""
|
||||
SecureTransport write callback. This is called by ST to request that data
|
||||
actually be sent on the network.
|
||||
"""
|
||||
wrapped_socket = None
|
||||
try:
|
||||
wrapped_socket = _connection_refs.get(connection_id)
|
||||
if wrapped_socket is None:
|
||||
return SecurityConst.errSSLInternal
|
||||
base_socket = wrapped_socket.socket
|
||||
|
||||
bytes_to_write = data_length_pointer[0]
|
||||
data = ctypes.string_at(data_buffer, bytes_to_write)
|
||||
|
||||
timeout = wrapped_socket.gettimeout()
|
||||
error = None
|
||||
sent = 0
|
||||
|
||||
try:
|
||||
while sent < bytes_to_write:
|
||||
if timeout is None or timeout >= 0:
|
||||
writables = util.wait_for_write([base_socket], timeout)
|
||||
if not writables:
|
||||
raise socket.error(errno.EAGAIN, 'timed out')
|
||||
chunk_sent = base_socket.send(data)
|
||||
sent += chunk_sent
|
||||
|
||||
# This has some needless copying here, but I'm not sure there's
|
||||
# much value in optimising this data path.
|
||||
data = data[chunk_sent:]
|
||||
except (socket.error) as e:
|
||||
error = e.errno
|
||||
|
||||
if error is not None and error != errno.EAGAIN:
|
||||
if error == errno.ECONNRESET:
|
||||
return SecurityConst.errSSLClosedAbort
|
||||
raise
|
||||
|
||||
data_length_pointer[0] = sent
|
||||
if sent != bytes_to_write:
|
||||
return SecurityConst.errSSLWouldBlock
|
||||
|
||||
return 0
|
||||
except Exception as e:
|
||||
if wrapped_socket is not None:
|
||||
wrapped_socket._exception = e
|
||||
return SecurityConst.errSSLInternal
|
||||
|
||||
|
||||
# We need to keep these two objects references alive: if they get GC'd while
|
||||
# in use then SecureTransport could attempt to call a function that is in freed
|
||||
# memory. That would be...uh...bad. Yeah, that's the word. Bad.
|
||||
_read_callback_pointer = Security.SSLReadFunc(_read_callback)
|
||||
_write_callback_pointer = Security.SSLWriteFunc(_write_callback)
|
||||
|
||||
|
||||
class WrappedSocket(object):
|
||||
"""
|
||||
API-compatibility wrapper for Python's OpenSSL wrapped socket object.
|
||||
|
||||
Note: _makefile_refs, _drop(), and _reuse() are needed for the garbage
|
||||
collector of PyPy.
|
||||
"""
|
||||
def __init__(self, socket):
|
||||
self.socket = socket
|
||||
self.context = None
|
||||
self._makefile_refs = 0
|
||||
self._closed = False
|
||||
self._exception = None
|
||||
self._keychain = None
|
||||
self._keychain_dir = None
|
||||
self._client_cert_chain = None
|
||||
|
||||
# We save off the previously-configured timeout and then set it to
|
||||
# zero. This is done because we use select and friends to handle the
|
||||
# timeouts, but if we leave the timeout set on the lower socket then
|
||||
# Python will "kindly" call select on that socket again for us. Avoid
|
||||
# that by forcing the timeout to zero.
|
||||
self._timeout = self.socket.gettimeout()
|
||||
self.socket.settimeout(0)
|
||||
|
||||
@contextlib.contextmanager
|
||||
def _raise_on_error(self):
|
||||
"""
|
||||
A context manager that can be used to wrap calls that do I/O from
|
||||
SecureTransport. If any of the I/O callbacks hit an exception, this
|
||||
context manager will correctly propagate the exception after the fact.
|
||||
This avoids silently swallowing those exceptions.
|
||||
|
||||
It also correctly forces the socket closed.
|
||||
"""
|
||||
self._exception = None
|
||||
|
||||
# We explicitly don't catch around this yield because in the unlikely
|
||||
# event that an exception was hit in the block we don't want to swallow
|
||||
# it.
|
||||
yield
|
||||
if self._exception is not None:
|
||||
exception, self._exception = self._exception, None
|
||||
self.close()
|
||||
raise exception
|
||||
|
||||
def _set_ciphers(self):
|
||||
"""
|
||||
Sets up the allowed ciphers. By default this matches the set in
|
||||
util.ssl_.DEFAULT_CIPHERS, at least as supported by macOS. This is done
|
||||
custom and doesn't allow changing at this time, mostly because parsing
|
||||
OpenSSL cipher strings is going to be a freaking nightmare.
|
||||
"""
|
||||
ciphers = (Security.SSLCipherSuite * len(CIPHER_SUITES))(*CIPHER_SUITES)
|
||||
result = Security.SSLSetEnabledCiphers(
|
||||
self.context, ciphers, len(CIPHER_SUITES)
|
||||
)
|
||||
_assert_no_error(result)
|
||||
|
||||
def _custom_validate(self, verify, trust_bundle):
|
||||
"""
|
||||
Called when we have set custom validation. We do this in two cases:
|
||||
first, when cert validation is entirely disabled; and second, when
|
||||
using a custom trust DB.
|
||||
"""
|
||||
# If we disabled cert validation, just say: cool.
|
||||
if not verify:
|
||||
return
|
||||
|
||||
# We want data in memory, so load it up.
|
||||
if os.path.isfile(trust_bundle):
|
||||
with open(trust_bundle, 'rb') as f:
|
||||
trust_bundle = f.read()
|
||||
|
||||
cert_array = None
|
||||
trust = Security.SecTrustRef()
|
||||
|
||||
try:
|
||||
# Get a CFArray that contains the certs we want.
|
||||
cert_array = _cert_array_from_pem(trust_bundle)
|
||||
|
||||
# Ok, now the hard part. We want to get the SecTrustRef that ST has
|
||||
# created for this connection, shove our CAs into it, tell ST to
|
||||
# ignore everything else it knows, and then ask if it can build a
|
||||
# chain. This is a buuuunch of code.
|
||||
result = Security.SSLCopyPeerTrust(
|
||||
self.context, ctypes.byref(trust)
|
||||
)
|
||||
_assert_no_error(result)
|
||||
if not trust:
|
||||
raise ssl.SSLError("Failed to copy trust reference")
|
||||
|
||||
result = Security.SecTrustSetAnchorCertificates(trust, cert_array)
|
||||
_assert_no_error(result)
|
||||
|
||||
result = Security.SecTrustSetAnchorCertificatesOnly(trust, True)
|
||||
_assert_no_error(result)
|
||||
|
||||
trust_result = Security.SecTrustResultType()
|
||||
result = Security.SecTrustEvaluate(
|
||||
trust, ctypes.byref(trust_result)
|
||||
)
|
||||
_assert_no_error(result)
|
||||
finally:
|
||||
if trust:
|
||||
CoreFoundation.CFRelease(trust)
|
||||
|
||||
if cert_array is None:
|
||||
CoreFoundation.CFRelease(cert_array)
|
||||
|
||||
# Ok, now we can look at what the result was.
|
||||
successes = (
|
||||
SecurityConst.kSecTrustResultUnspecified,
|
||||
SecurityConst.kSecTrustResultProceed
|
||||
)
|
||||
if trust_result.value not in successes:
|
||||
raise ssl.SSLError(
|
||||
"certificate verify failed, error code: %d" %
|
||||
trust_result.value
|
||||
)
|
||||
|
||||
def handshake(self,
|
||||
server_hostname,
|
||||
verify,
|
||||
trust_bundle,
|
||||
min_version,
|
||||
max_version,
|
||||
client_cert,
|
||||
client_key,
|
||||
client_key_passphrase):
|
||||
"""
|
||||
Actually performs the TLS handshake. This is run automatically by
|
||||
wrapped socket, and shouldn't be needed in user code.
|
||||
"""
|
||||
# First, we do the initial bits of connection setup. We need to create
|
||||
# a context, set its I/O funcs, and set the connection reference.
|
||||
self.context = Security.SSLCreateContext(
|
||||
None, SecurityConst.kSSLClientSide, SecurityConst.kSSLStreamType
|
||||
)
|
||||
result = Security.SSLSetIOFuncs(
|
||||
self.context, _read_callback_pointer, _write_callback_pointer
|
||||
)
|
||||
_assert_no_error(result)
|
||||
|
||||
# Here we need to compute the handle to use. We do this by taking the
|
||||
# id of self modulo 2**31 - 1. If this is already in the dictionary, we
|
||||
# just keep incrementing by one until we find a free space.
|
||||
with _connection_ref_lock:
|
||||
handle = id(self) % 2147483647
|
||||
while handle in _connection_refs:
|
||||
handle = (handle + 1) % 2147483647
|
||||
_connection_refs[handle] = self
|
||||
|
||||
result = Security.SSLSetConnection(self.context, handle)
|
||||
_assert_no_error(result)
|
||||
|
||||
# If we have a server hostname, we should set that too.
|
||||
if server_hostname:
|
||||
if not isinstance(server_hostname, bytes):
|
||||
server_hostname = server_hostname.encode('utf-8')
|
||||
|
||||
result = Security.SSLSetPeerDomainName(
|
||||
self.context, server_hostname, len(server_hostname)
|
||||
)
|
||||
_assert_no_error(result)
|
||||
|
||||
# Setup the ciphers.
|
||||
self._set_ciphers()
|
||||
|
||||
# Set the minimum and maximum TLS versions.
|
||||
result = Security.SSLSetProtocolVersionMin(self.context, min_version)
|
||||
_assert_no_error(result)
|
||||
result = Security.SSLSetProtocolVersionMax(self.context, max_version)
|
||||
_assert_no_error(result)
|
||||
|
||||
# If there's a trust DB, we need to use it. We do that by telling
|
||||
# SecureTransport to break on server auth. We also do that if we don't
|
||||
# want to validate the certs at all: we just won't actually do any
|
||||
# authing in that case.
|
||||
if not verify or trust_bundle is not None:
|
||||
result = Security.SSLSetSessionOption(
|
||||
self.context,
|
||||
SecurityConst.kSSLSessionOptionBreakOnServerAuth,
|
||||
True
|
||||
)
|
||||
_assert_no_error(result)
|
||||
|
||||
# If there's a client cert, we need to use it.
|
||||
if client_cert:
|
||||
self._keychain, self._keychain_dir = _temporary_keychain()
|
||||
self._client_cert_chain = _load_client_cert_chain(
|
||||
self._keychain, client_cert, client_key
|
||||
)
|
||||
result = Security.SSLSetCertificate(
|
||||
self.context, self._client_cert_chain
|
||||
)
|
||||
_assert_no_error(result)
|
||||
|
||||
while True:
|
||||
with self._raise_on_error():
|
||||
result = Security.SSLHandshake(self.context)
|
||||
|
||||
if result == SecurityConst.errSSLWouldBlock:
|
||||
raise socket.timeout("handshake timed out")
|
||||
elif result == SecurityConst.errSSLServerAuthCompleted:
|
||||
self._custom_validate(verify, trust_bundle)
|
||||
continue
|
||||
else:
|
||||
_assert_no_error(result)
|
||||
break
|
||||
|
||||
def fileno(self):
|
||||
return self.socket.fileno()
|
||||
|
||||
# Copy-pasted from Python 3.5 source code
|
||||
def _decref_socketios(self):
|
||||
if self._makefile_refs > 0:
|
||||
self._makefile_refs -= 1
|
||||
if self._closed:
|
||||
self.close()
|
||||
|
||||
def recv(self, bufsiz):
|
||||
buffer = ctypes.create_string_buffer(bufsiz)
|
||||
bytes_read = self.recv_into(buffer, bufsiz)
|
||||
data = buffer[:bytes_read]
|
||||
return data
|
||||
|
||||
def recv_into(self, buffer, nbytes=None):
|
||||
# Read short on EOF.
|
||||
if self._closed:
|
||||
return 0
|
||||
|
||||
if nbytes is None:
|
||||
nbytes = len(buffer)
|
||||
|
||||
buffer = (ctypes.c_char * nbytes).from_buffer(buffer)
|
||||
processed_bytes = ctypes.c_size_t(0)
|
||||
|
||||
with self._raise_on_error():
|
||||
result = Security.SSLRead(
|
||||
self.context, buffer, nbytes, ctypes.byref(processed_bytes)
|
||||
)
|
||||
|
||||
# There are some result codes that we want to treat as "not always
|
||||
# errors". Specifically, those are errSSLWouldBlock,
|
||||
# errSSLClosedGraceful, and errSSLClosedNoNotify.
|
||||
if (result == SecurityConst.errSSLWouldBlock):
|
||||
# If we didn't process any bytes, then this was just a time out.
|
||||
# However, we can get errSSLWouldBlock in situations when we *did*
|
||||
# read some data, and in those cases we should just read "short"
|
||||
# and return.
|
||||
if processed_bytes.value == 0:
|
||||
# Timed out, no data read.
|
||||
raise socket.timeout("recv timed out")
|
||||
elif result in (SecurityConst.errSSLClosedGraceful, SecurityConst.errSSLClosedNoNotify):
|
||||
# The remote peer has closed this connection. We should do so as
|
||||
# well. Note that we don't actually return here because in
|
||||
# principle this could actually be fired along with return data.
|
||||
# It's unlikely though.
|
||||
self.close()
|
||||
else:
|
||||
_assert_no_error(result)
|
||||
|
||||
# Ok, we read and probably succeeded. We should return whatever data
|
||||
# was actually read.
|
||||
return processed_bytes.value
|
||||
|
||||
def settimeout(self, timeout):
|
||||
self._timeout = timeout
|
||||
|
||||
def gettimeout(self):
|
||||
return self._timeout
|
||||
|
||||
def send(self, data):
|
||||
processed_bytes = ctypes.c_size_t(0)
|
||||
|
||||
with self._raise_on_error():
|
||||
result = Security.SSLWrite(
|
||||
self.context, data, len(data), ctypes.byref(processed_bytes)
|
||||
)
|
||||
|
||||
if result == SecurityConst.errSSLWouldBlock and processed_bytes.value == 0:
|
||||
# Timed out
|
||||
raise socket.timeout("send timed out")
|
||||
else:
|
||||
_assert_no_error(result)
|
||||
|
||||
# We sent, and probably succeeded. Tell them how much we sent.
|
||||
return processed_bytes.value
|
||||
|
||||
def sendall(self, data):
|
||||
total_sent = 0
|
||||
while total_sent < len(data):
|
||||
sent = self.send(data[total_sent:total_sent + SSL_WRITE_BLOCKSIZE])
|
||||
total_sent += sent
|
||||
|
||||
def shutdown(self):
|
||||
with self._raise_on_error():
|
||||
Security.SSLClose(self.context)
|
||||
|
||||
def close(self):
|
||||
# TODO: should I do clean shutdown here? Do I have to?
|
||||
if self._makefile_refs < 1:
|
||||
self._closed = True
|
||||
if self.context:
|
||||
CoreFoundation.CFRelease(self.context)
|
||||
self.context = None
|
||||
if self._client_cert_chain:
|
||||
CoreFoundation.CFRelease(self._client_cert_chain)
|
||||
self._client_cert_chain = None
|
||||
if self._keychain:
|
||||
Security.SecKeychainDelete(self._keychain)
|
||||
CoreFoundation.CFRelease(self._keychain)
|
||||
shutil.rmtree(self._keychain_dir)
|
||||
self._keychain = self._keychain_dir = None
|
||||
return self.socket.close()
|
||||
else:
|
||||
self._makefile_refs -= 1
|
||||
|
||||
def getpeercert(self, binary_form=False):
|
||||
# Urgh, annoying.
|
||||
#
|
||||
# Here's how we do this:
|
||||
#
|
||||
# 1. Call SSLCopyPeerTrust to get hold of the trust object for this
|
||||
# connection.
|
||||
# 2. Call SecTrustGetCertificateAtIndex for index 0 to get the leaf.
|
||||
# 3. To get the CN, call SecCertificateCopyCommonName and process that
|
||||
# string so that it's of the appropriate type.
|
||||
# 4. To get the SAN, we need to do something a bit more complex:
|
||||
# a. Call SecCertificateCopyValues to get the data, requesting
|
||||
# kSecOIDSubjectAltName.
|
||||
# b. Mess about with this dictionary to try to get the SANs out.
|
||||
#
|
||||
# This is gross. Really gross. It's going to be a few hundred LoC extra
|
||||
# just to repeat something that SecureTransport can *already do*. So my
|
||||
# operating assumption at this time is that what we want to do is
|
||||
# instead to just flag to urllib3 that it shouldn't do its own hostname
|
||||
# validation when using SecureTransport.
|
||||
if not binary_form:
|
||||
raise ValueError(
|
||||
"SecureTransport only supports dumping binary certs"
|
||||
)
|
||||
trust = Security.SecTrustRef()
|
||||
certdata = None
|
||||
der_bytes = None
|
||||
|
||||
try:
|
||||
# Grab the trust store.
|
||||
result = Security.SSLCopyPeerTrust(
|
||||
self.context, ctypes.byref(trust)
|
||||
)
|
||||
_assert_no_error(result)
|
||||
if not trust:
|
||||
# Probably we haven't done the handshake yet. No biggie.
|
||||
return None
|
||||
|
||||
cert_count = Security.SecTrustGetCertificateCount(trust)
|
||||
if not cert_count:
|
||||
# Also a case that might happen if we haven't handshaked.
|
||||
# Handshook? Handshaken?
|
||||
return None
|
||||
|
||||
leaf = Security.SecTrustGetCertificateAtIndex(trust, 0)
|
||||
assert leaf
|
||||
|
||||
# Ok, now we want the DER bytes.
|
||||
certdata = Security.SecCertificateCopyData(leaf)
|
||||
assert certdata
|
||||
|
||||
data_length = CoreFoundation.CFDataGetLength(certdata)
|
||||
data_buffer = CoreFoundation.CFDataGetBytePtr(certdata)
|
||||
der_bytes = ctypes.string_at(data_buffer, data_length)
|
||||
finally:
|
||||
if certdata:
|
||||
CoreFoundation.CFRelease(certdata)
|
||||
if trust:
|
||||
CoreFoundation.CFRelease(trust)
|
||||
|
||||
return der_bytes
|
||||
|
||||
def _reuse(self):
|
||||
self._makefile_refs += 1
|
||||
|
||||
def _drop(self):
|
||||
if self._makefile_refs < 1:
|
||||
self.close()
|
||||
else:
|
||||
self._makefile_refs -= 1
|
||||
|
||||
|
||||
if _fileobject: # Platform-specific: Python 2
|
||||
def makefile(self, mode, bufsize=-1):
|
||||
self._makefile_refs += 1
|
||||
return _fileobject(self, mode, bufsize, close=True)
|
||||
else: # Platform-specific: Python 3
|
||||
def makefile(self, mode="r", buffering=None, *args, **kwargs):
|
||||
# We disable buffering with SecureTransport because it conflicts with
|
||||
# the buffering that ST does internally (see issue #1153 for more).
|
||||
buffering = 0
|
||||
return backport_makefile(self, mode, buffering, *args, **kwargs)
|
||||
|
||||
WrappedSocket.makefile = makefile
|
||||
|
||||
|
||||
class SecureTransportContext(object):
|
||||
"""
|
||||
I am a wrapper class for the SecureTransport library, to translate the
|
||||
interface of the standard library ``SSLContext`` object to calls into
|
||||
SecureTransport.
|
||||
"""
|
||||
def __init__(self, protocol):
|
||||
self._min_version, self._max_version = _protocol_to_min_max[protocol]
|
||||
self._options = 0
|
||||
self._verify = False
|
||||
self._trust_bundle = None
|
||||
self._client_cert = None
|
||||
self._client_key = None
|
||||
self._client_key_passphrase = None
|
||||
|
||||
@property
|
||||
def check_hostname(self):
|
||||
"""
|
||||
SecureTransport cannot have its hostname checking disabled. For more,
|
||||
see the comment on getpeercert() in this file.
|
||||
"""
|
||||
return True
|
||||
|
||||
@check_hostname.setter
|
||||
def check_hostname(self, value):
|
||||
"""
|
||||
SecureTransport cannot have its hostname checking disabled. For more,
|
||||
see the comment on getpeercert() in this file.
|
||||
"""
|
||||
pass
|
||||
|
||||
@property
|
||||
def options(self):
|
||||
# TODO: Well, crap.
|
||||
#
|
||||
# So this is the bit of the code that is the most likely to cause us
|
||||
# trouble. Essentially we need to enumerate all of the SSL options that
|
||||
# users might want to use and try to see if we can sensibly translate
|
||||
# them, or whether we should just ignore them.
|
||||
return self._options
|
||||
|
||||
@options.setter
|
||||
def options(self, value):
|
||||
# TODO: Update in line with above.
|
||||
self._options = value
|
||||
|
||||
@property
|
||||
def verify_mode(self):
|
||||
return ssl.CERT_REQUIRED if self._verify else ssl.CERT_NONE
|
||||
|
||||
@verify_mode.setter
|
||||
def verify_mode(self, value):
|
||||
self._verify = True if value == ssl.CERT_REQUIRED else False
|
||||
|
||||
def set_default_verify_paths(self):
|
||||
# So, this has to do something a bit weird. Specifically, what it does
|
||||
# is nothing.
|
||||
#
|
||||
# This means that, if we had previously had load_verify_locations
|
||||
# called, this does not undo that. We need to do that because it turns
|
||||
# out that the rest of the urllib3 code will attempt to load the
|
||||
# default verify paths if it hasn't been told about any paths, even if
|
||||
# the context itself was sometime earlier. We resolve that by just
|
||||
# ignoring it.
|
||||
pass
|
||||
|
||||
def load_default_certs(self):
|
||||
return self.set_default_verify_paths()
|
||||
|
||||
def set_ciphers(self, ciphers):
|
||||
# For now, we just require the default cipher string.
|
||||
if ciphers != util.ssl_.DEFAULT_CIPHERS:
|
||||
raise ValueError(
|
||||
"SecureTransport doesn't support custom cipher strings"
|
||||
)
|
||||
|
||||
def load_verify_locations(self, cafile=None, capath=None, cadata=None):
|
||||
# OK, we only really support cadata and cafile.
|
||||
if capath is not None:
|
||||
raise ValueError(
|
||||
"SecureTransport does not support cert directories"
|
||||
)
|
||||
|
||||
self._trust_bundle = cafile or cadata
|
||||
|
||||
def load_cert_chain(self, certfile, keyfile=None, password=None):
|
||||
self._client_cert = certfile
|
||||
self._client_key = keyfile
|
||||
self._client_cert_passphrase = password
|
||||
|
||||
def wrap_socket(self, sock, server_side=False,
|
||||
do_handshake_on_connect=True, suppress_ragged_eofs=True,
|
||||
server_hostname=None):
|
||||
# So, what do we do here? Firstly, we assert some properties. This is a
|
||||
# stripped down shim, so there is some functionality we don't support.
|
||||
# See PEP 543 for the real deal.
|
||||
assert not server_side
|
||||
assert do_handshake_on_connect
|
||||
assert suppress_ragged_eofs
|
||||
|
||||
# Ok, we're good to go. Now we want to create the wrapped socket object
|
||||
# and store it in the appropriate place.
|
||||
wrapped_socket = WrappedSocket(sock)
|
||||
|
||||
# Now we can handshake
|
||||
wrapped_socket.handshake(
|
||||
server_hostname, self._verify, self._trust_bundle,
|
||||
self._min_version, self._max_version, self._client_cert,
|
||||
self._client_key, self._client_key_passphrase
|
||||
)
|
||||
return wrapped_socket
|
||||
@@ -1,188 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
This module contains provisional support for SOCKS proxies from within
|
||||
urllib3. This module supports SOCKS4 (specifically the SOCKS4A variant) and
|
||||
SOCKS5. To enable its functionality, either install PySocks or install this
|
||||
module with the ``socks`` extra.
|
||||
|
||||
The SOCKS implementation supports the full range of urllib3 features. It also
|
||||
supports the following SOCKS features:
|
||||
|
||||
- SOCKS4
|
||||
- SOCKS4a
|
||||
- SOCKS5
|
||||
- Usernames and passwords for the SOCKS proxy
|
||||
|
||||
Known Limitations:
|
||||
|
||||
- Currently PySocks does not support contacting remote websites via literal
|
||||
IPv6 addresses. Any such connection attempt will fail. You must use a domain
|
||||
name.
|
||||
- Currently PySocks does not support IPv6 connections to the SOCKS proxy. Any
|
||||
such connection attempt will fail.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
|
||||
try:
|
||||
import socks
|
||||
except ImportError:
|
||||
import warnings
|
||||
from ..exceptions import DependencyWarning
|
||||
|
||||
warnings.warn((
|
||||
'SOCKS support in urllib3 requires the installation of optional '
|
||||
'dependencies: specifically, PySocks. For more information, see '
|
||||
'https://urllib3.readthedocs.io/en/latest/contrib.html#socks-proxies'
|
||||
),
|
||||
DependencyWarning
|
||||
)
|
||||
raise
|
||||
|
||||
from socket import error as SocketError, timeout as SocketTimeout
|
||||
|
||||
from ..connection import (
|
||||
HTTPConnection, HTTPSConnection
|
||||
)
|
||||
from ..connectionpool import (
|
||||
HTTPConnectionPool, HTTPSConnectionPool
|
||||
)
|
||||
from ..exceptions import ConnectTimeoutError, NewConnectionError
|
||||
from ..poolmanager import PoolManager
|
||||
from ..util.url import parse_url
|
||||
|
||||
try:
|
||||
import ssl
|
||||
except ImportError:
|
||||
ssl = None
|
||||
|
||||
|
||||
class SOCKSConnection(HTTPConnection):
|
||||
"""
|
||||
A plain-text HTTP connection that connects via a SOCKS proxy.
|
||||
"""
|
||||
def __init__(self, *args, **kwargs):
|
||||
self._socks_options = kwargs.pop('_socks_options')
|
||||
super(SOCKSConnection, self).__init__(*args, **kwargs)
|
||||
|
||||
def _new_conn(self):
|
||||
"""
|
||||
Establish a new connection via the SOCKS proxy.
|
||||
"""
|
||||
extra_kw = {}
|
||||
if self.source_address:
|
||||
extra_kw['source_address'] = self.source_address
|
||||
|
||||
if self.socket_options:
|
||||
extra_kw['socket_options'] = self.socket_options
|
||||
|
||||
try:
|
||||
conn = socks.create_connection(
|
||||
(self.host, self.port),
|
||||
proxy_type=self._socks_options['socks_version'],
|
||||
proxy_addr=self._socks_options['proxy_host'],
|
||||
proxy_port=self._socks_options['proxy_port'],
|
||||
proxy_username=self._socks_options['username'],
|
||||
proxy_password=self._socks_options['password'],
|
||||
proxy_rdns=self._socks_options['rdns'],
|
||||
timeout=self.timeout,
|
||||
**extra_kw
|
||||
)
|
||||
|
||||
except SocketTimeout as e:
|
||||
raise ConnectTimeoutError(
|
||||
self, "Connection to %s timed out. (connect timeout=%s)" %
|
||||
(self.host, self.timeout))
|
||||
|
||||
except socks.ProxyError as e:
|
||||
# This is fragile as hell, but it seems to be the only way to raise
|
||||
# useful errors here.
|
||||
if e.socket_err:
|
||||
error = e.socket_err
|
||||
if isinstance(error, SocketTimeout):
|
||||
raise ConnectTimeoutError(
|
||||
self,
|
||||
"Connection to %s timed out. (connect timeout=%s)" %
|
||||
(self.host, self.timeout)
|
||||
)
|
||||
else:
|
||||
raise NewConnectionError(
|
||||
self,
|
||||
"Failed to establish a new connection: %s" % error
|
||||
)
|
||||
else:
|
||||
raise NewConnectionError(
|
||||
self,
|
||||
"Failed to establish a new connection: %s" % e
|
||||
)
|
||||
|
||||
except SocketError as e: # Defensive: PySocks should catch all these.
|
||||
raise NewConnectionError(
|
||||
self, "Failed to establish a new connection: %s" % e)
|
||||
|
||||
return conn
|
||||
|
||||
|
||||
# We don't need to duplicate the Verified/Unverified distinction from
|
||||
# urllib3/connection.py here because the HTTPSConnection will already have been
|
||||
# correctly set to either the Verified or Unverified form by that module. This
|
||||
# means the SOCKSHTTPSConnection will automatically be the correct type.
|
||||
class SOCKSHTTPSConnection(SOCKSConnection, HTTPSConnection):
|
||||
pass
|
||||
|
||||
|
||||
class SOCKSHTTPConnectionPool(HTTPConnectionPool):
|
||||
ConnectionCls = SOCKSConnection
|
||||
|
||||
|
||||
class SOCKSHTTPSConnectionPool(HTTPSConnectionPool):
|
||||
ConnectionCls = SOCKSHTTPSConnection
|
||||
|
||||
|
||||
class SOCKSProxyManager(PoolManager):
|
||||
"""
|
||||
A version of the urllib3 ProxyManager that routes connections via the
|
||||
defined SOCKS proxy.
|
||||
"""
|
||||
pool_classes_by_scheme = {
|
||||
'http': SOCKSHTTPConnectionPool,
|
||||
'https': SOCKSHTTPSConnectionPool,
|
||||
}
|
||||
|
||||
def __init__(self, proxy_url, username=None, password=None,
|
||||
num_pools=10, headers=None, **connection_pool_kw):
|
||||
parsed = parse_url(proxy_url)
|
||||
|
||||
if parsed.scheme == 'socks5':
|
||||
socks_version = socks.PROXY_TYPE_SOCKS5
|
||||
rdns = False
|
||||
elif parsed.scheme == 'socks5h':
|
||||
socks_version = socks.PROXY_TYPE_SOCKS5
|
||||
rdns = True
|
||||
elif parsed.scheme == 'socks4':
|
||||
socks_version = socks.PROXY_TYPE_SOCKS4
|
||||
rdns = False
|
||||
elif parsed.scheme == 'socks4a':
|
||||
socks_version = socks.PROXY_TYPE_SOCKS4
|
||||
rdns = True
|
||||
else:
|
||||
raise ValueError(
|
||||
"Unable to determine SOCKS version from %s" % proxy_url
|
||||
)
|
||||
|
||||
self.proxy_url = proxy_url
|
||||
|
||||
socks_options = {
|
||||
'socks_version': socks_version,
|
||||
'proxy_host': parsed.host,
|
||||
'proxy_port': parsed.port,
|
||||
'username': username,
|
||||
'password': password,
|
||||
'rdns': rdns
|
||||
}
|
||||
connection_pool_kw['_socks_options'] = socks_options
|
||||
|
||||
super(SOCKSProxyManager, self).__init__(
|
||||
num_pools, headers, **connection_pool_kw
|
||||
)
|
||||
|
||||
self.pool_classes_by_scheme = SOCKSProxyManager.pool_classes_by_scheme
|
||||
@@ -1,246 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
from .packages.six.moves.http_client import (
|
||||
IncompleteRead as httplib_IncompleteRead
|
||||
)
|
||||
# Base Exceptions
|
||||
|
||||
|
||||
class HTTPError(Exception):
|
||||
"Base exception used by this module."
|
||||
pass
|
||||
|
||||
|
||||
class HTTPWarning(Warning):
|
||||
"Base warning used by this module."
|
||||
pass
|
||||
|
||||
|
||||
class PoolError(HTTPError):
|
||||
"Base exception for errors caused within a pool."
|
||||
def __init__(self, pool, message):
|
||||
self.pool = pool
|
||||
HTTPError.__init__(self, "%s: %s" % (pool, message))
|
||||
|
||||
def __reduce__(self):
|
||||
# For pickling purposes.
|
||||
return self.__class__, (None, None)
|
||||
|
||||
|
||||
class RequestError(PoolError):
|
||||
"Base exception for PoolErrors that have associated URLs."
|
||||
def __init__(self, pool, url, message):
|
||||
self.url = url
|
||||
PoolError.__init__(self, pool, message)
|
||||
|
||||
def __reduce__(self):
|
||||
# For pickling purposes.
|
||||
return self.__class__, (None, self.url, None)
|
||||
|
||||
|
||||
class SSLError(HTTPError):
|
||||
"Raised when SSL certificate fails in an HTTPS connection."
|
||||
pass
|
||||
|
||||
|
||||
class ProxyError(HTTPError):
|
||||
"Raised when the connection to a proxy fails."
|
||||
pass
|
||||
|
||||
|
||||
class DecodeError(HTTPError):
|
||||
"Raised when automatic decoding based on Content-Type fails."
|
||||
pass
|
||||
|
||||
|
||||
class ProtocolError(HTTPError):
|
||||
"Raised when something unexpected happens mid-request/response."
|
||||
pass
|
||||
|
||||
|
||||
#: Renamed to ProtocolError but aliased for backwards compatibility.
|
||||
ConnectionError = ProtocolError
|
||||
|
||||
|
||||
# Leaf Exceptions
|
||||
|
||||
class MaxRetryError(RequestError):
|
||||
"""Raised when the maximum number of retries is exceeded.
|
||||
|
||||
:param pool: The connection pool
|
||||
:type pool: :class:`~urllib3.connectionpool.HTTPConnectionPool`
|
||||
:param string url: The requested Url
|
||||
:param exceptions.Exception reason: The underlying error
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, pool, url, reason=None):
|
||||
self.reason = reason
|
||||
|
||||
message = "Max retries exceeded with url: %s (Caused by %r)" % (
|
||||
url, reason)
|
||||
|
||||
RequestError.__init__(self, pool, url, message)
|
||||
|
||||
|
||||
class HostChangedError(RequestError):
|
||||
"Raised when an existing pool gets a request for a foreign host."
|
||||
|
||||
def __init__(self, pool, url, retries=3):
|
||||
message = "Tried to open a foreign host with url: %s" % url
|
||||
RequestError.__init__(self, pool, url, message)
|
||||
self.retries = retries
|
||||
|
||||
|
||||
class TimeoutStateError(HTTPError):
|
||||
""" Raised when passing an invalid state to a timeout """
|
||||
pass
|
||||
|
||||
|
||||
class TimeoutError(HTTPError):
|
||||
""" Raised when a socket timeout error occurs.
|
||||
|
||||
Catching this error will catch both :exc:`ReadTimeoutErrors
|
||||
<ReadTimeoutError>` and :exc:`ConnectTimeoutErrors <ConnectTimeoutError>`.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class ReadTimeoutError(TimeoutError, RequestError):
|
||||
"Raised when a socket timeout occurs while receiving data from a server"
|
||||
pass
|
||||
|
||||
|
||||
# This timeout error does not have a URL attached and needs to inherit from the
|
||||
# base HTTPError
|
||||
class ConnectTimeoutError(TimeoutError):
|
||||
"Raised when a socket timeout occurs while connecting to a server"
|
||||
pass
|
||||
|
||||
|
||||
class NewConnectionError(ConnectTimeoutError, PoolError):
|
||||
"Raised when we fail to establish a new connection. Usually ECONNREFUSED."
|
||||
pass
|
||||
|
||||
|
||||
class EmptyPoolError(PoolError):
|
||||
"Raised when a pool runs out of connections and no more are allowed."
|
||||
pass
|
||||
|
||||
|
||||
class ClosedPoolError(PoolError):
|
||||
"Raised when a request enters a pool after the pool has been closed."
|
||||
pass
|
||||
|
||||
|
||||
class LocationValueError(ValueError, HTTPError):
|
||||
"Raised when there is something wrong with a given URL input."
|
||||
pass
|
||||
|
||||
|
||||
class LocationParseError(LocationValueError):
|
||||
"Raised when get_host or similar fails to parse the URL input."
|
||||
|
||||
def __init__(self, location):
|
||||
message = "Failed to parse: %s" % location
|
||||
HTTPError.__init__(self, message)
|
||||
|
||||
self.location = location
|
||||
|
||||
|
||||
class ResponseError(HTTPError):
|
||||
"Used as a container for an error reason supplied in a MaxRetryError."
|
||||
GENERIC_ERROR = 'too many error responses'
|
||||
SPECIFIC_ERROR = 'too many {status_code} error responses'
|
||||
|
||||
|
||||
class SecurityWarning(HTTPWarning):
|
||||
"Warned when perfoming security reducing actions"
|
||||
pass
|
||||
|
||||
|
||||
class SubjectAltNameWarning(SecurityWarning):
|
||||
"Warned when connecting to a host with a certificate missing a SAN."
|
||||
pass
|
||||
|
||||
|
||||
class InsecureRequestWarning(SecurityWarning):
|
||||
"Warned when making an unverified HTTPS request."
|
||||
pass
|
||||
|
||||
|
||||
class SystemTimeWarning(SecurityWarning):
|
||||
"Warned when system time is suspected to be wrong"
|
||||
pass
|
||||
|
||||
|
||||
class InsecurePlatformWarning(SecurityWarning):
|
||||
"Warned when certain SSL configuration is not available on a platform."
|
||||
pass
|
||||
|
||||
|
||||
class SNIMissingWarning(HTTPWarning):
|
||||
"Warned when making a HTTPS request without SNI available."
|
||||
pass
|
||||
|
||||
|
||||
class DependencyWarning(HTTPWarning):
|
||||
"""
|
||||
Warned when an attempt is made to import a module with missing optional
|
||||
dependencies.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class ResponseNotChunked(ProtocolError, ValueError):
|
||||
"Response needs to be chunked in order to read it as chunks."
|
||||
pass
|
||||
|
||||
|
||||
class BodyNotHttplibCompatible(HTTPError):
|
||||
"""
|
||||
Body should be httplib.HTTPResponse like (have an fp attribute which
|
||||
returns raw chunks) for read_chunked().
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class IncompleteRead(HTTPError, httplib_IncompleteRead):
|
||||
"""
|
||||
Response length doesn't match expected Content-Length
|
||||
|
||||
Subclass of http_client.IncompleteRead to allow int value
|
||||
for `partial` to avoid creating large objects on streamed
|
||||
reads.
|
||||
"""
|
||||
def __init__(self, partial, expected):
|
||||
super(IncompleteRead, self).__init__(partial, expected)
|
||||
|
||||
def __repr__(self):
|
||||
return ('IncompleteRead(%i bytes read, '
|
||||
'%i more expected)' % (self.partial, self.expected))
|
||||
|
||||
|
||||
class InvalidHeader(HTTPError):
|
||||
"The header provided was somehow invalid."
|
||||
pass
|
||||
|
||||
|
||||
class ProxySchemeUnknown(AssertionError, ValueError):
|
||||
"ProxyManager does not support the supplied scheme"
|
||||
# TODO(t-8ch): Stop inheriting from AssertionError in v2.0.
|
||||
|
||||
def __init__(self, scheme):
|
||||
message = "Not supported proxy scheme %s" % scheme
|
||||
super(ProxySchemeUnknown, self).__init__(message)
|
||||
|
||||
|
||||
class HeaderParsingError(HTTPError):
|
||||
"Raised by assert_header_parsing, but we convert it to a log.warning statement."
|
||||
def __init__(self, defects, unparsed_data):
|
||||
message = '%s, unparsed data: %r' % (defects or 'Unknown', unparsed_data)
|
||||
super(HeaderParsingError, self).__init__(message)
|
||||
|
||||
|
||||
class UnrewindableBodyError(HTTPError):
|
||||
"urllib3 encountered an error when trying to rewind a body"
|
||||
pass
|
||||
@@ -1,178 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
import email.utils
|
||||
import mimetypes
|
||||
|
||||
from .packages import six
|
||||
|
||||
|
||||
def guess_content_type(filename, default='application/octet-stream'):
|
||||
"""
|
||||
Guess the "Content-Type" of a file.
|
||||
|
||||
:param filename:
|
||||
The filename to guess the "Content-Type" of using :mod:`mimetypes`.
|
||||
:param default:
|
||||
If no "Content-Type" can be guessed, default to `default`.
|
||||
"""
|
||||
if filename:
|
||||
return mimetypes.guess_type(filename)[0] or default
|
||||
return default
|
||||
|
||||
|
||||
def format_header_param(name, value):
|
||||
"""
|
||||
Helper function to format and quote a single header parameter.
|
||||
|
||||
Particularly useful for header parameters which might contain
|
||||
non-ASCII values, like file names. This follows RFC 2231, as
|
||||
suggested by RFC 2388 Section 4.4.
|
||||
|
||||
:param name:
|
||||
The name of the parameter, a string expected to be ASCII only.
|
||||
:param value:
|
||||
The value of the parameter, provided as a unicode string.
|
||||
"""
|
||||
if not any(ch in value for ch in '"\\\r\n'):
|
||||
result = '%s="%s"' % (name, value)
|
||||
try:
|
||||
result.encode('ascii')
|
||||
except (UnicodeEncodeError, UnicodeDecodeError):
|
||||
pass
|
||||
else:
|
||||
return result
|
||||
if not six.PY3 and isinstance(value, six.text_type): # Python 2:
|
||||
value = value.encode('utf-8')
|
||||
value = email.utils.encode_rfc2231(value, 'utf-8')
|
||||
value = '%s*=%s' % (name, value)
|
||||
return value
|
||||
|
||||
|
||||
class RequestField(object):
|
||||
"""
|
||||
A data container for request body parameters.
|
||||
|
||||
:param name:
|
||||
The name of this request field.
|
||||
:param data:
|
||||
The data/value body.
|
||||
:param filename:
|
||||
An optional filename of the request field.
|
||||
:param headers:
|
||||
An optional dict-like object of headers to initially use for the field.
|
||||
"""
|
||||
def __init__(self, name, data, filename=None, headers=None):
|
||||
self._name = name
|
||||
self._filename = filename
|
||||
self.data = data
|
||||
self.headers = {}
|
||||
if headers:
|
||||
self.headers = dict(headers)
|
||||
|
||||
@classmethod
|
||||
def from_tuples(cls, fieldname, value):
|
||||
"""
|
||||
A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters.
|
||||
|
||||
Supports constructing :class:`~urllib3.fields.RequestField` from
|
||||
parameter of key/value strings AND key/filetuple. A filetuple is a
|
||||
(filename, data, MIME type) tuple where the MIME type is optional.
|
||||
For example::
|
||||
|
||||
'foo': 'bar',
|
||||
'fakefile': ('foofile.txt', 'contents of foofile'),
|
||||
'realfile': ('barfile.txt', open('realfile').read()),
|
||||
'typedfile': ('bazfile.bin', open('bazfile').read(), 'image/jpeg'),
|
||||
'nonamefile': 'contents of nonamefile field',
|
||||
|
||||
Field names and filenames must be unicode.
|
||||
"""
|
||||
if isinstance(value, tuple):
|
||||
if len(value) == 3:
|
||||
filename, data, content_type = value
|
||||
else:
|
||||
filename, data = value
|
||||
content_type = guess_content_type(filename)
|
||||
else:
|
||||
filename = None
|
||||
content_type = None
|
||||
data = value
|
||||
|
||||
request_param = cls(fieldname, data, filename=filename)
|
||||
request_param.make_multipart(content_type=content_type)
|
||||
|
||||
return request_param
|
||||
|
||||
def _render_part(self, name, value):
|
||||
"""
|
||||
Overridable helper function to format a single header parameter.
|
||||
|
||||
:param name:
|
||||
The name of the parameter, a string expected to be ASCII only.
|
||||
:param value:
|
||||
The value of the parameter, provided as a unicode string.
|
||||
"""
|
||||
return format_header_param(name, value)
|
||||
|
||||
def _render_parts(self, header_parts):
|
||||
"""
|
||||
Helper function to format and quote a single header.
|
||||
|
||||
Useful for single headers that are composed of multiple items. E.g.,
|
||||
'Content-Disposition' fields.
|
||||
|
||||
:param header_parts:
|
||||
A sequence of (k, v) typles or a :class:`dict` of (k, v) to format
|
||||
as `k1="v1"; k2="v2"; ...`.
|
||||
"""
|
||||
parts = []
|
||||
iterable = header_parts
|
||||
if isinstance(header_parts, dict):
|
||||
iterable = header_parts.items()
|
||||
|
||||
for name, value in iterable:
|
||||
if value is not None:
|
||||
parts.append(self._render_part(name, value))
|
||||
|
||||
return '; '.join(parts)
|
||||
|
||||
def render_headers(self):
|
||||
"""
|
||||
Renders the headers for this request field.
|
||||
"""
|
||||
lines = []
|
||||
|
||||
sort_keys = ['Content-Disposition', 'Content-Type', 'Content-Location']
|
||||
for sort_key in sort_keys:
|
||||
if self.headers.get(sort_key, False):
|
||||
lines.append('%s: %s' % (sort_key, self.headers[sort_key]))
|
||||
|
||||
for header_name, header_value in self.headers.items():
|
||||
if header_name not in sort_keys:
|
||||
if header_value:
|
||||
lines.append('%s: %s' % (header_name, header_value))
|
||||
|
||||
lines.append('\r\n')
|
||||
return '\r\n'.join(lines)
|
||||
|
||||
def make_multipart(self, content_disposition=None, content_type=None,
|
||||
content_location=None):
|
||||
"""
|
||||
Makes this request field into a multipart request field.
|
||||
|
||||
This method overrides "Content-Disposition", "Content-Type" and
|
||||
"Content-Location" headers to the request parameter.
|
||||
|
||||
:param content_type:
|
||||
The 'Content-Type' of the request body.
|
||||
:param content_location:
|
||||
The 'Content-Location' of the request body.
|
||||
|
||||
"""
|
||||
self.headers['Content-Disposition'] = content_disposition or 'form-data'
|
||||
self.headers['Content-Disposition'] += '; '.join([
|
||||
'', self._render_parts(
|
||||
(('name', self._name), ('filename', self._filename))
|
||||
)
|
||||
])
|
||||
self.headers['Content-Type'] = content_type
|
||||
self.headers['Content-Location'] = content_location
|
||||
@@ -1,94 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
import codecs
|
||||
|
||||
from uuid import uuid4
|
||||
from io import BytesIO
|
||||
|
||||
from .packages import six
|
||||
from .packages.six import b
|
||||
from .fields import RequestField
|
||||
|
||||
writer = codecs.lookup('utf-8')[3]
|
||||
|
||||
|
||||
def choose_boundary():
|
||||
"""
|
||||
Our embarrassingly-simple replacement for mimetools.choose_boundary.
|
||||
"""
|
||||
return uuid4().hex
|
||||
|
||||
|
||||
def iter_field_objects(fields):
|
||||
"""
|
||||
Iterate over fields.
|
||||
|
||||
Supports list of (k, v) tuples and dicts, and lists of
|
||||
:class:`~urllib3.fields.RequestField`.
|
||||
|
||||
"""
|
||||
if isinstance(fields, dict):
|
||||
i = six.iteritems(fields)
|
||||
else:
|
||||
i = iter(fields)
|
||||
|
||||
for field in i:
|
||||
if isinstance(field, RequestField):
|
||||
yield field
|
||||
else:
|
||||
yield RequestField.from_tuples(*field)
|
||||
|
||||
|
||||
def iter_fields(fields):
|
||||
"""
|
||||
.. deprecated:: 1.6
|
||||
|
||||
Iterate over fields.
|
||||
|
||||
The addition of :class:`~urllib3.fields.RequestField` makes this function
|
||||
obsolete. Instead, use :func:`iter_field_objects`, which returns
|
||||
:class:`~urllib3.fields.RequestField` objects.
|
||||
|
||||
Supports list of (k, v) tuples and dicts.
|
||||
"""
|
||||
if isinstance(fields, dict):
|
||||
return ((k, v) for k, v in six.iteritems(fields))
|
||||
|
||||
return ((k, v) for k, v in fields)
|
||||
|
||||
|
||||
def encode_multipart_formdata(fields, boundary=None):
|
||||
"""
|
||||
Encode a dictionary of ``fields`` using the multipart/form-data MIME format.
|
||||
|
||||
:param fields:
|
||||
Dictionary of fields or list of (key, :class:`~urllib3.fields.RequestField`).
|
||||
|
||||
:param boundary:
|
||||
If not specified, then a random boundary will be generated using
|
||||
:func:`mimetools.choose_boundary`.
|
||||
"""
|
||||
body = BytesIO()
|
||||
if boundary is None:
|
||||
boundary = choose_boundary()
|
||||
|
||||
for field in iter_field_objects(fields):
|
||||
body.write(b('--%s\r\n' % (boundary)))
|
||||
|
||||
writer(body).write(field.render_headers())
|
||||
data = field.data
|
||||
|
||||
if isinstance(data, int):
|
||||
data = str(data) # Backwards compatibility
|
||||
|
||||
if isinstance(data, six.text_type):
|
||||
writer(body).write(data)
|
||||
else:
|
||||
body.write(data)
|
||||
|
||||
body.write(b'\r\n')
|
||||
|
||||
body.write(b('--%s--\r\n' % (boundary)))
|
||||
|
||||
content_type = str('multipart/form-data; boundary=%s' % boundary)
|
||||
|
||||
return body.getvalue(), content_type
|
||||
@@ -1,5 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
from . import ssl_match_hostname
|
||||
|
||||
__all__ = ('ssl_match_hostname', )
|
||||
@@ -1,53 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
backports.makefile
|
||||
~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Backports the Python 3 ``socket.makefile`` method for use with anything that
|
||||
wants to create a "fake" socket object.
|
||||
"""
|
||||
import io
|
||||
|
||||
from socket import SocketIO
|
||||
|
||||
|
||||
def backport_makefile(self, mode="r", buffering=None, encoding=None,
|
||||
errors=None, newline=None):
|
||||
"""
|
||||
Backport of ``socket.makefile`` from Python 3.5.
|
||||
"""
|
||||
if not set(mode) <= set(["r", "w", "b"]):
|
||||
raise ValueError(
|
||||
"invalid mode %r (only r, w, b allowed)" % (mode,)
|
||||
)
|
||||
writing = "w" in mode
|
||||
reading = "r" in mode or not writing
|
||||
assert reading or writing
|
||||
binary = "b" in mode
|
||||
rawmode = ""
|
||||
if reading:
|
||||
rawmode += "r"
|
||||
if writing:
|
||||
rawmode += "w"
|
||||
raw = SocketIO(self, rawmode)
|
||||
self._makefile_refs += 1
|
||||
if buffering is None:
|
||||
buffering = -1
|
||||
if buffering < 0:
|
||||
buffering = io.DEFAULT_BUFFER_SIZE
|
||||
if buffering == 0:
|
||||
if not binary:
|
||||
raise ValueError("unbuffered streams must be binary")
|
||||
return raw
|
||||
if reading and writing:
|
||||
buffer = io.BufferedRWPair(raw, raw, buffering)
|
||||
elif reading:
|
||||
buffer = io.BufferedReader(raw, buffering)
|
||||
else:
|
||||
assert writing
|
||||
buffer = io.BufferedWriter(raw, buffering)
|
||||
if binary:
|
||||
return buffer
|
||||
text = io.TextIOWrapper(buffer, encoding, errors, newline)
|
||||
text.mode = mode
|
||||
return text
|
||||
@@ -1,259 +0,0 @@
|
||||
# Backport of OrderedDict() class that runs on Python 2.4, 2.5, 2.6, 2.7 and pypy.
|
||||
# Passes Python2.7's test suite and incorporates all the latest updates.
|
||||
# Copyright 2009 Raymond Hettinger, released under the MIT License.
|
||||
# http://code.activestate.com/recipes/576693/
|
||||
try:
|
||||
from thread import get_ident as _get_ident
|
||||
except ImportError:
|
||||
from dummy_thread import get_ident as _get_ident
|
||||
|
||||
try:
|
||||
from _abcoll import KeysView, ValuesView, ItemsView
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
class OrderedDict(dict):
|
||||
'Dictionary that remembers insertion order'
|
||||
# An inherited dict maps keys to values.
|
||||
# The inherited dict provides __getitem__, __len__, __contains__, and get.
|
||||
# The remaining methods are order-aware.
|
||||
# Big-O running times for all methods are the same as for regular dictionaries.
|
||||
|
||||
# The internal self.__map dictionary maps keys to links in a doubly linked list.
|
||||
# The circular doubly linked list starts and ends with a sentinel element.
|
||||
# The sentinel element never gets deleted (this simplifies the algorithm).
|
||||
# Each link is stored as a list of length three: [PREV, NEXT, KEY].
|
||||
|
||||
def __init__(self, *args, **kwds):
|
||||
'''Initialize an ordered dictionary. Signature is the same as for
|
||||
regular dictionaries, but keyword arguments are not recommended
|
||||
because their insertion order is arbitrary.
|
||||
|
||||
'''
|
||||
if len(args) > 1:
|
||||
raise TypeError('expected at most 1 arguments, got %d' % len(args))
|
||||
try:
|
||||
self.__root
|
||||
except AttributeError:
|
||||
self.__root = root = [] # sentinel node
|
||||
root[:] = [root, root, None]
|
||||
self.__map = {}
|
||||
self.__update(*args, **kwds)
|
||||
|
||||
def __setitem__(self, key, value, dict_setitem=dict.__setitem__):
|
||||
'od.__setitem__(i, y) <==> od[i]=y'
|
||||
# Setting a new item creates a new link which goes at the end of the linked
|
||||
# list, and the inherited dictionary is updated with the new key/value pair.
|
||||
if key not in self:
|
||||
root = self.__root
|
||||
last = root[0]
|
||||
last[1] = root[0] = self.__map[key] = [last, root, key]
|
||||
dict_setitem(self, key, value)
|
||||
|
||||
def __delitem__(self, key, dict_delitem=dict.__delitem__):
|
||||
'od.__delitem__(y) <==> del od[y]'
|
||||
# Deleting an existing item uses self.__map to find the link which is
|
||||
# then removed by updating the links in the predecessor and successor nodes.
|
||||
dict_delitem(self, key)
|
||||
link_prev, link_next, key = self.__map.pop(key)
|
||||
link_prev[1] = link_next
|
||||
link_next[0] = link_prev
|
||||
|
||||
def __iter__(self):
|
||||
'od.__iter__() <==> iter(od)'
|
||||
root = self.__root
|
||||
curr = root[1]
|
||||
while curr is not root:
|
||||
yield curr[2]
|
||||
curr = curr[1]
|
||||
|
||||
def __reversed__(self):
|
||||
'od.__reversed__() <==> reversed(od)'
|
||||
root = self.__root
|
||||
curr = root[0]
|
||||
while curr is not root:
|
||||
yield curr[2]
|
||||
curr = curr[0]
|
||||
|
||||
def clear(self):
|
||||
'od.clear() -> None. Remove all items from od.'
|
||||
try:
|
||||
for node in self.__map.itervalues():
|
||||
del node[:]
|
||||
root = self.__root
|
||||
root[:] = [root, root, None]
|
||||
self.__map.clear()
|
||||
except AttributeError:
|
||||
pass
|
||||
dict.clear(self)
|
||||
|
||||
def popitem(self, last=True):
|
||||
'''od.popitem() -> (k, v), return and remove a (key, value) pair.
|
||||
Pairs are returned in LIFO order if last is true or FIFO order if false.
|
||||
|
||||
'''
|
||||
if not self:
|
||||
raise KeyError('dictionary is empty')
|
||||
root = self.__root
|
||||
if last:
|
||||
link = root[0]
|
||||
link_prev = link[0]
|
||||
link_prev[1] = root
|
||||
root[0] = link_prev
|
||||
else:
|
||||
link = root[1]
|
||||
link_next = link[1]
|
||||
root[1] = link_next
|
||||
link_next[0] = root
|
||||
key = link[2]
|
||||
del self.__map[key]
|
||||
value = dict.pop(self, key)
|
||||
return key, value
|
||||
|
||||
# -- the following methods do not depend on the internal structure --
|
||||
|
||||
def keys(self):
|
||||
'od.keys() -> list of keys in od'
|
||||
return list(self)
|
||||
|
||||
def values(self):
|
||||
'od.values() -> list of values in od'
|
||||
return [self[key] for key in self]
|
||||
|
||||
def items(self):
|
||||
'od.items() -> list of (key, value) pairs in od'
|
||||
return [(key, self[key]) for key in self]
|
||||
|
||||
def iterkeys(self):
|
||||
'od.iterkeys() -> an iterator over the keys in od'
|
||||
return iter(self)
|
||||
|
||||
def itervalues(self):
|
||||
'od.itervalues -> an iterator over the values in od'
|
||||
for k in self:
|
||||
yield self[k]
|
||||
|
||||
def iteritems(self):
|
||||
'od.iteritems -> an iterator over the (key, value) items in od'
|
||||
for k in self:
|
||||
yield (k, self[k])
|
||||
|
||||
def update(*args, **kwds):
|
||||
'''od.update(E, **F) -> None. Update od from dict/iterable E and F.
|
||||
|
||||
If E is a dict instance, does: for k in E: od[k] = E[k]
|
||||
If E has a .keys() method, does: for k in E.keys(): od[k] = E[k]
|
||||
Or if E is an iterable of items, does: for k, v in E: od[k] = v
|
||||
In either case, this is followed by: for k, v in F.items(): od[k] = v
|
||||
|
||||
'''
|
||||
if len(args) > 2:
|
||||
raise TypeError('update() takes at most 2 positional '
|
||||
'arguments (%d given)' % (len(args),))
|
||||
elif not args:
|
||||
raise TypeError('update() takes at least 1 argument (0 given)')
|
||||
self = args[0]
|
||||
# Make progressively weaker assumptions about "other"
|
||||
other = ()
|
||||
if len(args) == 2:
|
||||
other = args[1]
|
||||
if isinstance(other, dict):
|
||||
for key in other:
|
||||
self[key] = other[key]
|
||||
elif hasattr(other, 'keys'):
|
||||
for key in other.keys():
|
||||
self[key] = other[key]
|
||||
else:
|
||||
for key, value in other:
|
||||
self[key] = value
|
||||
for key, value in kwds.items():
|
||||
self[key] = value
|
||||
|
||||
__update = update # let subclasses override update without breaking __init__
|
||||
|
||||
__marker = object()
|
||||
|
||||
def pop(self, key, default=__marker):
|
||||
'''od.pop(k[,d]) -> v, remove specified key and return the corresponding value.
|
||||
If key is not found, d is returned if given, otherwise KeyError is raised.
|
||||
|
||||
'''
|
||||
if key in self:
|
||||
result = self[key]
|
||||
del self[key]
|
||||
return result
|
||||
if default is self.__marker:
|
||||
raise KeyError(key)
|
||||
return default
|
||||
|
||||
def setdefault(self, key, default=None):
|
||||
'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od'
|
||||
if key in self:
|
||||
return self[key]
|
||||
self[key] = default
|
||||
return default
|
||||
|
||||
def __repr__(self, _repr_running={}):
|
||||
'od.__repr__() <==> repr(od)'
|
||||
call_key = id(self), _get_ident()
|
||||
if call_key in _repr_running:
|
||||
return '...'
|
||||
_repr_running[call_key] = 1
|
||||
try:
|
||||
if not self:
|
||||
return '%s()' % (self.__class__.__name__,)
|
||||
return '%s(%r)' % (self.__class__.__name__, self.items())
|
||||
finally:
|
||||
del _repr_running[call_key]
|
||||
|
||||
def __reduce__(self):
|
||||
'Return state information for pickling'
|
||||
items = [[k, self[k]] for k in self]
|
||||
inst_dict = vars(self).copy()
|
||||
for k in vars(OrderedDict()):
|
||||
inst_dict.pop(k, None)
|
||||
if inst_dict:
|
||||
return (self.__class__, (items,), inst_dict)
|
||||
return self.__class__, (items,)
|
||||
|
||||
def copy(self):
|
||||
'od.copy() -> a shallow copy of od'
|
||||
return self.__class__(self)
|
||||
|
||||
@classmethod
|
||||
def fromkeys(cls, iterable, value=None):
|
||||
'''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
|
||||
and values equal to v (which defaults to None).
|
||||
|
||||
'''
|
||||
d = cls()
|
||||
for key in iterable:
|
||||
d[key] = value
|
||||
return d
|
||||
|
||||
def __eq__(self, other):
|
||||
'''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
|
||||
while comparison to a regular mapping is order-insensitive.
|
||||
|
||||
'''
|
||||
if isinstance(other, OrderedDict):
|
||||
return len(self)==len(other) and self.items() == other.items()
|
||||
return dict.__eq__(self, other)
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
# -- the following methods are only used in Python 2.7 --
|
||||
|
||||
def viewkeys(self):
|
||||
"od.viewkeys() -> a set-like object providing a view on od's keys"
|
||||
return KeysView(self)
|
||||
|
||||
def viewvalues(self):
|
||||
"od.viewvalues() -> an object providing a view on od's values"
|
||||
return ValuesView(self)
|
||||
|
||||
def viewitems(self):
|
||||
"od.viewitems() -> a set-like object providing a view on od's items"
|
||||
return ItemsView(self)
|
||||
@@ -1,868 +0,0 @@
|
||||
"""Utilities for writing code that runs on Python 2 and 3"""
|
||||
|
||||
# Copyright (c) 2010-2015 Benjamin Peterson
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
import functools
|
||||
import itertools
|
||||
import operator
|
||||
import sys
|
||||
import types
|
||||
|
||||
__author__ = "Benjamin Peterson <benjamin@python.org>"
|
||||
__version__ = "1.10.0"
|
||||
|
||||
|
||||
# Useful for very coarse version differentiation.
|
||||
PY2 = sys.version_info[0] == 2
|
||||
PY3 = sys.version_info[0] == 3
|
||||
PY34 = sys.version_info[0:2] >= (3, 4)
|
||||
|
||||
if PY3:
|
||||
string_types = str,
|
||||
integer_types = int,
|
||||
class_types = type,
|
||||
text_type = str
|
||||
binary_type = bytes
|
||||
|
||||
MAXSIZE = sys.maxsize
|
||||
else:
|
||||
string_types = basestring,
|
||||
integer_types = (int, long)
|
||||
class_types = (type, types.ClassType)
|
||||
text_type = unicode
|
||||
binary_type = str
|
||||
|
||||
if sys.platform.startswith("java"):
|
||||
# Jython always uses 32 bits.
|
||||
MAXSIZE = int((1 << 31) - 1)
|
||||
else:
|
||||
# It's possible to have sizeof(long) != sizeof(Py_ssize_t).
|
||||
class X(object):
|
||||
|
||||
def __len__(self):
|
||||
return 1 << 31
|
||||
try:
|
||||
len(X())
|
||||
except OverflowError:
|
||||
# 32-bit
|
||||
MAXSIZE = int((1 << 31) - 1)
|
||||
else:
|
||||
# 64-bit
|
||||
MAXSIZE = int((1 << 63) - 1)
|
||||
del X
|
||||
|
||||
|
||||
def _add_doc(func, doc):
|
||||
"""Add documentation to a function."""
|
||||
func.__doc__ = doc
|
||||
|
||||
|
||||
def _import_module(name):
|
||||
"""Import module, returning the module after the last dot."""
|
||||
__import__(name)
|
||||
return sys.modules[name]
|
||||
|
||||
|
||||
class _LazyDescr(object):
|
||||
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
|
||||
def __get__(self, obj, tp):
|
||||
result = self._resolve()
|
||||
setattr(obj, self.name, result) # Invokes __set__.
|
||||
try:
|
||||
# This is a bit ugly, but it avoids running this again by
|
||||
# removing this descriptor.
|
||||
delattr(obj.__class__, self.name)
|
||||
except AttributeError:
|
||||
pass
|
||||
return result
|
||||
|
||||
|
||||
class MovedModule(_LazyDescr):
|
||||
|
||||
def __init__(self, name, old, new=None):
|
||||
super(MovedModule, self).__init__(name)
|
||||
if PY3:
|
||||
if new is None:
|
||||
new = name
|
||||
self.mod = new
|
||||
else:
|
||||
self.mod = old
|
||||
|
||||
def _resolve(self):
|
||||
return _import_module(self.mod)
|
||||
|
||||
def __getattr__(self, attr):
|
||||
_module = self._resolve()
|
||||
value = getattr(_module, attr)
|
||||
setattr(self, attr, value)
|
||||
return value
|
||||
|
||||
|
||||
class _LazyModule(types.ModuleType):
|
||||
|
||||
def __init__(self, name):
|
||||
super(_LazyModule, self).__init__(name)
|
||||
self.__doc__ = self.__class__.__doc__
|
||||
|
||||
def __dir__(self):
|
||||
attrs = ["__doc__", "__name__"]
|
||||
attrs += [attr.name for attr in self._moved_attributes]
|
||||
return attrs
|
||||
|
||||
# Subclasses should override this
|
||||
_moved_attributes = []
|
||||
|
||||
|
||||
class MovedAttribute(_LazyDescr):
|
||||
|
||||
def __init__(self, name, old_mod, new_mod, old_attr=None, new_attr=None):
|
||||
super(MovedAttribute, self).__init__(name)
|
||||
if PY3:
|
||||
if new_mod is None:
|
||||
new_mod = name
|
||||
self.mod = new_mod
|
||||
if new_attr is None:
|
||||
if old_attr is None:
|
||||
new_attr = name
|
||||
else:
|
||||
new_attr = old_attr
|
||||
self.attr = new_attr
|
||||
else:
|
||||
self.mod = old_mod
|
||||
if old_attr is None:
|
||||
old_attr = name
|
||||
self.attr = old_attr
|
||||
|
||||
def _resolve(self):
|
||||
module = _import_module(self.mod)
|
||||
return getattr(module, self.attr)
|
||||
|
||||
|
||||
class _SixMetaPathImporter(object):
|
||||
|
||||
"""
|
||||
A meta path importer to import six.moves and its submodules.
|
||||
|
||||
This class implements a PEP302 finder and loader. It should be compatible
|
||||
with Python 2.5 and all existing versions of Python3
|
||||
"""
|
||||
|
||||
def __init__(self, six_module_name):
|
||||
self.name = six_module_name
|
||||
self.known_modules = {}
|
||||
|
||||
def _add_module(self, mod, *fullnames):
|
||||
for fullname in fullnames:
|
||||
self.known_modules[self.name + "." + fullname] = mod
|
||||
|
||||
def _get_module(self, fullname):
|
||||
return self.known_modules[self.name + "." + fullname]
|
||||
|
||||
def find_module(self, fullname, path=None):
|
||||
if fullname in self.known_modules:
|
||||
return self
|
||||
return None
|
||||
|
||||
def __get_module(self, fullname):
|
||||
try:
|
||||
return self.known_modules[fullname]
|
||||
except KeyError:
|
||||
raise ImportError("This loader does not know module " + fullname)
|
||||
|
||||
def load_module(self, fullname):
|
||||
try:
|
||||
# in case of a reload
|
||||
return sys.modules[fullname]
|
||||
except KeyError:
|
||||
pass
|
||||
mod = self.__get_module(fullname)
|
||||
if isinstance(mod, MovedModule):
|
||||
mod = mod._resolve()
|
||||
else:
|
||||
mod.__loader__ = self
|
||||
sys.modules[fullname] = mod
|
||||
return mod
|
||||
|
||||
def is_package(self, fullname):
|
||||
"""
|
||||
Return true, if the named module is a package.
|
||||
|
||||
We need this method to get correct spec objects with
|
||||
Python 3.4 (see PEP451)
|
||||
"""
|
||||
return hasattr(self.__get_module(fullname), "__path__")
|
||||
|
||||
def get_code(self, fullname):
|
||||
"""Return None
|
||||
|
||||
Required, if is_package is implemented"""
|
||||
self.__get_module(fullname) # eventually raises ImportError
|
||||
return None
|
||||
get_source = get_code # same as get_code
|
||||
|
||||
_importer = _SixMetaPathImporter(__name__)
|
||||
|
||||
|
||||
class _MovedItems(_LazyModule):
|
||||
|
||||
"""Lazy loading of moved objects"""
|
||||
__path__ = [] # mark as package
|
||||
|
||||
|
||||
_moved_attributes = [
|
||||
MovedAttribute("cStringIO", "cStringIO", "io", "StringIO"),
|
||||
MovedAttribute("filter", "itertools", "builtins", "ifilter", "filter"),
|
||||
MovedAttribute("filterfalse", "itertools", "itertools", "ifilterfalse", "filterfalse"),
|
||||
MovedAttribute("input", "__builtin__", "builtins", "raw_input", "input"),
|
||||
MovedAttribute("intern", "__builtin__", "sys"),
|
||||
MovedAttribute("map", "itertools", "builtins", "imap", "map"),
|
||||
MovedAttribute("getcwd", "os", "os", "getcwdu", "getcwd"),
|
||||
MovedAttribute("getcwdb", "os", "os", "getcwd", "getcwdb"),
|
||||
MovedAttribute("range", "__builtin__", "builtins", "xrange", "range"),
|
||||
MovedAttribute("reload_module", "__builtin__", "importlib" if PY34 else "imp", "reload"),
|
||||
MovedAttribute("reduce", "__builtin__", "functools"),
|
||||
MovedAttribute("shlex_quote", "pipes", "shlex", "quote"),
|
||||
MovedAttribute("StringIO", "StringIO", "io"),
|
||||
MovedAttribute("UserDict", "UserDict", "collections"),
|
||||
MovedAttribute("UserList", "UserList", "collections"),
|
||||
MovedAttribute("UserString", "UserString", "collections"),
|
||||
MovedAttribute("xrange", "__builtin__", "builtins", "xrange", "range"),
|
||||
MovedAttribute("zip", "itertools", "builtins", "izip", "zip"),
|
||||
MovedAttribute("zip_longest", "itertools", "itertools", "izip_longest", "zip_longest"),
|
||||
MovedModule("builtins", "__builtin__"),
|
||||
MovedModule("configparser", "ConfigParser"),
|
||||
MovedModule("copyreg", "copy_reg"),
|
||||
MovedModule("dbm_gnu", "gdbm", "dbm.gnu"),
|
||||
MovedModule("_dummy_thread", "dummy_thread", "_dummy_thread"),
|
||||
MovedModule("http_cookiejar", "cookielib", "http.cookiejar"),
|
||||
MovedModule("http_cookies", "Cookie", "http.cookies"),
|
||||
MovedModule("html_entities", "htmlentitydefs", "html.entities"),
|
||||
MovedModule("html_parser", "HTMLParser", "html.parser"),
|
||||
MovedModule("http_client", "httplib", "http.client"),
|
||||
MovedModule("email_mime_multipart", "email.MIMEMultipart", "email.mime.multipart"),
|
||||
MovedModule("email_mime_nonmultipart", "email.MIMENonMultipart", "email.mime.nonmultipart"),
|
||||
MovedModule("email_mime_text", "email.MIMEText", "email.mime.text"),
|
||||
MovedModule("email_mime_base", "email.MIMEBase", "email.mime.base"),
|
||||
MovedModule("BaseHTTPServer", "BaseHTTPServer", "http.server"),
|
||||
MovedModule("CGIHTTPServer", "CGIHTTPServer", "http.server"),
|
||||
MovedModule("SimpleHTTPServer", "SimpleHTTPServer", "http.server"),
|
||||
MovedModule("cPickle", "cPickle", "pickle"),
|
||||
MovedModule("queue", "Queue"),
|
||||
MovedModule("reprlib", "repr"),
|
||||
MovedModule("socketserver", "SocketServer"),
|
||||
MovedModule("_thread", "thread", "_thread"),
|
||||
MovedModule("tkinter", "Tkinter"),
|
||||
MovedModule("tkinter_dialog", "Dialog", "tkinter.dialog"),
|
||||
MovedModule("tkinter_filedialog", "FileDialog", "tkinter.filedialog"),
|
||||
MovedModule("tkinter_scrolledtext", "ScrolledText", "tkinter.scrolledtext"),
|
||||
MovedModule("tkinter_simpledialog", "SimpleDialog", "tkinter.simpledialog"),
|
||||
MovedModule("tkinter_tix", "Tix", "tkinter.tix"),
|
||||
MovedModule("tkinter_ttk", "ttk", "tkinter.ttk"),
|
||||
MovedModule("tkinter_constants", "Tkconstants", "tkinter.constants"),
|
||||
MovedModule("tkinter_dnd", "Tkdnd", "tkinter.dnd"),
|
||||
MovedModule("tkinter_colorchooser", "tkColorChooser",
|
||||
"tkinter.colorchooser"),
|
||||
MovedModule("tkinter_commondialog", "tkCommonDialog",
|
||||
"tkinter.commondialog"),
|
||||
MovedModule("tkinter_tkfiledialog", "tkFileDialog", "tkinter.filedialog"),
|
||||
MovedModule("tkinter_font", "tkFont", "tkinter.font"),
|
||||
MovedModule("tkinter_messagebox", "tkMessageBox", "tkinter.messagebox"),
|
||||
MovedModule("tkinter_tksimpledialog", "tkSimpleDialog",
|
||||
"tkinter.simpledialog"),
|
||||
MovedModule("urllib_parse", __name__ + ".moves.urllib_parse", "urllib.parse"),
|
||||
MovedModule("urllib_error", __name__ + ".moves.urllib_error", "urllib.error"),
|
||||
MovedModule("urllib", __name__ + ".moves.urllib", __name__ + ".moves.urllib"),
|
||||
MovedModule("urllib_robotparser", "robotparser", "urllib.robotparser"),
|
||||
MovedModule("xmlrpc_client", "xmlrpclib", "xmlrpc.client"),
|
||||
MovedModule("xmlrpc_server", "SimpleXMLRPCServer", "xmlrpc.server"),
|
||||
]
|
||||
# Add windows specific modules.
|
||||
if sys.platform == "win32":
|
||||
_moved_attributes += [
|
||||
MovedModule("winreg", "_winreg"),
|
||||
]
|
||||
|
||||
for attr in _moved_attributes:
|
||||
setattr(_MovedItems, attr.name, attr)
|
||||
if isinstance(attr, MovedModule):
|
||||
_importer._add_module(attr, "moves." + attr.name)
|
||||
del attr
|
||||
|
||||
_MovedItems._moved_attributes = _moved_attributes
|
||||
|
||||
moves = _MovedItems(__name__ + ".moves")
|
||||
_importer._add_module(moves, "moves")
|
||||
|
||||
|
||||
class Module_six_moves_urllib_parse(_LazyModule):
|
||||
|
||||
"""Lazy loading of moved objects in six.moves.urllib_parse"""
|
||||
|
||||
|
||||
_urllib_parse_moved_attributes = [
|
||||
MovedAttribute("ParseResult", "urlparse", "urllib.parse"),
|
||||
MovedAttribute("SplitResult", "urlparse", "urllib.parse"),
|
||||
MovedAttribute("parse_qs", "urlparse", "urllib.parse"),
|
||||
MovedAttribute("parse_qsl", "urlparse", "urllib.parse"),
|
||||
MovedAttribute("urldefrag", "urlparse", "urllib.parse"),
|
||||
MovedAttribute("urljoin", "urlparse", "urllib.parse"),
|
||||
MovedAttribute("urlparse", "urlparse", "urllib.parse"),
|
||||
MovedAttribute("urlsplit", "urlparse", "urllib.parse"),
|
||||
MovedAttribute("urlunparse", "urlparse", "urllib.parse"),
|
||||
MovedAttribute("urlunsplit", "urlparse", "urllib.parse"),
|
||||
MovedAttribute("quote", "urllib", "urllib.parse"),
|
||||
MovedAttribute("quote_plus", "urllib", "urllib.parse"),
|
||||
MovedAttribute("unquote", "urllib", "urllib.parse"),
|
||||
MovedAttribute("unquote_plus", "urllib", "urllib.parse"),
|
||||
MovedAttribute("urlencode", "urllib", "urllib.parse"),
|
||||
MovedAttribute("splitquery", "urllib", "urllib.parse"),
|
||||
MovedAttribute("splittag", "urllib", "urllib.parse"),
|
||||
MovedAttribute("splituser", "urllib", "urllib.parse"),
|
||||
MovedAttribute("uses_fragment", "urlparse", "urllib.parse"),
|
||||
MovedAttribute("uses_netloc", "urlparse", "urllib.parse"),
|
||||
MovedAttribute("uses_params", "urlparse", "urllib.parse"),
|
||||
MovedAttribute("uses_query", "urlparse", "urllib.parse"),
|
||||
MovedAttribute("uses_relative", "urlparse", "urllib.parse"),
|
||||
]
|
||||
for attr in _urllib_parse_moved_attributes:
|
||||
setattr(Module_six_moves_urllib_parse, attr.name, attr)
|
||||
del attr
|
||||
|
||||
Module_six_moves_urllib_parse._moved_attributes = _urllib_parse_moved_attributes
|
||||
|
||||
_importer._add_module(Module_six_moves_urllib_parse(__name__ + ".moves.urllib_parse"),
|
||||
"moves.urllib_parse", "moves.urllib.parse")
|
||||
|
||||
|
||||
class Module_six_moves_urllib_error(_LazyModule):
|
||||
|
||||
"""Lazy loading of moved objects in six.moves.urllib_error"""
|
||||
|
||||
|
||||
_urllib_error_moved_attributes = [
|
||||
MovedAttribute("URLError", "urllib2", "urllib.error"),
|
||||
MovedAttribute("HTTPError", "urllib2", "urllib.error"),
|
||||
MovedAttribute("ContentTooShortError", "urllib", "urllib.error"),
|
||||
]
|
||||
for attr in _urllib_error_moved_attributes:
|
||||
setattr(Module_six_moves_urllib_error, attr.name, attr)
|
||||
del attr
|
||||
|
||||
Module_six_moves_urllib_error._moved_attributes = _urllib_error_moved_attributes
|
||||
|
||||
_importer._add_module(Module_six_moves_urllib_error(__name__ + ".moves.urllib.error"),
|
||||
"moves.urllib_error", "moves.urllib.error")
|
||||
|
||||
|
||||
class Module_six_moves_urllib_request(_LazyModule):
|
||||
|
||||
"""Lazy loading of moved objects in six.moves.urllib_request"""
|
||||
|
||||
|
||||
_urllib_request_moved_attributes = [
|
||||
MovedAttribute("urlopen", "urllib2", "urllib.request"),
|
||||
MovedAttribute("install_opener", "urllib2", "urllib.request"),
|
||||
MovedAttribute("build_opener", "urllib2", "urllib.request"),
|
||||
MovedAttribute("pathname2url", "urllib", "urllib.request"),
|
||||
MovedAttribute("url2pathname", "urllib", "urllib.request"),
|
||||
MovedAttribute("getproxies", "urllib", "urllib.request"),
|
||||
MovedAttribute("Request", "urllib2", "urllib.request"),
|
||||
MovedAttribute("OpenerDirector", "urllib2", "urllib.request"),
|
||||
MovedAttribute("HTTPDefaultErrorHandler", "urllib2", "urllib.request"),
|
||||
MovedAttribute("HTTPRedirectHandler", "urllib2", "urllib.request"),
|
||||
MovedAttribute("HTTPCookieProcessor", "urllib2", "urllib.request"),
|
||||
MovedAttribute("ProxyHandler", "urllib2", "urllib.request"),
|
||||
MovedAttribute("BaseHandler", "urllib2", "urllib.request"),
|
||||
MovedAttribute("HTTPPasswordMgr", "urllib2", "urllib.request"),
|
||||
MovedAttribute("HTTPPasswordMgrWithDefaultRealm", "urllib2", "urllib.request"),
|
||||
MovedAttribute("AbstractBasicAuthHandler", "urllib2", "urllib.request"),
|
||||
MovedAttribute("HTTPBasicAuthHandler", "urllib2", "urllib.request"),
|
||||
MovedAttribute("ProxyBasicAuthHandler", "urllib2", "urllib.request"),
|
||||
MovedAttribute("AbstractDigestAuthHandler", "urllib2", "urllib.request"),
|
||||
MovedAttribute("HTTPDigestAuthHandler", "urllib2", "urllib.request"),
|
||||
MovedAttribute("ProxyDigestAuthHandler", "urllib2", "urllib.request"),
|
||||
MovedAttribute("HTTPHandler", "urllib2", "urllib.request"),
|
||||
MovedAttribute("HTTPSHandler", "urllib2", "urllib.request"),
|
||||
MovedAttribute("FileHandler", "urllib2", "urllib.request"),
|
||||
MovedAttribute("FTPHandler", "urllib2", "urllib.request"),
|
||||
MovedAttribute("CacheFTPHandler", "urllib2", "urllib.request"),
|
||||
MovedAttribute("UnknownHandler", "urllib2", "urllib.request"),
|
||||
MovedAttribute("HTTPErrorProcessor", "urllib2", "urllib.request"),
|
||||
MovedAttribute("urlretrieve", "urllib", "urllib.request"),
|
||||
MovedAttribute("urlcleanup", "urllib", "urllib.request"),
|
||||
MovedAttribute("URLopener", "urllib", "urllib.request"),
|
||||
MovedAttribute("FancyURLopener", "urllib", "urllib.request"),
|
||||
MovedAttribute("proxy_bypass", "urllib", "urllib.request"),
|
||||
]
|
||||
for attr in _urllib_request_moved_attributes:
|
||||
setattr(Module_six_moves_urllib_request, attr.name, attr)
|
||||
del attr
|
||||
|
||||
Module_six_moves_urllib_request._moved_attributes = _urllib_request_moved_attributes
|
||||
|
||||
_importer._add_module(Module_six_moves_urllib_request(__name__ + ".moves.urllib.request"),
|
||||
"moves.urllib_request", "moves.urllib.request")
|
||||
|
||||
|
||||
class Module_six_moves_urllib_response(_LazyModule):
|
||||
|
||||
"""Lazy loading of moved objects in six.moves.urllib_response"""
|
||||
|
||||
|
||||
_urllib_response_moved_attributes = [
|
||||
MovedAttribute("addbase", "urllib", "urllib.response"),
|
||||
MovedAttribute("addclosehook", "urllib", "urllib.response"),
|
||||
MovedAttribute("addinfo", "urllib", "urllib.response"),
|
||||
MovedAttribute("addinfourl", "urllib", "urllib.response"),
|
||||
]
|
||||
for attr in _urllib_response_moved_attributes:
|
||||
setattr(Module_six_moves_urllib_response, attr.name, attr)
|
||||
del attr
|
||||
|
||||
Module_six_moves_urllib_response._moved_attributes = _urllib_response_moved_attributes
|
||||
|
||||
_importer._add_module(Module_six_moves_urllib_response(__name__ + ".moves.urllib.response"),
|
||||
"moves.urllib_response", "moves.urllib.response")
|
||||
|
||||
|
||||
class Module_six_moves_urllib_robotparser(_LazyModule):
|
||||
|
||||
"""Lazy loading of moved objects in six.moves.urllib_robotparser"""
|
||||
|
||||
|
||||
_urllib_robotparser_moved_attributes = [
|
||||
MovedAttribute("RobotFileParser", "robotparser", "urllib.robotparser"),
|
||||
]
|
||||
for attr in _urllib_robotparser_moved_attributes:
|
||||
setattr(Module_six_moves_urllib_robotparser, attr.name, attr)
|
||||
del attr
|
||||
|
||||
Module_six_moves_urllib_robotparser._moved_attributes = _urllib_robotparser_moved_attributes
|
||||
|
||||
_importer._add_module(Module_six_moves_urllib_robotparser(__name__ + ".moves.urllib.robotparser"),
|
||||
"moves.urllib_robotparser", "moves.urllib.robotparser")
|
||||
|
||||
|
||||
class Module_six_moves_urllib(types.ModuleType):
|
||||
|
||||
"""Create a six.moves.urllib namespace that resembles the Python 3 namespace"""
|
||||
__path__ = [] # mark as package
|
||||
parse = _importer._get_module("moves.urllib_parse")
|
||||
error = _importer._get_module("moves.urllib_error")
|
||||
request = _importer._get_module("moves.urllib_request")
|
||||
response = _importer._get_module("moves.urllib_response")
|
||||
robotparser = _importer._get_module("moves.urllib_robotparser")
|
||||
|
||||
def __dir__(self):
|
||||
return ['parse', 'error', 'request', 'response', 'robotparser']
|
||||
|
||||
_importer._add_module(Module_six_moves_urllib(__name__ + ".moves.urllib"),
|
||||
"moves.urllib")
|
||||
|
||||
|
||||
def add_move(move):
|
||||
"""Add an item to six.moves."""
|
||||
setattr(_MovedItems, move.name, move)
|
||||
|
||||
|
||||
def remove_move(name):
|
||||
"""Remove item from six.moves."""
|
||||
try:
|
||||
delattr(_MovedItems, name)
|
||||
except AttributeError:
|
||||
try:
|
||||
del moves.__dict__[name]
|
||||
except KeyError:
|
||||
raise AttributeError("no such move, %r" % (name,))
|
||||
|
||||
|
||||
if PY3:
|
||||
_meth_func = "__func__"
|
||||
_meth_self = "__self__"
|
||||
|
||||
_func_closure = "__closure__"
|
||||
_func_code = "__code__"
|
||||
_func_defaults = "__defaults__"
|
||||
_func_globals = "__globals__"
|
||||
else:
|
||||
_meth_func = "im_func"
|
||||
_meth_self = "im_self"
|
||||
|
||||
_func_closure = "func_closure"
|
||||
_func_code = "func_code"
|
||||
_func_defaults = "func_defaults"
|
||||
_func_globals = "func_globals"
|
||||
|
||||
|
||||
try:
|
||||
advance_iterator = next
|
||||
except NameError:
|
||||
def advance_iterator(it):
|
||||
return it.next()
|
||||
next = advance_iterator
|
||||
|
||||
|
||||
try:
|
||||
callable = callable
|
||||
except NameError:
|
||||
def callable(obj):
|
||||
return any("__call__" in klass.__dict__ for klass in type(obj).__mro__)
|
||||
|
||||
|
||||
if PY3:
|
||||
def get_unbound_function(unbound):
|
||||
return unbound
|
||||
|
||||
create_bound_method = types.MethodType
|
||||
|
||||
def create_unbound_method(func, cls):
|
||||
return func
|
||||
|
||||
Iterator = object
|
||||
else:
|
||||
def get_unbound_function(unbound):
|
||||
return unbound.im_func
|
||||
|
||||
def create_bound_method(func, obj):
|
||||
return types.MethodType(func, obj, obj.__class__)
|
||||
|
||||
def create_unbound_method(func, cls):
|
||||
return types.MethodType(func, None, cls)
|
||||
|
||||
class Iterator(object):
|
||||
|
||||
def next(self):
|
||||
return type(self).__next__(self)
|
||||
|
||||
callable = callable
|
||||
_add_doc(get_unbound_function,
|
||||
"""Get the function out of a possibly unbound function""")
|
||||
|
||||
|
||||
get_method_function = operator.attrgetter(_meth_func)
|
||||
get_method_self = operator.attrgetter(_meth_self)
|
||||
get_function_closure = operator.attrgetter(_func_closure)
|
||||
get_function_code = operator.attrgetter(_func_code)
|
||||
get_function_defaults = operator.attrgetter(_func_defaults)
|
||||
get_function_globals = operator.attrgetter(_func_globals)
|
||||
|
||||
|
||||
if PY3:
|
||||
def iterkeys(d, **kw):
|
||||
return iter(d.keys(**kw))
|
||||
|
||||
def itervalues(d, **kw):
|
||||
return iter(d.values(**kw))
|
||||
|
||||
def iteritems(d, **kw):
|
||||
return iter(d.items(**kw))
|
||||
|
||||
def iterlists(d, **kw):
|
||||
return iter(d.lists(**kw))
|
||||
|
||||
viewkeys = operator.methodcaller("keys")
|
||||
|
||||
viewvalues = operator.methodcaller("values")
|
||||
|
||||
viewitems = operator.methodcaller("items")
|
||||
else:
|
||||
def iterkeys(d, **kw):
|
||||
return d.iterkeys(**kw)
|
||||
|
||||
def itervalues(d, **kw):
|
||||
return d.itervalues(**kw)
|
||||
|
||||
def iteritems(d, **kw):
|
||||
return d.iteritems(**kw)
|
||||
|
||||
def iterlists(d, **kw):
|
||||
return d.iterlists(**kw)
|
||||
|
||||
viewkeys = operator.methodcaller("viewkeys")
|
||||
|
||||
viewvalues = operator.methodcaller("viewvalues")
|
||||
|
||||
viewitems = operator.methodcaller("viewitems")
|
||||
|
||||
_add_doc(iterkeys, "Return an iterator over the keys of a dictionary.")
|
||||
_add_doc(itervalues, "Return an iterator over the values of a dictionary.")
|
||||
_add_doc(iteritems,
|
||||
"Return an iterator over the (key, value) pairs of a dictionary.")
|
||||
_add_doc(iterlists,
|
||||
"Return an iterator over the (key, [values]) pairs of a dictionary.")
|
||||
|
||||
|
||||
if PY3:
|
||||
def b(s):
|
||||
return s.encode("latin-1")
|
||||
|
||||
def u(s):
|
||||
return s
|
||||
unichr = chr
|
||||
import struct
|
||||
int2byte = struct.Struct(">B").pack
|
||||
del struct
|
||||
byte2int = operator.itemgetter(0)
|
||||
indexbytes = operator.getitem
|
||||
iterbytes = iter
|
||||
import io
|
||||
StringIO = io.StringIO
|
||||
BytesIO = io.BytesIO
|
||||
_assertCountEqual = "assertCountEqual"
|
||||
if sys.version_info[1] <= 1:
|
||||
_assertRaisesRegex = "assertRaisesRegexp"
|
||||
_assertRegex = "assertRegexpMatches"
|
||||
else:
|
||||
_assertRaisesRegex = "assertRaisesRegex"
|
||||
_assertRegex = "assertRegex"
|
||||
else:
|
||||
def b(s):
|
||||
return s
|
||||
# Workaround for standalone backslash
|
||||
|
||||
def u(s):
|
||||
return unicode(s.replace(r'\\', r'\\\\'), "unicode_escape")
|
||||
unichr = unichr
|
||||
int2byte = chr
|
||||
|
||||
def byte2int(bs):
|
||||
return ord(bs[0])
|
||||
|
||||
def indexbytes(buf, i):
|
||||
return ord(buf[i])
|
||||
iterbytes = functools.partial(itertools.imap, ord)
|
||||
import StringIO
|
||||
StringIO = BytesIO = StringIO.StringIO
|
||||
_assertCountEqual = "assertItemsEqual"
|
||||
_assertRaisesRegex = "assertRaisesRegexp"
|
||||
_assertRegex = "assertRegexpMatches"
|
||||
_add_doc(b, """Byte literal""")
|
||||
_add_doc(u, """Text literal""")
|
||||
|
||||
|
||||
def assertCountEqual(self, *args, **kwargs):
|
||||
return getattr(self, _assertCountEqual)(*args, **kwargs)
|
||||
|
||||
|
||||
def assertRaisesRegex(self, *args, **kwargs):
|
||||
return getattr(self, _assertRaisesRegex)(*args, **kwargs)
|
||||
|
||||
|
||||
def assertRegex(self, *args, **kwargs):
|
||||
return getattr(self, _assertRegex)(*args, **kwargs)
|
||||
|
||||
|
||||
if PY3:
|
||||
exec_ = getattr(moves.builtins, "exec")
|
||||
|
||||
def reraise(tp, value, tb=None):
|
||||
if value is None:
|
||||
value = tp()
|
||||
if value.__traceback__ is not tb:
|
||||
raise value.with_traceback(tb)
|
||||
raise value
|
||||
|
||||
else:
|
||||
def exec_(_code_, _globs_=None, _locs_=None):
|
||||
"""Execute code in a namespace."""
|
||||
if _globs_ is None:
|
||||
frame = sys._getframe(1)
|
||||
_globs_ = frame.f_globals
|
||||
if _locs_ is None:
|
||||
_locs_ = frame.f_locals
|
||||
del frame
|
||||
elif _locs_ is None:
|
||||
_locs_ = _globs_
|
||||
exec("""exec _code_ in _globs_, _locs_""")
|
||||
|
||||
exec_("""def reraise(tp, value, tb=None):
|
||||
raise tp, value, tb
|
||||
""")
|
||||
|
||||
|
||||
if sys.version_info[:2] == (3, 2):
|
||||
exec_("""def raise_from(value, from_value):
|
||||
if from_value is None:
|
||||
raise value
|
||||
raise value from from_value
|
||||
""")
|
||||
elif sys.version_info[:2] > (3, 2):
|
||||
exec_("""def raise_from(value, from_value):
|
||||
raise value from from_value
|
||||
""")
|
||||
else:
|
||||
def raise_from(value, from_value):
|
||||
raise value
|
||||
|
||||
|
||||
print_ = getattr(moves.builtins, "print", None)
|
||||
if print_ is None:
|
||||
def print_(*args, **kwargs):
|
||||
"""The new-style print function for Python 2.4 and 2.5."""
|
||||
fp = kwargs.pop("file", sys.stdout)
|
||||
if fp is None:
|
||||
return
|
||||
|
||||
def write(data):
|
||||
if not isinstance(data, basestring):
|
||||
data = str(data)
|
||||
# If the file has an encoding, encode unicode with it.
|
||||
if (isinstance(fp, file) and
|
||||
isinstance(data, unicode) and
|
||||
fp.encoding is not None):
|
||||
errors = getattr(fp, "errors", None)
|
||||
if errors is None:
|
||||
errors = "strict"
|
||||
data = data.encode(fp.encoding, errors)
|
||||
fp.write(data)
|
||||
want_unicode = False
|
||||
sep = kwargs.pop("sep", None)
|
||||
if sep is not None:
|
||||
if isinstance(sep, unicode):
|
||||
want_unicode = True
|
||||
elif not isinstance(sep, str):
|
||||
raise TypeError("sep must be None or a string")
|
||||
end = kwargs.pop("end", None)
|
||||
if end is not None:
|
||||
if isinstance(end, unicode):
|
||||
want_unicode = True
|
||||
elif not isinstance(end, str):
|
||||
raise TypeError("end must be None or a string")
|
||||
if kwargs:
|
||||
raise TypeError("invalid keyword arguments to print()")
|
||||
if not want_unicode:
|
||||
for arg in args:
|
||||
if isinstance(arg, unicode):
|
||||
want_unicode = True
|
||||
break
|
||||
if want_unicode:
|
||||
newline = unicode("\n")
|
||||
space = unicode(" ")
|
||||
else:
|
||||
newline = "\n"
|
||||
space = " "
|
||||
if sep is None:
|
||||
sep = space
|
||||
if end is None:
|
||||
end = newline
|
||||
for i, arg in enumerate(args):
|
||||
if i:
|
||||
write(sep)
|
||||
write(arg)
|
||||
write(end)
|
||||
if sys.version_info[:2] < (3, 3):
|
||||
_print = print_
|
||||
|
||||
def print_(*args, **kwargs):
|
||||
fp = kwargs.get("file", sys.stdout)
|
||||
flush = kwargs.pop("flush", False)
|
||||
_print(*args, **kwargs)
|
||||
if flush and fp is not None:
|
||||
fp.flush()
|
||||
|
||||
_add_doc(reraise, """Reraise an exception.""")
|
||||
|
||||
if sys.version_info[0:2] < (3, 4):
|
||||
def wraps(wrapped, assigned=functools.WRAPPER_ASSIGNMENTS,
|
||||
updated=functools.WRAPPER_UPDATES):
|
||||
def wrapper(f):
|
||||
f = functools.wraps(wrapped, assigned, updated)(f)
|
||||
f.__wrapped__ = wrapped
|
||||
return f
|
||||
return wrapper
|
||||
else:
|
||||
wraps = functools.wraps
|
||||
|
||||
|
||||
def with_metaclass(meta, *bases):
|
||||
"""Create a base class with a metaclass."""
|
||||
# This requires a bit of explanation: the basic idea is to make a dummy
|
||||
# metaclass for one level of class instantiation that replaces itself with
|
||||
# the actual metaclass.
|
||||
class metaclass(meta):
|
||||
|
||||
def __new__(cls, name, this_bases, d):
|
||||
return meta(name, bases, d)
|
||||
return type.__new__(metaclass, 'temporary_class', (), {})
|
||||
|
||||
|
||||
def add_metaclass(metaclass):
|
||||
"""Class decorator for creating a class with a metaclass."""
|
||||
def wrapper(cls):
|
||||
orig_vars = cls.__dict__.copy()
|
||||
slots = orig_vars.get('__slots__')
|
||||
if slots is not None:
|
||||
if isinstance(slots, str):
|
||||
slots = [slots]
|
||||
for slots_var in slots:
|
||||
orig_vars.pop(slots_var)
|
||||
orig_vars.pop('__dict__', None)
|
||||
orig_vars.pop('__weakref__', None)
|
||||
return metaclass(cls.__name__, cls.__bases__, orig_vars)
|
||||
return wrapper
|
||||
|
||||
|
||||
def python_2_unicode_compatible(klass):
|
||||
"""
|
||||
A decorator that defines __unicode__ and __str__ methods under Python 2.
|
||||
Under Python 3 it does nothing.
|
||||
|
||||
To support Python 2 and 3 with a single code base, define a __str__ method
|
||||
returning text and apply this decorator to the class.
|
||||
"""
|
||||
if PY2:
|
||||
if '__str__' not in klass.__dict__:
|
||||
raise ValueError("@python_2_unicode_compatible cannot be applied "
|
||||
"to %s because it doesn't define __str__()." %
|
||||
klass.__name__)
|
||||
klass.__unicode__ = klass.__str__
|
||||
klass.__str__ = lambda self: self.__unicode__().encode('utf-8')
|
||||
return klass
|
||||
|
||||
|
||||
# Complete the moves implementation.
|
||||
# This code is at the end of this module to speed up module loading.
|
||||
# Turn this module into a package.
|
||||
__path__ = [] # required for PEP 302 and PEP 451
|
||||
__package__ = __name__ # see PEP 366 @ReservedAssignment
|
||||
if globals().get("__spec__") is not None:
|
||||
__spec__.submodule_search_locations = [] # PEP 451 @UndefinedVariable
|
||||
# Remove other six meta path importers, since they cause problems. This can
|
||||
# happen if six is removed from sys.modules and then reloaded. (Setuptools does
|
||||
# this for some reason.)
|
||||
if sys.meta_path:
|
||||
for i, importer in enumerate(sys.meta_path):
|
||||
# Here's some real nastiness: Another "instance" of the six module might
|
||||
# be floating around. Therefore, we can't use isinstance() to check for
|
||||
# the six meta path importer, since the other six instance will have
|
||||
# inserted an importer with different class.
|
||||
if (type(importer).__name__ == "_SixMetaPathImporter" and
|
||||
importer.name == __name__):
|
||||
del sys.meta_path[i]
|
||||
break
|
||||
del i, importer
|
||||
# Finally, add the importer to the meta path import hook.
|
||||
sys.meta_path.append(_importer)
|
||||
@@ -1,19 +0,0 @@
|
||||
import sys
|
||||
|
||||
try:
|
||||
# Our match_hostname function is the same as 3.5's, so we only want to
|
||||
# import the match_hostname function if it's at least that good.
|
||||
if sys.version_info < (3, 5):
|
||||
raise ImportError("Fallback to vendored code")
|
||||
|
||||
from ssl import CertificateError, match_hostname
|
||||
except ImportError:
|
||||
try:
|
||||
# Backport of the function from a pypi module
|
||||
from backports.ssl_match_hostname import CertificateError, match_hostname
|
||||
except ImportError:
|
||||
# Our vendored copy
|
||||
from ._implementation import CertificateError, match_hostname
|
||||
|
||||
# Not needed, but documenting what we provide.
|
||||
__all__ = ('CertificateError', 'match_hostname')
|
||||
@@ -1,157 +0,0 @@
|
||||
"""The match_hostname() function from Python 3.3.3, essential when using SSL."""
|
||||
|
||||
# Note: This file is under the PSF license as the code comes from the python
|
||||
# stdlib. http://docs.python.org/3/license.html
|
||||
|
||||
import re
|
||||
import sys
|
||||
|
||||
# ipaddress has been backported to 2.6+ in pypi. If it is installed on the
|
||||
# system, use it to handle IPAddress ServerAltnames (this was added in
|
||||
# python-3.5) otherwise only do DNS matching. This allows
|
||||
# backports.ssl_match_hostname to continue to be used all the way back to
|
||||
# python-2.4.
|
||||
try:
|
||||
import ipaddress
|
||||
except ImportError:
|
||||
ipaddress = None
|
||||
|
||||
__version__ = '3.5.0.1'
|
||||
|
||||
|
||||
class CertificateError(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
def _dnsname_match(dn, hostname, max_wildcards=1):
|
||||
"""Matching according to RFC 6125, section 6.4.3
|
||||
|
||||
http://tools.ietf.org/html/rfc6125#section-6.4.3
|
||||
"""
|
||||
pats = []
|
||||
if not dn:
|
||||
return False
|
||||
|
||||
# Ported from python3-syntax:
|
||||
# leftmost, *remainder = dn.split(r'.')
|
||||
parts = dn.split(r'.')
|
||||
leftmost = parts[0]
|
||||
remainder = parts[1:]
|
||||
|
||||
wildcards = leftmost.count('*')
|
||||
if wildcards > max_wildcards:
|
||||
# Issue #17980: avoid denials of service by refusing more
|
||||
# than one wildcard per fragment. A survey of established
|
||||
# policy among SSL implementations showed it to be a
|
||||
# reasonable choice.
|
||||
raise CertificateError(
|
||||
"too many wildcards in certificate DNS name: " + repr(dn))
|
||||
|
||||
# speed up common case w/o wildcards
|
||||
if not wildcards:
|
||||
return dn.lower() == hostname.lower()
|
||||
|
||||
# RFC 6125, section 6.4.3, subitem 1.
|
||||
# The client SHOULD NOT attempt to match a presented identifier in which
|
||||
# the wildcard character comprises a label other than the left-most label.
|
||||
if leftmost == '*':
|
||||
# When '*' is a fragment by itself, it matches a non-empty dotless
|
||||
# fragment.
|
||||
pats.append('[^.]+')
|
||||
elif leftmost.startswith('xn--') or hostname.startswith('xn--'):
|
||||
# RFC 6125, section 6.4.3, subitem 3.
|
||||
# The client SHOULD NOT attempt to match a presented identifier
|
||||
# where the wildcard character is embedded within an A-label or
|
||||
# U-label of an internationalized domain name.
|
||||
pats.append(re.escape(leftmost))
|
||||
else:
|
||||
# Otherwise, '*' matches any dotless string, e.g. www*
|
||||
pats.append(re.escape(leftmost).replace(r'\*', '[^.]*'))
|
||||
|
||||
# add the remaining fragments, ignore any wildcards
|
||||
for frag in remainder:
|
||||
pats.append(re.escape(frag))
|
||||
|
||||
pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE)
|
||||
return pat.match(hostname)
|
||||
|
||||
|
||||
def _to_unicode(obj):
|
||||
if isinstance(obj, str) and sys.version_info < (3,):
|
||||
obj = unicode(obj, encoding='ascii', errors='strict')
|
||||
return obj
|
||||
|
||||
def _ipaddress_match(ipname, host_ip):
|
||||
"""Exact matching of IP addresses.
|
||||
|
||||
RFC 6125 explicitly doesn't define an algorithm for this
|
||||
(section 1.7.2 - "Out of Scope").
|
||||
"""
|
||||
# OpenSSL may add a trailing newline to a subjectAltName's IP address
|
||||
# Divergence from upstream: ipaddress can't handle byte str
|
||||
ip = ipaddress.ip_address(_to_unicode(ipname).rstrip())
|
||||
return ip == host_ip
|
||||
|
||||
|
||||
def match_hostname(cert, hostname):
|
||||
"""Verify that *cert* (in decoded format as returned by
|
||||
SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125
|
||||
rules are followed, but IP addresses are not accepted for *hostname*.
|
||||
|
||||
CertificateError is raised on failure. On success, the function
|
||||
returns nothing.
|
||||
"""
|
||||
if not cert:
|
||||
raise ValueError("empty or no certificate, match_hostname needs a "
|
||||
"SSL socket or SSL context with either "
|
||||
"CERT_OPTIONAL or CERT_REQUIRED")
|
||||
try:
|
||||
# Divergence from upstream: ipaddress can't handle byte str
|
||||
host_ip = ipaddress.ip_address(_to_unicode(hostname))
|
||||
except ValueError:
|
||||
# Not an IP address (common case)
|
||||
host_ip = None
|
||||
except UnicodeError:
|
||||
# Divergence from upstream: Have to deal with ipaddress not taking
|
||||
# byte strings. addresses should be all ascii, so we consider it not
|
||||
# an ipaddress in this case
|
||||
host_ip = None
|
||||
except AttributeError:
|
||||
# Divergence from upstream: Make ipaddress library optional
|
||||
if ipaddress is None:
|
||||
host_ip = None
|
||||
else:
|
||||
raise
|
||||
dnsnames = []
|
||||
san = cert.get('subjectAltName', ())
|
||||
for key, value in san:
|
||||
if key == 'DNS':
|
||||
if host_ip is None and _dnsname_match(value, hostname):
|
||||
return
|
||||
dnsnames.append(value)
|
||||
elif key == 'IP Address':
|
||||
if host_ip is not None and _ipaddress_match(value, host_ip):
|
||||
return
|
||||
dnsnames.append(value)
|
||||
if not dnsnames:
|
||||
# The subject is only checked when there is no dNSName entry
|
||||
# in subjectAltName
|
||||
for sub in cert.get('subject', ()):
|
||||
for key, value in sub:
|
||||
# XXX according to RFC 2818, the most specific Common Name
|
||||
# must be used.
|
||||
if key == 'commonName':
|
||||
if _dnsname_match(value, hostname):
|
||||
return
|
||||
dnsnames.append(value)
|
||||
if len(dnsnames) > 1:
|
||||
raise CertificateError("hostname %r "
|
||||
"doesn't match either of %s"
|
||||
% (hostname, ', '.join(map(repr, dnsnames))))
|
||||
elif len(dnsnames) == 1:
|
||||
raise CertificateError("hostname %r "
|
||||
"doesn't match %r"
|
||||
% (hostname, dnsnames[0]))
|
||||
else:
|
||||
raise CertificateError("no appropriate commonName or "
|
||||
"subjectAltName fields were found")
|
||||
@@ -1,440 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
import collections
|
||||
import functools
|
||||
import logging
|
||||
|
||||
from ._collections import RecentlyUsedContainer
|
||||
from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool
|
||||
from .connectionpool import port_by_scheme
|
||||
from .exceptions import LocationValueError, MaxRetryError, ProxySchemeUnknown
|
||||
from .packages.six.moves.urllib.parse import urljoin
|
||||
from .request import RequestMethods
|
||||
from .util.url import parse_url
|
||||
from .util.retry import Retry
|
||||
|
||||
|
||||
__all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url']
|
||||
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
SSL_KEYWORDS = ('key_file', 'cert_file', 'cert_reqs', 'ca_certs',
|
||||
'ssl_version', 'ca_cert_dir', 'ssl_context')
|
||||
|
||||
# All known keyword arguments that could be provided to the pool manager, its
|
||||
# pools, or the underlying connections. This is used to construct a pool key.
|
||||
_key_fields = (
|
||||
'key_scheme', # str
|
||||
'key_host', # str
|
||||
'key_port', # int
|
||||
'key_timeout', # int or float or Timeout
|
||||
'key_retries', # int or Retry
|
||||
'key_strict', # bool
|
||||
'key_block', # bool
|
||||
'key_source_address', # str
|
||||
'key_key_file', # str
|
||||
'key_cert_file', # str
|
||||
'key_cert_reqs', # str
|
||||
'key_ca_certs', # str
|
||||
'key_ssl_version', # str
|
||||
'key_ca_cert_dir', # str
|
||||
'key_ssl_context', # instance of ssl.SSLContext or urllib3.util.ssl_.SSLContext
|
||||
'key_maxsize', # int
|
||||
'key_headers', # dict
|
||||
'key__proxy', # parsed proxy url
|
||||
'key__proxy_headers', # dict
|
||||
'key_socket_options', # list of (level (int), optname (int), value (int or str)) tuples
|
||||
'key__socks_options', # dict
|
||||
'key_assert_hostname', # bool or string
|
||||
'key_assert_fingerprint', # str
|
||||
)
|
||||
|
||||
#: The namedtuple class used to construct keys for the connection pool.
|
||||
#: All custom key schemes should include the fields in this key at a minimum.
|
||||
PoolKey = collections.namedtuple('PoolKey', _key_fields)
|
||||
|
||||
|
||||
def _default_key_normalizer(key_class, request_context):
|
||||
"""
|
||||
Create a pool key out of a request context dictionary.
|
||||
|
||||
According to RFC 3986, both the scheme and host are case-insensitive.
|
||||
Therefore, this function normalizes both before constructing the pool
|
||||
key for an HTTPS request. If you wish to change this behaviour, provide
|
||||
alternate callables to ``key_fn_by_scheme``.
|
||||
|
||||
:param key_class:
|
||||
The class to use when constructing the key. This should be a namedtuple
|
||||
with the ``scheme`` and ``host`` keys at a minimum.
|
||||
:type key_class: namedtuple
|
||||
:param request_context:
|
||||
A dictionary-like object that contain the context for a request.
|
||||
:type request_context: dict
|
||||
|
||||
:return: A namedtuple that can be used as a connection pool key.
|
||||
:rtype: PoolKey
|
||||
"""
|
||||
# Since we mutate the dictionary, make a copy first
|
||||
context = request_context.copy()
|
||||
context['scheme'] = context['scheme'].lower()
|
||||
context['host'] = context['host'].lower()
|
||||
|
||||
# These are both dictionaries and need to be transformed into frozensets
|
||||
for key in ('headers', '_proxy_headers', '_socks_options'):
|
||||
if key in context and context[key] is not None:
|
||||
context[key] = frozenset(context[key].items())
|
||||
|
||||
# The socket_options key may be a list and needs to be transformed into a
|
||||
# tuple.
|
||||
socket_opts = context.get('socket_options')
|
||||
if socket_opts is not None:
|
||||
context['socket_options'] = tuple(socket_opts)
|
||||
|
||||
# Map the kwargs to the names in the namedtuple - this is necessary since
|
||||
# namedtuples can't have fields starting with '_'.
|
||||
for key in list(context.keys()):
|
||||
context['key_' + key] = context.pop(key)
|
||||
|
||||
# Default to ``None`` for keys missing from the context
|
||||
for field in key_class._fields:
|
||||
if field not in context:
|
||||
context[field] = None
|
||||
|
||||
return key_class(**context)
|
||||
|
||||
|
||||
#: A dictionary that maps a scheme to a callable that creates a pool key.
|
||||
#: This can be used to alter the way pool keys are constructed, if desired.
|
||||
#: Each PoolManager makes a copy of this dictionary so they can be configured
|
||||
#: globally here, or individually on the instance.
|
||||
key_fn_by_scheme = {
|
||||
'http': functools.partial(_default_key_normalizer, PoolKey),
|
||||
'https': functools.partial(_default_key_normalizer, PoolKey),
|
||||
}
|
||||
|
||||
pool_classes_by_scheme = {
|
||||
'http': HTTPConnectionPool,
|
||||
'https': HTTPSConnectionPool,
|
||||
}
|
||||
|
||||
|
||||
class PoolManager(RequestMethods):
|
||||
"""
|
||||
Allows for arbitrary requests while transparently keeping track of
|
||||
necessary connection pools for you.
|
||||
|
||||
:param num_pools:
|
||||
Number of connection pools to cache before discarding the least
|
||||
recently used pool.
|
||||
|
||||
:param headers:
|
||||
Headers to include with all requests, unless other headers are given
|
||||
explicitly.
|
||||
|
||||
:param \\**connection_pool_kw:
|
||||
Additional parameters are used to create fresh
|
||||
:class:`urllib3.connectionpool.ConnectionPool` instances.
|
||||
|
||||
Example::
|
||||
|
||||
>>> manager = PoolManager(num_pools=2)
|
||||
>>> r = manager.request('GET', 'http://google.com/')
|
||||
>>> r = manager.request('GET', 'http://google.com/mail')
|
||||
>>> r = manager.request('GET', 'http://yahoo.com/')
|
||||
>>> len(manager.pools)
|
||||
2
|
||||
|
||||
"""
|
||||
|
||||
proxy = None
|
||||
|
||||
def __init__(self, num_pools=10, headers=None, **connection_pool_kw):
|
||||
RequestMethods.__init__(self, headers)
|
||||
self.connection_pool_kw = connection_pool_kw
|
||||
self.pools = RecentlyUsedContainer(num_pools,
|
||||
dispose_func=lambda p: p.close())
|
||||
|
||||
# Locally set the pool classes and keys so other PoolManagers can
|
||||
# override them.
|
||||
self.pool_classes_by_scheme = pool_classes_by_scheme
|
||||
self.key_fn_by_scheme = key_fn_by_scheme.copy()
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self.clear()
|
||||
# Return False to re-raise any potential exceptions
|
||||
return False
|
||||
|
||||
def _new_pool(self, scheme, host, port, request_context=None):
|
||||
"""
|
||||
Create a new :class:`ConnectionPool` based on host, port, scheme, and
|
||||
any additional pool keyword arguments.
|
||||
|
||||
If ``request_context`` is provided, it is provided as keyword arguments
|
||||
to the pool class used. This method is used to actually create the
|
||||
connection pools handed out by :meth:`connection_from_url` and
|
||||
companion methods. It is intended to be overridden for customization.
|
||||
"""
|
||||
pool_cls = self.pool_classes_by_scheme[scheme]
|
||||
if request_context is None:
|
||||
request_context = self.connection_pool_kw.copy()
|
||||
|
||||
# Although the context has everything necessary to create the pool,
|
||||
# this function has historically only used the scheme, host, and port
|
||||
# in the positional args. When an API change is acceptable these can
|
||||
# be removed.
|
||||
for key in ('scheme', 'host', 'port'):
|
||||
request_context.pop(key, None)
|
||||
|
||||
if scheme == 'http':
|
||||
for kw in SSL_KEYWORDS:
|
||||
request_context.pop(kw, None)
|
||||
|
||||
return pool_cls(host, port, **request_context)
|
||||
|
||||
def clear(self):
|
||||
"""
|
||||
Empty our store of pools and direct them all to close.
|
||||
|
||||
This will not affect in-flight connections, but they will not be
|
||||
re-used after completion.
|
||||
"""
|
||||
self.pools.clear()
|
||||
|
||||
def connection_from_host(self, host, port=None, scheme='http', pool_kwargs=None):
|
||||
"""
|
||||
Get a :class:`ConnectionPool` based on the host, port, and scheme.
|
||||
|
||||
If ``port`` isn't given, it will be derived from the ``scheme`` using
|
||||
``urllib3.connectionpool.port_by_scheme``. If ``pool_kwargs`` is
|
||||
provided, it is merged with the instance's ``connection_pool_kw``
|
||||
variable and used to create the new connection pool, if one is
|
||||
needed.
|
||||
"""
|
||||
|
||||
if not host:
|
||||
raise LocationValueError("No host specified.")
|
||||
|
||||
request_context = self._merge_pool_kwargs(pool_kwargs)
|
||||
request_context['scheme'] = scheme or 'http'
|
||||
if not port:
|
||||
port = port_by_scheme.get(request_context['scheme'].lower(), 80)
|
||||
request_context['port'] = port
|
||||
request_context['host'] = host
|
||||
|
||||
return self.connection_from_context(request_context)
|
||||
|
||||
def connection_from_context(self, request_context):
|
||||
"""
|
||||
Get a :class:`ConnectionPool` based on the request context.
|
||||
|
||||
``request_context`` must at least contain the ``scheme`` key and its
|
||||
value must be a key in ``key_fn_by_scheme`` instance variable.
|
||||
"""
|
||||
scheme = request_context['scheme'].lower()
|
||||
pool_key_constructor = self.key_fn_by_scheme[scheme]
|
||||
pool_key = pool_key_constructor(request_context)
|
||||
|
||||
return self.connection_from_pool_key(pool_key, request_context=request_context)
|
||||
|
||||
def connection_from_pool_key(self, pool_key, request_context=None):
|
||||
"""
|
||||
Get a :class:`ConnectionPool` based on the provided pool key.
|
||||
|
||||
``pool_key`` should be a namedtuple that only contains immutable
|
||||
objects. At a minimum it must have the ``scheme``, ``host``, and
|
||||
``port`` fields.
|
||||
"""
|
||||
with self.pools.lock:
|
||||
# If the scheme, host, or port doesn't match existing open
|
||||
# connections, open a new ConnectionPool.
|
||||
pool = self.pools.get(pool_key)
|
||||
if pool:
|
||||
return pool
|
||||
|
||||
# Make a fresh ConnectionPool of the desired type
|
||||
scheme = request_context['scheme']
|
||||
host = request_context['host']
|
||||
port = request_context['port']
|
||||
pool = self._new_pool(scheme, host, port, request_context=request_context)
|
||||
self.pools[pool_key] = pool
|
||||
|
||||
return pool
|
||||
|
||||
def connection_from_url(self, url, pool_kwargs=None):
|
||||
"""
|
||||
Similar to :func:`urllib3.connectionpool.connection_from_url`.
|
||||
|
||||
If ``pool_kwargs`` is not provided and a new pool needs to be
|
||||
constructed, ``self.connection_pool_kw`` is used to initialize
|
||||
the :class:`urllib3.connectionpool.ConnectionPool`. If ``pool_kwargs``
|
||||
is provided, it is used instead. Note that if a new pool does not
|
||||
need to be created for the request, the provided ``pool_kwargs`` are
|
||||
not used.
|
||||
"""
|
||||
u = parse_url(url)
|
||||
return self.connection_from_host(u.host, port=u.port, scheme=u.scheme,
|
||||
pool_kwargs=pool_kwargs)
|
||||
|
||||
def _merge_pool_kwargs(self, override):
|
||||
"""
|
||||
Merge a dictionary of override values for self.connection_pool_kw.
|
||||
|
||||
This does not modify self.connection_pool_kw and returns a new dict.
|
||||
Any keys in the override dictionary with a value of ``None`` are
|
||||
removed from the merged dictionary.
|
||||
"""
|
||||
base_pool_kwargs = self.connection_pool_kw.copy()
|
||||
if override:
|
||||
for key, value in override.items():
|
||||
if value is None:
|
||||
try:
|
||||
del base_pool_kwargs[key]
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
base_pool_kwargs[key] = value
|
||||
return base_pool_kwargs
|
||||
|
||||
def urlopen(self, method, url, redirect=True, **kw):
|
||||
"""
|
||||
Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen`
|
||||
with custom cross-host redirect logic and only sends the request-uri
|
||||
portion of the ``url``.
|
||||
|
||||
The given ``url`` parameter must be absolute, such that an appropriate
|
||||
:class:`urllib3.connectionpool.ConnectionPool` can be chosen for it.
|
||||
"""
|
||||
u = parse_url(url)
|
||||
conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme)
|
||||
|
||||
kw['assert_same_host'] = False
|
||||
kw['redirect'] = False
|
||||
if 'headers' not in kw:
|
||||
kw['headers'] = self.headers
|
||||
|
||||
if self.proxy is not None and u.scheme == "http":
|
||||
response = conn.urlopen(method, url, **kw)
|
||||
else:
|
||||
response = conn.urlopen(method, u.request_uri, **kw)
|
||||
|
||||
redirect_location = redirect and response.get_redirect_location()
|
||||
if not redirect_location:
|
||||
return response
|
||||
|
||||
# Support relative URLs for redirecting.
|
||||
redirect_location = urljoin(url, redirect_location)
|
||||
|
||||
# RFC 7231, Section 6.4.4
|
||||
if response.status == 303:
|
||||
method = 'GET'
|
||||
|
||||
retries = kw.get('retries')
|
||||
if not isinstance(retries, Retry):
|
||||
retries = Retry.from_int(retries, redirect=redirect)
|
||||
|
||||
try:
|
||||
retries = retries.increment(method, url, response=response, _pool=conn)
|
||||
except MaxRetryError:
|
||||
if retries.raise_on_redirect:
|
||||
raise
|
||||
return response
|
||||
|
||||
kw['retries'] = retries
|
||||
kw['redirect'] = redirect
|
||||
|
||||
log.info("Redirecting %s -> %s", url, redirect_location)
|
||||
return self.urlopen(method, redirect_location, **kw)
|
||||
|
||||
|
||||
class ProxyManager(PoolManager):
|
||||
"""
|
||||
Behaves just like :class:`PoolManager`, but sends all requests through
|
||||
the defined proxy, using the CONNECT method for HTTPS URLs.
|
||||
|
||||
:param proxy_url:
|
||||
The URL of the proxy to be used.
|
||||
|
||||
:param proxy_headers:
|
||||
A dictionary contaning headers that will be sent to the proxy. In case
|
||||
of HTTP they are being sent with each request, while in the
|
||||
HTTPS/CONNECT case they are sent only once. Could be used for proxy
|
||||
authentication.
|
||||
|
||||
Example:
|
||||
>>> proxy = urllib3.ProxyManager('http://localhost:3128/')
|
||||
>>> r1 = proxy.request('GET', 'http://google.com/')
|
||||
>>> r2 = proxy.request('GET', 'http://httpbin.org/')
|
||||
>>> len(proxy.pools)
|
||||
1
|
||||
>>> r3 = proxy.request('GET', 'https://httpbin.org/')
|
||||
>>> r4 = proxy.request('GET', 'https://twitter.com/')
|
||||
>>> len(proxy.pools)
|
||||
3
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, proxy_url, num_pools=10, headers=None,
|
||||
proxy_headers=None, **connection_pool_kw):
|
||||
|
||||
if isinstance(proxy_url, HTTPConnectionPool):
|
||||
proxy_url = '%s://%s:%i' % (proxy_url.scheme, proxy_url.host,
|
||||
proxy_url.port)
|
||||
proxy = parse_url(proxy_url)
|
||||
if not proxy.port:
|
||||
port = port_by_scheme.get(proxy.scheme, 80)
|
||||
proxy = proxy._replace(port=port)
|
||||
|
||||
if proxy.scheme not in ("http", "https"):
|
||||
raise ProxySchemeUnknown(proxy.scheme)
|
||||
|
||||
self.proxy = proxy
|
||||
self.proxy_headers = proxy_headers or {}
|
||||
|
||||
connection_pool_kw['_proxy'] = self.proxy
|
||||
connection_pool_kw['_proxy_headers'] = self.proxy_headers
|
||||
|
||||
super(ProxyManager, self).__init__(
|
||||
num_pools, headers, **connection_pool_kw)
|
||||
|
||||
def connection_from_host(self, host, port=None, scheme='http', pool_kwargs=None):
|
||||
if scheme == "https":
|
||||
return super(ProxyManager, self).connection_from_host(
|
||||
host, port, scheme, pool_kwargs=pool_kwargs)
|
||||
|
||||
return super(ProxyManager, self).connection_from_host(
|
||||
self.proxy.host, self.proxy.port, self.proxy.scheme, pool_kwargs=pool_kwargs)
|
||||
|
||||
def _set_proxy_headers(self, url, headers=None):
|
||||
"""
|
||||
Sets headers needed by proxies: specifically, the Accept and Host
|
||||
headers. Only sets headers not provided by the user.
|
||||
"""
|
||||
headers_ = {'Accept': '*/*'}
|
||||
|
||||
netloc = parse_url(url).netloc
|
||||
if netloc:
|
||||
headers_['Host'] = netloc
|
||||
|
||||
if headers:
|
||||
headers_.update(headers)
|
||||
return headers_
|
||||
|
||||
def urlopen(self, method, url, redirect=True, **kw):
|
||||
"Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute."
|
||||
u = parse_url(url)
|
||||
|
||||
if u.scheme == "http":
|
||||
# For proxied HTTPS requests, httplib sets the necessary headers
|
||||
# on the CONNECT to the proxy. For HTTP, we'll definitely
|
||||
# need to set 'Host' at the very least.
|
||||
headers = kw.get('headers', self.headers)
|
||||
kw['headers'] = self._set_proxy_headers(url, headers)
|
||||
|
||||
return super(ProxyManager, self).urlopen(method, url, redirect=redirect, **kw)
|
||||
|
||||
|
||||
def proxy_from_url(url, **kw):
|
||||
return ProxyManager(proxy_url=url, **kw)
|
||||
@@ -1,148 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
from .filepost import encode_multipart_formdata
|
||||
from .packages.six.moves.urllib.parse import urlencode
|
||||
|
||||
|
||||
__all__ = ['RequestMethods']
|
||||
|
||||
|
||||
class RequestMethods(object):
|
||||
"""
|
||||
Convenience mixin for classes who implement a :meth:`urlopen` method, such
|
||||
as :class:`~urllib3.connectionpool.HTTPConnectionPool` and
|
||||
:class:`~urllib3.poolmanager.PoolManager`.
|
||||
|
||||
Provides behavior for making common types of HTTP request methods and
|
||||
decides which type of request field encoding to use.
|
||||
|
||||
Specifically,
|
||||
|
||||
:meth:`.request_encode_url` is for sending requests whose fields are
|
||||
encoded in the URL (such as GET, HEAD, DELETE).
|
||||
|
||||
:meth:`.request_encode_body` is for sending requests whose fields are
|
||||
encoded in the *body* of the request using multipart or www-form-urlencoded
|
||||
(such as for POST, PUT, PATCH).
|
||||
|
||||
:meth:`.request` is for making any kind of request, it will look up the
|
||||
appropriate encoding format and use one of the above two methods to make
|
||||
the request.
|
||||
|
||||
Initializer parameters:
|
||||
|
||||
:param headers:
|
||||
Headers to include with all requests, unless other headers are given
|
||||
explicitly.
|
||||
"""
|
||||
|
||||
_encode_url_methods = set(['DELETE', 'GET', 'HEAD', 'OPTIONS'])
|
||||
|
||||
def __init__(self, headers=None):
|
||||
self.headers = headers or {}
|
||||
|
||||
def urlopen(self, method, url, body=None, headers=None,
|
||||
encode_multipart=True, multipart_boundary=None,
|
||||
**kw): # Abstract
|
||||
raise NotImplemented("Classes extending RequestMethods must implement "
|
||||
"their own ``urlopen`` method.")
|
||||
|
||||
def request(self, method, url, fields=None, headers=None, **urlopen_kw):
|
||||
"""
|
||||
Make a request using :meth:`urlopen` with the appropriate encoding of
|
||||
``fields`` based on the ``method`` used.
|
||||
|
||||
This is a convenience method that requires the least amount of manual
|
||||
effort. It can be used in most situations, while still having the
|
||||
option to drop down to more specific methods when necessary, such as
|
||||
:meth:`request_encode_url`, :meth:`request_encode_body`,
|
||||
or even the lowest level :meth:`urlopen`.
|
||||
"""
|
||||
method = method.upper()
|
||||
|
||||
if method in self._encode_url_methods:
|
||||
return self.request_encode_url(method, url, fields=fields,
|
||||
headers=headers,
|
||||
**urlopen_kw)
|
||||
else:
|
||||
return self.request_encode_body(method, url, fields=fields,
|
||||
headers=headers,
|
||||
**urlopen_kw)
|
||||
|
||||
def request_encode_url(self, method, url, fields=None, headers=None,
|
||||
**urlopen_kw):
|
||||
"""
|
||||
Make a request using :meth:`urlopen` with the ``fields`` encoded in
|
||||
the url. This is useful for request methods like GET, HEAD, DELETE, etc.
|
||||
"""
|
||||
if headers is None:
|
||||
headers = self.headers
|
||||
|
||||
extra_kw = {'headers': headers}
|
||||
extra_kw.update(urlopen_kw)
|
||||
|
||||
if fields:
|
||||
url += '?' + urlencode(fields)
|
||||
|
||||
return self.urlopen(method, url, **extra_kw)
|
||||
|
||||
def request_encode_body(self, method, url, fields=None, headers=None,
|
||||
encode_multipart=True, multipart_boundary=None,
|
||||
**urlopen_kw):
|
||||
"""
|
||||
Make a request using :meth:`urlopen` with the ``fields`` encoded in
|
||||
the body. This is useful for request methods like POST, PUT, PATCH, etc.
|
||||
|
||||
When ``encode_multipart=True`` (default), then
|
||||
:meth:`urllib3.filepost.encode_multipart_formdata` is used to encode
|
||||
the payload with the appropriate content type. Otherwise
|
||||
:meth:`urllib.urlencode` is used with the
|
||||
'application/x-www-form-urlencoded' content type.
|
||||
|
||||
Multipart encoding must be used when posting files, and it's reasonably
|
||||
safe to use it in other times too. However, it may break request
|
||||
signing, such as with OAuth.
|
||||
|
||||
Supports an optional ``fields`` parameter of key/value strings AND
|
||||
key/filetuple. A filetuple is a (filename, data, MIME type) tuple where
|
||||
the MIME type is optional. For example::
|
||||
|
||||
fields = {
|
||||
'foo': 'bar',
|
||||
'fakefile': ('foofile.txt', 'contents of foofile'),
|
||||
'realfile': ('barfile.txt', open('realfile').read()),
|
||||
'typedfile': ('bazfile.bin', open('bazfile').read(),
|
||||
'image/jpeg'),
|
||||
'nonamefile': 'contents of nonamefile field',
|
||||
}
|
||||
|
||||
When uploading a file, providing a filename (the first parameter of the
|
||||
tuple) is optional but recommended to best mimick behavior of browsers.
|
||||
|
||||
Note that if ``headers`` are supplied, the 'Content-Type' header will
|
||||
be overwritten because it depends on the dynamic random boundary string
|
||||
which is used to compose the body of the request. The random boundary
|
||||
string can be explicitly set with the ``multipart_boundary`` parameter.
|
||||
"""
|
||||
if headers is None:
|
||||
headers = self.headers
|
||||
|
||||
extra_kw = {'headers': {}}
|
||||
|
||||
if fields:
|
||||
if 'body' in urlopen_kw:
|
||||
raise TypeError(
|
||||
"request got values for both 'fields' and 'body', can only specify one.")
|
||||
|
||||
if encode_multipart:
|
||||
body, content_type = encode_multipart_formdata(fields, boundary=multipart_boundary)
|
||||
else:
|
||||
body, content_type = urlencode(fields), 'application/x-www-form-urlencoded'
|
||||
|
||||
extra_kw['body'] = body
|
||||
extra_kw['headers'] = {'Content-Type': content_type}
|
||||
|
||||
extra_kw['headers'].update(headers)
|
||||
extra_kw.update(urlopen_kw)
|
||||
|
||||
return self.urlopen(method, url, **extra_kw)
|
||||
@@ -1,622 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
from contextlib import contextmanager
|
||||
import zlib
|
||||
import io
|
||||
import logging
|
||||
from socket import timeout as SocketTimeout
|
||||
from socket import error as SocketError
|
||||
|
||||
from ._collections import HTTPHeaderDict
|
||||
from .exceptions import (
|
||||
BodyNotHttplibCompatible, ProtocolError, DecodeError, ReadTimeoutError,
|
||||
ResponseNotChunked, IncompleteRead, InvalidHeader
|
||||
)
|
||||
from .packages.six import string_types as basestring, binary_type, PY3
|
||||
from .packages.six.moves import http_client as httplib
|
||||
from .connection import HTTPException, BaseSSLError
|
||||
from .util.response import is_fp_closed, is_response_to_head
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DeflateDecoder(object):
|
||||
|
||||
def __init__(self):
|
||||
self._first_try = True
|
||||
self._data = binary_type()
|
||||
self._obj = zlib.decompressobj()
|
||||
|
||||
def __getattr__(self, name):
|
||||
return getattr(self._obj, name)
|
||||
|
||||
def decompress(self, data):
|
||||
if not data:
|
||||
return data
|
||||
|
||||
if not self._first_try:
|
||||
return self._obj.decompress(data)
|
||||
|
||||
self._data += data
|
||||
try:
|
||||
decompressed = self._obj.decompress(data)
|
||||
if decompressed:
|
||||
self._first_try = False
|
||||
self._data = None
|
||||
return decompressed
|
||||
except zlib.error:
|
||||
self._first_try = False
|
||||
self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
|
||||
try:
|
||||
return self.decompress(self._data)
|
||||
finally:
|
||||
self._data = None
|
||||
|
||||
|
||||
class GzipDecoder(object):
|
||||
|
||||
def __init__(self):
|
||||
self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
|
||||
|
||||
def __getattr__(self, name):
|
||||
return getattr(self._obj, name)
|
||||
|
||||
def decompress(self, data):
|
||||
if not data:
|
||||
return data
|
||||
return self._obj.decompress(data)
|
||||
|
||||
|
||||
def _get_decoder(mode):
|
||||
if mode == 'gzip':
|
||||
return GzipDecoder()
|
||||
|
||||
return DeflateDecoder()
|
||||
|
||||
|
||||
class HTTPResponse(io.IOBase):
|
||||
"""
|
||||
HTTP Response container.
|
||||
|
||||
Backwards-compatible to httplib's HTTPResponse but the response ``body`` is
|
||||
loaded and decoded on-demand when the ``data`` property is accessed. This
|
||||
class is also compatible with the Python standard library's :mod:`io`
|
||||
module, and can hence be treated as a readable object in the context of that
|
||||
framework.
|
||||
|
||||
Extra parameters for behaviour not present in httplib.HTTPResponse:
|
||||
|
||||
:param preload_content:
|
||||
If True, the response's body will be preloaded during construction.
|
||||
|
||||
:param decode_content:
|
||||
If True, attempts to decode specific content-encoding's based on headers
|
||||
(like 'gzip' and 'deflate') will be skipped and raw data will be used
|
||||
instead.
|
||||
|
||||
:param original_response:
|
||||
When this HTTPResponse wrapper is generated from an httplib.HTTPResponse
|
||||
object, it's convenient to include the original for debug purposes. It's
|
||||
otherwise unused.
|
||||
|
||||
:param retries:
|
||||
The retries contains the last :class:`~urllib3.util.retry.Retry` that
|
||||
was used during the request.
|
||||
|
||||
:param enforce_content_length:
|
||||
Enforce content length checking. Body returned by server must match
|
||||
value of Content-Length header, if present. Otherwise, raise error.
|
||||
"""
|
||||
|
||||
CONTENT_DECODERS = ['gzip', 'deflate']
|
||||
REDIRECT_STATUSES = [301, 302, 303, 307, 308]
|
||||
|
||||
def __init__(self, body='', headers=None, status=0, version=0, reason=None,
|
||||
strict=0, preload_content=True, decode_content=True,
|
||||
original_response=None, pool=None, connection=None,
|
||||
retries=None, enforce_content_length=False, request_method=None):
|
||||
|
||||
if isinstance(headers, HTTPHeaderDict):
|
||||
self.headers = headers
|
||||
else:
|
||||
self.headers = HTTPHeaderDict(headers)
|
||||
self.status = status
|
||||
self.version = version
|
||||
self.reason = reason
|
||||
self.strict = strict
|
||||
self.decode_content = decode_content
|
||||
self.retries = retries
|
||||
self.enforce_content_length = enforce_content_length
|
||||
|
||||
self._decoder = None
|
||||
self._body = None
|
||||
self._fp = None
|
||||
self._original_response = original_response
|
||||
self._fp_bytes_read = 0
|
||||
|
||||
if body and isinstance(body, (basestring, binary_type)):
|
||||
self._body = body
|
||||
|
||||
self._pool = pool
|
||||
self._connection = connection
|
||||
|
||||
if hasattr(body, 'read'):
|
||||
self._fp = body
|
||||
|
||||
# Are we using the chunked-style of transfer encoding?
|
||||
self.chunked = False
|
||||
self.chunk_left = None
|
||||
tr_enc = self.headers.get('transfer-encoding', '').lower()
|
||||
# Don't incur the penalty of creating a list and then discarding it
|
||||
encodings = (enc.strip() for enc in tr_enc.split(","))
|
||||
if "chunked" in encodings:
|
||||
self.chunked = True
|
||||
|
||||
# Determine length of response
|
||||
self.length_remaining = self._init_length(request_method)
|
||||
|
||||
# If requested, preload the body.
|
||||
if preload_content and not self._body:
|
||||
self._body = self.read(decode_content=decode_content)
|
||||
|
||||
def get_redirect_location(self):
|
||||
"""
|
||||
Should we redirect and where to?
|
||||
|
||||
:returns: Truthy redirect location string if we got a redirect status
|
||||
code and valid location. ``None`` if redirect status and no
|
||||
location. ``False`` if not a redirect status code.
|
||||
"""
|
||||
if self.status in self.REDIRECT_STATUSES:
|
||||
return self.headers.get('location')
|
||||
|
||||
return False
|
||||
|
||||
def release_conn(self):
|
||||
if not self._pool or not self._connection:
|
||||
return
|
||||
|
||||
self._pool._put_conn(self._connection)
|
||||
self._connection = None
|
||||
|
||||
@property
|
||||
def data(self):
|
||||
# For backwords-compat with earlier urllib3 0.4 and earlier.
|
||||
if self._body:
|
||||
return self._body
|
||||
|
||||
if self._fp:
|
||||
return self.read(cache_content=True)
|
||||
|
||||
@property
|
||||
def connection(self):
|
||||
return self._connection
|
||||
|
||||
def tell(self):
|
||||
"""
|
||||
Obtain the number of bytes pulled over the wire so far. May differ from
|
||||
the amount of content returned by :meth:``HTTPResponse.read`` if bytes
|
||||
are encoded on the wire (e.g, compressed).
|
||||
"""
|
||||
return self._fp_bytes_read
|
||||
|
||||
def _init_length(self, request_method):
|
||||
"""
|
||||
Set initial length value for Response content if available.
|
||||
"""
|
||||
length = self.headers.get('content-length')
|
||||
|
||||
if length is not None and self.chunked:
|
||||
# This Response will fail with an IncompleteRead if it can't be
|
||||
# received as chunked. This method falls back to attempt reading
|
||||
# the response before raising an exception.
|
||||
log.warning("Received response with both Content-Length and "
|
||||
"Transfer-Encoding set. This is expressly forbidden "
|
||||
"by RFC 7230 sec 3.3.2. Ignoring Content-Length and "
|
||||
"attempting to process response as Transfer-Encoding: "
|
||||
"chunked.")
|
||||
return None
|
||||
|
||||
elif length is not None:
|
||||
try:
|
||||
# RFC 7230 section 3.3.2 specifies multiple content lengths can
|
||||
# be sent in a single Content-Length header
|
||||
# (e.g. Content-Length: 42, 42). This line ensures the values
|
||||
# are all valid ints and that as long as the `set` length is 1,
|
||||
# all values are the same. Otherwise, the header is invalid.
|
||||
lengths = set([int(val) for val in length.split(',')])
|
||||
if len(lengths) > 1:
|
||||
raise InvalidHeader("Content-Length contained multiple "
|
||||
"unmatching values (%s)" % length)
|
||||
length = lengths.pop()
|
||||
except ValueError:
|
||||
length = None
|
||||
else:
|
||||
if length < 0:
|
||||
length = None
|
||||
|
||||
# Convert status to int for comparison
|
||||
# In some cases, httplib returns a status of "_UNKNOWN"
|
||||
try:
|
||||
status = int(self.status)
|
||||
except ValueError:
|
||||
status = 0
|
||||
|
||||
# Check for responses that shouldn't include a body
|
||||
if status in (204, 304) or 100 <= status < 200 or request_method == 'HEAD':
|
||||
length = 0
|
||||
|
||||
return length
|
||||
|
||||
def _init_decoder(self):
|
||||
"""
|
||||
Set-up the _decoder attribute if necessary.
|
||||
"""
|
||||
# Note: content-encoding value should be case-insensitive, per RFC 7230
|
||||
# Section 3.2
|
||||
content_encoding = self.headers.get('content-encoding', '').lower()
|
||||
if self._decoder is None and content_encoding in self.CONTENT_DECODERS:
|
||||
self._decoder = _get_decoder(content_encoding)
|
||||
|
||||
def _decode(self, data, decode_content, flush_decoder):
|
||||
"""
|
||||
Decode the data passed in and potentially flush the decoder.
|
||||
"""
|
||||
try:
|
||||
if decode_content and self._decoder:
|
||||
data = self._decoder.decompress(data)
|
||||
except (IOError, zlib.error) as e:
|
||||
content_encoding = self.headers.get('content-encoding', '').lower()
|
||||
raise DecodeError(
|
||||
"Received response with content-encoding: %s, but "
|
||||
"failed to decode it." % content_encoding, e)
|
||||
|
||||
if flush_decoder and decode_content:
|
||||
data += self._flush_decoder()
|
||||
|
||||
return data
|
||||
|
||||
def _flush_decoder(self):
|
||||
"""
|
||||
Flushes the decoder. Should only be called if the decoder is actually
|
||||
being used.
|
||||
"""
|
||||
if self._decoder:
|
||||
buf = self._decoder.decompress(b'')
|
||||
return buf + self._decoder.flush()
|
||||
|
||||
return b''
|
||||
|
||||
@contextmanager
|
||||
def _error_catcher(self):
|
||||
"""
|
||||
Catch low-level python exceptions, instead re-raising urllib3
|
||||
variants, so that low-level exceptions are not leaked in the
|
||||
high-level api.
|
||||
|
||||
On exit, release the connection back to the pool.
|
||||
"""
|
||||
clean_exit = False
|
||||
|
||||
try:
|
||||
try:
|
||||
yield
|
||||
|
||||
except SocketTimeout:
|
||||
# FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
|
||||
# there is yet no clean way to get at it from this context.
|
||||
raise ReadTimeoutError(self._pool, None, 'Read timed out.')
|
||||
|
||||
except BaseSSLError as e:
|
||||
# FIXME: Is there a better way to differentiate between SSLErrors?
|
||||
if 'read operation timed out' not in str(e): # Defensive:
|
||||
# This shouldn't happen but just in case we're missing an edge
|
||||
# case, let's avoid swallowing SSL errors.
|
||||
raise
|
||||
|
||||
raise ReadTimeoutError(self._pool, None, 'Read timed out.')
|
||||
|
||||
except (HTTPException, SocketError) as e:
|
||||
# This includes IncompleteRead.
|
||||
raise ProtocolError('Connection broken: %r' % e, e)
|
||||
|
||||
# If no exception is thrown, we should avoid cleaning up
|
||||
# unnecessarily.
|
||||
clean_exit = True
|
||||
finally:
|
||||
# If we didn't terminate cleanly, we need to throw away our
|
||||
# connection.
|
||||
if not clean_exit:
|
||||
# The response may not be closed but we're not going to use it
|
||||
# anymore so close it now to ensure that the connection is
|
||||
# released back to the pool.
|
||||
if self._original_response:
|
||||
self._original_response.close()
|
||||
|
||||
# Closing the response may not actually be sufficient to close
|
||||
# everything, so if we have a hold of the connection close that
|
||||
# too.
|
||||
if self._connection:
|
||||
self._connection.close()
|
||||
|
||||
# If we hold the original response but it's closed now, we should
|
||||
# return the connection back to the pool.
|
||||
if self._original_response and self._original_response.isclosed():
|
||||
self.release_conn()
|
||||
|
||||
def read(self, amt=None, decode_content=None, cache_content=False):
|
||||
"""
|
||||
Similar to :meth:`httplib.HTTPResponse.read`, but with two additional
|
||||
parameters: ``decode_content`` and ``cache_content``.
|
||||
|
||||
:param amt:
|
||||
How much of the content to read. If specified, caching is skipped
|
||||
because it doesn't make sense to cache partial content as the full
|
||||
response.
|
||||
|
||||
:param decode_content:
|
||||
If True, will attempt to decode the body based on the
|
||||
'content-encoding' header.
|
||||
|
||||
:param cache_content:
|
||||
If True, will save the returned data such that the same result is
|
||||
returned despite of the state of the underlying file object. This
|
||||
is useful if you want the ``.data`` property to continue working
|
||||
after having ``.read()`` the file object. (Overridden if ``amt`` is
|
||||
set.)
|
||||
"""
|
||||
self._init_decoder()
|
||||
if decode_content is None:
|
||||
decode_content = self.decode_content
|
||||
|
||||
if self._fp is None:
|
||||
return
|
||||
|
||||
flush_decoder = False
|
||||
data = None
|
||||
|
||||
with self._error_catcher():
|
||||
if amt is None:
|
||||
# cStringIO doesn't like amt=None
|
||||
data = self._fp.read()
|
||||
flush_decoder = True
|
||||
else:
|
||||
cache_content = False
|
||||
data = self._fp.read(amt)
|
||||
if amt != 0 and not data: # Platform-specific: Buggy versions of Python.
|
||||
# Close the connection when no data is returned
|
||||
#
|
||||
# This is redundant to what httplib/http.client _should_
|
||||
# already do. However, versions of python released before
|
||||
# December 15, 2012 (http://bugs.python.org/issue16298) do
|
||||
# not properly close the connection in all cases. There is
|
||||
# no harm in redundantly calling close.
|
||||
self._fp.close()
|
||||
flush_decoder = True
|
||||
if self.enforce_content_length and self.length_remaining not in (0, None):
|
||||
# This is an edge case that httplib failed to cover due
|
||||
# to concerns of backward compatibility. We're
|
||||
# addressing it here to make sure IncompleteRead is
|
||||
# raised during streaming, so all calls with incorrect
|
||||
# Content-Length are caught.
|
||||
raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
|
||||
|
||||
if data:
|
||||
self._fp_bytes_read += len(data)
|
||||
if self.length_remaining is not None:
|
||||
self.length_remaining -= len(data)
|
||||
|
||||
data = self._decode(data, decode_content, flush_decoder)
|
||||
|
||||
if cache_content:
|
||||
self._body = data
|
||||
|
||||
return data
|
||||
|
||||
def stream(self, amt=2**16, decode_content=None):
|
||||
"""
|
||||
A generator wrapper for the read() method. A call will block until
|
||||
``amt`` bytes have been read from the connection or until the
|
||||
connection is closed.
|
||||
|
||||
:param amt:
|
||||
How much of the content to read. The generator will return up to
|
||||
much data per iteration, but may return less. This is particularly
|
||||
likely when using compressed data. However, the empty string will
|
||||
never be returned.
|
||||
|
||||
:param decode_content:
|
||||
If True, will attempt to decode the body based on the
|
||||
'content-encoding' header.
|
||||
"""
|
||||
if self.chunked and self.supports_chunked_reads():
|
||||
for line in self.read_chunked(amt, decode_content=decode_content):
|
||||
yield line
|
||||
else:
|
||||
while not is_fp_closed(self._fp):
|
||||
data = self.read(amt=amt, decode_content=decode_content)
|
||||
|
||||
if data:
|
||||
yield data
|
||||
|
||||
@classmethod
|
||||
def from_httplib(ResponseCls, r, **response_kw):
|
||||
"""
|
||||
Given an :class:`httplib.HTTPResponse` instance ``r``, return a
|
||||
corresponding :class:`urllib3.response.HTTPResponse` object.
|
||||
|
||||
Remaining parameters are passed to the HTTPResponse constructor, along
|
||||
with ``original_response=r``.
|
||||
"""
|
||||
headers = r.msg
|
||||
|
||||
if not isinstance(headers, HTTPHeaderDict):
|
||||
if PY3: # Python 3
|
||||
headers = HTTPHeaderDict(headers.items())
|
||||
else: # Python 2
|
||||
headers = HTTPHeaderDict.from_httplib(headers)
|
||||
|
||||
# HTTPResponse objects in Python 3 don't have a .strict attribute
|
||||
strict = getattr(r, 'strict', 0)
|
||||
resp = ResponseCls(body=r,
|
||||
headers=headers,
|
||||
status=r.status,
|
||||
version=r.version,
|
||||
reason=r.reason,
|
||||
strict=strict,
|
||||
original_response=r,
|
||||
**response_kw)
|
||||
return resp
|
||||
|
||||
# Backwards-compatibility methods for httplib.HTTPResponse
|
||||
def getheaders(self):
|
||||
return self.headers
|
||||
|
||||
def getheader(self, name, default=None):
|
||||
return self.headers.get(name, default)
|
||||
|
||||
# Overrides from io.IOBase
|
||||
def close(self):
|
||||
if not self.closed:
|
||||
self._fp.close()
|
||||
|
||||
if self._connection:
|
||||
self._connection.close()
|
||||
|
||||
@property
|
||||
def closed(self):
|
||||
if self._fp is None:
|
||||
return True
|
||||
elif hasattr(self._fp, 'isclosed'):
|
||||
return self._fp.isclosed()
|
||||
elif hasattr(self._fp, 'closed'):
|
||||
return self._fp.closed
|
||||
else:
|
||||
return True
|
||||
|
||||
def fileno(self):
|
||||
if self._fp is None:
|
||||
raise IOError("HTTPResponse has no file to get a fileno from")
|
||||
elif hasattr(self._fp, "fileno"):
|
||||
return self._fp.fileno()
|
||||
else:
|
||||
raise IOError("The file-like object this HTTPResponse is wrapped "
|
||||
"around has no file descriptor")
|
||||
|
||||
def flush(self):
|
||||
if self._fp is not None and hasattr(self._fp, 'flush'):
|
||||
return self._fp.flush()
|
||||
|
||||
def readable(self):
|
||||
# This method is required for `io` module compatibility.
|
||||
return True
|
||||
|
||||
def readinto(self, b):
|
||||
# This method is required for `io` module compatibility.
|
||||
temp = self.read(len(b))
|
||||
if len(temp) == 0:
|
||||
return 0
|
||||
else:
|
||||
b[:len(temp)] = temp
|
||||
return len(temp)
|
||||
|
||||
def supports_chunked_reads(self):
|
||||
"""
|
||||
Checks if the underlying file-like object looks like a
|
||||
httplib.HTTPResponse object. We do this by testing for the fp
|
||||
attribute. If it is present we assume it returns raw chunks as
|
||||
processed by read_chunked().
|
||||
"""
|
||||
return hasattr(self._fp, 'fp')
|
||||
|
||||
def _update_chunk_length(self):
|
||||
# First, we'll figure out length of a chunk and then
|
||||
# we'll try to read it from socket.
|
||||
if self.chunk_left is not None:
|
||||
return
|
||||
line = self._fp.fp.readline()
|
||||
line = line.split(b';', 1)[0]
|
||||
try:
|
||||
self.chunk_left = int(line, 16)
|
||||
except ValueError:
|
||||
# Invalid chunked protocol response, abort.
|
||||
self.close()
|
||||
raise httplib.IncompleteRead(line)
|
||||
|
||||
def _handle_chunk(self, amt):
|
||||
returned_chunk = None
|
||||
if amt is None:
|
||||
chunk = self._fp._safe_read(self.chunk_left)
|
||||
returned_chunk = chunk
|
||||
self._fp._safe_read(2) # Toss the CRLF at the end of the chunk.
|
||||
self.chunk_left = None
|
||||
elif amt < self.chunk_left:
|
||||
value = self._fp._safe_read(amt)
|
||||
self.chunk_left = self.chunk_left - amt
|
||||
returned_chunk = value
|
||||
elif amt == self.chunk_left:
|
||||
value = self._fp._safe_read(amt)
|
||||
self._fp._safe_read(2) # Toss the CRLF at the end of the chunk.
|
||||
self.chunk_left = None
|
||||
returned_chunk = value
|
||||
else: # amt > self.chunk_left
|
||||
returned_chunk = self._fp._safe_read(self.chunk_left)
|
||||
self._fp._safe_read(2) # Toss the CRLF at the end of the chunk.
|
||||
self.chunk_left = None
|
||||
return returned_chunk
|
||||
|
||||
def read_chunked(self, amt=None, decode_content=None):
|
||||
"""
|
||||
Similar to :meth:`HTTPResponse.read`, but with an additional
|
||||
parameter: ``decode_content``.
|
||||
|
||||
:param decode_content:
|
||||
If True, will attempt to decode the body based on the
|
||||
'content-encoding' header.
|
||||
"""
|
||||
self._init_decoder()
|
||||
# FIXME: Rewrite this method and make it a class with a better structured logic.
|
||||
if not self.chunked:
|
||||
raise ResponseNotChunked(
|
||||
"Response is not chunked. "
|
||||
"Header 'transfer-encoding: chunked' is missing.")
|
||||
if not self.supports_chunked_reads():
|
||||
raise BodyNotHttplibCompatible(
|
||||
"Body should be httplib.HTTPResponse like. "
|
||||
"It should have have an fp attribute which returns raw chunks.")
|
||||
|
||||
# Don't bother reading the body of a HEAD request.
|
||||
if self._original_response and is_response_to_head(self._original_response):
|
||||
self._original_response.close()
|
||||
return
|
||||
|
||||
with self._error_catcher():
|
||||
while True:
|
||||
self._update_chunk_length()
|
||||
if self.chunk_left == 0:
|
||||
break
|
||||
chunk = self._handle_chunk(amt)
|
||||
decoded = self._decode(chunk, decode_content=decode_content,
|
||||
flush_decoder=False)
|
||||
if decoded:
|
||||
yield decoded
|
||||
|
||||
if decode_content:
|
||||
# On CPython and PyPy, we should never need to flush the
|
||||
# decoder. However, on Jython we *might* need to, so
|
||||
# lets defensively do it anyway.
|
||||
decoded = self._flush_decoder()
|
||||
if decoded: # Platform-specific: Jython.
|
||||
yield decoded
|
||||
|
||||
# Chunk content ends with \r\n: discard it.
|
||||
while True:
|
||||
line = self._fp.fp.readline()
|
||||
if not line:
|
||||
# Some sites may not end with '\r\n'.
|
||||
break
|
||||
if line == b'\r\n':
|
||||
break
|
||||
|
||||
# We read everything; close the "file".
|
||||
if self._original_response:
|
||||
self._original_response.close()
|
||||
@@ -1,54 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
# For backwards compatibility, provide imports that used to be here.
|
||||
from .connection import is_connection_dropped
|
||||
from .request import make_headers
|
||||
from .response import is_fp_closed
|
||||
from .ssl_ import (
|
||||
SSLContext,
|
||||
HAS_SNI,
|
||||
IS_PYOPENSSL,
|
||||
IS_SECURETRANSPORT,
|
||||
assert_fingerprint,
|
||||
resolve_cert_reqs,
|
||||
resolve_ssl_version,
|
||||
ssl_wrap_socket,
|
||||
)
|
||||
from .timeout import (
|
||||
current_time,
|
||||
Timeout,
|
||||
)
|
||||
|
||||
from .retry import Retry
|
||||
from .url import (
|
||||
get_host,
|
||||
parse_url,
|
||||
split_first,
|
||||
Url,
|
||||
)
|
||||
from .wait import (
|
||||
wait_for_read,
|
||||
wait_for_write
|
||||
)
|
||||
|
||||
__all__ = (
|
||||
'HAS_SNI',
|
||||
'IS_PYOPENSSL',
|
||||
'IS_SECURETRANSPORT',
|
||||
'SSLContext',
|
||||
'Retry',
|
||||
'Timeout',
|
||||
'Url',
|
||||
'assert_fingerprint',
|
||||
'current_time',
|
||||
'is_connection_dropped',
|
||||
'is_fp_closed',
|
||||
'get_host',
|
||||
'parse_url',
|
||||
'make_headers',
|
||||
'resolve_cert_reqs',
|
||||
'resolve_ssl_version',
|
||||
'split_first',
|
||||
'ssl_wrap_socket',
|
||||
'wait_for_read',
|
||||
'wait_for_write'
|
||||
)
|
||||
@@ -1,130 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
import socket
|
||||
from .wait import wait_for_read
|
||||
from .selectors import HAS_SELECT, SelectorError
|
||||
|
||||
|
||||
def is_connection_dropped(conn): # Platform-specific
|
||||
"""
|
||||
Returns True if the connection is dropped and should be closed.
|
||||
|
||||
:param conn:
|
||||
:class:`httplib.HTTPConnection` object.
|
||||
|
||||
Note: For platforms like AppEngine, this will always return ``False`` to
|
||||
let the platform handle connection recycling transparently for us.
|
||||
"""
|
||||
sock = getattr(conn, 'sock', False)
|
||||
if sock is False: # Platform-specific: AppEngine
|
||||
return False
|
||||
if sock is None: # Connection already closed (such as by httplib).
|
||||
return True
|
||||
|
||||
if not HAS_SELECT:
|
||||
return False
|
||||
|
||||
try:
|
||||
return bool(wait_for_read(sock, timeout=0.0))
|
||||
except SelectorError:
|
||||
return True
|
||||
|
||||
|
||||
# This function is copied from socket.py in the Python 2.7 standard
|
||||
# library test suite. Added to its signature is only `socket_options`.
|
||||
# One additional modification is that we avoid binding to IPv6 servers
|
||||
# discovered in DNS if the system doesn't have IPv6 functionality.
|
||||
def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
|
||||
source_address=None, socket_options=None):
|
||||
"""Connect to *address* and return the socket object.
|
||||
|
||||
Convenience function. Connect to *address* (a 2-tuple ``(host,
|
||||
port)``) and return the socket object. Passing the optional
|
||||
*timeout* parameter will set the timeout on the socket instance
|
||||
before attempting to connect. If no *timeout* is supplied, the
|
||||
global default timeout setting returned by :func:`getdefaulttimeout`
|
||||
is used. If *source_address* is set it must be a tuple of (host, port)
|
||||
for the socket to bind as a source address before making the connection.
|
||||
An host of '' or port 0 tells the OS to use the default.
|
||||
"""
|
||||
|
||||
host, port = address
|
||||
if host.startswith('['):
|
||||
host = host.strip('[]')
|
||||
err = None
|
||||
|
||||
# Using the value from allowed_gai_family() in the context of getaddrinfo lets
|
||||
# us select whether to work with IPv4 DNS records, IPv6 records, or both.
|
||||
# The original create_connection function always returns all records.
|
||||
family = allowed_gai_family()
|
||||
|
||||
for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
|
||||
af, socktype, proto, canonname, sa = res
|
||||
sock = None
|
||||
try:
|
||||
sock = socket.socket(af, socktype, proto)
|
||||
|
||||
# If provided, set socket level options before connecting.
|
||||
_set_socket_options(sock, socket_options)
|
||||
|
||||
if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
|
||||
sock.settimeout(timeout)
|
||||
if source_address:
|
||||
sock.bind(source_address)
|
||||
sock.connect(sa)
|
||||
return sock
|
||||
|
||||
except socket.error as e:
|
||||
err = e
|
||||
if sock is not None:
|
||||
sock.close()
|
||||
sock = None
|
||||
|
||||
if err is not None:
|
||||
raise err
|
||||
|
||||
raise socket.error("getaddrinfo returns an empty list")
|
||||
|
||||
|
||||
def _set_socket_options(sock, options):
|
||||
if options is None:
|
||||
return
|
||||
|
||||
for opt in options:
|
||||
sock.setsockopt(*opt)
|
||||
|
||||
|
||||
def allowed_gai_family():
|
||||
"""This function is designed to work in the context of
|
||||
getaddrinfo, where family=socket.AF_UNSPEC is the default and
|
||||
will perform a DNS search for both IPv6 and IPv4 records."""
|
||||
|
||||
family = socket.AF_INET
|
||||
if HAS_IPV6:
|
||||
family = socket.AF_UNSPEC
|
||||
return family
|
||||
|
||||
|
||||
def _has_ipv6(host):
|
||||
""" Returns True if the system can bind an IPv6 address. """
|
||||
sock = None
|
||||
has_ipv6 = False
|
||||
|
||||
if socket.has_ipv6:
|
||||
# has_ipv6 returns true if cPython was compiled with IPv6 support.
|
||||
# It does not tell us if the system has IPv6 support enabled. To
|
||||
# determine that we must bind to an IPv6 address.
|
||||
# https://github.com/shazow/urllib3/pull/611
|
||||
# https://bugs.python.org/issue658327
|
||||
try:
|
||||
sock = socket.socket(socket.AF_INET6)
|
||||
sock.bind((host, 0))
|
||||
has_ipv6 = True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if sock:
|
||||
sock.close()
|
||||
return has_ipv6
|
||||
|
||||
|
||||
HAS_IPV6 = _has_ipv6('::1')
|
||||
@@ -1,118 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
from base64 import b64encode
|
||||
|
||||
from ..packages.six import b, integer_types
|
||||
from ..exceptions import UnrewindableBodyError
|
||||
|
||||
ACCEPT_ENCODING = 'gzip,deflate'
|
||||
_FAILEDTELL = object()
|
||||
|
||||
|
||||
def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,
|
||||
basic_auth=None, proxy_basic_auth=None, disable_cache=None):
|
||||
"""
|
||||
Shortcuts for generating request headers.
|
||||
|
||||
:param keep_alive:
|
||||
If ``True``, adds 'connection: keep-alive' header.
|
||||
|
||||
:param accept_encoding:
|
||||
Can be a boolean, list, or string.
|
||||
``True`` translates to 'gzip,deflate'.
|
||||
List will get joined by comma.
|
||||
String will be used as provided.
|
||||
|
||||
:param user_agent:
|
||||
String representing the user-agent you want, such as
|
||||
"python-urllib3/0.6"
|
||||
|
||||
:param basic_auth:
|
||||
Colon-separated username:password string for 'authorization: basic ...'
|
||||
auth header.
|
||||
|
||||
:param proxy_basic_auth:
|
||||
Colon-separated username:password string for 'proxy-authorization: basic ...'
|
||||
auth header.
|
||||
|
||||
:param disable_cache:
|
||||
If ``True``, adds 'cache-control: no-cache' header.
|
||||
|
||||
Example::
|
||||
|
||||
>>> make_headers(keep_alive=True, user_agent="Batman/1.0")
|
||||
{'connection': 'keep-alive', 'user-agent': 'Batman/1.0'}
|
||||
>>> make_headers(accept_encoding=True)
|
||||
{'accept-encoding': 'gzip,deflate'}
|
||||
"""
|
||||
headers = {}
|
||||
if accept_encoding:
|
||||
if isinstance(accept_encoding, str):
|
||||
pass
|
||||
elif isinstance(accept_encoding, list):
|
||||
accept_encoding = ','.join(accept_encoding)
|
||||
else:
|
||||
accept_encoding = ACCEPT_ENCODING
|
||||
headers['accept-encoding'] = accept_encoding
|
||||
|
||||
if user_agent:
|
||||
headers['user-agent'] = user_agent
|
||||
|
||||
if keep_alive:
|
||||
headers['connection'] = 'keep-alive'
|
||||
|
||||
if basic_auth:
|
||||
headers['authorization'] = 'Basic ' + \
|
||||
b64encode(b(basic_auth)).decode('utf-8')
|
||||
|
||||
if proxy_basic_auth:
|
||||
headers['proxy-authorization'] = 'Basic ' + \
|
||||
b64encode(b(proxy_basic_auth)).decode('utf-8')
|
||||
|
||||
if disable_cache:
|
||||
headers['cache-control'] = 'no-cache'
|
||||
|
||||
return headers
|
||||
|
||||
|
||||
def set_file_position(body, pos):
|
||||
"""
|
||||
If a position is provided, move file to that point.
|
||||
Otherwise, we'll attempt to record a position for future use.
|
||||
"""
|
||||
if pos is not None:
|
||||
rewind_body(body, pos)
|
||||
elif getattr(body, 'tell', None) is not None:
|
||||
try:
|
||||
pos = body.tell()
|
||||
except (IOError, OSError):
|
||||
# This differentiates from None, allowing us to catch
|
||||
# a failed `tell()` later when trying to rewind the body.
|
||||
pos = _FAILEDTELL
|
||||
|
||||
return pos
|
||||
|
||||
|
||||
def rewind_body(body, body_pos):
|
||||
"""
|
||||
Attempt to rewind body to a certain position.
|
||||
Primarily used for request redirects and retries.
|
||||
|
||||
:param body:
|
||||
File-like object that supports seek.
|
||||
|
||||
:param int pos:
|
||||
Position to seek to in file.
|
||||
"""
|
||||
body_seek = getattr(body, 'seek', None)
|
||||
if body_seek is not None and isinstance(body_pos, integer_types):
|
||||
try:
|
||||
body_seek(body_pos)
|
||||
except (IOError, OSError):
|
||||
raise UnrewindableBodyError("An error occurred when rewinding request "
|
||||
"body for redirect/retry.")
|
||||
elif body_pos is _FAILEDTELL:
|
||||
raise UnrewindableBodyError("Unable to record file position for rewinding "
|
||||
"request body during a redirect/retry.")
|
||||
else:
|
||||
raise ValueError("body_pos must be of type integer, "
|
||||
"instead it was %s." % type(body_pos))
|
||||
@@ -1,81 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
from ..packages.six.moves import http_client as httplib
|
||||
|
||||
from ..exceptions import HeaderParsingError
|
||||
|
||||
|
||||
def is_fp_closed(obj):
|
||||
"""
|
||||
Checks whether a given file-like object is closed.
|
||||
|
||||
:param obj:
|
||||
The file-like object to check.
|
||||
"""
|
||||
|
||||
try:
|
||||
# Check `isclosed()` first, in case Python3 doesn't set `closed`.
|
||||
# GH Issue #928
|
||||
return obj.isclosed()
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
try:
|
||||
# Check via the official file-like-object way.
|
||||
return obj.closed
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
try:
|
||||
# Check if the object is a container for another file-like object that
|
||||
# gets released on exhaustion (e.g. HTTPResponse).
|
||||
return obj.fp is None
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
raise ValueError("Unable to determine whether fp is closed.")
|
||||
|
||||
|
||||
def assert_header_parsing(headers):
|
||||
"""
|
||||
Asserts whether all headers have been successfully parsed.
|
||||
Extracts encountered errors from the result of parsing headers.
|
||||
|
||||
Only works on Python 3.
|
||||
|
||||
:param headers: Headers to verify.
|
||||
:type headers: `httplib.HTTPMessage`.
|
||||
|
||||
:raises urllib3.exceptions.HeaderParsingError:
|
||||
If parsing errors are found.
|
||||
"""
|
||||
|
||||
# This will fail silently if we pass in the wrong kind of parameter.
|
||||
# To make debugging easier add an explicit check.
|
||||
if not isinstance(headers, httplib.HTTPMessage):
|
||||
raise TypeError('expected httplib.Message, got {0}.'.format(
|
||||
type(headers)))
|
||||
|
||||
defects = getattr(headers, 'defects', None)
|
||||
get_payload = getattr(headers, 'get_payload', None)
|
||||
|
||||
unparsed_data = None
|
||||
if get_payload: # Platform-specific: Python 3.
|
||||
unparsed_data = get_payload()
|
||||
|
||||
if defects or unparsed_data:
|
||||
raise HeaderParsingError(defects=defects, unparsed_data=unparsed_data)
|
||||
|
||||
|
||||
def is_response_to_head(response):
|
||||
"""
|
||||
Checks whether the request of a response has been a HEAD-request.
|
||||
Handles the quirks of AppEngine.
|
||||
|
||||
:param conn:
|
||||
:type conn: :class:`httplib.HTTPResponse`
|
||||
"""
|
||||
# FIXME: Can we do this somehow without accessing private httplib _method?
|
||||
method = response._method
|
||||
if isinstance(method, int): # Platform-specific: Appengine
|
||||
return method == 3
|
||||
return method.upper() == 'HEAD'
|
||||
@@ -1,401 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
import time
|
||||
import logging
|
||||
from collections import namedtuple
|
||||
from itertools import takewhile
|
||||
import email
|
||||
import re
|
||||
|
||||
from ..exceptions import (
|
||||
ConnectTimeoutError,
|
||||
MaxRetryError,
|
||||
ProtocolError,
|
||||
ReadTimeoutError,
|
||||
ResponseError,
|
||||
InvalidHeader,
|
||||
)
|
||||
from ..packages import six
|
||||
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
# Data structure for representing the metadata of requests that result in a retry.
|
||||
RequestHistory = namedtuple('RequestHistory', ["method", "url", "error",
|
||||
"status", "redirect_location"])
|
||||
|
||||
|
||||
class Retry(object):
|
||||
""" Retry configuration.
|
||||
|
||||
Each retry attempt will create a new Retry object with updated values, so
|
||||
they can be safely reused.
|
||||
|
||||
Retries can be defined as a default for a pool::
|
||||
|
||||
retries = Retry(connect=5, read=2, redirect=5)
|
||||
http = PoolManager(retries=retries)
|
||||
response = http.request('GET', 'http://example.com/')
|
||||
|
||||
Or per-request (which overrides the default for the pool)::
|
||||
|
||||
response = http.request('GET', 'http://example.com/', retries=Retry(10))
|
||||
|
||||
Retries can be disabled by passing ``False``::
|
||||
|
||||
response = http.request('GET', 'http://example.com/', retries=False)
|
||||
|
||||
Errors will be wrapped in :class:`~urllib3.exceptions.MaxRetryError` unless
|
||||
retries are disabled, in which case the causing exception will be raised.
|
||||
|
||||
:param int total:
|
||||
Total number of retries to allow. Takes precedence over other counts.
|
||||
|
||||
Set to ``None`` to remove this constraint and fall back on other
|
||||
counts. It's a good idea to set this to some sensibly-high value to
|
||||
account for unexpected edge cases and avoid infinite retry loops.
|
||||
|
||||
Set to ``0`` to fail on the first retry.
|
||||
|
||||
Set to ``False`` to disable and imply ``raise_on_redirect=False``.
|
||||
|
||||
:param int connect:
|
||||
How many connection-related errors to retry on.
|
||||
|
||||
These are errors raised before the request is sent to the remote server,
|
||||
which we assume has not triggered the server to process the request.
|
||||
|
||||
Set to ``0`` to fail on the first retry of this type.
|
||||
|
||||
:param int read:
|
||||
How many times to retry on read errors.
|
||||
|
||||
These errors are raised after the request was sent to the server, so the
|
||||
request may have side-effects.
|
||||
|
||||
Set to ``0`` to fail on the first retry of this type.
|
||||
|
||||
:param int redirect:
|
||||
How many redirects to perform. Limit this to avoid infinite redirect
|
||||
loops.
|
||||
|
||||
A redirect is a HTTP response with a status code 301, 302, 303, 307 or
|
||||
308.
|
||||
|
||||
Set to ``0`` to fail on the first retry of this type.
|
||||
|
||||
Set to ``False`` to disable and imply ``raise_on_redirect=False``.
|
||||
|
||||
:param int status:
|
||||
How many times to retry on bad status codes.
|
||||
|
||||
These are retries made on responses, where status code matches
|
||||
``status_forcelist``.
|
||||
|
||||
Set to ``0`` to fail on the first retry of this type.
|
||||
|
||||
:param iterable method_whitelist:
|
||||
Set of uppercased HTTP method verbs that we should retry on.
|
||||
|
||||
By default, we only retry on methods which are considered to be
|
||||
idempotent (multiple requests with the same parameters end with the
|
||||
same state). See :attr:`Retry.DEFAULT_METHOD_WHITELIST`.
|
||||
|
||||
Set to a ``False`` value to retry on any verb.
|
||||
|
||||
:param iterable status_forcelist:
|
||||
A set of integer HTTP status codes that we should force a retry on.
|
||||
A retry is initiated if the request method is in ``method_whitelist``
|
||||
and the response status code is in ``status_forcelist``.
|
||||
|
||||
By default, this is disabled with ``None``.
|
||||
|
||||
:param float backoff_factor:
|
||||
A backoff factor to apply between attempts after the second try
|
||||
(most errors are resolved immediately by a second try without a
|
||||
delay). urllib3 will sleep for::
|
||||
|
||||
{backoff factor} * (2 ^ ({number of total retries} - 1))
|
||||
|
||||
seconds. If the backoff_factor is 0.1, then :func:`.sleep` will sleep
|
||||
for [0.0s, 0.2s, 0.4s, ...] between retries. It will never be longer
|
||||
than :attr:`Retry.BACKOFF_MAX`.
|
||||
|
||||
By default, backoff is disabled (set to 0).
|
||||
|
||||
:param bool raise_on_redirect: Whether, if the number of redirects is
|
||||
exhausted, to raise a MaxRetryError, or to return a response with a
|
||||
response code in the 3xx range.
|
||||
|
||||
:param bool raise_on_status: Similar meaning to ``raise_on_redirect``:
|
||||
whether we should raise an exception, or return a response,
|
||||
if status falls in ``status_forcelist`` range and retries have
|
||||
been exhausted.
|
||||
|
||||
:param tuple history: The history of the request encountered during
|
||||
each call to :meth:`~Retry.increment`. The list is in the order
|
||||
the requests occurred. Each list item is of class :class:`RequestHistory`.
|
||||
|
||||
:param bool respect_retry_after_header:
|
||||
Whether to respect Retry-After header on status codes defined as
|
||||
:attr:`Retry.RETRY_AFTER_STATUS_CODES` or not.
|
||||
|
||||
"""
|
||||
|
||||
DEFAULT_METHOD_WHITELIST = frozenset([
|
||||
'HEAD', 'GET', 'PUT', 'DELETE', 'OPTIONS', 'TRACE'])
|
||||
|
||||
RETRY_AFTER_STATUS_CODES = frozenset([413, 429, 503])
|
||||
|
||||
#: Maximum backoff time.
|
||||
BACKOFF_MAX = 120
|
||||
|
||||
def __init__(self, total=10, connect=None, read=None, redirect=None, status=None,
|
||||
method_whitelist=DEFAULT_METHOD_WHITELIST, status_forcelist=None,
|
||||
backoff_factor=0, raise_on_redirect=True, raise_on_status=True,
|
||||
history=None, respect_retry_after_header=True):
|
||||
|
||||
self.total = total
|
||||
self.connect = connect
|
||||
self.read = read
|
||||
self.status = status
|
||||
|
||||
if redirect is False or total is False:
|
||||
redirect = 0
|
||||
raise_on_redirect = False
|
||||
|
||||
self.redirect = redirect
|
||||
self.status_forcelist = status_forcelist or set()
|
||||
self.method_whitelist = method_whitelist
|
||||
self.backoff_factor = backoff_factor
|
||||
self.raise_on_redirect = raise_on_redirect
|
||||
self.raise_on_status = raise_on_status
|
||||
self.history = history or tuple()
|
||||
self.respect_retry_after_header = respect_retry_after_header
|
||||
|
||||
def new(self, **kw):
|
||||
params = dict(
|
||||
total=self.total,
|
||||
connect=self.connect, read=self.read, redirect=self.redirect, status=self.status,
|
||||
method_whitelist=self.method_whitelist,
|
||||
status_forcelist=self.status_forcelist,
|
||||
backoff_factor=self.backoff_factor,
|
||||
raise_on_redirect=self.raise_on_redirect,
|
||||
raise_on_status=self.raise_on_status,
|
||||
history=self.history,
|
||||
)
|
||||
params.update(kw)
|
||||
return type(self)(**params)
|
||||
|
||||
@classmethod
|
||||
def from_int(cls, retries, redirect=True, default=None):
|
||||
""" Backwards-compatibility for the old retries format."""
|
||||
if retries is None:
|
||||
retries = default if default is not None else cls.DEFAULT
|
||||
|
||||
if isinstance(retries, Retry):
|
||||
return retries
|
||||
|
||||
redirect = bool(redirect) and None
|
||||
new_retries = cls(retries, redirect=redirect)
|
||||
log.debug("Converted retries value: %r -> %r", retries, new_retries)
|
||||
return new_retries
|
||||
|
||||
def get_backoff_time(self):
|
||||
""" Formula for computing the current backoff
|
||||
|
||||
:rtype: float
|
||||
"""
|
||||
# We want to consider only the last consecutive errors sequence (Ignore redirects).
|
||||
consecutive_errors_len = len(list(takewhile(lambda x: x.redirect_location is None,
|
||||
reversed(self.history))))
|
||||
if consecutive_errors_len <= 1:
|
||||
return 0
|
||||
|
||||
backoff_value = self.backoff_factor * (2 ** (consecutive_errors_len - 1))
|
||||
return min(self.BACKOFF_MAX, backoff_value)
|
||||
|
||||
def parse_retry_after(self, retry_after):
|
||||
# Whitespace: https://tools.ietf.org/html/rfc7230#section-3.2.4
|
||||
if re.match(r"^\s*[0-9]+\s*$", retry_after):
|
||||
seconds = int(retry_after)
|
||||
else:
|
||||
retry_date_tuple = email.utils.parsedate(retry_after)
|
||||
if retry_date_tuple is None:
|
||||
raise InvalidHeader("Invalid Retry-After header: %s" % retry_after)
|
||||
retry_date = time.mktime(retry_date_tuple)
|
||||
seconds = retry_date - time.time()
|
||||
|
||||
if seconds < 0:
|
||||
seconds = 0
|
||||
|
||||
return seconds
|
||||
|
||||
def get_retry_after(self, response):
|
||||
""" Get the value of Retry-After in seconds. """
|
||||
|
||||
retry_after = response.getheader("Retry-After")
|
||||
|
||||
if retry_after is None:
|
||||
return None
|
||||
|
||||
return self.parse_retry_after(retry_after)
|
||||
|
||||
def sleep_for_retry(self, response=None):
|
||||
retry_after = self.get_retry_after(response)
|
||||
if retry_after:
|
||||
time.sleep(retry_after)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def _sleep_backoff(self):
|
||||
backoff = self.get_backoff_time()
|
||||
if backoff <= 0:
|
||||
return
|
||||
time.sleep(backoff)
|
||||
|
||||
def sleep(self, response=None):
|
||||
""" Sleep between retry attempts.
|
||||
|
||||
This method will respect a server's ``Retry-After`` response header
|
||||
and sleep the duration of the time requested. If that is not present, it
|
||||
will use an exponential backoff. By default, the backoff factor is 0 and
|
||||
this method will return immediately.
|
||||
"""
|
||||
|
||||
if response:
|
||||
slept = self.sleep_for_retry(response)
|
||||
if slept:
|
||||
return
|
||||
|
||||
self._sleep_backoff()
|
||||
|
||||
def _is_connection_error(self, err):
|
||||
""" Errors when we're fairly sure that the server did not receive the
|
||||
request, so it should be safe to retry.
|
||||
"""
|
||||
return isinstance(err, ConnectTimeoutError)
|
||||
|
||||
def _is_read_error(self, err):
|
||||
""" Errors that occur after the request has been started, so we should
|
||||
assume that the server began processing it.
|
||||
"""
|
||||
return isinstance(err, (ReadTimeoutError, ProtocolError))
|
||||
|
||||
def _is_method_retryable(self, method):
|
||||
""" Checks if a given HTTP method should be retried upon, depending if
|
||||
it is included on the method whitelist.
|
||||
"""
|
||||
if self.method_whitelist and method.upper() not in self.method_whitelist:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def is_retry(self, method, status_code, has_retry_after=False):
|
||||
""" Is this method/status code retryable? (Based on whitelists and control
|
||||
variables such as the number of total retries to allow, whether to
|
||||
respect the Retry-After header, whether this header is present, and
|
||||
whether the returned status code is on the list of status codes to
|
||||
be retried upon on the presence of the aforementioned header)
|
||||
"""
|
||||
if not self._is_method_retryable(method):
|
||||
return False
|
||||
|
||||
if self.status_forcelist and status_code in self.status_forcelist:
|
||||
return True
|
||||
|
||||
return (self.total and self.respect_retry_after_header and
|
||||
has_retry_after and (status_code in self.RETRY_AFTER_STATUS_CODES))
|
||||
|
||||
def is_exhausted(self):
|
||||
""" Are we out of retries? """
|
||||
retry_counts = (self.total, self.connect, self.read, self.redirect, self.status)
|
||||
retry_counts = list(filter(None, retry_counts))
|
||||
if not retry_counts:
|
||||
return False
|
||||
|
||||
return min(retry_counts) < 0
|
||||
|
||||
def increment(self, method=None, url=None, response=None, error=None,
|
||||
_pool=None, _stacktrace=None):
|
||||
""" Return a new Retry object with incremented retry counters.
|
||||
|
||||
:param response: A response object, or None, if the server did not
|
||||
return a response.
|
||||
:type response: :class:`~urllib3.response.HTTPResponse`
|
||||
:param Exception error: An error encountered during the request, or
|
||||
None if the response was received successfully.
|
||||
|
||||
:return: A new ``Retry`` object.
|
||||
"""
|
||||
if self.total is False and error:
|
||||
# Disabled, indicate to re-raise the error.
|
||||
raise six.reraise(type(error), error, _stacktrace)
|
||||
|
||||
total = self.total
|
||||
if total is not None:
|
||||
total -= 1
|
||||
|
||||
connect = self.connect
|
||||
read = self.read
|
||||
redirect = self.redirect
|
||||
status_count = self.status
|
||||
cause = 'unknown'
|
||||
status = None
|
||||
redirect_location = None
|
||||
|
||||
if error and self._is_connection_error(error):
|
||||
# Connect retry?
|
||||
if connect is False:
|
||||
raise six.reraise(type(error), error, _stacktrace)
|
||||
elif connect is not None:
|
||||
connect -= 1
|
||||
|
||||
elif error and self._is_read_error(error):
|
||||
# Read retry?
|
||||
if read is False or not self._is_method_retryable(method):
|
||||
raise six.reraise(type(error), error, _stacktrace)
|
||||
elif read is not None:
|
||||
read -= 1
|
||||
|
||||
elif response and response.get_redirect_location():
|
||||
# Redirect retry?
|
||||
if redirect is not None:
|
||||
redirect -= 1
|
||||
cause = 'too many redirects'
|
||||
redirect_location = response.get_redirect_location()
|
||||
status = response.status
|
||||
|
||||
else:
|
||||
# Incrementing because of a server error like a 500 in
|
||||
# status_forcelist and a the given method is in the whitelist
|
||||
cause = ResponseError.GENERIC_ERROR
|
||||
if response and response.status:
|
||||
if status_count is not None:
|
||||
status_count -= 1
|
||||
cause = ResponseError.SPECIFIC_ERROR.format(
|
||||
status_code=response.status)
|
||||
status = response.status
|
||||
|
||||
history = self.history + (RequestHistory(method, url, error, status, redirect_location),)
|
||||
|
||||
new_retry = self.new(
|
||||
total=total,
|
||||
connect=connect, read=read, redirect=redirect, status=status_count,
|
||||
history=history)
|
||||
|
||||
if new_retry.is_exhausted():
|
||||
raise MaxRetryError(_pool, url, error or ResponseError(cause))
|
||||
|
||||
log.debug("Incremented Retry for (url='%s'): %r", url, new_retry)
|
||||
|
||||
return new_retry
|
||||
|
||||
def __repr__(self):
|
||||
return ('{cls.__name__}(total={self.total}, connect={self.connect}, '
|
||||
'read={self.read}, redirect={self.redirect}, status={self.status})').format(
|
||||
cls=type(self), self=self)
|
||||
|
||||
|
||||
# For backwards compatibility (equivalent to pre-v1.9):
|
||||
Retry.DEFAULT = Retry(3)
|
||||
@@ -1,581 +0,0 @@
|
||||
# Backport of selectors.py from Python 3.5+ to support Python < 3.4
|
||||
# Also has the behavior specified in PEP 475 which is to retry syscalls
|
||||
# in the case of an EINTR error. This module is required because selectors34
|
||||
# does not follow this behavior and instead returns that no dile descriptor
|
||||
# events have occurred rather than retry the syscall. The decision to drop
|
||||
# support for select.devpoll is made to maintain 100% test coverage.
|
||||
|
||||
import errno
|
||||
import math
|
||||
import select
|
||||
import socket
|
||||
import sys
|
||||
import time
|
||||
from collections import namedtuple, Mapping
|
||||
|
||||
try:
|
||||
monotonic = time.monotonic
|
||||
except (AttributeError, ImportError): # Python 3.3<
|
||||
monotonic = time.time
|
||||
|
||||
EVENT_READ = (1 << 0)
|
||||
EVENT_WRITE = (1 << 1)
|
||||
|
||||
HAS_SELECT = True # Variable that shows whether the platform has a selector.
|
||||
_SYSCALL_SENTINEL = object() # Sentinel in case a system call returns None.
|
||||
_DEFAULT_SELECTOR = None
|
||||
|
||||
|
||||
class SelectorError(Exception):
|
||||
def __init__(self, errcode):
|
||||
super(SelectorError, self).__init__()
|
||||
self.errno = errcode
|
||||
|
||||
def __repr__(self):
|
||||
return "<SelectorError errno={0}>".format(self.errno)
|
||||
|
||||
def __str__(self):
|
||||
return self.__repr__()
|
||||
|
||||
|
||||
def _fileobj_to_fd(fileobj):
|
||||
""" Return a file descriptor from a file object. If
|
||||
given an integer will simply return that integer back. """
|
||||
if isinstance(fileobj, int):
|
||||
fd = fileobj
|
||||
else:
|
||||
try:
|
||||
fd = int(fileobj.fileno())
|
||||
except (AttributeError, TypeError, ValueError):
|
||||
raise ValueError("Invalid file object: {0!r}".format(fileobj))
|
||||
if fd < 0:
|
||||
raise ValueError("Invalid file descriptor: {0}".format(fd))
|
||||
return fd
|
||||
|
||||
|
||||
# Determine which function to use to wrap system calls because Python 3.5+
|
||||
# already handles the case when system calls are interrupted.
|
||||
if sys.version_info >= (3, 5):
|
||||
def _syscall_wrapper(func, _, *args, **kwargs):
|
||||
""" This is the short-circuit version of the below logic
|
||||
because in Python 3.5+ all system calls automatically restart
|
||||
and recalculate their timeouts. """
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except (OSError, IOError, select.error) as e:
|
||||
errcode = None
|
||||
if hasattr(e, "errno"):
|
||||
errcode = e.errno
|
||||
raise SelectorError(errcode)
|
||||
else:
|
||||
def _syscall_wrapper(func, recalc_timeout, *args, **kwargs):
|
||||
""" Wrapper function for syscalls that could fail due to EINTR.
|
||||
All functions should be retried if there is time left in the timeout
|
||||
in accordance with PEP 475. """
|
||||
timeout = kwargs.get("timeout", None)
|
||||
if timeout is None:
|
||||
expires = None
|
||||
recalc_timeout = False
|
||||
else:
|
||||
timeout = float(timeout)
|
||||
if timeout < 0.0: # Timeout less than 0 treated as no timeout.
|
||||
expires = None
|
||||
else:
|
||||
expires = monotonic() + timeout
|
||||
|
||||
args = list(args)
|
||||
if recalc_timeout and "timeout" not in kwargs:
|
||||
raise ValueError(
|
||||
"Timeout must be in args or kwargs to be recalculated")
|
||||
|
||||
result = _SYSCALL_SENTINEL
|
||||
while result is _SYSCALL_SENTINEL:
|
||||
try:
|
||||
result = func(*args, **kwargs)
|
||||
# OSError is thrown by select.select
|
||||
# IOError is thrown by select.epoll.poll
|
||||
# select.error is thrown by select.poll.poll
|
||||
# Aren't we thankful for Python 3.x rework for exceptions?
|
||||
except (OSError, IOError, select.error) as e:
|
||||
# select.error wasn't a subclass of OSError in the past.
|
||||
errcode = None
|
||||
if hasattr(e, "errno"):
|
||||
errcode = e.errno
|
||||
elif hasattr(e, "args"):
|
||||
errcode = e.args[0]
|
||||
|
||||
# Also test for the Windows equivalent of EINTR.
|
||||
is_interrupt = (errcode == errno.EINTR or (hasattr(errno, "WSAEINTR") and
|
||||
errcode == errno.WSAEINTR))
|
||||
|
||||
if is_interrupt:
|
||||
if expires is not None:
|
||||
current_time = monotonic()
|
||||
if current_time > expires:
|
||||
raise OSError(errno=errno.ETIMEDOUT)
|
||||
if recalc_timeout:
|
||||
if "timeout" in kwargs:
|
||||
kwargs["timeout"] = expires - current_time
|
||||
continue
|
||||
if errcode:
|
||||
raise SelectorError(errcode)
|
||||
else:
|
||||
raise
|
||||
return result
|
||||
|
||||
|
||||
SelectorKey = namedtuple('SelectorKey', ['fileobj', 'fd', 'events', 'data'])
|
||||
|
||||
|
||||
class _SelectorMapping(Mapping):
|
||||
""" Mapping of file objects to selector keys """
|
||||
|
||||
def __init__(self, selector):
|
||||
self._selector = selector
|
||||
|
||||
def __len__(self):
|
||||
return len(self._selector._fd_to_key)
|
||||
|
||||
def __getitem__(self, fileobj):
|
||||
try:
|
||||
fd = self._selector._fileobj_lookup(fileobj)
|
||||
return self._selector._fd_to_key[fd]
|
||||
except KeyError:
|
||||
raise KeyError("{0!r} is not registered.".format(fileobj))
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self._selector._fd_to_key)
|
||||
|
||||
|
||||
class BaseSelector(object):
|
||||
""" Abstract Selector class
|
||||
|
||||
A selector supports registering file objects to be monitored
|
||||
for specific I/O events.
|
||||
|
||||
A file object is a file descriptor or any object with a
|
||||
`fileno()` method. An arbitrary object can be attached to the
|
||||
file object which can be used for example to store context info,
|
||||
a callback, etc.
|
||||
|
||||
A selector can use various implementations (select(), poll(), epoll(),
|
||||
and kqueue()) depending on the platform. The 'DefaultSelector' class uses
|
||||
the most efficient implementation for the current platform.
|
||||
"""
|
||||
def __init__(self):
|
||||
# Maps file descriptors to keys.
|
||||
self._fd_to_key = {}
|
||||
|
||||
# Read-only mapping returned by get_map()
|
||||
self._map = _SelectorMapping(self)
|
||||
|
||||
def _fileobj_lookup(self, fileobj):
|
||||
""" Return a file descriptor from a file object.
|
||||
This wraps _fileobj_to_fd() to do an exhaustive
|
||||
search in case the object is invalid but we still
|
||||
have it in our map. Used by unregister() so we can
|
||||
unregister an object that was previously registered
|
||||
even if it is closed. It is also used by _SelectorMapping
|
||||
"""
|
||||
try:
|
||||
return _fileobj_to_fd(fileobj)
|
||||
except ValueError:
|
||||
|
||||
# Search through all our mapped keys.
|
||||
for key in self._fd_to_key.values():
|
||||
if key.fileobj is fileobj:
|
||||
return key.fd
|
||||
|
||||
# Raise ValueError after all.
|
||||
raise
|
||||
|
||||
def register(self, fileobj, events, data=None):
|
||||
""" Register a file object for a set of events to monitor. """
|
||||
if (not events) or (events & ~(EVENT_READ | EVENT_WRITE)):
|
||||
raise ValueError("Invalid events: {0!r}".format(events))
|
||||
|
||||
key = SelectorKey(fileobj, self._fileobj_lookup(fileobj), events, data)
|
||||
|
||||
if key.fd in self._fd_to_key:
|
||||
raise KeyError("{0!r} (FD {1}) is already registered"
|
||||
.format(fileobj, key.fd))
|
||||
|
||||
self._fd_to_key[key.fd] = key
|
||||
return key
|
||||
|
||||
def unregister(self, fileobj):
|
||||
""" Unregister a file object from being monitored. """
|
||||
try:
|
||||
key = self._fd_to_key.pop(self._fileobj_lookup(fileobj))
|
||||
except KeyError:
|
||||
raise KeyError("{0!r} is not registered".format(fileobj))
|
||||
|
||||
# Getting the fileno of a closed socket on Windows errors with EBADF.
|
||||
except socket.error as e: # Platform-specific: Windows.
|
||||
if e.errno != errno.EBADF:
|
||||
raise
|
||||
else:
|
||||
for key in self._fd_to_key.values():
|
||||
if key.fileobj is fileobj:
|
||||
self._fd_to_key.pop(key.fd)
|
||||
break
|
||||
else:
|
||||
raise KeyError("{0!r} is not registered".format(fileobj))
|
||||
return key
|
||||
|
||||
def modify(self, fileobj, events, data=None):
|
||||
""" Change a registered file object monitored events and data. """
|
||||
# NOTE: Some subclasses optimize this operation even further.
|
||||
try:
|
||||
key = self._fd_to_key[self._fileobj_lookup(fileobj)]
|
||||
except KeyError:
|
||||
raise KeyError("{0!r} is not registered".format(fileobj))
|
||||
|
||||
if events != key.events:
|
||||
self.unregister(fileobj)
|
||||
key = self.register(fileobj, events, data)
|
||||
|
||||
elif data != key.data:
|
||||
# Use a shortcut to update the data.
|
||||
key = key._replace(data=data)
|
||||
self._fd_to_key[key.fd] = key
|
||||
|
||||
return key
|
||||
|
||||
def select(self, timeout=None):
|
||||
""" Perform the actual selection until some monitored file objects
|
||||
are ready or the timeout expires. """
|
||||
raise NotImplementedError()
|
||||
|
||||
def close(self):
|
||||
""" Close the selector. This must be called to ensure that all
|
||||
underlying resources are freed. """
|
||||
self._fd_to_key.clear()
|
||||
self._map = None
|
||||
|
||||
def get_key(self, fileobj):
|
||||
""" Return the key associated with a registered file object. """
|
||||
mapping = self.get_map()
|
||||
if mapping is None:
|
||||
raise RuntimeError("Selector is closed")
|
||||
try:
|
||||
return mapping[fileobj]
|
||||
except KeyError:
|
||||
raise KeyError("{0!r} is not registered".format(fileobj))
|
||||
|
||||
def get_map(self):
|
||||
""" Return a mapping of file objects to selector keys """
|
||||
return self._map
|
||||
|
||||
def _key_from_fd(self, fd):
|
||||
""" Return the key associated to a given file descriptor
|
||||
Return None if it is not found. """
|
||||
try:
|
||||
return self._fd_to_key[fd]
|
||||
except KeyError:
|
||||
return None
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, *args):
|
||||
self.close()
|
||||
|
||||
|
||||
# Almost all platforms have select.select()
|
||||
if hasattr(select, "select"):
|
||||
class SelectSelector(BaseSelector):
|
||||
""" Select-based selector. """
|
||||
def __init__(self):
|
||||
super(SelectSelector, self).__init__()
|
||||
self._readers = set()
|
||||
self._writers = set()
|
||||
|
||||
def register(self, fileobj, events, data=None):
|
||||
key = super(SelectSelector, self).register(fileobj, events, data)
|
||||
if events & EVENT_READ:
|
||||
self._readers.add(key.fd)
|
||||
if events & EVENT_WRITE:
|
||||
self._writers.add(key.fd)
|
||||
return key
|
||||
|
||||
def unregister(self, fileobj):
|
||||
key = super(SelectSelector, self).unregister(fileobj)
|
||||
self._readers.discard(key.fd)
|
||||
self._writers.discard(key.fd)
|
||||
return key
|
||||
|
||||
def _select(self, r, w, timeout=None):
|
||||
""" Wrapper for select.select because timeout is a positional arg """
|
||||
return select.select(r, w, [], timeout)
|
||||
|
||||
def select(self, timeout=None):
|
||||
# Selecting on empty lists on Windows errors out.
|
||||
if not len(self._readers) and not len(self._writers):
|
||||
return []
|
||||
|
||||
timeout = None if timeout is None else max(timeout, 0.0)
|
||||
ready = []
|
||||
r, w, _ = _syscall_wrapper(self._select, True, self._readers,
|
||||
self._writers, timeout)
|
||||
r = set(r)
|
||||
w = set(w)
|
||||
for fd in r | w:
|
||||
events = 0
|
||||
if fd in r:
|
||||
events |= EVENT_READ
|
||||
if fd in w:
|
||||
events |= EVENT_WRITE
|
||||
|
||||
key = self._key_from_fd(fd)
|
||||
if key:
|
||||
ready.append((key, events & key.events))
|
||||
return ready
|
||||
|
||||
|
||||
if hasattr(select, "poll"):
|
||||
class PollSelector(BaseSelector):
|
||||
""" Poll-based selector """
|
||||
def __init__(self):
|
||||
super(PollSelector, self).__init__()
|
||||
self._poll = select.poll()
|
||||
|
||||
def register(self, fileobj, events, data=None):
|
||||
key = super(PollSelector, self).register(fileobj, events, data)
|
||||
event_mask = 0
|
||||
if events & EVENT_READ:
|
||||
event_mask |= select.POLLIN
|
||||
if events & EVENT_WRITE:
|
||||
event_mask |= select.POLLOUT
|
||||
self._poll.register(key.fd, event_mask)
|
||||
return key
|
||||
|
||||
def unregister(self, fileobj):
|
||||
key = super(PollSelector, self).unregister(fileobj)
|
||||
self._poll.unregister(key.fd)
|
||||
return key
|
||||
|
||||
def _wrap_poll(self, timeout=None):
|
||||
""" Wrapper function for select.poll.poll() so that
|
||||
_syscall_wrapper can work with only seconds. """
|
||||
if timeout is not None:
|
||||
if timeout <= 0:
|
||||
timeout = 0
|
||||
else:
|
||||
# select.poll.poll() has a resolution of 1 millisecond,
|
||||
# round away from zero to wait *at least* timeout seconds.
|
||||
timeout = math.ceil(timeout * 1e3)
|
||||
|
||||
result = self._poll.poll(timeout)
|
||||
return result
|
||||
|
||||
def select(self, timeout=None):
|
||||
ready = []
|
||||
fd_events = _syscall_wrapper(self._wrap_poll, True, timeout=timeout)
|
||||
for fd, event_mask in fd_events:
|
||||
events = 0
|
||||
if event_mask & ~select.POLLIN:
|
||||
events |= EVENT_WRITE
|
||||
if event_mask & ~select.POLLOUT:
|
||||
events |= EVENT_READ
|
||||
|
||||
key = self._key_from_fd(fd)
|
||||
if key:
|
||||
ready.append((key, events & key.events))
|
||||
|
||||
return ready
|
||||
|
||||
|
||||
if hasattr(select, "epoll"):
|
||||
class EpollSelector(BaseSelector):
|
||||
""" Epoll-based selector """
|
||||
def __init__(self):
|
||||
super(EpollSelector, self).__init__()
|
||||
self._epoll = select.epoll()
|
||||
|
||||
def fileno(self):
|
||||
return self._epoll.fileno()
|
||||
|
||||
def register(self, fileobj, events, data=None):
|
||||
key = super(EpollSelector, self).register(fileobj, events, data)
|
||||
events_mask = 0
|
||||
if events & EVENT_READ:
|
||||
events_mask |= select.EPOLLIN
|
||||
if events & EVENT_WRITE:
|
||||
events_mask |= select.EPOLLOUT
|
||||
_syscall_wrapper(self._epoll.register, False, key.fd, events_mask)
|
||||
return key
|
||||
|
||||
def unregister(self, fileobj):
|
||||
key = super(EpollSelector, self).unregister(fileobj)
|
||||
try:
|
||||
_syscall_wrapper(self._epoll.unregister, False, key.fd)
|
||||
except SelectorError:
|
||||
# This can occur when the fd was closed since registry.
|
||||
pass
|
||||
return key
|
||||
|
||||
def select(self, timeout=None):
|
||||
if timeout is not None:
|
||||
if timeout <= 0:
|
||||
timeout = 0.0
|
||||
else:
|
||||
# select.epoll.poll() has a resolution of 1 millisecond
|
||||
# but luckily takes seconds so we don't need a wrapper
|
||||
# like PollSelector. Just for better rounding.
|
||||
timeout = math.ceil(timeout * 1e3) * 1e-3
|
||||
timeout = float(timeout)
|
||||
else:
|
||||
timeout = -1.0 # epoll.poll() must have a float.
|
||||
|
||||
# We always want at least 1 to ensure that select can be called
|
||||
# with no file descriptors registered. Otherwise will fail.
|
||||
max_events = max(len(self._fd_to_key), 1)
|
||||
|
||||
ready = []
|
||||
fd_events = _syscall_wrapper(self._epoll.poll, True,
|
||||
timeout=timeout,
|
||||
maxevents=max_events)
|
||||
for fd, event_mask in fd_events:
|
||||
events = 0
|
||||
if event_mask & ~select.EPOLLIN:
|
||||
events |= EVENT_WRITE
|
||||
if event_mask & ~select.EPOLLOUT:
|
||||
events |= EVENT_READ
|
||||
|
||||
key = self._key_from_fd(fd)
|
||||
if key:
|
||||
ready.append((key, events & key.events))
|
||||
return ready
|
||||
|
||||
def close(self):
|
||||
self._epoll.close()
|
||||
super(EpollSelector, self).close()
|
||||
|
||||
|
||||
if hasattr(select, "kqueue"):
|
||||
class KqueueSelector(BaseSelector):
|
||||
""" Kqueue / Kevent-based selector """
|
||||
def __init__(self):
|
||||
super(KqueueSelector, self).__init__()
|
||||
self._kqueue = select.kqueue()
|
||||
|
||||
def fileno(self):
|
||||
return self._kqueue.fileno()
|
||||
|
||||
def register(self, fileobj, events, data=None):
|
||||
key = super(KqueueSelector, self).register(fileobj, events, data)
|
||||
if events & EVENT_READ:
|
||||
kevent = select.kevent(key.fd,
|
||||
select.KQ_FILTER_READ,
|
||||
select.KQ_EV_ADD)
|
||||
|
||||
_syscall_wrapper(self._kqueue.control, False, [kevent], 0, 0)
|
||||
|
||||
if events & EVENT_WRITE:
|
||||
kevent = select.kevent(key.fd,
|
||||
select.KQ_FILTER_WRITE,
|
||||
select.KQ_EV_ADD)
|
||||
|
||||
_syscall_wrapper(self._kqueue.control, False, [kevent], 0, 0)
|
||||
|
||||
return key
|
||||
|
||||
def unregister(self, fileobj):
|
||||
key = super(KqueueSelector, self).unregister(fileobj)
|
||||
if key.events & EVENT_READ:
|
||||
kevent = select.kevent(key.fd,
|
||||
select.KQ_FILTER_READ,
|
||||
select.KQ_EV_DELETE)
|
||||
try:
|
||||
_syscall_wrapper(self._kqueue.control, False, [kevent], 0, 0)
|
||||
except SelectorError:
|
||||
pass
|
||||
if key.events & EVENT_WRITE:
|
||||
kevent = select.kevent(key.fd,
|
||||
select.KQ_FILTER_WRITE,
|
||||
select.KQ_EV_DELETE)
|
||||
try:
|
||||
_syscall_wrapper(self._kqueue.control, False, [kevent], 0, 0)
|
||||
except SelectorError:
|
||||
pass
|
||||
|
||||
return key
|
||||
|
||||
def select(self, timeout=None):
|
||||
if timeout is not None:
|
||||
timeout = max(timeout, 0)
|
||||
|
||||
max_events = len(self._fd_to_key) * 2
|
||||
ready_fds = {}
|
||||
|
||||
kevent_list = _syscall_wrapper(self._kqueue.control, True,
|
||||
None, max_events, timeout)
|
||||
|
||||
for kevent in kevent_list:
|
||||
fd = kevent.ident
|
||||
event_mask = kevent.filter
|
||||
events = 0
|
||||
if event_mask == select.KQ_FILTER_READ:
|
||||
events |= EVENT_READ
|
||||
if event_mask == select.KQ_FILTER_WRITE:
|
||||
events |= EVENT_WRITE
|
||||
|
||||
key = self._key_from_fd(fd)
|
||||
if key:
|
||||
if key.fd not in ready_fds:
|
||||
ready_fds[key.fd] = (key, events & key.events)
|
||||
else:
|
||||
old_events = ready_fds[key.fd][1]
|
||||
ready_fds[key.fd] = (key, (events | old_events) & key.events)
|
||||
|
||||
return list(ready_fds.values())
|
||||
|
||||
def close(self):
|
||||
self._kqueue.close()
|
||||
super(KqueueSelector, self).close()
|
||||
|
||||
|
||||
if not hasattr(select, 'select'): # Platform-specific: AppEngine
|
||||
HAS_SELECT = False
|
||||
|
||||
|
||||
def _can_allocate(struct):
|
||||
""" Checks that select structs can be allocated by the underlying
|
||||
operating system, not just advertised by the select module. We don't
|
||||
check select() because we'll be hopeful that most platforms that
|
||||
don't have it available will not advertise it. (ie: GAE) """
|
||||
try:
|
||||
# select.poll() objects won't fail until used.
|
||||
if struct == 'poll':
|
||||
p = select.poll()
|
||||
p.poll(0)
|
||||
|
||||
# All others will fail on allocation.
|
||||
else:
|
||||
getattr(select, struct)().close()
|
||||
return True
|
||||
except (OSError, AttributeError) as e:
|
||||
return False
|
||||
|
||||
|
||||
# Choose the best implementation, roughly:
|
||||
# kqueue == epoll > poll > select. Devpoll not supported. (See above)
|
||||
# select() also can't accept a FD > FD_SETSIZE (usually around 1024)
|
||||
def DefaultSelector():
|
||||
""" This function serves as a first call for DefaultSelector to
|
||||
detect if the select module is being monkey-patched incorrectly
|
||||
by eventlet, greenlet, and preserve proper behavior. """
|
||||
global _DEFAULT_SELECTOR
|
||||
if _DEFAULT_SELECTOR is None:
|
||||
if _can_allocate('kqueue'):
|
||||
_DEFAULT_SELECTOR = KqueueSelector
|
||||
elif _can_allocate('epoll'):
|
||||
_DEFAULT_SELECTOR = EpollSelector
|
||||
elif _can_allocate('poll'):
|
||||
_DEFAULT_SELECTOR = PollSelector
|
||||
elif hasattr(select, 'select'):
|
||||
_DEFAULT_SELECTOR = SelectSelector
|
||||
else: # Platform-specific: AppEngine
|
||||
raise ValueError('Platform does not have a selector')
|
||||
return _DEFAULT_SELECTOR()
|
||||
@@ -1,337 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
import errno
|
||||
import warnings
|
||||
import hmac
|
||||
|
||||
from binascii import hexlify, unhexlify
|
||||
from hashlib import md5, sha1, sha256
|
||||
|
||||
from ..exceptions import SSLError, InsecurePlatformWarning, SNIMissingWarning
|
||||
|
||||
|
||||
SSLContext = None
|
||||
HAS_SNI = False
|
||||
IS_PYOPENSSL = False
|
||||
IS_SECURETRANSPORT = False
|
||||
|
||||
# Maps the length of a digest to a possible hash function producing this digest
|
||||
HASHFUNC_MAP = {
|
||||
32: md5,
|
||||
40: sha1,
|
||||
64: sha256,
|
||||
}
|
||||
|
||||
|
||||
def _const_compare_digest_backport(a, b):
|
||||
"""
|
||||
Compare two digests of equal length in constant time.
|
||||
|
||||
The digests must be of type str/bytes.
|
||||
Returns True if the digests match, and False otherwise.
|
||||
"""
|
||||
result = abs(len(a) - len(b))
|
||||
for l, r in zip(bytearray(a), bytearray(b)):
|
||||
result |= l ^ r
|
||||
return result == 0
|
||||
|
||||
|
||||
_const_compare_digest = getattr(hmac, 'compare_digest',
|
||||
_const_compare_digest_backport)
|
||||
|
||||
|
||||
try: # Test for SSL features
|
||||
import ssl
|
||||
from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23
|
||||
from ssl import HAS_SNI # Has SNI?
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
try:
|
||||
from ssl import OP_NO_SSLv2, OP_NO_SSLv3, OP_NO_COMPRESSION
|
||||
except ImportError:
|
||||
OP_NO_SSLv2, OP_NO_SSLv3 = 0x1000000, 0x2000000
|
||||
OP_NO_COMPRESSION = 0x20000
|
||||
|
||||
# A secure default.
|
||||
# Sources for more information on TLS ciphers:
|
||||
#
|
||||
# - https://wiki.mozilla.org/Security/Server_Side_TLS
|
||||
# - https://www.ssllabs.com/projects/best-practices/index.html
|
||||
# - https://hynek.me/articles/hardening-your-web-servers-ssl-ciphers/
|
||||
#
|
||||
# The general intent is:
|
||||
# - Prefer cipher suites that offer perfect forward secrecy (DHE/ECDHE),
|
||||
# - prefer ECDHE over DHE for better performance,
|
||||
# - prefer any AES-GCM and ChaCha20 over any AES-CBC for better performance and
|
||||
# security,
|
||||
# - prefer AES-GCM over ChaCha20 because hardware-accelerated AES is common,
|
||||
# - disable NULL authentication, MD5 MACs and DSS for security reasons.
|
||||
DEFAULT_CIPHERS = ':'.join([
|
||||
'ECDH+AESGCM',
|
||||
'ECDH+CHACHA20',
|
||||
'DH+AESGCM',
|
||||
'DH+CHACHA20',
|
||||
'ECDH+AES256',
|
||||
'DH+AES256',
|
||||
'ECDH+AES128',
|
||||
'DH+AES',
|
||||
'RSA+AESGCM',
|
||||
'RSA+AES',
|
||||
'!aNULL',
|
||||
'!eNULL',
|
||||
'!MD5',
|
||||
])
|
||||
|
||||
try:
|
||||
from ssl import SSLContext # Modern SSL?
|
||||
except ImportError:
|
||||
import sys
|
||||
|
||||
class SSLContext(object): # Platform-specific: Python 2 & 3.1
|
||||
supports_set_ciphers = ((2, 7) <= sys.version_info < (3,) or
|
||||
(3, 2) <= sys.version_info)
|
||||
|
||||
def __init__(self, protocol_version):
|
||||
self.protocol = protocol_version
|
||||
# Use default values from a real SSLContext
|
||||
self.check_hostname = False
|
||||
self.verify_mode = ssl.CERT_NONE
|
||||
self.ca_certs = None
|
||||
self.options = 0
|
||||
self.certfile = None
|
||||
self.keyfile = None
|
||||
self.ciphers = None
|
||||
|
||||
def load_cert_chain(self, certfile, keyfile):
|
||||
self.certfile = certfile
|
||||
self.keyfile = keyfile
|
||||
|
||||
def load_verify_locations(self, cafile=None, capath=None):
|
||||
self.ca_certs = cafile
|
||||
|
||||
if capath is not None:
|
||||
raise SSLError("CA directories not supported in older Pythons")
|
||||
|
||||
def set_ciphers(self, cipher_suite):
|
||||
if not self.supports_set_ciphers:
|
||||
raise TypeError(
|
||||
'Your version of Python does not support setting '
|
||||
'a custom cipher suite. Please upgrade to Python '
|
||||
'2.7, 3.2, or later if you need this functionality.'
|
||||
)
|
||||
self.ciphers = cipher_suite
|
||||
|
||||
def wrap_socket(self, socket, server_hostname=None, server_side=False):
|
||||
warnings.warn(
|
||||
'A true SSLContext object is not available. This prevents '
|
||||
'urllib3 from configuring SSL appropriately and may cause '
|
||||
'certain SSL connections to fail. You can upgrade to a newer '
|
||||
'version of Python to solve this. For more information, see '
|
||||
'https://urllib3.readthedocs.io/en/latest/advanced-usage.html'
|
||||
'#ssl-warnings',
|
||||
InsecurePlatformWarning
|
||||
)
|
||||
kwargs = {
|
||||
'keyfile': self.keyfile,
|
||||
'certfile': self.certfile,
|
||||
'ca_certs': self.ca_certs,
|
||||
'cert_reqs': self.verify_mode,
|
||||
'ssl_version': self.protocol,
|
||||
'server_side': server_side,
|
||||
}
|
||||
if self.supports_set_ciphers: # Platform-specific: Python 2.7+
|
||||
return wrap_socket(socket, ciphers=self.ciphers, **kwargs)
|
||||
else: # Platform-specific: Python 2.6
|
||||
return wrap_socket(socket, **kwargs)
|
||||
|
||||
|
||||
def assert_fingerprint(cert, fingerprint):
|
||||
"""
|
||||
Checks if given fingerprint matches the supplied certificate.
|
||||
|
||||
:param cert:
|
||||
Certificate as bytes object.
|
||||
:param fingerprint:
|
||||
Fingerprint as string of hexdigits, can be interspersed by colons.
|
||||
"""
|
||||
|
||||
fingerprint = fingerprint.replace(':', '').lower()
|
||||
digest_length = len(fingerprint)
|
||||
hashfunc = HASHFUNC_MAP.get(digest_length)
|
||||
if not hashfunc:
|
||||
raise SSLError(
|
||||
'Fingerprint of invalid length: {0}'.format(fingerprint))
|
||||
|
||||
# We need encode() here for py32; works on py2 and p33.
|
||||
fingerprint_bytes = unhexlify(fingerprint.encode())
|
||||
|
||||
cert_digest = hashfunc(cert).digest()
|
||||
|
||||
if not _const_compare_digest(cert_digest, fingerprint_bytes):
|
||||
raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".'
|
||||
.format(fingerprint, hexlify(cert_digest)))
|
||||
|
||||
|
||||
def resolve_cert_reqs(candidate):
|
||||
"""
|
||||
Resolves the argument to a numeric constant, which can be passed to
|
||||
the wrap_socket function/method from the ssl module.
|
||||
Defaults to :data:`ssl.CERT_NONE`.
|
||||
If given a string it is assumed to be the name of the constant in the
|
||||
:mod:`ssl` module or its abbrevation.
|
||||
(So you can specify `REQUIRED` instead of `CERT_REQUIRED`.
|
||||
If it's neither `None` nor a string we assume it is already the numeric
|
||||
constant which can directly be passed to wrap_socket.
|
||||
"""
|
||||
if candidate is None:
|
||||
return CERT_NONE
|
||||
|
||||
if isinstance(candidate, str):
|
||||
res = getattr(ssl, candidate, None)
|
||||
if res is None:
|
||||
res = getattr(ssl, 'CERT_' + candidate)
|
||||
return res
|
||||
|
||||
return candidate
|
||||
|
||||
|
||||
def resolve_ssl_version(candidate):
|
||||
"""
|
||||
like resolve_cert_reqs
|
||||
"""
|
||||
if candidate is None:
|
||||
return PROTOCOL_SSLv23
|
||||
|
||||
if isinstance(candidate, str):
|
||||
res = getattr(ssl, candidate, None)
|
||||
if res is None:
|
||||
res = getattr(ssl, 'PROTOCOL_' + candidate)
|
||||
return res
|
||||
|
||||
return candidate
|
||||
|
||||
|
||||
def create_urllib3_context(ssl_version=None, cert_reqs=None,
|
||||
options=None, ciphers=None):
|
||||
"""All arguments have the same meaning as ``ssl_wrap_socket``.
|
||||
|
||||
By default, this function does a lot of the same work that
|
||||
``ssl.create_default_context`` does on Python 3.4+. It:
|
||||
|
||||
- Disables SSLv2, SSLv3, and compression
|
||||
- Sets a restricted set of server ciphers
|
||||
|
||||
If you wish to enable SSLv3, you can do::
|
||||
|
||||
from urllib3.util import ssl_
|
||||
context = ssl_.create_urllib3_context()
|
||||
context.options &= ~ssl_.OP_NO_SSLv3
|
||||
|
||||
You can do the same to enable compression (substituting ``COMPRESSION``
|
||||
for ``SSLv3`` in the last line above).
|
||||
|
||||
:param ssl_version:
|
||||
The desired protocol version to use. This will default to
|
||||
PROTOCOL_SSLv23 which will negotiate the highest protocol that both
|
||||
the server and your installation of OpenSSL support.
|
||||
:param cert_reqs:
|
||||
Whether to require the certificate verification. This defaults to
|
||||
``ssl.CERT_REQUIRED``.
|
||||
:param options:
|
||||
Specific OpenSSL options. These default to ``ssl.OP_NO_SSLv2``,
|
||||
``ssl.OP_NO_SSLv3``, ``ssl.OP_NO_COMPRESSION``.
|
||||
:param ciphers:
|
||||
Which cipher suites to allow the server to select.
|
||||
:returns:
|
||||
Constructed SSLContext object with specified options
|
||||
:rtype: SSLContext
|
||||
"""
|
||||
context = SSLContext(ssl_version or ssl.PROTOCOL_SSLv23)
|
||||
|
||||
# Setting the default here, as we may have no ssl module on import
|
||||
cert_reqs = ssl.CERT_REQUIRED if cert_reqs is None else cert_reqs
|
||||
|
||||
if options is None:
|
||||
options = 0
|
||||
# SSLv2 is easily broken and is considered harmful and dangerous
|
||||
options |= OP_NO_SSLv2
|
||||
# SSLv3 has several problems and is now dangerous
|
||||
options |= OP_NO_SSLv3
|
||||
# Disable compression to prevent CRIME attacks for OpenSSL 1.0+
|
||||
# (issue #309)
|
||||
options |= OP_NO_COMPRESSION
|
||||
|
||||
context.options |= options
|
||||
|
||||
if getattr(context, 'supports_set_ciphers', True): # Platform-specific: Python 2.6
|
||||
context.set_ciphers(ciphers or DEFAULT_CIPHERS)
|
||||
|
||||
context.verify_mode = cert_reqs
|
||||
if getattr(context, 'check_hostname', None) is not None: # Platform-specific: Python 3.2
|
||||
# We do our own verification, including fingerprints and alternative
|
||||
# hostnames. So disable it here
|
||||
context.check_hostname = False
|
||||
return context
|
||||
|
||||
|
||||
def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
|
||||
ca_certs=None, server_hostname=None,
|
||||
ssl_version=None, ciphers=None, ssl_context=None,
|
||||
ca_cert_dir=None):
|
||||
"""
|
||||
All arguments except for server_hostname, ssl_context, and ca_cert_dir have
|
||||
the same meaning as they do when using :func:`ssl.wrap_socket`.
|
||||
|
||||
:param server_hostname:
|
||||
When SNI is supported, the expected hostname of the certificate
|
||||
:param ssl_context:
|
||||
A pre-made :class:`SSLContext` object. If none is provided, one will
|
||||
be created using :func:`create_urllib3_context`.
|
||||
:param ciphers:
|
||||
A string of ciphers we wish the client to support. This is not
|
||||
supported on Python 2.6 as the ssl module does not support it.
|
||||
:param ca_cert_dir:
|
||||
A directory containing CA certificates in multiple separate files, as
|
||||
supported by OpenSSL's -CApath flag or the capath argument to
|
||||
SSLContext.load_verify_locations().
|
||||
"""
|
||||
context = ssl_context
|
||||
if context is None:
|
||||
# Note: This branch of code and all the variables in it are no longer
|
||||
# used by urllib3 itself. We should consider deprecating and removing
|
||||
# this code.
|
||||
context = create_urllib3_context(ssl_version, cert_reqs,
|
||||
ciphers=ciphers)
|
||||
|
||||
if ca_certs or ca_cert_dir:
|
||||
try:
|
||||
context.load_verify_locations(ca_certs, ca_cert_dir)
|
||||
except IOError as e: # Platform-specific: Python 2.6, 2.7, 3.2
|
||||
raise SSLError(e)
|
||||
# Py33 raises FileNotFoundError which subclasses OSError
|
||||
# These are not equivalent unless we check the errno attribute
|
||||
except OSError as e: # Platform-specific: Python 3.3 and beyond
|
||||
if e.errno == errno.ENOENT:
|
||||
raise SSLError(e)
|
||||
raise
|
||||
elif getattr(context, 'load_default_certs', None) is not None:
|
||||
# try to load OS default certs; works well on Windows (require Python3.4+)
|
||||
context.load_default_certs()
|
||||
|
||||
if certfile:
|
||||
context.load_cert_chain(certfile, keyfile)
|
||||
if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI
|
||||
return context.wrap_socket(sock, server_hostname=server_hostname)
|
||||
|
||||
warnings.warn(
|
||||
'An HTTPS request has been made, but the SNI (Subject Name '
|
||||
'Indication) extension to TLS is not available on this platform. '
|
||||
'This may cause the server to present an incorrect TLS '
|
||||
'certificate, which can cause validation failures. You can upgrade to '
|
||||
'a newer version of Python to solve this. For more information, see '
|
||||
'https://urllib3.readthedocs.io/en/latest/advanced-usage.html'
|
||||
'#ssl-warnings',
|
||||
SNIMissingWarning
|
||||
)
|
||||
return context.wrap_socket(sock)
|
||||
@@ -1,242 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
# The default socket timeout, used by httplib to indicate that no timeout was
|
||||
# specified by the user
|
||||
from socket import _GLOBAL_DEFAULT_TIMEOUT
|
||||
import time
|
||||
|
||||
from ..exceptions import TimeoutStateError
|
||||
|
||||
# A sentinel value to indicate that no timeout was specified by the user in
|
||||
# urllib3
|
||||
_Default = object()
|
||||
|
||||
|
||||
# Use time.monotonic if available.
|
||||
current_time = getattr(time, "monotonic", time.time)
|
||||
|
||||
|
||||
class Timeout(object):
|
||||
""" Timeout configuration.
|
||||
|
||||
Timeouts can be defined as a default for a pool::
|
||||
|
||||
timeout = Timeout(connect=2.0, read=7.0)
|
||||
http = PoolManager(timeout=timeout)
|
||||
response = http.request('GET', 'http://example.com/')
|
||||
|
||||
Or per-request (which overrides the default for the pool)::
|
||||
|
||||
response = http.request('GET', 'http://example.com/', timeout=Timeout(10))
|
||||
|
||||
Timeouts can be disabled by setting all the parameters to ``None``::
|
||||
|
||||
no_timeout = Timeout(connect=None, read=None)
|
||||
response = http.request('GET', 'http://example.com/, timeout=no_timeout)
|
||||
|
||||
|
||||
:param total:
|
||||
This combines the connect and read timeouts into one; the read timeout
|
||||
will be set to the time leftover from the connect attempt. In the
|
||||
event that both a connect timeout and a total are specified, or a read
|
||||
timeout and a total are specified, the shorter timeout will be applied.
|
||||
|
||||
Defaults to None.
|
||||
|
||||
:type total: integer, float, or None
|
||||
|
||||
:param connect:
|
||||
The maximum amount of time to wait for a connection attempt to a server
|
||||
to succeed. Omitting the parameter will default the connect timeout to
|
||||
the system default, probably `the global default timeout in socket.py
|
||||
<http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_.
|
||||
None will set an infinite timeout for connection attempts.
|
||||
|
||||
:type connect: integer, float, or None
|
||||
|
||||
:param read:
|
||||
The maximum amount of time to wait between consecutive
|
||||
read operations for a response from the server. Omitting
|
||||
the parameter will default the read timeout to the system
|
||||
default, probably `the global default timeout in socket.py
|
||||
<http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_.
|
||||
None will set an infinite timeout.
|
||||
|
||||
:type read: integer, float, or None
|
||||
|
||||
.. note::
|
||||
|
||||
Many factors can affect the total amount of time for urllib3 to return
|
||||
an HTTP response.
|
||||
|
||||
For example, Python's DNS resolver does not obey the timeout specified
|
||||
on the socket. Other factors that can affect total request time include
|
||||
high CPU load, high swap, the program running at a low priority level,
|
||||
or other behaviors.
|
||||
|
||||
In addition, the read and total timeouts only measure the time between
|
||||
read operations on the socket connecting the client and the server,
|
||||
not the total amount of time for the request to return a complete
|
||||
response. For most requests, the timeout is raised because the server
|
||||
has not sent the first byte in the specified time. This is not always
|
||||
the case; if a server streams one byte every fifteen seconds, a timeout
|
||||
of 20 seconds will not trigger, even though the request will take
|
||||
several minutes to complete.
|
||||
|
||||
If your goal is to cut off any request after a set amount of wall clock
|
||||
time, consider having a second "watcher" thread to cut off a slow
|
||||
request.
|
||||
"""
|
||||
|
||||
#: A sentinel object representing the default timeout value
|
||||
DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT
|
||||
|
||||
def __init__(self, total=None, connect=_Default, read=_Default):
|
||||
self._connect = self._validate_timeout(connect, 'connect')
|
||||
self._read = self._validate_timeout(read, 'read')
|
||||
self.total = self._validate_timeout(total, 'total')
|
||||
self._start_connect = None
|
||||
|
||||
def __str__(self):
|
||||
return '%s(connect=%r, read=%r, total=%r)' % (
|
||||
type(self).__name__, self._connect, self._read, self.total)
|
||||
|
||||
@classmethod
|
||||
def _validate_timeout(cls, value, name):
|
||||
""" Check that a timeout attribute is valid.
|
||||
|
||||
:param value: The timeout value to validate
|
||||
:param name: The name of the timeout attribute to validate. This is
|
||||
used to specify in error messages.
|
||||
:return: The validated and casted version of the given value.
|
||||
:raises ValueError: If it is a numeric value less than or equal to
|
||||
zero, or the type is not an integer, float, or None.
|
||||
"""
|
||||
if value is _Default:
|
||||
return cls.DEFAULT_TIMEOUT
|
||||
|
||||
if value is None or value is cls.DEFAULT_TIMEOUT:
|
||||
return value
|
||||
|
||||
if isinstance(value, bool):
|
||||
raise ValueError("Timeout cannot be a boolean value. It must "
|
||||
"be an int, float or None.")
|
||||
try:
|
||||
float(value)
|
||||
except (TypeError, ValueError):
|
||||
raise ValueError("Timeout value %s was %s, but it must be an "
|
||||
"int, float or None." % (name, value))
|
||||
|
||||
try:
|
||||
if value <= 0:
|
||||
raise ValueError("Attempted to set %s timeout to %s, but the "
|
||||
"timeout cannot be set to a value less "
|
||||
"than or equal to 0." % (name, value))
|
||||
except TypeError: # Python 3
|
||||
raise ValueError("Timeout value %s was %s, but it must be an "
|
||||
"int, float or None." % (name, value))
|
||||
|
||||
return value
|
||||
|
||||
@classmethod
|
||||
def from_float(cls, timeout):
|
||||
""" Create a new Timeout from a legacy timeout value.
|
||||
|
||||
The timeout value used by httplib.py sets the same timeout on the
|
||||
connect(), and recv() socket requests. This creates a :class:`Timeout`
|
||||
object that sets the individual timeouts to the ``timeout`` value
|
||||
passed to this function.
|
||||
|
||||
:param timeout: The legacy timeout value.
|
||||
:type timeout: integer, float, sentinel default object, or None
|
||||
:return: Timeout object
|
||||
:rtype: :class:`Timeout`
|
||||
"""
|
||||
return Timeout(read=timeout, connect=timeout)
|
||||
|
||||
def clone(self):
|
||||
""" Create a copy of the timeout object
|
||||
|
||||
Timeout properties are stored per-pool but each request needs a fresh
|
||||
Timeout object to ensure each one has its own start/stop configured.
|
||||
|
||||
:return: a copy of the timeout object
|
||||
:rtype: :class:`Timeout`
|
||||
"""
|
||||
# We can't use copy.deepcopy because that will also create a new object
|
||||
# for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to
|
||||
# detect the user default.
|
||||
return Timeout(connect=self._connect, read=self._read,
|
||||
total=self.total)
|
||||
|
||||
def start_connect(self):
|
||||
""" Start the timeout clock, used during a connect() attempt
|
||||
|
||||
:raises urllib3.exceptions.TimeoutStateError: if you attempt
|
||||
to start a timer that has been started already.
|
||||
"""
|
||||
if self._start_connect is not None:
|
||||
raise TimeoutStateError("Timeout timer has already been started.")
|
||||
self._start_connect = current_time()
|
||||
return self._start_connect
|
||||
|
||||
def get_connect_duration(self):
|
||||
""" Gets the time elapsed since the call to :meth:`start_connect`.
|
||||
|
||||
:return: Elapsed time.
|
||||
:rtype: float
|
||||
:raises urllib3.exceptions.TimeoutStateError: if you attempt
|
||||
to get duration for a timer that hasn't been started.
|
||||
"""
|
||||
if self._start_connect is None:
|
||||
raise TimeoutStateError("Can't get connect duration for timer "
|
||||
"that has not started.")
|
||||
return current_time() - self._start_connect
|
||||
|
||||
@property
|
||||
def connect_timeout(self):
|
||||
""" Get the value to use when setting a connection timeout.
|
||||
|
||||
This will be a positive float or integer, the value None
|
||||
(never timeout), or the default system timeout.
|
||||
|
||||
:return: Connect timeout.
|
||||
:rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None
|
||||
"""
|
||||
if self.total is None:
|
||||
return self._connect
|
||||
|
||||
if self._connect is None or self._connect is self.DEFAULT_TIMEOUT:
|
||||
return self.total
|
||||
|
||||
return min(self._connect, self.total)
|
||||
|
||||
@property
|
||||
def read_timeout(self):
|
||||
""" Get the value for the read timeout.
|
||||
|
||||
This assumes some time has elapsed in the connection timeout and
|
||||
computes the read timeout appropriately.
|
||||
|
||||
If self.total is set, the read timeout is dependent on the amount of
|
||||
time taken by the connect timeout. If the connection time has not been
|
||||
established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be
|
||||
raised.
|
||||
|
||||
:return: Value to use for the read timeout.
|
||||
:rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None
|
||||
:raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect`
|
||||
has not yet been called on this object.
|
||||
"""
|
||||
if (self.total is not None and
|
||||
self.total is not self.DEFAULT_TIMEOUT and
|
||||
self._read is not None and
|
||||
self._read is not self.DEFAULT_TIMEOUT):
|
||||
# In case the connect timeout has not yet been established.
|
||||
if self._start_connect is None:
|
||||
return self._read
|
||||
return max(0, min(self.total - self.get_connect_duration(),
|
||||
self._read))
|
||||
elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT:
|
||||
return max(0, self.total - self.get_connect_duration())
|
||||
else:
|
||||
return self._read
|
||||
@@ -1,230 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
from collections import namedtuple
|
||||
|
||||
from ..exceptions import LocationParseError
|
||||
|
||||
|
||||
url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment']
|
||||
|
||||
# We only want to normalize urls with an HTTP(S) scheme.
|
||||
# urllib3 infers URLs without a scheme (None) to be http.
|
||||
NORMALIZABLE_SCHEMES = ('http', 'https', None)
|
||||
|
||||
|
||||
class Url(namedtuple('Url', url_attrs)):
|
||||
"""
|
||||
Datastructure for representing an HTTP URL. Used as a return value for
|
||||
:func:`parse_url`. Both the scheme and host are normalized as they are
|
||||
both case-insensitive according to RFC 3986.
|
||||
"""
|
||||
__slots__ = ()
|
||||
|
||||
def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None,
|
||||
query=None, fragment=None):
|
||||
if path and not path.startswith('/'):
|
||||
path = '/' + path
|
||||
if scheme:
|
||||
scheme = scheme.lower()
|
||||
if host and scheme in NORMALIZABLE_SCHEMES:
|
||||
host = host.lower()
|
||||
return super(Url, cls).__new__(cls, scheme, auth, host, port, path,
|
||||
query, fragment)
|
||||
|
||||
@property
|
||||
def hostname(self):
|
||||
"""For backwards-compatibility with urlparse. We're nice like that."""
|
||||
return self.host
|
||||
|
||||
@property
|
||||
def request_uri(self):
|
||||
"""Absolute path including the query string."""
|
||||
uri = self.path or '/'
|
||||
|
||||
if self.query is not None:
|
||||
uri += '?' + self.query
|
||||
|
||||
return uri
|
||||
|
||||
@property
|
||||
def netloc(self):
|
||||
"""Network location including host and port"""
|
||||
if self.port:
|
||||
return '%s:%d' % (self.host, self.port)
|
||||
return self.host
|
||||
|
||||
@property
|
||||
def url(self):
|
||||
"""
|
||||
Convert self into a url
|
||||
|
||||
This function should more or less round-trip with :func:`.parse_url`. The
|
||||
returned url may not be exactly the same as the url inputted to
|
||||
:func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls
|
||||
with a blank port will have : removed).
|
||||
|
||||
Example: ::
|
||||
|
||||
>>> U = parse_url('http://google.com/mail/')
|
||||
>>> U.url
|
||||
'http://google.com/mail/'
|
||||
>>> Url('http', 'username:password', 'host.com', 80,
|
||||
... '/path', 'query', 'fragment').url
|
||||
'http://username:password@host.com:80/path?query#fragment'
|
||||
"""
|
||||
scheme, auth, host, port, path, query, fragment = self
|
||||
url = ''
|
||||
|
||||
# We use "is not None" we want things to happen with empty strings (or 0 port)
|
||||
if scheme is not None:
|
||||
url += scheme + '://'
|
||||
if auth is not None:
|
||||
url += auth + '@'
|
||||
if host is not None:
|
||||
url += host
|
||||
if port is not None:
|
||||
url += ':' + str(port)
|
||||
if path is not None:
|
||||
url += path
|
||||
if query is not None:
|
||||
url += '?' + query
|
||||
if fragment is not None:
|
||||
url += '#' + fragment
|
||||
|
||||
return url
|
||||
|
||||
def __str__(self):
|
||||
return self.url
|
||||
|
||||
|
||||
def split_first(s, delims):
|
||||
"""
|
||||
Given a string and an iterable of delimiters, split on the first found
|
||||
delimiter. Return two split parts and the matched delimiter.
|
||||
|
||||
If not found, then the first part is the full input string.
|
||||
|
||||
Example::
|
||||
|
||||
>>> split_first('foo/bar?baz', '?/=')
|
||||
('foo', 'bar?baz', '/')
|
||||
>>> split_first('foo/bar?baz', '123')
|
||||
('foo/bar?baz', '', None)
|
||||
|
||||
Scales linearly with number of delims. Not ideal for large number of delims.
|
||||
"""
|
||||
min_idx = None
|
||||
min_delim = None
|
||||
for d in delims:
|
||||
idx = s.find(d)
|
||||
if idx < 0:
|
||||
continue
|
||||
|
||||
if min_idx is None or idx < min_idx:
|
||||
min_idx = idx
|
||||
min_delim = d
|
||||
|
||||
if min_idx is None or min_idx < 0:
|
||||
return s, '', None
|
||||
|
||||
return s[:min_idx], s[min_idx + 1:], min_delim
|
||||
|
||||
|
||||
def parse_url(url):
|
||||
"""
|
||||
Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is
|
||||
performed to parse incomplete urls. Fields not provided will be None.
|
||||
|
||||
Partly backwards-compatible with :mod:`urlparse`.
|
||||
|
||||
Example::
|
||||
|
||||
>>> parse_url('http://google.com/mail/')
|
||||
Url(scheme='http', host='google.com', port=None, path='/mail/', ...)
|
||||
>>> parse_url('google.com:80')
|
||||
Url(scheme=None, host='google.com', port=80, path=None, ...)
|
||||
>>> parse_url('/foo?bar')
|
||||
Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...)
|
||||
"""
|
||||
|
||||
# While this code has overlap with stdlib's urlparse, it is much
|
||||
# simplified for our needs and less annoying.
|
||||
# Additionally, this implementations does silly things to be optimal
|
||||
# on CPython.
|
||||
|
||||
if not url:
|
||||
# Empty
|
||||
return Url()
|
||||
|
||||
scheme = None
|
||||
auth = None
|
||||
host = None
|
||||
port = None
|
||||
path = None
|
||||
fragment = None
|
||||
query = None
|
||||
|
||||
# Scheme
|
||||
if '://' in url:
|
||||
scheme, url = url.split('://', 1)
|
||||
|
||||
# Find the earliest Authority Terminator
|
||||
# (http://tools.ietf.org/html/rfc3986#section-3.2)
|
||||
url, path_, delim = split_first(url, ['/', '?', '#'])
|
||||
|
||||
if delim:
|
||||
# Reassemble the path
|
||||
path = delim + path_
|
||||
|
||||
# Auth
|
||||
if '@' in url:
|
||||
# Last '@' denotes end of auth part
|
||||
auth, url = url.rsplit('@', 1)
|
||||
|
||||
# IPv6
|
||||
if url and url[0] == '[':
|
||||
host, url = url.split(']', 1)
|
||||
host += ']'
|
||||
|
||||
# Port
|
||||
if ':' in url:
|
||||
_host, port = url.split(':', 1)
|
||||
|
||||
if not host:
|
||||
host = _host
|
||||
|
||||
if port:
|
||||
# If given, ports must be integers. No whitespace, no plus or
|
||||
# minus prefixes, no non-integer digits such as ^2 (superscript).
|
||||
if not port.isdigit():
|
||||
raise LocationParseError(url)
|
||||
try:
|
||||
port = int(port)
|
||||
except ValueError:
|
||||
raise LocationParseError(url)
|
||||
else:
|
||||
# Blank ports are cool, too. (rfc3986#section-3.2.3)
|
||||
port = None
|
||||
|
||||
elif not host and url:
|
||||
host = url
|
||||
|
||||
if not path:
|
||||
return Url(scheme, auth, host, port, path, query, fragment)
|
||||
|
||||
# Fragment
|
||||
if '#' in path:
|
||||
path, fragment = path.split('#', 1)
|
||||
|
||||
# Query
|
||||
if '?' in path:
|
||||
path, query = path.split('?', 1)
|
||||
|
||||
return Url(scheme, auth, host, port, path, query, fragment)
|
||||
|
||||
|
||||
def get_host(url):
|
||||
"""
|
||||
Deprecated. Use :func:`parse_url` instead.
|
||||
"""
|
||||
p = parse_url(url)
|
||||
return p.scheme or 'http', p.hostname, p.port
|
||||
@@ -1,40 +0,0 @@
|
||||
from .selectors import (
|
||||
HAS_SELECT,
|
||||
DefaultSelector,
|
||||
EVENT_READ,
|
||||
EVENT_WRITE
|
||||
)
|
||||
|
||||
|
||||
def _wait_for_io_events(socks, events, timeout=None):
|
||||
""" Waits for IO events to be available from a list of sockets
|
||||
or optionally a single socket if passed in. Returns a list of
|
||||
sockets that can be interacted with immediately. """
|
||||
if not HAS_SELECT:
|
||||
raise ValueError('Platform does not have a selector')
|
||||
if not isinstance(socks, list):
|
||||
# Probably just a single socket.
|
||||
if hasattr(socks, "fileno"):
|
||||
socks = [socks]
|
||||
# Otherwise it might be a non-list iterable.
|
||||
else:
|
||||
socks = list(socks)
|
||||
with DefaultSelector() as selector:
|
||||
for sock in socks:
|
||||
selector.register(sock, events)
|
||||
return [key[0].fileobj for key in
|
||||
selector.select(timeout) if key[1] & events]
|
||||
|
||||
|
||||
def wait_for_read(socks, timeout=None):
|
||||
""" Waits for reading to be available from a list of sockets
|
||||
or optionally a single socket if passed in. Returns a list of
|
||||
sockets that can be read from immediately. """
|
||||
return _wait_for_io_events(socks, EVENT_READ, timeout)
|
||||
|
||||
|
||||
def wait_for_write(socks, timeout=None):
|
||||
""" Waits for writing to be available from a list of sockets
|
||||
or optionally a single socket if passed in. Returns a list of
|
||||
sockets that can be written to immediately. """
|
||||
return _wait_for_io_events(socks, EVENT_WRITE, timeout)
|
||||
Reference in New Issue
Block a user