mirror of
https://github.com/kennethreitz/dive-into-python3.git
synced 2026-06-05 23:10:17 +00:00
53 lines
3.2 KiB
HTML
53 lines
3.2 KiB
HTML
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
|
|
<html lang="en">
|
|
<head>
|
|
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
|
<title>Universal Encoding Detector: character encoding auto-detection in Python</title>
|
|
<link rel="stylesheet" href="/css/chardet.css" type="text/css">
|
|
</head>
|
|
<body id="charset-feedparser-org">
|
|
<!--#include virtual="/inc/header.html" -->
|
|
<div id="main">
|
|
<div id="mainInner">
|
|
|
|
<h2 class="skip">Getting started</h2>
|
|
<div class="example">
|
|
<pre class="screen"><samp class="prompt">>>> </samp><span class="userinput"><span class='pykeyword'>import</span> urllib</span>
|
|
<samp class="prompt">>>> </samp><span class="userinput">urlread = lambda url: urllib.urlopen(url).read()</span>
|
|
<samp class="prompt">>>> </samp><span class="userinput"><span class='pykeyword'>import</span> chardet</span>
|
|
<samp class="prompt">>>> </samp><span class="userinput">chardet.detect(urlread(<span class='pystring'>"<a href="http://google.cn/">http://google.cn/</a>"</span>))</span>
|
|
<span class="computeroutput">{'encoding': 'GB2312', 'confidence': 0.99}</span>
|
|
|
|
<samp class="prompt">>>> </samp><span class="userinput">chardet.detect(urlread(<span class='pystring'>"<a href="http://yahoo.co.jp/">http://yahoo.co.jp/</a>"</span>))</span>
|
|
<span class="computeroutput">{'encoding': 'EUC-JP', 'confidence': 0.99}</span>
|
|
|
|
<samp class="prompt">>>> </samp><span class="userinput">chardet.detect(urlread(<span class='pystring'>"<a href="http://amazon.co.jp/">http://amazon.co.jp/</a>"</span>))</span>
|
|
<span class="computeroutput">{'encoding': 'SHIFT_JIS', 'confidence': 1}</span>
|
|
|
|
<samp class="prompt">>>> </samp><span class="userinput">chardet.detect(urlread(<span class='pystring'>"<a href="http://pravda.ru/">http://pravda.ru/</a>"</span>))</span>
|
|
<span class="computeroutput">{'encoding': 'windows-1251', 'confidence': 0.9355}</span>
|
|
|
|
<samp class="prompt">>>> </samp><span class="userinput">chardet.detect(urlread(<span class='pystring'>"<a href="http://auction.co.kr/">http://auction.co.kr/</a>"</span>))</span>
|
|
<span class="computeroutput">{'encoding': 'EUC-KR', 'confidence': 0.99}</span>
|
|
|
|
<samp class="prompt">>>> </samp><span class="userinput">chardet.detect(urlread(<span class='pystring'>"<a href="http://haaretz.co.il/">http://haaretz.co.il/</a>"</span>))</span>
|
|
<span class="computeroutput">{'encoding': 'windows-1255', 'confidence': 0.99}</span>
|
|
|
|
<samp class="prompt">>>> </samp><span class="userinput">chardet.detect(urlread(<span class='pystring'>"<a href="http://www.nectec.or.th/tindex.html">http://www.nectec.or.th/tindex.html</a>"</span>))</span>
|
|
<span class="computeroutput">{'encoding': 'TIS-620', 'confidence': 0.7675}</span>
|
|
|
|
<samp class="prompt">>>> </samp><span class="userinput">chardet.detect(urlread(<span class='pystring'>"<a href="http://feedparser.org/docs/">http://feedparser.org/docs/</a>"</span>))</span>
|
|
<span class="computeroutput">{'encoding': 'utf-8', 'confidence': 0.99}</span>
|
|
</pre>
|
|
</div> <!-- example -->
|
|
|
|
</div> <!-- mainInner -->
|
|
</div> <!-- main -->
|
|
|
|
<hr>
|
|
|
|
<div id="footer"><p class="copyright">Copyright © 2006-2008 Mark Pilgrim · <a href="mailto:mark@diveintomark.org">mark@diveintomark.org</a> · <a href="/docs/license.html">Terms of use</a></p></div>
|
|
|
|
</body>
|
|
</html>
|