Files
dive-into-python3/chardet/index.html
T
Mark Pilgrim 831681489e initial import
2009-01-24 16:05:55 -05:00

53 lines
3.2 KiB
HTML

<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<html lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<title>Universal Encoding Detector: character encoding auto-detection in Python</title>
<link rel="stylesheet" href="/css/chardet.css" type="text/css">
</head>
<body id="charset-feedparser-org">
<!--#include virtual="/inc/header.html" -->
<div id="main">
<div id="mainInner">
<h2 class="skip">Getting started</h2>
<div class="example">
<pre class="screen"><samp class="prompt">>>> </samp><span class="userinput"><span class='pykeyword'>import</span> urllib</span>
<samp class="prompt">>>> </samp><span class="userinput">urlread = lambda url: urllib.urlopen(url).read()</span>
<samp class="prompt">>>> </samp><span class="userinput"><span class='pykeyword'>import</span> chardet</span>
<samp class="prompt">>>> </samp><span class="userinput">chardet.detect(urlread(<span class='pystring'>"<a href="http://google.cn/">http://google.cn/</a>"</span>))</span>
<span class="computeroutput">{'encoding': 'GB2312', 'confidence': 0.99}</span>
<samp class="prompt">>>> </samp><span class="userinput">chardet.detect(urlread(<span class='pystring'>"<a href="http://yahoo.co.jp/">http://yahoo.co.jp/</a>"</span>))</span>
<span class="computeroutput">{'encoding': 'EUC-JP', 'confidence': 0.99}</span>
<samp class="prompt">>>> </samp><span class="userinput">chardet.detect(urlread(<span class='pystring'>"<a href="http://amazon.co.jp/">http://amazon.co.jp/</a>"</span>))</span>
<span class="computeroutput">{'encoding': 'SHIFT_JIS', 'confidence': 1}</span>
<samp class="prompt">>>> </samp><span class="userinput">chardet.detect(urlread(<span class='pystring'>"<a href="http://pravda.ru/">http://pravda.ru/</a>"</span>))</span>
<span class="computeroutput">{'encoding': 'windows-1251', 'confidence': 0.9355}</span>
<samp class="prompt">>>> </samp><span class="userinput">chardet.detect(urlread(<span class='pystring'>"<a href="http://auction.co.kr/">http://auction.co.kr/</a>"</span>))</span>
<span class="computeroutput">{'encoding': 'EUC-KR', 'confidence': 0.99}</span>
<samp class="prompt">>>> </samp><span class="userinput">chardet.detect(urlread(<span class='pystring'>"<a href="http://haaretz.co.il/">http://haaretz.co.il/</a>"</span>))</span>
<span class="computeroutput">{'encoding': 'windows-1255', 'confidence': 0.99}</span>
<samp class="prompt">>>> </samp><span class="userinput">chardet.detect(urlread(<span class='pystring'>"<a href="http://www.nectec.or.th/tindex.html">http://www.nectec.or.th/tindex.html</a>"</span>))</span>
<span class="computeroutput">{'encoding': 'TIS-620', 'confidence': 0.7675}</span>
<samp class="prompt">>>> </samp><span class="userinput">chardet.detect(urlread(<span class='pystring'>"<a href="http://feedparser.org/docs/">http://feedparser.org/docs/</a>"</span>))</span>
<span class="computeroutput">{'encoding': 'utf-8', 'confidence': 0.99}</span>
</pre>
</div> <!-- example -->
</div> <!-- mainInner -->
</div> <!-- main -->
<hr>
<div id="footer"><p class="copyright">Copyright &copy; 2006-2008 Mark Pilgrim &middot; <a href="mailto:mark@diveintomark.org">mark@diveintomark.org</a> &middot; <a href="/docs/license.html">Terms of use</a></p></div>
</body>
</html>