mirror of
https://github.com/kennethreitz/tablib.git
synced 2026-06-05 15:00:19 +00:00
Compare commits
186 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 1cc051f3e8 | |||
| 3da155ce0d | |||
| 9a34cf0980 | |||
| 434f66b4eb | |||
| d056916c53 | |||
| cf5239f097 | |||
| 49d8cb816f | |||
| fbd277ff2e | |||
| 6f4572fa56 | |||
| 453fc8614c | |||
| 01cf58e431 | |||
| f6cd89c76c | |||
| 1e0f30e8a6 | |||
| 569d35bfca | |||
| d40cdfbcd0 | |||
| 86bbaf9bea | |||
| 0ed01d85b9 | |||
| fc4cc7fa14 | |||
| 1146ec2341 | |||
| 1a7d597745 | |||
| 56b627a561 | |||
| 98e182bed2 | |||
| c8a5563309 | |||
| c225a64d68 | |||
| d611d5a14f | |||
| 45121ddd65 | |||
| c74357cb20 | |||
| 939b0af551 | |||
| 9c2018653f | |||
| 7f0748aac9 | |||
| 41a5c67159 | |||
| 3efefcc8da | |||
| d19de6025b | |||
| 65ba937c0d | |||
| 79a2bb888f | |||
| 25eacaf6f0 | |||
| c2a9af7fb3 | |||
| 3b06f3760d | |||
| e7ee3195a7 | |||
| 5bd2e3df52 | |||
| 837b3f83e6 | |||
| ff8f23edd5 | |||
| 5ffcfd56f2 | |||
| 955c24c974 | |||
| 192a5efabb | |||
| 9e45b95d12 | |||
| d8f0a018ae | |||
| 7545f3726e | |||
| 85e2bd73fc | |||
| 37033903c5 | |||
| 02c38c2520 | |||
| 26748deb9f | |||
| 63f6cea132 | |||
| 1b035f9774 | |||
| 2c14486c33 | |||
| 8bc69c9d85 | |||
| d36a2cbd42 | |||
| 1ab0eb3fae | |||
| cd71e1a5b1 | |||
| 47f79a7ca1 | |||
| 9f38efe413 | |||
| 5d98239a7e | |||
| a3f0d02633 | |||
| b29007a0df | |||
| e75c3c1a66 | |||
| 47cebbc328 | |||
| e4c39524f7 | |||
| c88c794314 | |||
| 752443f077 | |||
| 7c0507bcce | |||
| 652ac85549 | |||
| 05ea3c35fc | |||
| d5fada7e1d | |||
| 511c58d4e1 | |||
| c469360a0e | |||
| 97b4401b18 | |||
| 40e0f41b4c | |||
| 39435727ba | |||
| eda9d5af03 | |||
| 15435047c6 | |||
| a3781e3c89 | |||
| 6a825a8a39 | |||
| 6a449d497a | |||
| d807c60346 | |||
| 71603662b1 | |||
| 21c11b9911 | |||
| e8c923d712 | |||
| bc581c08df | |||
| 4f9c9d09ec | |||
| 63e8a7172d | |||
| 45e0af9f0e | |||
| fa6f5b3af3 | |||
| 0528e0a500 | |||
| 8e83734985 | |||
| 783eccc67d | |||
| 7236415f42 | |||
| c0a3c3ea1e | |||
| 14bd964fb1 | |||
| 6bfc6634ba | |||
| 54affad292 | |||
| 7c963a0f4d | |||
| 02f27f15c5 | |||
| 9c65515e7a | |||
| c87a954a9e | |||
| 42e40ed0ab | |||
| 23ab6c4724 | |||
| 32a09ccd6a | |||
| 81a7f79b3d | |||
| 05c9b33003 | |||
| ec7273d02d | |||
| 19ee1997b5 | |||
| f01d65c2e9 | |||
| 9778a96351 | |||
| 906138b138 | |||
| 43c68b396f | |||
| d611233c80 | |||
| 3d02b866ce | |||
| 887ee2fbac | |||
| bfd211854a | |||
| bc75911500 | |||
| a2b4e4c6ba | |||
| fde6f11763 | |||
| 33a83316df | |||
| f6d7888d9e | |||
| c19e2f2c5b | |||
| eaa2b9b8ea | |||
| 2f8083bda6 | |||
| 2c5a9af76e | |||
| e74a8f41cc | |||
| cd5aa4fc06 | |||
| 1d460bac40 | |||
| 4a3fde37a3 | |||
| 62ad123ad8 | |||
| fefc7b4d1f | |||
| 6313437a27 | |||
| 23a5bb1443 | |||
| 864f29cc4b | |||
| c136b794a7 | |||
| d254c2d2b0 | |||
| 9b235150cf | |||
| 9f3e6eeaa1 | |||
| 51728f954f | |||
| 2949b7c656 | |||
| 07d243bbc9 | |||
| bf3484e606 | |||
| 9b2ab6fae9 | |||
| 7a3d55daab | |||
| eec0595c5c | |||
| 0c7c248b96 | |||
| 0d14f7f2b9 | |||
| d5f713024d | |||
| 415bc819e7 | |||
| 974258094e | |||
| ab16f69be6 | |||
| 28d9af852a | |||
| 39c6ea6503 | |||
| 39b66ad8e9 | |||
| 004b3da680 | |||
| d4923533eb | |||
| 29e0b76910 | |||
| 4f54de2630 | |||
| 1f0d68ee79 | |||
| cae8fa1276 | |||
| 4c0a20a7b9 | |||
| 6c1fa87138 | |||
| 0e30255836 | |||
| 1156d5a220 | |||
| 83b71967b9 | |||
| 4dab48cd76 | |||
| 5324526329 | |||
| 1dfcd42233 | |||
| f162b19bd6 | |||
| 707164e459 | |||
| 42f0a285c3 | |||
| d111cc7cc7 | |||
| 25fe211a22 | |||
| 4b675494c4 | |||
| a196b9a5dd | |||
| 5ba56c2bb3 | |||
| 36fbdda492 | |||
| 273d2729ee | |||
| 3036bc9e52 | |||
| b9c74eacc8 | |||
| 805ccfae34 | |||
| fddc018394 | |||
| 2477100062 |
+4
-1
@@ -22,4 +22,7 @@ coverage.xml
|
||||
nosetests.xml
|
||||
junit-py25.xml
|
||||
junit-py26.xml
|
||||
junit-py27.xml
|
||||
junit-py27.xml
|
||||
|
||||
# tox noise
|
||||
.tox
|
||||
|
||||
@@ -0,0 +1,9 @@
|
||||
language: python
|
||||
python:
|
||||
- 2.6
|
||||
- 2.7
|
||||
- 3.2
|
||||
- 3.3
|
||||
install:
|
||||
- python setup.py install
|
||||
script: python test_tablib.py
|
||||
@@ -4,9 +4,14 @@ various contributors:
|
||||
Development Lead
|
||||
````````````````
|
||||
|
||||
- Kenneth Reitz <_@kennethreitz.com>
|
||||
- Kenneth Reitz <me@kennethreitz.org>
|
||||
|
||||
|
||||
Core Contributors
|
||||
`````````````````
|
||||
|
||||
- Iuri de Silvio <iurisilvio@gmail.com>
|
||||
|
||||
Patches and Suggestions
|
||||
```````````````````````
|
||||
|
||||
@@ -15,4 +20,10 @@ Patches and Suggestions
|
||||
- Luca Beltrame
|
||||
- Benjamin Wohlwend
|
||||
- Erik Youngren
|
||||
- Mark Rogers
|
||||
- Mark Rogers
|
||||
- Mark Walling
|
||||
- Mike Waldner
|
||||
- Joel Friedly
|
||||
- Jakub Janoszek
|
||||
- Marc Abramowitz
|
||||
- Alex Gaynor
|
||||
|
||||
+25
-2
@@ -1,8 +1,31 @@
|
||||
History
|
||||
-------
|
||||
|
||||
0.9.8
|
||||
+++++
|
||||
++++
|
||||
|
||||
* Unicode Column Headers
|
||||
|
||||
0.9.11 (2011-06-30)
|
||||
+++++++++++++++++++
|
||||
|
||||
* Bugfixes
|
||||
|
||||
0.9.10 (2011-06-22)
|
||||
+++++++++++++++++++
|
||||
|
||||
* Bugfixes
|
||||
|
||||
0.9.9 (2011-06-21)
|
||||
++++++++++++++++++
|
||||
|
||||
* Dataset API Changes
|
||||
* ``stack_rows`` => ``stack``, ``stack_columns`` => ``stack_cols``
|
||||
* column operations have their own methods now (``append_col``, ``insert_col``)
|
||||
* List-style ``pop()``
|
||||
* Redis-style ``rpush``, ``lpush``, ``rpop``, ``lpop``, ``rpush_col``, and ``lpush_col``
|
||||
|
||||
0.9.8 (2011-05-22)
|
||||
++++++++++++++++++
|
||||
|
||||
* OpenDocument Spreadsheet support (.ods)
|
||||
* Full Unicode TSV support
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
Copyright (c) 2011 Kenneth Reitz.
|
||||
Copyright 2011 Kenneth Reitz
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
||||
+1
-1
@@ -1 +1 @@
|
||||
include HISTORY.rst README.rst LICENSE AUTHORS
|
||||
include HISTORY.rst README.rst LICENSE AUTHORS NOTICE test_tablib.py
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
Tablib includes some vendorized python libraries: ordereddict, pyyaml,
|
||||
simplejson, unicodecsv, and xlwt.
|
||||
Tablib includes some vendorized python libraries: ordereddict, odfpy, pyyaml,
|
||||
simplejson, unicodecsv, xlrd, xlrd3, xlwt, and xlwt3.
|
||||
|
||||
Markup License
|
||||
==============
|
||||
@@ -7,6 +7,213 @@ Markup License
|
||||
Markup is in the public domain.
|
||||
|
||||
|
||||
Odfpy License
|
||||
=============
|
||||
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
|
||||
|
||||
OrderedDict License
|
||||
===================
|
||||
@@ -60,40 +267,6 @@ SOFTWARE.
|
||||
|
||||
|
||||
|
||||
AnyJSON License
|
||||
==================
|
||||
|
||||
This software is licensed under the ``New BSD License``:
|
||||
|
||||
Copyright (c) 2009, by the authors
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
Neither the name of the authors nor the names of its contributors may be used
|
||||
to endorse or promote products derived from this software without specific
|
||||
prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
|
||||
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
UnicodeCSV License
|
||||
==================
|
||||
|
||||
@@ -126,8 +299,8 @@ or implied, of Jeremy Dunck.
|
||||
|
||||
|
||||
|
||||
XLWT License
|
||||
============
|
||||
XLWT (and XLWT3) License
|
||||
========================
|
||||
|
||||
Portions copyright © 2007, Stephen John Machin, Lingfo Pty Ltd
|
||||
All rights reserved.
|
||||
@@ -206,4 +379,81 @@ Libknecht St., 4
|
||||
+7(0712)56-09-83
|
||||
|
||||
<roman@kiseliov.ru>
|
||||
Subject: pyExcelerator
|
||||
Subject: pyExcelerator
|
||||
|
||||
|
||||
|
||||
|
||||
XLRD (and XLRD3) License
|
||||
========================
|
||||
|
||||
Portions copyright © 2005-2009, Stephen John Machin, Lingfo Pty Ltd
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
3. None of the names of Stephen John Machin, Lingfo Pty Ltd and any
|
||||
contributors may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
|
||||
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
Copyright (c) 2001 David Giffin.
|
||||
All rights reserved.
|
||||
|
||||
Based on the the Java version: Andrew Khan Copyright (c) 2000.
|
||||
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
|
||||
3. All advertising materials mentioning features or use of this
|
||||
software must display the following acknowledgment:
|
||||
"This product includes software developed by
|
||||
David Giffin <david@giffin.org>."
|
||||
|
||||
4. Redistributions of any form whatsoever must retain the following
|
||||
acknowledgment:
|
||||
"This product includes software developed by
|
||||
David Giffin <david@giffin.org>."
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY DAVID GIFFIN ``AS IS'' AND ANY
|
||||
EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DAVID GIFFIN OR
|
||||
ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
+8
-11
@@ -1,6 +1,9 @@
|
||||
Tablib: format-agnostic tabular dataset library
|
||||
===============================================
|
||||
|
||||
.. image:: https://travis-ci.org/kennethreitz/tablib.svg?branch=develop
|
||||
:target: https://travis-ci.org/kennethreitz/tablib
|
||||
|
||||
::
|
||||
|
||||
_____ ______ ___________ ______
|
||||
@@ -28,10 +31,10 @@ Overview
|
||||
--------
|
||||
|
||||
`tablib.Dataset()`
|
||||
A Dataset is a table of tabular data. It may or may not have a header row. They can be build and manipulated as raw Python datatypes (Lists of tuples|dictionaries). Datasets can be imported from JSON, YAML, and CSV; they can be exported to Excel (XLS), JSON, YAML, and CSV.
|
||||
A Dataset is a table of tabular data. It may or may not have a header row. They can be build and manipulated as raw Python datatypes (Lists of tuples|dictionaries). Datasets can be imported from JSON, YAML, and CSV; they can be exported to XLSX, XLS, ODS, JSON, YAML, CSV, TSV, and HTML.
|
||||
|
||||
`tablib.Databook()`
|
||||
A Databook is a set of Datasets. The most common form of a Databook is an Excel file with multiple spreadsheets. Databooks can be imported from JSON and YAML; they can be exported to Excel (XLS), JSON, and YAML.
|
||||
A Databook is a set of Datasets. The most common form of a Databook is an Excel file with multiple spreadsheets. Databooks can be imported from JSON and YAML; they can be exported to XLSX, XLS, ODS, JSON, and YAML.
|
||||
|
||||
Usage
|
||||
-----
|
||||
@@ -55,7 +58,7 @@ Intelligently add new rows: ::
|
||||
|
||||
Intelligently add new columns: ::
|
||||
|
||||
>>> data.append(col=(90, 67, 83), header='age')
|
||||
>>> data.append_col((90, 67, 83), header='age')
|
||||
|
||||
Slice rows: ::
|
||||
|
||||
@@ -117,7 +120,8 @@ EXCEL!
|
||||
++++++
|
||||
::
|
||||
|
||||
>>> open('people.xls', 'wb').write(data.xls)
|
||||
>>> with open('people.xls', 'wb') as f:
|
||||
... f.write(data.xls)
|
||||
|
||||
It's that easy.
|
||||
|
||||
@@ -141,13 +145,6 @@ changes to the **develop** branch (or branch off of it), and send a pull
|
||||
request. Make sure you add yourself to AUTHORS_.
|
||||
|
||||
|
||||
Roadmap
|
||||
-------
|
||||
|
||||
v1.0.0:
|
||||
- Hooks system
|
||||
- Tablib.ext namespace
|
||||
- Width detection on XLS out
|
||||
|
||||
|
||||
.. _`the repository`: http://github.com/kennethreitz/tablib
|
||||
|
||||
@@ -3,7 +3,5 @@
|
||||
- pre/post-import
|
||||
- pre/post-export
|
||||
* Add Tablib.ext namespace
|
||||
* Fix 2.x/3.x handling (currently internal codebase fork)
|
||||
* Make CSV write more customizable.
|
||||
* Width detection for XLS output
|
||||
* Documentation Improvements
|
||||
Vendored
+2
-2
@@ -1,4 +1,4 @@
|
||||
<h3><a href="http://tablib.org">About Tablib</a></h3>
|
||||
<h3><a href="http://docs.python-tablib.org">About Tablib</a></h3>
|
||||
<p>
|
||||
Tablib is an MIT Licensed format-agnostic tabular dataset library, written in Python. It allows you to import, export, and manipulate tabular data sets. Advanced features include, segregation, dynamic columns, tags & filtering, and seamless format import & export.
|
||||
</p>
|
||||
@@ -13,7 +13,7 @@
|
||||
|
||||
<h3>Useful Links</h3>
|
||||
<ul>
|
||||
<li><a href="http://tablib.org/">The Tablib Website</a></li>
|
||||
<li><a href="http://docs.python-tablib.org/">The Tablib Website</a></li>
|
||||
<li><a href="http://pypi.python.org/pypi/tablib">Tablib @ PyPI</a></li>
|
||||
<li><a href="http://github.com/kennethreitz/tablib">Tablib @ GitHub</a></li>
|
||||
<li><a href="http://github.com/kennethreitz/tablib/issues">Issue Tracker</a></li>
|
||||
|
||||
Vendored
+1
-1
@@ -1,4 +1,4 @@
|
||||
<h3><a href="http://tablib.org">About Tablib</a></h3>
|
||||
<h3><a href="http://docs.python-tablib.org/">About Tablib</a></h3>
|
||||
<p>
|
||||
Tablib is an MIT Licensed format-agnostic tabular dataset library, written in Python. It allows you to import, export, and manipulate tabular data sets. Advanced features include, segregation, dynamic columns, tags & filtering, and seamless format import & export.
|
||||
</p>
|
||||
Vendored
+21
@@ -15,6 +15,12 @@
|
||||
<a href="https://github.com/kennethreitz/tablib">
|
||||
<img style="position: absolute; top: 0; right: 0; border: 0;" src="http://s3.amazonaws.com/github/ribbons/forkme_right_darkblue_121621.png" alt="Fork me on GitHub" />
|
||||
</a>
|
||||
|
||||
<script type="text/javascript" src="//www.hellobar.com/hellobar.js"></script>
|
||||
<script type="text/javascript">
|
||||
new HelloBar(36402,48802);
|
||||
</script>
|
||||
|
||||
<script type="text/javascript">
|
||||
|
||||
var _gaq = _gaq || [];
|
||||
@@ -30,4 +36,19 @@
|
||||
})();
|
||||
|
||||
</script>
|
||||
|
||||
<script type="text/javascript">
|
||||
(function() {
|
||||
var t = document.createElement('script');
|
||||
t.type = 'text/javascript';
|
||||
t.async = true;
|
||||
t.id = 'gauges-tracker';
|
||||
t.setAttribute('data-site-id',
|
||||
'4ddc284f613f5d2f1a000001');
|
||||
t.src = '//secure.gaug.es/track.js';
|
||||
var s = document.getElementsByTagName('script')[0];
|
||||
s.parentNode.insertBefore(t, s);
|
||||
})();
|
||||
</script>
|
||||
|
||||
{%- endblock %}
|
||||
|
||||
Vendored
+6
@@ -462,3 +462,9 @@ a:hover tt {
|
||||
background-color: #ccc;
|
||||
-webkit-border-radius: 3px;
|
||||
}
|
||||
|
||||
/* misc. */
|
||||
|
||||
.revsys-inline {
|
||||
display: none!important;
|
||||
}
|
||||
+1
-1
@@ -48,7 +48,7 @@ copyright = u'2011. A <a href="http://kennethreitz.com/pages/open-projects.html"
|
||||
# built documents.
|
||||
#
|
||||
# The short X.Y version.
|
||||
version = '0.9.8'
|
||||
version = tablib.__version__
|
||||
# The full version, including alpha/beta/rc tags.
|
||||
release = version
|
||||
|
||||
|
||||
+37
-42
@@ -5,7 +5,8 @@ Development
|
||||
|
||||
Tablib is under active development, and contributors are welcome.
|
||||
|
||||
If you have a feature request, suggestion, or bug report, please open a new issue on GitHub_. To submit patches, please send a pull request on GitHub_.
|
||||
If you have a feature request, suggestion, or bug report, please open a new
|
||||
issue on GitHub_. To submit patches, please send a pull request on GitHub_.
|
||||
|
||||
If you'd like to contribute, there's plenty to do. Here's a short todo list.
|
||||
|
||||
@@ -42,19 +43,18 @@ Source Control
|
||||
--------------
|
||||
|
||||
|
||||
Tablib source is controlled with Git_, the lean, mean, distributed source control machine.
|
||||
Tablib source is controlled with Git_, the lean, mean, distributed source
|
||||
control machine.
|
||||
|
||||
The repository is publicly accessable.
|
||||
The repository is publicly accessible.
|
||||
|
||||
``git clone git://github.com/kennethreitz/tablib.git``
|
||||
|
||||
The project is hosted both on **GitHub** and **git.kennethreitz.com**.
|
||||
|
||||
|
||||
GitHub:
|
||||
|
||||
The project is hosted on **GitHub**.
|
||||
|
||||
|
||||
GitHub:
|
||||
http://github.com/kennethreitz/tablib
|
||||
"Mirror":
|
||||
http://git.kennethreitz.com/projects/tablib
|
||||
|
||||
|
||||
Git Branch Structure
|
||||
@@ -67,11 +67,11 @@ Feature / Hotfix / Release branches follow a `Successful Git Branching Model`_ .
|
||||
``master``
|
||||
Current production release (|version|) on PyPi.
|
||||
``gh-pages``
|
||||
Current release of http://tablib.org.
|
||||
Current release of http://docs.python-tablib.org.
|
||||
|
||||
Each release is tagged.
|
||||
|
||||
When submitting patches, please place your feature/change in its own branch prior to opening a pull reqeust on GitHub_.
|
||||
When submitting patches, please place your feature/change in its own branch prior to opening a pull request on GitHub_.
|
||||
|
||||
|
||||
.. _Git: http://git-scm.org
|
||||
@@ -100,27 +100,27 @@ Tablib features a micro-framework for adding format support. The easiest way to
|
||||
1. Write a new format interface.
|
||||
|
||||
:class:`tablib.core` follows a simple pattern for automatically utilizing your format throughout Tablib. Function names are crucial.
|
||||
|
||||
|
||||
Example **tablib/formats/_xxx.py**: ::
|
||||
|
||||
title = 'xxx'
|
||||
|
||||
|
||||
def export_set(dset):
|
||||
....
|
||||
# returns string representation of given dataset
|
||||
|
||||
|
||||
def export_book(dbook):
|
||||
....
|
||||
# returns string representation of given databook
|
||||
|
||||
|
||||
def import_set(dset, in_stream):
|
||||
...
|
||||
# populates given Dataset with given datastream
|
||||
|
||||
|
||||
def import_book(dbook, in_stream):
|
||||
...
|
||||
# returns Databook instance
|
||||
|
||||
|
||||
def detect(stream):
|
||||
...
|
||||
# returns True if given stream is parsable as xxx
|
||||
@@ -130,9 +130,9 @@ Tablib features a micro-framework for adding format support. The easiest way to
|
||||
|
||||
If the format excludes support for an import/export mechanism (*eg.* :class:`csv <tablib.Dataset.csv>` excludes :class:`Databook <tablib.Databook>` support), simply don't define the respective functions. Appropriate errors will be raised.
|
||||
|
||||
2.
|
||||
2.
|
||||
|
||||
Add your new format module to the :class:`tablib.formats.avalable` tuple.
|
||||
Add your new format module to the :class:`tablib.formats.available` tuple.
|
||||
|
||||
3.
|
||||
Add a mock property to the :class:`Dataset <tablib.Dataset>` class with verbose `reStructured Text`_ docstring. This alleviates IDE confusion, and allows for pretty auto-generated Sphinx_ documentation.
|
||||
@@ -152,7 +152,7 @@ When developing a feature for Tablib, the easiest way to test your changes for p
|
||||
$ ./test_tablib.py
|
||||
|
||||
|
||||
`Hudson CI`_, amongst other tools, supports Java's xUnit testing report format. Nose_ allows us to generate our own xUnit reports.
|
||||
`Jenkins CI`_, amongst other tools, supports Java's xUnit testing report format. Nose_ allows us to generate our own xUnit reports.
|
||||
|
||||
Installing nose is simple. ::
|
||||
|
||||
@@ -168,26 +168,21 @@ This will generate a **nosetests.xml** file, which can then be analyzed.
|
||||
|
||||
|
||||
|
||||
.. _hudson:
|
||||
.. _jenkins:
|
||||
|
||||
----------------------
|
||||
Continuous Integration
|
||||
----------------------
|
||||
|
||||
Every commit made to the **develop** branch is automatically tested and inspected upon receipt with `Hudson CI`_. If you have access to the main repository and broke the build, you will receive an email accordingly.
|
||||
Every commit made to the **develop** branch is automatically tested and inspected upon receipt with `Travis CI`_. If you have access to the main repository and broke the build, you will receive an email accordingly.
|
||||
|
||||
Anyone may view the build status and history at any time.
|
||||
|
||||
http://ci.kennethreitz.com/
|
||||
|
||||
|
||||
If you are trustworthy and plan to contribute to tablib on a regular basis, please contact `Kenneth Reitz`_ to get an account on the Hudson Server.
|
||||
|
||||
https://travis-ci.org/kennethreitz/tablib
|
||||
|
||||
Additional reports will also be included here in the future, including :pep:`8` checks and stress reports for extremely large datasets.
|
||||
|
||||
.. _`Hudson CI`: http://hudson.dev.java.net
|
||||
.. _`Kenneth Reitz`: http://kennethreitz.com/contact-me/
|
||||
.. _`Jenkins CI`: https://travis-ci.org/
|
||||
|
||||
|
||||
.. _docs:
|
||||
@@ -196,28 +191,28 @@ Additional reports will also be included here in the future, including :pep:`8`
|
||||
Building the Docs
|
||||
-----------------
|
||||
|
||||
Documentation is written in the powerful, flexible, and standard Python documentation format, `reStructured Text`_.
|
||||
Documentation is written in the powerful, flexible, and standard Python documentation format, `reStructured Text`_.
|
||||
Documentation builds are powered by the powerful Pocoo project, Sphinx_. The :ref:`API Documentation <api>` is mostly documented inline throughout the module.
|
||||
|
||||
The Docs live in ``tablib/docs``. In order to build them, you will first need to install Sphinx. ::
|
||||
|
||||
$ pip install sphinx
|
||||
|
||||
|
||||
|
||||
Then, to build an HTML version of the docs, simply run the following from the **docs** directory: ::
|
||||
|
||||
$ make html
|
||||
$ make html
|
||||
|
||||
Your ``docs/_build/html`` directory will then contain an HTML representation of the documentation, ready for publication on most web servers.
|
||||
|
||||
You can also generate the documentation in **ebpub**, **latex**, **json**, *&c* similarly.
|
||||
You can also generate the documentation in **epub**, **latex**, **json**, *&c* similarly.
|
||||
|
||||
.. admonition:: GitHub Pages
|
||||
|
||||
To push the documentation up to `GitHub Pages`_, you will first need to run `sphinx-to-github`_ against your ``docs/_build/html`` directory.
|
||||
|
||||
GitHub Pages are powered by an HTML generation system called Jeckyl_, which is configured to ignore files and folders that begin with "``_``" (*ie.* **_static**).
|
||||
|
||||
To push the documentation up to `GitHub Pages`_, you will first need to run `sphinx-to-github`_ against your ``docs/_build/html`` directory.
|
||||
|
||||
GitHub Pages are powered by an HTML generation system called Jekyll_, which is configured to ignore files and folders that begin with "``_``" (*ie.* **_static**).
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -232,15 +227,15 @@ You can also generate the documentation in **ebpub**, **latex**, **json**, *&c*
|
||||
Running it against the docs is even simpler. ::
|
||||
|
||||
$ sphinx-to-github _build/html
|
||||
|
||||
Move the resulting files to the **gh-pages** branch of your repository, and push it up to GitHub.
|
||||
|
||||
Move the resulting files to the **gh-pages** branch of your repository, and push it up to GitHub.
|
||||
|
||||
.. _`reStructured Text`: http://docutils.sourceforge.net/rst.html
|
||||
.. _Sphinx: http://sphinx.pocoo.org
|
||||
.. _`GitHub Pages`: http://pages.github.com
|
||||
.. _Jeckyl: http://github.com/mojombo/jekyll
|
||||
.. _Jekyll: http://github.com/mojombo/jekyll
|
||||
.. _`sphinx-to-github`: http://github.com/michaeljones/sphinx-to-github
|
||||
|
||||
----------
|
||||
|
||||
Make sure to check out the :ref:`API Documentation <api>`.
|
||||
Make sure to check out the :ref:`API Documentation <api>`.
|
||||
|
||||
+5
-7
@@ -26,8 +26,8 @@ Tablib is an :ref:`MIT Licensed <mit>` format-agnostic tabular dataset library,
|
||||
|
||||
::
|
||||
|
||||
>>> data = tablib.Dataset(headers=['First Name', 'Last Name', 'Age'])
|
||||
>>> map(data.append, [('Kenneth', 'Reitz', 22), ('Bessie', 'Monke', 21)])
|
||||
>>> data = tablib.Dataset(*[('Kenneth', 'Reitz', 23), ('Bessie', 'Monke', 22)],
|
||||
headers=['First Name', 'Last Name', 'Age'])
|
||||
|
||||
>>> data.json
|
||||
[{"Last Name": "Reitz", "First Name": "Kenneth", "Age": 22}, {"Last Name": "Monke", "First Name": "Bessie", "Age": 21}]
|
||||
@@ -43,13 +43,11 @@ Tablib is an :ref:`MIT Licensed <mit>` format-agnostic tabular dataset library,
|
||||
Testimonials
|
||||
------------
|
||||
|
||||
`The Library of Congress <http://www.loc.gov/>`_,
|
||||
`National Geographic <http://www.nationalgeographic.com/>`_,
|
||||
`Digg, Inc <http://digg.com/>`_,
|
||||
`Northrop Grumman <http://www.northropgrumman.com/>`_,
|
||||
`Discovery Channel <http://dsc.discovery.com/>`_,
|
||||
`The Sunlight Foundation <http://sunlightfoundation.com/>`_, and
|
||||
`NetApp, Inc <http://netapp.com>`_ use Tablib internally.
|
||||
and `The Sunlight Foundation <http://sunlightfoundation.com/>`_ use Tablib internally.
|
||||
|
||||
|
||||
|
||||
@@ -57,7 +55,7 @@ Testimonials
|
||||
Tablib by @kennethreitz saved my life. I had to consolidate like 5 huge poorly maintained lists of domains and data. It was a breeze!
|
||||
|
||||
**Dave Coutts**
|
||||
It's turning into one of my most used modules of 2010. You really hit a sweat spot for managing tabular data with a minimal amount of code and effort.
|
||||
It's turning into one of my most used modules of 2010. You really hit a sweet spot for managing tabular data with a minimal amount of code and effort.
|
||||
|
||||
**Joshua Ourisman**
|
||||
Tablib has made it so much easier to deal with the inevitable 'I want an Excel file!' requests from clients...
|
||||
@@ -101,4 +99,4 @@ method, this part of the documentation is for you.
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
api
|
||||
api
|
||||
|
||||
+30
-16
@@ -2,7 +2,7 @@
|
||||
Installation
|
||||
============
|
||||
|
||||
This part of the documentation covers the installation of Tablib. The first step to using any software package is getting it properly installed. Please read this section carefully, or you may miss out on some nice :ref:`speed enhancements <peed-extentions>`.
|
||||
This part of the documentation covers the installation of Tablib. The first step to using any software package is getting it properly installed. Please read this section carefully, or you may miss out on some nice :ref:`speed enhancements <peed-extensions>`.
|
||||
|
||||
|
||||
.. _installing:
|
||||
@@ -11,15 +11,29 @@ This part of the documentation covers the installation of Tablib. The first step
|
||||
Installing Tablib
|
||||
-----------------
|
||||
|
||||
To install Tablib, it only takes one simple command. ::
|
||||
Distribute & Pip
|
||||
----------------
|
||||
|
||||
$ pip install tablib
|
||||
Installing Tablib is simple with `pip <http://www.pip-installer.org/>`_::
|
||||
|
||||
$ pip install tablib
|
||||
|
||||
or, with `easy_install <http://pypi.python.org/pypi/setuptools>`_::
|
||||
|
||||
$ easy_install tablib
|
||||
|
||||
But, you really `shouldn't do that <http://www.pip-installer.org/en/latest/other-tools.html#pip-compared-to-easy-install>`_.
|
||||
|
||||
|
||||
|
||||
Cheeseshop Mirror
|
||||
-----------------
|
||||
|
||||
If the Cheeseshop is down, you can also install Requests from Kenneth Reitz's personal `Cheeseshop mirror <pip.kreitz.co/>`_::
|
||||
|
||||
$ pip install -i http://pip.kreitz.co/simple tablib
|
||||
|
||||
Or, if you must: ::
|
||||
|
||||
$ easy_install tablib
|
||||
|
||||
But, you really shouldn't do that.
|
||||
|
||||
|
||||
-------------------
|
||||
@@ -43,21 +57,21 @@ To download the full source history from Git, see :ref:`Source Control <scm>`.
|
||||
.. _zipball: http://github.com/kennethreitz/tablib/zipball/master
|
||||
|
||||
|
||||
.. _speed-extentions:
|
||||
Speed Extentions
|
||||
.. _speed-extensions:
|
||||
Speed Extensions
|
||||
----------------
|
||||
|
||||
.. versionadded:: 0.8.5
|
||||
|
||||
Tablib is partially dependent on the **pyyaml**, **simplejson**, and **xlwt** modules. To reduce installation issues, fully integrated versions of all required libraries are included in Tablib.
|
||||
Tablib is partially dependent on the **pyyaml**, **simplejson**, and **xlwt** modules. To reduce installation issues, fully integrated versions of all required libraries are included in Tablib.
|
||||
|
||||
However, if performance is important to you (and it should be), you can install **pyyaml** with C extentions from PyPi. ::
|
||||
However, if performance is important to you (and it should be), you can install **pyyaml** with C extensions from PyPi. ::
|
||||
|
||||
$ pip install PyYAML
|
||||
$ pip install PyYAML
|
||||
|
||||
If you're using Python 2.5, you should also install the **simplejson** module (pip will do this for you). If you're using Python 2.6+, the built-in **json** module is already optimized and in use. ::
|
||||
|
||||
$ pip install simplejson
|
||||
$ pip install simplejson
|
||||
|
||||
|
||||
|
||||
@@ -65,14 +79,14 @@ If you're using Python 2.5, you should also install the **simplejson** module (p
|
||||
Staying Updated
|
||||
---------------
|
||||
|
||||
The latest version of Tablib will always be available here:
|
||||
The latest version of Tablib will always be available here:
|
||||
|
||||
* PyPi: http://pypi.python.org/pypi/tablib/
|
||||
* GitHub: http://github.com/kennethreitz/tablib/
|
||||
|
||||
When a new version is available, upgrading is simple. ::
|
||||
When a new version is available, upgrading is simple::
|
||||
|
||||
$ pip install tablib --upgrade
|
||||
$ pip install tablib --upgrade
|
||||
|
||||
|
||||
Now, go get a :ref:`Quick Start <quickstart>`.
|
||||
+13
-7
@@ -4,7 +4,10 @@ Introduction
|
||||
============
|
||||
|
||||
This part of the documentation covers all the interfaces of Tablib.
|
||||
Tablib is a format-agnostic tabular dataset library, written in Python. It allows you to Pythonically import, export, and manipulate tabular data sets. Advanced features include, segregation, dynamic columns, tags / filtering, and seamless format import/export.
|
||||
Tablib is a format-agnostic tabular dataset library, written in Python.
|
||||
It allows you to Pythonically import, export, and manipulate tabular data sets.
|
||||
Advanced features include, segregation, dynamic columns, tags / filtering, and
|
||||
seamless format import/export.
|
||||
|
||||
|
||||
Philosphy
|
||||
@@ -21,29 +24,32 @@ Tablib was developed with a few :pep:`20` idioms in mind.
|
||||
|
||||
All contributions to Tablib should keep these important rules in mind.
|
||||
|
||||
.. _mit:
|
||||
.. mit:
|
||||
|
||||
MIT License
|
||||
-----------
|
||||
|
||||
A large number of open source projects you find today are `GPL Licensed`_. While the GPL has its time and place, it should most certainly not be your go-to license for your next open source project.
|
||||
A large number of open source projects you find today are `GPL Licensed`_.
|
||||
While the GPL has its time and place, it should most certainly not be your
|
||||
go-to license for your next open source project.
|
||||
|
||||
A project that is released as GPL cannot be used in any commercial product without the product itself also being offered as open source. The MIT, BSD, and ISC licenses are great alternatives to the GPL that allow your open-source software to be used in proprietary, closed-source software.
|
||||
A project that is released as GPL cannot be used in any commercial product
|
||||
without the product itself also being offered as open source. The MIT, BSD, and
|
||||
ISC licenses are great alternatives to the GPL that allow your open-source
|
||||
software to be used in proprietary, closed-source software.
|
||||
|
||||
Tablib is released under terms of `The MIT License`_.
|
||||
|
||||
.. _`GPL Licensed`: http://www.opensource.org/licenses/gpl-license.php
|
||||
.. _`The MIT License`: http://www.opensource.org/licenses/mit-license.php
|
||||
|
||||
.. note::
|
||||
Tablib will be moved to the `Apache 2 License <http://www.apache.org/licenses/LICENSE-2.0>`_ upon the release of v1.0.0.
|
||||
|
||||
.. _license:
|
||||
|
||||
Tablib License
|
||||
--------------
|
||||
|
||||
Copyright (c) 2011 Kenneth Reitz.
|
||||
Copyright 2011 Kenneth Reitz
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
||||
+16
-9
@@ -68,7 +68,7 @@ Adding Headers
|
||||
--------------
|
||||
|
||||
|
||||
It's time enhance our :class:`Dataset` by giving our columns some titles. To do so, set :class:`Dataset.headers`. ::
|
||||
It's time to enhance our :class:`Dataset` by giving our columns some titles. To do so, set :class:`Dataset.headers`. ::
|
||||
|
||||
data.headers = ['First Name', 'Last Name']
|
||||
|
||||
@@ -87,7 +87,7 @@ Adding Columns
|
||||
|
||||
Now that we have a basic :class:`Dataset` in place, let's add a column of **ages** to it. ::
|
||||
|
||||
data.append(col=[22, 20], header='Age')
|
||||
data.append_col([22, 20], header='Age')
|
||||
|
||||
Let's view the data now. ::
|
||||
|
||||
@@ -146,6 +146,13 @@ To do so, we access the :class:`Dataset` as if it were a standard Python diction
|
||||
>>> data['First Name']
|
||||
['Kenneth', 'Bessie']
|
||||
|
||||
You can also access the column using its index. ::
|
||||
|
||||
>>> d.headers
|
||||
['Last Name', 'First Name', 'Age']
|
||||
>>> d.get_col(1)
|
||||
['Kenneth', 'Bessie']
|
||||
|
||||
Let's find the average age. ::
|
||||
|
||||
>>> ages = data['Age']
|
||||
@@ -158,7 +165,7 @@ Let's find the average age. ::
|
||||
Removing Rows & Columns
|
||||
-----------------------
|
||||
|
||||
It's easier than you could imagine. ::
|
||||
It's easier than you could imagine::
|
||||
|
||||
>>> del data['Col Name']
|
||||
|
||||
@@ -195,7 +202,7 @@ Let's add a dynamic column to our :class:`Dataset` object. In this example, we h
|
||||
"""Returns a random integer for entry."""
|
||||
return (random.randint(60,100)/100.0)
|
||||
|
||||
data.append(col=[random_grade], header='Grade')
|
||||
data.append_col(random_grade, header='Grade')
|
||||
|
||||
Let's have a look at our data. ::
|
||||
|
||||
@@ -244,7 +251,7 @@ Filtering Datasets with Tags
|
||||
|
||||
|
||||
When constructing a :class:`Dataset` object, you can add tags to rows by specifying the ``tags`` parameter.
|
||||
This allows you to filter your :class:`Dataset` later. This can be useful so separate rows of data based on
|
||||
This allows you to filter your :class:`Dataset` later. This can be useful to separate rows of data based on
|
||||
arbitrary criteria (*e.g.* origin) that you don't want to include in your :class:`Dataset`.
|
||||
|
||||
Let's tag some students. ::
|
||||
@@ -253,10 +260,10 @@ Let's tag some students. ::
|
||||
|
||||
students.headers = ['first', 'last']
|
||||
|
||||
students.append(['Kenneth', 'Reitz'], tags=['male', 'technical'])
|
||||
students.append(['Bessie', 'Monke'], tags=['female', 'creative'])
|
||||
students.rpush(['Kenneth', 'Reitz'], tags=['male', 'technical'])
|
||||
students.rpush(['Bessie', 'Monke'], tags=['female', 'creative'])
|
||||
|
||||
Now that we have extra meta-data on our rows, we can use easily filter our :class:`Dataset`. Let's just see Male students. ::
|
||||
Now that we have extra meta-data on our rows, we can easily filter our :class:`Dataset`. Let's just see Male students. ::
|
||||
|
||||
|
||||
>>> students.filter(['male']).yaml
|
||||
@@ -273,7 +280,7 @@ When dealing with a large number of :class:`Datasets <Dataset>` in spreadsheet f
|
||||
|
||||
Let's say we have 3 different :class:`Datasets <Dataset>`. All we have to do is add then to a :class:`Databook` object... ::
|
||||
|
||||
book = tablib.Databook([data1, data2, data3])
|
||||
book = tablib.Databook((data1, data2, data3))
|
||||
|
||||
... and export to Excel just like :class:`Datasets <Dataset>`. ::
|
||||
|
||||
|
||||
@@ -4,43 +4,69 @@
|
||||
import os
|
||||
import sys
|
||||
|
||||
from distutils.core import setup
|
||||
import tablib
|
||||
|
||||
try:
|
||||
from setuptools import setup
|
||||
except ImportError:
|
||||
from distutils.core import setup
|
||||
|
||||
|
||||
def publish():
|
||||
"""Publish to PyPi"""
|
||||
if sys.argv[-1] == 'publish':
|
||||
os.system("python setup.py sdist upload")
|
||||
|
||||
if sys.argv[-1] == "publish":
|
||||
publish()
|
||||
sys.exit()
|
||||
|
||||
required = []
|
||||
if sys.argv[-1] == 'speedups':
|
||||
try:
|
||||
__import__('pip')
|
||||
except ImportError:
|
||||
print('Pip required.')
|
||||
sys.exit(1)
|
||||
|
||||
if sys.version_info[:2] < (2,6):
|
||||
required.append('simplejson')
|
||||
os.system('pip install ujson pyyaml')
|
||||
sys.exit()
|
||||
|
||||
if sys.argv[-1] == 'test':
|
||||
try:
|
||||
__import__('py')
|
||||
except ImportError:
|
||||
print('py.test required.')
|
||||
sys.exit(1)
|
||||
|
||||
errors = os.system('py.test test_tablib.py')
|
||||
sys.exit(bool(errors))
|
||||
|
||||
setup(
|
||||
name='tablib',
|
||||
version='0.9.8',
|
||||
version=tablib.__version__,
|
||||
description='Format agnostic tabular data library (XLS, JSON, YAML, CSV)',
|
||||
long_description=open('README.rst').read() + '\n\n' +
|
||||
open('HISTORY.rst').read(),
|
||||
long_description=(open('README.rst').read() + '\n\n' +
|
||||
open('HISTORY.rst').read()),
|
||||
author='Kenneth Reitz',
|
||||
author_email='me@kennethreitz.com',
|
||||
url='http://tablib.org',
|
||||
packages= [
|
||||
author_email='me@kennethreitz.org',
|
||||
url='http://python-tablib.org',
|
||||
packages=[
|
||||
'tablib', 'tablib.formats',
|
||||
'tablib.packages',
|
||||
'tablib.packages.xlwt',
|
||||
'tablib.packages.xlwt3',
|
||||
'tablib.packages.xlrd',
|
||||
'tablib.packages.xlrd3',
|
||||
'tablib.packages.omnijson',
|
||||
'tablib.packages.odf',
|
||||
'tablib.packages.odf3',
|
||||
'tablib.packages.openpyxl',
|
||||
'tablib.packages.openpyxl.shared',
|
||||
'tablib.packages.openpyxl.reader',
|
||||
'tablib.packages.openpyxl.writer',
|
||||
'tablib.packages.openpyxl3',
|
||||
'tablib.packages.openpyxl3.shared',
|
||||
'tablib.packages.openpyxl3.reader',
|
||||
'tablib.packages.openpyxl3.writer',
|
||||
'tablib.packages.yaml',
|
||||
'tablib.packages.yaml3',
|
||||
'tablib.packages.unicodecsv'
|
||||
],
|
||||
install_requires=required,
|
||||
license='MIT',
|
||||
classifiers=(
|
||||
'Development Status :: 5 - Production/Stable',
|
||||
@@ -54,5 +80,8 @@ setup(
|
||||
'Programming Language :: Python :: 3.0',
|
||||
'Programming Language :: Python :: 3.1',
|
||||
'Programming Language :: Python :: 3.2',
|
||||
'Programming Language :: Python :: 3.3',
|
||||
'Programming Language :: Python :: 3.4',
|
||||
),
|
||||
tests_require=['pytest'],
|
||||
)
|
||||
|
||||
+4
-4
@@ -1,8 +1,8 @@
|
||||
""" Tablib.
|
||||
"""
|
||||
""" Tablib. """
|
||||
|
||||
from tablib.core import (
|
||||
Databook, Dataset, detect, import_set,
|
||||
InvalidDatasetType, InvalidDimensions, UnsupportedFormat
|
||||
Databook, Dataset, detect, import_set, import_book,
|
||||
InvalidDatasetType, InvalidDimensions, UnsupportedFormat,
|
||||
__version__
|
||||
)
|
||||
|
||||
|
||||
+5
-1
@@ -23,6 +23,8 @@ except ImportError:
|
||||
if is_py3:
|
||||
from io import BytesIO
|
||||
import tablib.packages.xlwt3 as xlwt
|
||||
import tablib.packages.xlrd3 as xlrd
|
||||
from tablib.packages.xlrd3.biffh import XLRDError
|
||||
from tablib.packages import markup3 as markup
|
||||
from tablib.packages import openpyxl3 as openpyxl
|
||||
from tablib.packages.odf3 import opendocument, style, text, table
|
||||
@@ -39,6 +41,8 @@ else:
|
||||
from cStringIO import StringIO as BytesIO
|
||||
from cStringIO import StringIO
|
||||
import tablib.packages.xlwt as xlwt
|
||||
import tablib.packages.xlrd as xlrd
|
||||
from tablib.packages.xlrd.biffh import XLRDError
|
||||
from tablib.packages import markup
|
||||
from itertools import ifilter
|
||||
from tablib.packages import openpyxl
|
||||
@@ -46,4 +50,4 @@ else:
|
||||
|
||||
from tablib.packages import unicodecsv as csv
|
||||
|
||||
unicode = unicode
|
||||
unicode = unicode
|
||||
|
||||
+341
-142
@@ -5,7 +5,7 @@
|
||||
|
||||
This module implements the central Tablib objects.
|
||||
|
||||
:copyright: (c) 2011 by Kenneth Reitz.
|
||||
:copyright: (c) 2014 by Kenneth Reitz.
|
||||
:license: MIT, see LICENSE for more details.
|
||||
"""
|
||||
|
||||
@@ -14,23 +14,22 @@ from operator import itemgetter
|
||||
|
||||
from tablib import formats
|
||||
|
||||
|
||||
from tablib.compat import OrderedDict
|
||||
from tablib.compat import OrderedDict, unicode
|
||||
|
||||
|
||||
__title__ = 'tablib'
|
||||
__version__ = '0.9.8'
|
||||
__build__ = 0x000908
|
||||
__version__ = '0.10.0'
|
||||
__build__ = 0x001000
|
||||
__author__ = 'Kenneth Reitz'
|
||||
__license__ = 'MIT'
|
||||
__copyright__ = 'Copyright 2011 Kenneth Reitz'
|
||||
__copyright__ = 'Copyright 2014 Kenneth Reitz'
|
||||
__docformat__ = 'restructuredtext'
|
||||
|
||||
|
||||
class Row(object):
|
||||
"""Internal Row object. Mainly used for filtering."""
|
||||
|
||||
__slots__ = ['tuple', '_row', 'tags']
|
||||
__slots__ = ['_row', 'tags']
|
||||
|
||||
def __init__(self, row=list(), tags=list()):
|
||||
self._row = list(row)
|
||||
@@ -58,13 +57,26 @@ class Row(object):
|
||||
del self._row[i]
|
||||
|
||||
def __getstate__(self):
|
||||
return {slot: [getattr(self, slot) for slot in self.__slots__]}
|
||||
|
||||
slots = dict()
|
||||
|
||||
for slot in self.__slots__:
|
||||
attribute = getattr(self, slot)
|
||||
slots[slot] = attribute
|
||||
|
||||
return slots
|
||||
|
||||
def __setstate__(self, state):
|
||||
for (k, v) in list(state.items()): setattr(self, k, v)
|
||||
|
||||
def rpush(self, value):
|
||||
self.insert(0, value)
|
||||
|
||||
def lpush(self, value):
|
||||
self.insert(len(value), value)
|
||||
|
||||
def append(self, value):
|
||||
self._row.append(value)
|
||||
self.rpush(value)
|
||||
|
||||
def insert(self, index, value):
|
||||
self._row.insert(index, value)
|
||||
@@ -74,12 +86,12 @@ class Row(object):
|
||||
|
||||
@property
|
||||
def tuple(self):
|
||||
'''Tuple representation of :class:`Row`.'''
|
||||
"""Tuple representation of :class:`Row`."""
|
||||
return tuple(self._row)
|
||||
|
||||
@property
|
||||
def list(self):
|
||||
'''List representation of :class:`Row`.'''
|
||||
"""List representation of :class:`Row`."""
|
||||
return list(self._row)
|
||||
|
||||
def has_tag(self, tag):
|
||||
@@ -100,7 +112,7 @@ class Dataset(object):
|
||||
functionality.
|
||||
|
||||
Usually you create a :class:`Dataset` instance in your main module, and append
|
||||
rows and columns as you collect data. ::
|
||||
rows as you collect data. ::
|
||||
|
||||
data = tablib.Dataset()
|
||||
data.headers = ('name', 'age')
|
||||
@@ -108,15 +120,27 @@ class Dataset(object):
|
||||
for (name, age) in some_collector():
|
||||
data.append((name, age))
|
||||
|
||||
You can also set rows and headers upon instantiation. This is useful if dealing
|
||||
with dozens or hundres of :class:`Dataset` objects. ::
|
||||
|
||||
Setting columns is similar. The column data length must equal the
|
||||
current height of the data and headers must be set ::
|
||||
|
||||
data = tablib.Dataset()
|
||||
data.headers = ('first_name', 'last_name')
|
||||
|
||||
data.append(('John', 'Adams'))
|
||||
data.append(('George', 'Washington'))
|
||||
|
||||
data.append_col((90, 67), header='age')
|
||||
|
||||
|
||||
You can also set rows and headers upon instantiation. This is useful if
|
||||
dealing with dozens or hundreds of :class:`Dataset` objects. ::
|
||||
|
||||
headers = ('first_name', 'last_name')
|
||||
data = [('John', 'Adams'), ('George', 'Washington')]
|
||||
|
||||
data = tablib.Dataset(*data, headers=headers)
|
||||
|
||||
|
||||
:param \*args: (optional) list of rows to populate Dataset
|
||||
:param headers: (optional) list strings for Dataset header row
|
||||
|
||||
@@ -157,7 +181,7 @@ class Dataset(object):
|
||||
|
||||
|
||||
def __getitem__(self, key):
|
||||
if isinstance(key, str):
|
||||
if isinstance(key, str) or isinstance(key, unicode):
|
||||
if key in self.headers:
|
||||
pos = self.headers.index(key) # get 'key' index from each data
|
||||
return [row[pos] for row in self._data]
|
||||
@@ -170,14 +194,13 @@ class Dataset(object):
|
||||
else:
|
||||
return [result.tuple for result in _results]
|
||||
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
self._validate(value)
|
||||
self._data[key] = Row(value)
|
||||
|
||||
|
||||
def __delitem__(self, key):
|
||||
if isinstance(key, str):
|
||||
if isinstance(key, str) or isinstance(key, unicode):
|
||||
|
||||
if key in self.headers:
|
||||
|
||||
@@ -200,6 +223,29 @@ class Dataset(object):
|
||||
except AttributeError:
|
||||
return '<dataset object>'
|
||||
|
||||
def __unicode__(self):
|
||||
result = [self.__headers]
|
||||
|
||||
result.extend(list(map(unicode, row)) for row in self._data)
|
||||
|
||||
# here, we calculate max width for each column
|
||||
lens = (list(map(len, row)) for row in result)
|
||||
field_lens = list(map(max, zip(*lens)))
|
||||
|
||||
# delimiter between header and data
|
||||
result.insert(1, ['-' * length for length in field_lens])
|
||||
|
||||
format_string = '|'.join('{%s:%s}' % item for item in enumerate(field_lens))
|
||||
|
||||
return '\n'.join(format_string.format(*row) for row in result)
|
||||
|
||||
def __str__(self):
|
||||
return self.__unicode__()
|
||||
|
||||
|
||||
# ---------
|
||||
# Internals
|
||||
# ---------
|
||||
|
||||
@classmethod
|
||||
def _register_formats(cls):
|
||||
@@ -235,11 +281,17 @@ class Dataset(object):
|
||||
return False
|
||||
|
||||
|
||||
def _package(self, dicts=True):
|
||||
def _package(self, dicts=True, ordered=True):
|
||||
"""Packages Dataset into lists of dictionaries for transmission."""
|
||||
# TODO: Dicts default to false?
|
||||
|
||||
_data = list(self._data)
|
||||
|
||||
if ordered:
|
||||
dict_pack = OrderedDict
|
||||
else:
|
||||
dict_pack = dict
|
||||
|
||||
# Execute formatters
|
||||
if self._formatters:
|
||||
for row_i, row in enumerate(_data):
|
||||
@@ -256,7 +308,7 @@ class Dataset(object):
|
||||
|
||||
if self.headers:
|
||||
if dicts:
|
||||
data = [OrderedDict(list(zip(self.headers, data_row))) for data_row in _data]
|
||||
data = [dict_pack(list(zip(self.headers, data_row))) for data_row in _data]
|
||||
else:
|
||||
data = [list(self.headers)] + list(_data)
|
||||
else:
|
||||
@@ -265,46 +317,6 @@ class Dataset(object):
|
||||
return data
|
||||
|
||||
|
||||
def _clean_col(self, col):
|
||||
"""Prepares the given column for insert/append."""
|
||||
|
||||
col = list(col)
|
||||
|
||||
if self.headers:
|
||||
header = [col.pop(0)]
|
||||
else:
|
||||
header = []
|
||||
|
||||
if len(col) == 1 and hasattr(col[0], '__call__'):
|
||||
|
||||
col = list(map(col[0], self._data))
|
||||
col = tuple(header + col)
|
||||
|
||||
return col
|
||||
|
||||
|
||||
@property
|
||||
def height(self):
|
||||
"""The number of rows currently in the :class:`Dataset`.
|
||||
Cannot be directly modified.
|
||||
"""
|
||||
return len(self._data)
|
||||
|
||||
|
||||
@property
|
||||
def width(self):
|
||||
"""The number of columns currently in the :class:`Dataset`.
|
||||
Cannot be directly modified.
|
||||
"""
|
||||
|
||||
try:
|
||||
return len(self._data[0])
|
||||
except IndexError:
|
||||
try:
|
||||
return len(self.headers)
|
||||
except TypeError:
|
||||
return 0
|
||||
|
||||
|
||||
def _get_headers(self):
|
||||
"""An *optional* list of strings to be used for header rows and attribute names.
|
||||
@@ -328,6 +340,7 @@ class Dataset(object):
|
||||
|
||||
headers = property(_get_headers, _set_headers)
|
||||
|
||||
|
||||
def _get_dict(self):
|
||||
"""A native Python representation of the :class:`Dataset` object. If headers have
|
||||
been set, a list of Python dictionaries will be returned. If no headers have been set,
|
||||
@@ -375,6 +388,52 @@ class Dataset(object):
|
||||
dict = property(_get_dict, _set_dict)
|
||||
|
||||
|
||||
def _clean_col(self, col):
|
||||
"""Prepares the given column for insert/append."""
|
||||
|
||||
col = list(col)
|
||||
|
||||
if self.headers:
|
||||
header = [col.pop(0)]
|
||||
else:
|
||||
header = []
|
||||
|
||||
if len(col) == 1 and hasattr(col[0], '__call__'):
|
||||
|
||||
col = list(map(col[0], self._data))
|
||||
col = tuple(header + col)
|
||||
|
||||
return col
|
||||
|
||||
|
||||
@property
|
||||
def height(self):
|
||||
"""The number of rows currently in the :class:`Dataset`.
|
||||
Cannot be directly modified.
|
||||
"""
|
||||
return len(self._data)
|
||||
|
||||
|
||||
@property
|
||||
def width(self):
|
||||
"""The number of columns currently in the :class:`Dataset`.
|
||||
Cannot be directly modified.
|
||||
"""
|
||||
|
||||
try:
|
||||
return len(self._data[0])
|
||||
except IndexError:
|
||||
try:
|
||||
return len(self.headers)
|
||||
except TypeError:
|
||||
return 0
|
||||
|
||||
|
||||
# -------
|
||||
# Formats
|
||||
# -------
|
||||
|
||||
|
||||
@property
|
||||
def xls():
|
||||
"""A Legacy Excel Spreadsheet representation of the :class:`Dataset` object, with :ref:`separators`. Cannot be set.
|
||||
@@ -388,7 +447,7 @@ class Dataset(object):
|
||||
:class:`Dataset.xls` contains binary data, so make sure to write in binary mode::
|
||||
|
||||
with open('output.xls', 'wb') as f:
|
||||
f.write(data.xls)'
|
||||
f.write(data.xls)
|
||||
"""
|
||||
pass
|
||||
|
||||
@@ -401,7 +460,7 @@ class Dataset(object):
|
||||
:class:`Dataset.xlsx` contains binary data, so make sure to write in binary mode::
|
||||
|
||||
with open('output.xlsx', 'wb') as f:
|
||||
f.write(data.xlsx)'
|
||||
f.write(data.xlsx)
|
||||
"""
|
||||
pass
|
||||
|
||||
@@ -411,10 +470,10 @@ class Dataset(object):
|
||||
|
||||
.. admonition:: Binary Warning
|
||||
|
||||
:class:`Dataset.xlsx` contains binary data, so make sure to write in binary mode::
|
||||
:class:`Dataset.ods` contains binary data, so make sure to write in binary mode::
|
||||
|
||||
with open('output.ods', 'wb') as f:
|
||||
f.write(data.ods)'
|
||||
f.write(data.ods)
|
||||
"""
|
||||
pass
|
||||
|
||||
@@ -430,6 +489,17 @@ class Dataset(object):
|
||||
data.csv = 'age, first_name, last_name\\n90, John, Adams'
|
||||
|
||||
Import assumes (for now) that headers exist.
|
||||
|
||||
.. admonition:: Binary Warning
|
||||
|
||||
:class:`Dataset.csv` uses \\r\\n line endings by default, so make
|
||||
sure to write in binary mode::
|
||||
|
||||
with open('output.csv', 'wb') as f:
|
||||
f.write(data.csv)
|
||||
|
||||
If you do not do this, and you export the file on Windows, your
|
||||
CSV file will open in Excel with a blank line between each row.
|
||||
"""
|
||||
pass
|
||||
|
||||
@@ -455,7 +525,7 @@ class Dataset(object):
|
||||
set, a YAML list of objects will be returned. If no headers have
|
||||
been set, a YAML list of lists (rows) will be returned instead.
|
||||
|
||||
A dataset object can also be imported by setting the :class:`Dataset.json` attribute: ::
|
||||
A dataset object can also be imported by setting the :class:`Dataset.yaml` attribute: ::
|
||||
|
||||
data = tablib.Dataset()
|
||||
data.yaml = '- {age: 90, first_name: John, last_name: Adams}'
|
||||
@@ -478,6 +548,7 @@ class Dataset(object):
|
||||
|
||||
Import assumes (for now) that headers exist.
|
||||
"""
|
||||
pass
|
||||
|
||||
@property
|
||||
def html():
|
||||
@@ -489,15 +560,159 @@ class Dataset(object):
|
||||
pass
|
||||
|
||||
|
||||
def append(self, row=None, col=None, header=None, tags=list()):
|
||||
"""Adds a row or column to the :class:`Dataset`.
|
||||
Usage is :class:`Dataset.insert` for documentation.
|
||||
# ----
|
||||
# Rows
|
||||
# ----
|
||||
|
||||
def insert(self, index, row, tags=list()):
|
||||
"""Inserts a row to the :class:`Dataset` at the given index.
|
||||
|
||||
Rows inserted must be the correct size (height or width).
|
||||
|
||||
The default behaviour is to insert the given row to the :class:`Dataset`
|
||||
object at the given index.
|
||||
"""
|
||||
|
||||
self._validate(row)
|
||||
self._data.insert(index, Row(row, tags=tags))
|
||||
|
||||
|
||||
def rpush(self, row, tags=list()):
|
||||
"""Adds a row to the end of the :class:`Dataset`.
|
||||
See :class:`Dataset.insert` for additional documentation.
|
||||
"""
|
||||
|
||||
if row is not None:
|
||||
self.insert(self.height, row=row, tags=tags)
|
||||
elif col is not None:
|
||||
self.insert(self.width, col=col, header=header)
|
||||
self.insert(self.height, row=row, tags=tags)
|
||||
|
||||
|
||||
def lpush(self, row, tags=list()):
|
||||
"""Adds a row to the top of the :class:`Dataset`.
|
||||
See :class:`Dataset.insert` for additional documentation.
|
||||
"""
|
||||
|
||||
self.insert(0, row=row, tags=tags)
|
||||
|
||||
|
||||
def append(self, row, tags=list()):
|
||||
"""Adds a row to the :class:`Dataset`.
|
||||
See :class:`Dataset.insert` for additional documentation.
|
||||
"""
|
||||
|
||||
self.rpush(row, tags)
|
||||
|
||||
def extend(self, rows, tags=list()):
|
||||
"""Adds a list of rows to the :class:`Dataset` using
|
||||
:class:`Dataset.append`
|
||||
"""
|
||||
|
||||
for row in rows:
|
||||
self.append(row, tags)
|
||||
|
||||
|
||||
def lpop(self):
|
||||
"""Removes and returns the first row of the :class:`Dataset`."""
|
||||
|
||||
cache = self[0]
|
||||
del self[0]
|
||||
|
||||
return cache
|
||||
|
||||
|
||||
def rpop(self):
|
||||
"""Removes and returns the last row of the :class:`Dataset`."""
|
||||
|
||||
cache = self[-1]
|
||||
del self[-1]
|
||||
|
||||
return cache
|
||||
|
||||
|
||||
def pop(self):
|
||||
"""Removes and returns the last row of the :class:`Dataset`."""
|
||||
|
||||
return self.rpop()
|
||||
|
||||
|
||||
# -------
|
||||
# Columns
|
||||
# -------
|
||||
|
||||
def insert_col(self, index, col=None, header=None):
|
||||
"""Inserts a column to the :class:`Dataset` at the given index.
|
||||
|
||||
Columns inserted must be the correct height.
|
||||
|
||||
You can also insert a column of a single callable object, which will
|
||||
add a new column with the return values of the callable each as an
|
||||
item in the column. ::
|
||||
|
||||
data.append_col(col=random.randint)
|
||||
|
||||
If inserting a column, and :class:`Dataset.headers` is set, the
|
||||
header attribute must be set, and will be considered the header for
|
||||
that row.
|
||||
|
||||
See :ref:`dyncols` for an in-depth example.
|
||||
|
||||
.. versionchanged:: 0.9.0
|
||||
If inserting a column, and :class:`Dataset.headers` is set, the
|
||||
header attribute must be set, and will be considered the header for
|
||||
that row.
|
||||
|
||||
.. versionadded:: 0.9.0
|
||||
If inserting a row, you can add :ref:`tags <tags>` to the row you are inserting.
|
||||
This gives you the ability to :class:`filter <Dataset.filter>` your
|
||||
:class:`Dataset` later.
|
||||
|
||||
"""
|
||||
|
||||
if col is None:
|
||||
col = []
|
||||
|
||||
# Callable Columns...
|
||||
if hasattr(col, '__call__'):
|
||||
col = list(map(col, self._data))
|
||||
|
||||
col = self._clean_col(col)
|
||||
self._validate(col=col)
|
||||
|
||||
if self.headers:
|
||||
# pop the first item off, add to headers
|
||||
if not header:
|
||||
raise HeadersNeeded()
|
||||
|
||||
# corner case - if header is set without data
|
||||
elif header and self.height == 0 and len(col):
|
||||
raise InvalidDimensions
|
||||
|
||||
self.headers.insert(index, header)
|
||||
|
||||
|
||||
if self.height and self.width:
|
||||
|
||||
for i, row in enumerate(self._data):
|
||||
|
||||
row.insert(index, col[i])
|
||||
self._data[i] = row
|
||||
else:
|
||||
self._data = [Row([row]) for row in col]
|
||||
|
||||
|
||||
|
||||
def rpush_col(self, col, header=None):
|
||||
"""Adds a column to the end of the :class:`Dataset`.
|
||||
See :class:`Dataset.insert` for additional documentation.
|
||||
"""
|
||||
|
||||
self.insert_col(self.width, col, header=header)
|
||||
|
||||
|
||||
def lpush_col(self, col, header=None):
|
||||
"""Adds a column to the top of the :class:`Dataset`.
|
||||
See :class:`Dataset.insert` for additional documentation.
|
||||
"""
|
||||
|
||||
self.insert_col(0, col, header=header)
|
||||
|
||||
|
||||
def insert_separator(self, index, text='-'):
|
||||
@@ -519,6 +734,24 @@ class Dataset(object):
|
||||
self.insert_separator(index, text)
|
||||
|
||||
|
||||
def append_col(self, col, header=None):
|
||||
"""Adds a column to the :class:`Dataset`.
|
||||
See :class:`Dataset.insert_col` for additional documentation.
|
||||
"""
|
||||
|
||||
self.rpush_col(col, header)
|
||||
|
||||
|
||||
def get_col(self, index):
|
||||
"""Returns the column from the :class:`Dataset` at the given index."""
|
||||
|
||||
return [row[index] for row in self._data]
|
||||
|
||||
|
||||
# ----
|
||||
# Misc
|
||||
# ----
|
||||
|
||||
def add_formatter(self, col, handler):
|
||||
"""Adds a :ref:`formatter` to the :class:`Dataset`.
|
||||
|
||||
@@ -542,63 +775,6 @@ class Dataset(object):
|
||||
return True
|
||||
|
||||
|
||||
def insert(self, index, row=None, col=None, header=None, tags=list()):
|
||||
"""Inserts a row or column to the :class:`Dataset` at the given index.
|
||||
|
||||
Rows and columns inserted must be the correct size (height or width).
|
||||
|
||||
The default behaviour is to insert the given row to the :class:`Dataset`
|
||||
object at the given index. If the ``col`` parameter is given, however,
|
||||
a new column will be insert to the :class:`Dataset` object instead.
|
||||
|
||||
You can also insert a column of a single callable object, which will
|
||||
add a new column with the return values of the callable each as an
|
||||
item in the column. ::
|
||||
|
||||
data.append(col=random.randint)
|
||||
|
||||
See :ref:`dyncols` for an in-depth example.
|
||||
|
||||
.. versionchanged:: 0.9.0
|
||||
If inserting a column, and :class:`Dataset.headers` is set, the
|
||||
header attribute must be set, and will be considered the header for
|
||||
that row.
|
||||
|
||||
.. versionadded:: 0.9.0
|
||||
If inserting a row, you can add :ref:`tags <tags>` to the row you are inserting.
|
||||
This gives you the ability to :class:`filter <Dataset.filter>` your
|
||||
:class:`Dataset` later.
|
||||
|
||||
"""
|
||||
if row:
|
||||
self._validate(row)
|
||||
self._data.insert(index, Row(row, tags=tags))
|
||||
elif col:
|
||||
col = list(col)
|
||||
|
||||
# Callable Columns...
|
||||
if len(col) == 1 and hasattr(col[0], '__call__'):
|
||||
col = list(map(col[0], self._data))
|
||||
|
||||
col = self._clean_col(col)
|
||||
self._validate(col=col)
|
||||
|
||||
if self.headers:
|
||||
# pop the first item off, add to headers
|
||||
if not header:
|
||||
raise HeadersNeeded()
|
||||
self.headers.insert(index, header)
|
||||
|
||||
if self.height and self.width:
|
||||
|
||||
for i, row in enumerate(self._data):
|
||||
|
||||
row.insert(index, col[i])
|
||||
self._data[i] = row
|
||||
else:
|
||||
self._data = [Row([row]) for row in col]
|
||||
|
||||
|
||||
def filter(self, tag):
|
||||
"""Returns a new instance of the :class:`Dataset`, excluding any rows
|
||||
that do not contain the given :ref:`tags <tags>`.
|
||||
@@ -613,16 +789,18 @@ class Dataset(object):
|
||||
"""Sort a :class:`Dataset` by a specific column, given string (for
|
||||
header) or integer (for column index). The order can be reversed by
|
||||
setting ``reverse`` to ``True``.
|
||||
Returns a new :class:`Dataset` instance where columns have been
|
||||
sorted."""
|
||||
|
||||
if isinstance(col, str):
|
||||
Returns a new :class:`Dataset` instance where columns have been
|
||||
sorted.
|
||||
"""
|
||||
|
||||
if isinstance(col, str) or isinstance(col, unicode):
|
||||
|
||||
if not self.headers:
|
||||
raise HeadersNeeded
|
||||
|
||||
_sorted = sorted(self.dict, key=itemgetter(col), reverse=reverse)
|
||||
_dset = Dataset(headers=self.headers)
|
||||
_dset = Dataset(headers=self.headers, title=self.title)
|
||||
|
||||
for item in _sorted:
|
||||
row = [item[key] for key in self.headers]
|
||||
@@ -633,7 +811,7 @@ class Dataset(object):
|
||||
col = self.headers[col]
|
||||
|
||||
_sorted = sorted(self.dict, key=itemgetter(col), reverse=reverse)
|
||||
_dset = Dataset(headers=self.headers)
|
||||
_dset = Dataset(headers=self.headers, title=self.title)
|
||||
|
||||
for item in _sorted:
|
||||
if self.headers:
|
||||
@@ -675,7 +853,7 @@ class Dataset(object):
|
||||
return _dset
|
||||
|
||||
|
||||
def stack_rows(self, other):
|
||||
def stack(self, other):
|
||||
"""Stack two :class:`Dataset` instances together by
|
||||
joining at the row level, and return new combined
|
||||
``Dataset`` instance."""
|
||||
@@ -698,7 +876,7 @@ class Dataset(object):
|
||||
return _dset
|
||||
|
||||
|
||||
def stack_columns(self, other):
|
||||
def stack_cols(self, other):
|
||||
"""Stack two :class:`Dataset` instances together by
|
||||
joining at the column level, and return a new
|
||||
combined ``Dataset`` instance. If either ``Dataset``
|
||||
@@ -722,10 +900,10 @@ class Dataset(object):
|
||||
_dset = Dataset()
|
||||
|
||||
for column in self.headers:
|
||||
_dset.append(col=self[column])
|
||||
_dset.append_col(col=self[column])
|
||||
|
||||
for column in other.headers:
|
||||
_dset.append(col=other[column])
|
||||
_dset.append_col(col=other[column])
|
||||
|
||||
_dset.headers = new_headers
|
||||
|
||||
@@ -777,22 +955,30 @@ class Databook(object):
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
def sheets(self):
|
||||
return self._datasets
|
||||
|
||||
def add_sheet(self, dataset):
|
||||
"""Adds given :class:`Dataset` to the :class:`Databook`."""
|
||||
if type(dataset) is Dataset:
|
||||
if isinstance(dataset, Dataset):
|
||||
self._datasets.append(dataset)
|
||||
else:
|
||||
raise InvalidDatasetType
|
||||
|
||||
|
||||
def _package(self):
|
||||
def _package(self, ordered=True):
|
||||
"""Packages :class:`Databook` for delivery."""
|
||||
collector = []
|
||||
|
||||
if ordered:
|
||||
dict_pack = OrderedDict
|
||||
else:
|
||||
dict_pack = dict
|
||||
|
||||
for dset in self._datasets:
|
||||
collector.append(OrderedDict(
|
||||
collector.append(dict_pack(
|
||||
title = dset.title,
|
||||
data = dset.dict
|
||||
data = dset._package(ordered=ordered)
|
||||
))
|
||||
return collector
|
||||
|
||||
@@ -827,6 +1013,19 @@ def import_set(stream):
|
||||
return None
|
||||
|
||||
|
||||
def import_book(stream):
|
||||
"""Return dataset of given stream."""
|
||||
(format, stream) = detect(stream)
|
||||
|
||||
try:
|
||||
databook = Databook()
|
||||
format.import_book(databook, stream)
|
||||
return databook
|
||||
|
||||
except AttributeError:
|
||||
return None
|
||||
|
||||
|
||||
class InvalidDatasetType(Exception):
|
||||
"Only Datasets can be added to a DataBook"
|
||||
|
||||
|
||||
+6
-10
@@ -3,15 +3,11 @@
|
||||
""" Tablib - CSV Support.
|
||||
"""
|
||||
|
||||
|
||||
import os
|
||||
|
||||
import tablib
|
||||
from tablib.compat import is_py3, csv, StringIO
|
||||
|
||||
|
||||
title = 'csv'
|
||||
extentions = ('csv',)
|
||||
extensions = ('csv',)
|
||||
|
||||
|
||||
DEFAULT_ENCODING = 'utf-8'
|
||||
@@ -39,9 +35,9 @@ def import_set(dset, in_stream, headers=True):
|
||||
dset.wipe()
|
||||
|
||||
if is_py3:
|
||||
rows = csv.reader(in_stream.splitlines())
|
||||
rows = csv.reader(StringIO(in_stream))
|
||||
else:
|
||||
rows = csv.reader(in_stream.splitlines(), encoding=DEFAULT_ENCODING)
|
||||
rows = csv.reader(StringIO(in_stream), encoding=DEFAULT_ENCODING)
|
||||
for i, row in enumerate(rows):
|
||||
|
||||
if (i == 0) and (headers):
|
||||
@@ -53,7 +49,7 @@ def import_set(dset, in_stream, headers=True):
|
||||
def detect(stream):
|
||||
"""Returns True if given stream is valid CSV."""
|
||||
try:
|
||||
rows = dialect = csv.Sniffer().sniff(stream)
|
||||
csv.Sniffer().sniff(stream, delimiters=',')
|
||||
return True
|
||||
except csv.Error:
|
||||
return False
|
||||
except (csv.Error, TypeError):
|
||||
return False
|
||||
|
||||
+14
-7
@@ -5,20 +5,21 @@
|
||||
|
||||
import sys
|
||||
|
||||
|
||||
if sys.version_info[0] > 2:
|
||||
from io import StringIO
|
||||
from io import BytesIO as StringIO
|
||||
from tablib.packages import markup3 as markup
|
||||
else:
|
||||
from cStringIO import StringIO
|
||||
from tablib.packages import markup
|
||||
|
||||
import tablib
|
||||
from tablib.compat import unicode
|
||||
import codecs
|
||||
|
||||
BOOK_ENDINGS = 'h3'
|
||||
|
||||
title = 'html'
|
||||
extentions = ('html', )
|
||||
extensions = ('html', )
|
||||
|
||||
|
||||
def export_set(dataset):
|
||||
@@ -30,20 +31,26 @@ def export_set(dataset):
|
||||
page.table.open()
|
||||
|
||||
if dataset.headers is not None:
|
||||
new_header = [item if item is not None else '' for item in dataset.headers]
|
||||
|
||||
page.thead.open()
|
||||
headers = markup.oneliner.th(dataset.headers)
|
||||
headers = markup.oneliner.th(new_header)
|
||||
page.tr(headers)
|
||||
page.thead.close()
|
||||
|
||||
for row in dataset:
|
||||
html_row = markup.oneliner.td(row)
|
||||
new_row = [item if item is not None else '' for item in row]
|
||||
|
||||
html_row = markup.oneliner.td(new_row)
|
||||
page.tr(html_row)
|
||||
|
||||
page.table.close()
|
||||
|
||||
stream.writelines(str(page))
|
||||
# Allow unicode characters in output
|
||||
wrapper = codecs.getwriter("utf8")(stream)
|
||||
wrapper.writelines(unicode(page))
|
||||
|
||||
return stream.getvalue()
|
||||
return stream.getvalue().decode('utf-8')
|
||||
|
||||
|
||||
def export_book(databook):
|
||||
|
||||
+7
-11
@@ -6,39 +6,35 @@
|
||||
import tablib
|
||||
|
||||
import sys
|
||||
if sys.version_info[:2] > (2, 5):
|
||||
from tablib.packages import anyjson
|
||||
else:
|
||||
from tablib.packages import anyjson25 as anyjson
|
||||
|
||||
from tablib.packages import omnijson as json
|
||||
|
||||
|
||||
title = 'json'
|
||||
extentions = ('json', 'jsn')
|
||||
extensions = ('json', 'jsn')
|
||||
|
||||
|
||||
def export_set(dataset):
|
||||
"""Returns JSON representation of Dataset."""
|
||||
return anyjson.serialize(dataset.dict)
|
||||
return json.dumps(dataset.dict)
|
||||
|
||||
|
||||
def export_book(databook):
|
||||
"""Returns JSON representation of Databook."""
|
||||
return anyjson.serialize(databook._package())
|
||||
return json.dumps(databook._package())
|
||||
|
||||
|
||||
def import_set(dset, in_stream):
|
||||
"""Returns dataset from JSON stream."""
|
||||
|
||||
dset.wipe()
|
||||
dset.dict = anyjson.deserialize(in_stream)
|
||||
dset.dict = json.loads(in_stream)
|
||||
|
||||
|
||||
def import_book(dbook, in_stream):
|
||||
"""Returns databook from JSON stream."""
|
||||
|
||||
dbook.wipe()
|
||||
for sheet in anyjson.deserialize(in_stream):
|
||||
for sheet in json.loads(in_stream):
|
||||
data = tablib.Dataset()
|
||||
data.title = sheet['title']
|
||||
data.dict = sheet['data']
|
||||
@@ -48,7 +44,7 @@ def import_book(dbook, in_stream):
|
||||
def detect(stream):
|
||||
"""Returns True if given stream is valid JSON."""
|
||||
try:
|
||||
anyjson.deserialize(stream)
|
||||
json.loads(stream)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
@@ -14,7 +14,7 @@ else:
|
||||
from tablib.compat import opendocument, style, table, text, unicode
|
||||
|
||||
title = 'ods'
|
||||
extentions = ('ods',)
|
||||
extensions = ('ods',)
|
||||
|
||||
bold = style.Style(name="bold", family="paragraph")
|
||||
bold.addElement(style.TextProperties(fontweight="bold", fontweightasian="bold", fontweightcomplex="bold"))
|
||||
|
||||
+9
-10
@@ -3,26 +3,24 @@
|
||||
""" Tablib - TSV (Tab Separated Values) Support.
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
import tablib
|
||||
from tablib.compat import is_py3, csv, StringIO
|
||||
|
||||
|
||||
|
||||
title = 'tsv'
|
||||
extentions = ('tsv',)
|
||||
extensions = ('tsv',)
|
||||
|
||||
DEFAULT_ENCODING = 'utf-8'
|
||||
|
||||
def export_set(dataset):
|
||||
"""Returns a TSV representation of Dataset."""
|
||||
|
||||
stream = StringIO()
|
||||
|
||||
if is_py3:
|
||||
_tsv = csv.writer(stream, delimiter="\t")
|
||||
_tsv = csv.writer(stream, delimiter='\t')
|
||||
else:
|
||||
_tsv = csv.writer(stream, encoding=DEFAULT_ENCODING, delimiter="\t")
|
||||
_tsv = csv.writer(stream, encoding=DEFAULT_ENCODING, delimiter='\t')
|
||||
|
||||
for row in dataset._package(dicts=False):
|
||||
_tsv.writerow(row)
|
||||
@@ -32,12 +30,13 @@ def export_set(dataset):
|
||||
|
||||
def import_set(dset, in_stream, headers=True):
|
||||
"""Returns dataset from TSV stream."""
|
||||
|
||||
dset.wipe()
|
||||
|
||||
if is_py3:
|
||||
rows = csv.reader(in_stream.split('\r\n'), delimiter='\t')
|
||||
rows = csv.reader(in_stream.splitlines(), delimiter='\t')
|
||||
else:
|
||||
rows = csv.reader(in_stream.split('\r\n'), delimiter='\t',
|
||||
rows = csv.reader(in_stream.splitlines(), delimiter='\t',
|
||||
encoding=DEFAULT_ENCODING)
|
||||
|
||||
for i, row in enumerate(rows):
|
||||
@@ -54,7 +53,7 @@ def import_set(dset, in_stream, headers=True):
|
||||
def detect(stream):
|
||||
"""Returns True if given stream is valid TSV."""
|
||||
try:
|
||||
rows = dialect = csv.Sniffer().sniff(stream, delimiters='\t')
|
||||
csv.Sniffer().sniff(stream, delimiters='\t')
|
||||
return True
|
||||
except csv.Error:
|
||||
except (csv.Error, TypeError):
|
||||
return False
|
||||
|
||||
+58
-5
@@ -5,17 +5,36 @@
|
||||
|
||||
import sys
|
||||
|
||||
from tablib.compat import BytesIO, xlwt
|
||||
|
||||
from tablib.compat import BytesIO, xlwt, xlrd, XLRDError
|
||||
import tablib
|
||||
|
||||
title = 'xls'
|
||||
extentions = ('xls',)
|
||||
extensions = ('xls',)
|
||||
|
||||
# special styles
|
||||
wrap = xlwt.easyxf("alignment: wrap on")
|
||||
bold = xlwt.easyxf("font: bold on")
|
||||
|
||||
|
||||
def detect(stream):
|
||||
"""Returns True if given stream is a readable excel file."""
|
||||
try:
|
||||
xlrd.open_workbook(file_contents=stream)
|
||||
return True
|
||||
except (TypeError, XLRDError):
|
||||
pass
|
||||
try:
|
||||
xlrd.open_workbook(file_contents=stream.read())
|
||||
return True
|
||||
except (AttributeError, XLRDError):
|
||||
pass
|
||||
try:
|
||||
xlrd.open_workbook(filename=stream)
|
||||
return True
|
||||
except:
|
||||
return False
|
||||
|
||||
|
||||
def export_set(dataset):
|
||||
"""Returns XLS representation of Dataset."""
|
||||
|
||||
@@ -45,6 +64,42 @@ def export_book(databook):
|
||||
return stream.getvalue()
|
||||
|
||||
|
||||
def import_set(dset, in_stream, headers=True):
|
||||
"""Returns databook from XLS stream."""
|
||||
|
||||
dset.wipe()
|
||||
|
||||
xls_book = xlrd.open_workbook(file_contents=in_stream)
|
||||
sheet = xls_book.sheet_by_index(0)
|
||||
|
||||
dset.title = sheet.name
|
||||
|
||||
for i in xrange(sheet.nrows):
|
||||
if (i == 0) and (headers):
|
||||
dset.headers = sheet.row_values(0)
|
||||
else:
|
||||
dset.append(sheet.row_values(i))
|
||||
|
||||
def import_book(dbook, in_stream, headers=True):
|
||||
"""Returns databook from XLS stream."""
|
||||
|
||||
dbook.wipe()
|
||||
|
||||
xls_book = xlrd.open_workbook(file_contents=in_stream)
|
||||
|
||||
for sheet in xls_book.sheets():
|
||||
data = tablib.Dataset()
|
||||
data.title = sheet.name
|
||||
|
||||
for i in xrange(sheet.nrows):
|
||||
if (i == 0) and (headers):
|
||||
data.headers = sheet.row_values(0)
|
||||
else:
|
||||
data.append(sheet.row_values(i))
|
||||
|
||||
dbook.add_sheet(data)
|
||||
|
||||
|
||||
def dset_sheet(dataset, ws):
|
||||
"""Completes given worksheet from given Dataset."""
|
||||
_package = dataset._package(dicts=False)
|
||||
@@ -78,5 +133,3 @@ def dset_sheet(dataset, ws):
|
||||
ws.write(i, j, col)
|
||||
except TypeError:
|
||||
ws.write(i, j, col)
|
||||
|
||||
|
||||
|
||||
+55
-4
@@ -12,6 +12,7 @@ else:
|
||||
from cStringIO import StringIO as BytesIO
|
||||
|
||||
from tablib.compat import openpyxl
|
||||
import tablib
|
||||
|
||||
Workbook = openpyxl.workbook.Workbook
|
||||
ExcelWriter = openpyxl.writer.excel.ExcelWriter
|
||||
@@ -21,7 +22,16 @@ from tablib.compat import unicode
|
||||
|
||||
|
||||
title = 'xlsx'
|
||||
extentions = ('xlsx',)
|
||||
extensions = ('xlsx',)
|
||||
|
||||
|
||||
def detect(stream):
|
||||
"""Returns True if given stream is a readable excel file."""
|
||||
try:
|
||||
openpyxl.reader.excel.load_workbook(stream)
|
||||
return True
|
||||
except openpyxl.shared.exc.InvalidFileException:
|
||||
pass
|
||||
|
||||
def export_set(dataset):
|
||||
"""Returns XLSX representation of Dataset."""
|
||||
@@ -41,7 +51,7 @@ def export_book(databook):
|
||||
"""Returns XLSX representation of DataBook."""
|
||||
|
||||
wb = Workbook()
|
||||
ew = ExcelWriter(workbook = wb)
|
||||
wb.worksheets = []
|
||||
for i, dset in enumerate(databook._datasets):
|
||||
ws = wb.create_sheet()
|
||||
ws.title = dset.title if dset.title else 'Sheet%s' % (i)
|
||||
@@ -50,10 +60,49 @@ def export_book(databook):
|
||||
|
||||
|
||||
stream = BytesIO()
|
||||
ew.save(stream)
|
||||
wb.save(stream)
|
||||
return stream.getvalue()
|
||||
|
||||
|
||||
def import_set(dset, in_stream, headers=True):
|
||||
"""Returns databook from XLS stream."""
|
||||
|
||||
dset.wipe()
|
||||
|
||||
xls_book = openpyxl.reader.excel.load_workbook(in_stream)
|
||||
sheet = xls_book.get_active_sheet()
|
||||
|
||||
dset.title = sheet.title
|
||||
|
||||
for i, row in enumerate(sheet.rows):
|
||||
row_vals = [c.value for c in row]
|
||||
if (i == 0) and (headers):
|
||||
dset.headers = row_vals
|
||||
else:
|
||||
dset.append(row_vals)
|
||||
|
||||
|
||||
def import_book(dbook, in_stream, headers=True):
|
||||
"""Returns databook from XLS stream."""
|
||||
|
||||
dbook.wipe()
|
||||
|
||||
xls_book = openpyxl.reader.excel.load_workbook(in_stream)
|
||||
|
||||
for sheet in xls_book.worksheets:
|
||||
data = tablib.Dataset()
|
||||
data.title = sheet.title
|
||||
|
||||
for i, row in enumerate(sheet.rows):
|
||||
row_vals = [c.value for c in row]
|
||||
if (i == 0) and (headers):
|
||||
data.headers = row_vals
|
||||
else:
|
||||
data.append(row_vals)
|
||||
|
||||
dbook.add_sheet(data)
|
||||
|
||||
|
||||
def dset_sheet(dataset, ws):
|
||||
"""Completes given worksheet from given Dataset."""
|
||||
_package = dataset._package(dicts=False)
|
||||
@@ -66,6 +115,8 @@ def dset_sheet(dataset, ws):
|
||||
row_number = i + 1
|
||||
for j, col in enumerate(row):
|
||||
col_idx = get_column_letter(j + 1)
|
||||
# We want to freeze the column after the last column
|
||||
frzn_col_idx = get_column_letter(j + 2)
|
||||
|
||||
# bold headers
|
||||
if (row_number == 1) and dataset.headers:
|
||||
@@ -74,7 +125,7 @@ def dset_sheet(dataset, ws):
|
||||
ws.cell('%s%s'%(col_idx, row_number)).value = unicode(col)
|
||||
style = ws.get_style('%s%s' % (col_idx, row_number))
|
||||
style.font.bold = True
|
||||
ws.freeze_panes = '%s%s' % (col_idx, row_number)
|
||||
ws.freeze_panes = '%s%s' % (frzn_col_idx, row_number)
|
||||
|
||||
|
||||
# bold separators
|
||||
|
||||
+11
-9
@@ -12,32 +12,33 @@ except ImportError:
|
||||
import tablib.packages.yaml3 as yaml
|
||||
else:
|
||||
import tablib.packages.yaml as yaml
|
||||
|
||||
|
||||
|
||||
import tablib
|
||||
|
||||
|
||||
|
||||
title = 'yaml'
|
||||
extentions = ('yaml', 'yml')
|
||||
extensions = ('yaml', 'yml')
|
||||
|
||||
|
||||
|
||||
def export_set(dataset):
|
||||
"""Returns YAML representation of Dataset."""
|
||||
return yaml.dump(dataset.dict)
|
||||
|
||||
return yaml.safe_dump(dataset._package(ordered=False))
|
||||
|
||||
|
||||
def export_book(databook):
|
||||
"""Returns YAML representation of Databook."""
|
||||
return yaml.dump(databook._package())
|
||||
return yaml.safe_dump(databook._package(ordered=False))
|
||||
|
||||
|
||||
def import_set(dset, in_stream):
|
||||
"""Returns dataset from YAML stream."""
|
||||
|
||||
dset.wipe()
|
||||
dset.dict = yaml.load(in_stream)
|
||||
dset.dict = yaml.safe_load(in_stream)
|
||||
|
||||
|
||||
def import_book(dbook, in_stream):
|
||||
@@ -50,14 +51,15 @@ def import_book(dbook, in_stream):
|
||||
data.title = sheet['title']
|
||||
data.dict = sheet['data']
|
||||
dbook.add_sheet(data)
|
||||
|
||||
|
||||
def detect(stream):
|
||||
"""Returns True if given stream is valid YAML."""
|
||||
try:
|
||||
_yaml = yaml.load(stream)
|
||||
_yaml = yaml.safe_load(stream)
|
||||
if isinstance(_yaml, (list, tuple, dict)):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
except yaml.parser.ParserError:
|
||||
return False
|
||||
except (yaml.parser.ParserError, yaml.reader.ReaderError,
|
||||
yaml.scanner.ScannerError):
|
||||
return False
|
||||
|
||||
@@ -1,117 +0,0 @@
|
||||
"""
|
||||
Wraps the best available JSON implementation available in a common interface
|
||||
"""
|
||||
|
||||
__version__ = "0.2.0"
|
||||
__author__ = "Rune Halvorsen <runefh@gmail.com>"
|
||||
__homepage__ = "http://bitbucket.org/runeh/anyjson/"
|
||||
__docformat__ = "restructuredtext"
|
||||
|
||||
"""
|
||||
|
||||
.. function:: serialize(obj)
|
||||
|
||||
Serialize the object to JSON.
|
||||
|
||||
.. function:: deserialize(str)
|
||||
|
||||
Deserialize JSON-encoded object to a Python object.
|
||||
|
||||
.. function:: force_implementation(name)
|
||||
|
||||
Load a specific json module. This is useful for testing and not much else
|
||||
|
||||
.. attribute:: implementation
|
||||
|
||||
The json implementation object. This is probably not useful to you,
|
||||
except to get the name of the implementation in use. The name is
|
||||
available through `implementation.name`.
|
||||
"""
|
||||
|
||||
import sys
|
||||
|
||||
implementation = None
|
||||
|
||||
"""
|
||||
.. data:: _modules
|
||||
|
||||
List of known json modules, and the names of their serialize/unserialize
|
||||
methods, as well as the exception they throw. Exception can be either
|
||||
an exception class or a string.
|
||||
"""
|
||||
_modules = [("cjson", "encode", "EncodeError", "decode", "DecodeError"),
|
||||
("jsonlib2", "write", "WriteError", "read", "ReadError"),
|
||||
("jsonlib", "write", "WriteError", "read", "ReadError"),
|
||||
("simplejson", "dumps", TypeError, "loads", ValueError),
|
||||
("json", "dumps", TypeError, "loads", ValueError),
|
||||
("django.utils.simplejson", "dumps", TypeError, "loads",
|
||||
ValueError)]
|
||||
_fields = ("modname", "encoder", "encerror", "decoder", "decerror")
|
||||
|
||||
|
||||
class _JsonImplementation(object):
|
||||
"""Incapsulates a JSON implementation"""
|
||||
|
||||
def __init__(self, modspec):
|
||||
modinfo = dict(list(zip(_fields, modspec)))
|
||||
|
||||
# No try block. We want importerror to end up at caller
|
||||
module = self._attempt_load(modinfo["modname"])
|
||||
|
||||
self.implementation = modinfo["modname"]
|
||||
self._encode = getattr(module, modinfo["encoder"])
|
||||
self._decode = getattr(module, modinfo["decoder"])
|
||||
self._encode_error = modinfo["encerror"]
|
||||
self._decode_error = modinfo["decerror"]
|
||||
|
||||
if isinstance(modinfo["encerror"], str):
|
||||
self._encode_error = getattr(module, modinfo["encerror"])
|
||||
if isinstance(modinfo["decerror"], str):
|
||||
self._decode_error = getattr(module, modinfo["decerror"])
|
||||
|
||||
self.name = modinfo["modname"]
|
||||
|
||||
def _attempt_load(self, modname):
|
||||
"""Attempt to load module name modname, returning it on success,
|
||||
throwing ImportError if module couldn't be imported"""
|
||||
__import__(modname)
|
||||
return sys.modules[modname]
|
||||
|
||||
def serialize(self, data):
|
||||
"""Serialize the datastructure to json. Returns a string. Raises
|
||||
TypeError if the object could not be serialized."""
|
||||
try:
|
||||
return self._encode(data)
|
||||
except self._encode_error as exc:
|
||||
raise TypeError(*exc.args)
|
||||
|
||||
def deserialize(self, s):
|
||||
"""deserialize the string to python data types. Raises
|
||||
ValueError if the string vould not be parsed."""
|
||||
try:
|
||||
return self._decode(s)
|
||||
except self._decode_error as exc:
|
||||
raise ValueError(*exc.args)
|
||||
|
||||
|
||||
def force_implementation(modname):
|
||||
"""Forces anyjson to use a specific json module if it's available"""
|
||||
global implementation
|
||||
for name, spec in [(e[0], e) for e in _modules]:
|
||||
if name == modname:
|
||||
implementation = _JsonImplementation(spec)
|
||||
return
|
||||
raise ImportError("No module named: %s" % modname)
|
||||
|
||||
|
||||
for modspec in _modules:
|
||||
try:
|
||||
implementation = _JsonImplementation(modspec)
|
||||
break
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
raise ImportError("No supported JSON module found")
|
||||
|
||||
serialize = lambda value: implementation.serialize(value)
|
||||
deserialize = lambda value: implementation.deserialize(value)
|
||||
@@ -1,118 +0,0 @@
|
||||
u"""
|
||||
Wraps the best available JSON implementation available in a common interface
|
||||
"""
|
||||
|
||||
__version__ = u"0.2.0"
|
||||
__author__ = u"Rune Halvorsen <runefh@gmail.com>"
|
||||
__homepage__ = u"http://bitbucket.org/runeh/anyjson/"
|
||||
__docformat__ = u"restructuredtext"
|
||||
|
||||
u"""
|
||||
|
||||
.. function:: serialize(obj)
|
||||
|
||||
Serialize the object to JSON.
|
||||
|
||||
.. function:: deserialize(str)
|
||||
|
||||
Deserialize JSON-encoded object to a Python object.
|
||||
|
||||
.. function:: force_implementation(name)
|
||||
|
||||
Load a specific json module. This is useful for testing and not much else
|
||||
|
||||
.. attribute:: implementation
|
||||
|
||||
The json implementation object. This is probably not useful to you,
|
||||
except to get the name of the implementation in use. The name is
|
||||
available through `implementation.name`.
|
||||
"""
|
||||
|
||||
import sys
|
||||
from itertools import izip
|
||||
|
||||
implementation = None
|
||||
|
||||
u"""
|
||||
.. data:: _modules
|
||||
|
||||
List of known json modules, and the names of their serialize/unserialize
|
||||
methods, as well as the exception they throw. Exception can be either
|
||||
an exception class or a string.
|
||||
"""
|
||||
_modules = [(u"cjson", u"encode", u"EncodeError", u"decode", u"DecodeError"),
|
||||
(u"jsonlib2", u"write", u"WriteError", u"read", u"ReadError"),
|
||||
(u"jsonlib", u"write", u"WriteError", u"read", u"ReadError"),
|
||||
(u"simplejson", u"dumps", TypeError, u"loads", ValueError),
|
||||
(u"json", u"dumps", TypeError, u"loads", ValueError),
|
||||
(u"django.utils.simplejson", u"dumps", TypeError, u"loads",
|
||||
ValueError)]
|
||||
_fields = (u"modname", u"encoder", u"encerror", u"decoder", u"decerror")
|
||||
|
||||
|
||||
class _JsonImplementation(object):
|
||||
u"""Incapsulates a JSON implementation"""
|
||||
|
||||
def __init__(self, modspec):
|
||||
modinfo = dict(list(izip(_fields, modspec)))
|
||||
|
||||
# No try block. We want importerror to end up at caller
|
||||
module = self._attempt_load(modinfo[u"modname"])
|
||||
|
||||
self.implementation = modinfo[u"modname"]
|
||||
self._encode = getattr(module, modinfo[u"encoder"])
|
||||
self._decode = getattr(module, modinfo[u"decoder"])
|
||||
self._encode_error = modinfo[u"encerror"]
|
||||
self._decode_error = modinfo[u"decerror"]
|
||||
|
||||
if isinstance(modinfo[u"encerror"], unicode):
|
||||
self._encode_error = getattr(module, modinfo[u"encerror"])
|
||||
if isinstance(modinfo[u"decerror"], unicode):
|
||||
self._decode_error = getattr(module, modinfo[u"decerror"])
|
||||
|
||||
self.name = modinfo[u"modname"]
|
||||
|
||||
def _attempt_load(self, modname):
|
||||
u"""Attempt to load module name modname, returning it on success,
|
||||
throwing ImportError if module couldn't be imported"""
|
||||
__import__(modname)
|
||||
return sys.modules[modname]
|
||||
|
||||
def serialize(self, data):
|
||||
u"""Serialize the datastructure to json. Returns a string. Raises
|
||||
TypeError if the object could not be serialized."""
|
||||
try:
|
||||
return self._encode(data)
|
||||
except self._encode_error, exc:
|
||||
raise TypeError(*exc.args)
|
||||
|
||||
def deserialize(self, s):
|
||||
u"""deserialize the string to python data types. Raises
|
||||
ValueError if the string vould not be parsed."""
|
||||
try:
|
||||
return self._decode(s)
|
||||
except self._decode_error, exc:
|
||||
raise ValueError(*exc.args)
|
||||
|
||||
|
||||
def force_implementation(modname):
|
||||
u"""Forces anyjson to use a specific json module if it's available"""
|
||||
global implementation
|
||||
for name, spec in [(e[0], e) for e in _modules]:
|
||||
if name == modname:
|
||||
implementation = _JsonImplementation(spec)
|
||||
return
|
||||
raise ImportError(u"No module named: %s" % modname)
|
||||
|
||||
|
||||
for modspec in _modules:
|
||||
try:
|
||||
implementation = _JsonImplementation(modspec)
|
||||
break
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
raise ImportError(u"No supported JSON module found")
|
||||
|
||||
serialize = lambda value: implementation.serialize(value)
|
||||
deserialize = lambda value: implementation.deserialize(value)
|
||||
@@ -67,7 +67,7 @@ class element:
|
||||
def render( self, tag, single, between, kwargs ):
|
||||
"""Append the actual tags to content."""
|
||||
|
||||
out = "<%s" % tag
|
||||
out = u"<%s" % tag
|
||||
for key, value in kwargs.iteritems( ):
|
||||
if value is not None: # when value is None that means stuff like <... checked>
|
||||
key = key.strip('_') # strip this so class_ will mean class, etc.
|
||||
@@ -75,16 +75,16 @@ class element:
|
||||
key = 'http-equiv'
|
||||
elif key == 'accept_charset':
|
||||
key = 'accept-charset'
|
||||
out = "%s %s=\"%s\"" % ( out, key, escape( value ) )
|
||||
out = u"%s %s=\"%s\"" % ( out, key, escape( value ) )
|
||||
else:
|
||||
out = "%s %s" % ( out, key )
|
||||
out = u"%s %s" % ( out, key )
|
||||
if between is not None:
|
||||
out = "%s>%s</%s>" % ( out, between, tag )
|
||||
out = u"%s>%s</%s>" % ( out, between, tag )
|
||||
else:
|
||||
if single:
|
||||
out = "%s />" % out
|
||||
out = u"%s />" % out
|
||||
else:
|
||||
out = "%s>" % out
|
||||
out = u"%s>" % out
|
||||
if self.parent is not None:
|
||||
self.parent.content.append( out )
|
||||
else:
|
||||
|
||||
@@ -39,7 +39,7 @@ UNIXPERMS = 0o100644 << 16 # -rw-r--r--
|
||||
IS_FILENAME = 0
|
||||
IS_IMAGE = 1
|
||||
# We need at least Python 2.2
|
||||
assert sys.version_info[0]>=2 and sys.version_info[1] >= 2
|
||||
assert sys.version_info[0]>=2
|
||||
|
||||
#sys.setrecursionlimit(100)
|
||||
#The recursion limit is set conservative so mistakes like
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
from .core import loads, dumps, JSONError
|
||||
|
||||
|
||||
__all__ = ('loads', 'dumps', 'JSONError')
|
||||
|
||||
|
||||
__version__ = '0.1.2'
|
||||
__author__ = 'Kenneth Reitz'
|
||||
__license__ = 'MIT'
|
||||
@@ -0,0 +1,93 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
omijson.core
|
||||
~~~~~~~~~~~~
|
||||
|
||||
This module provides the core omnijson functionality.
|
||||
|
||||
"""
|
||||
|
||||
import sys
|
||||
|
||||
engine = None
|
||||
_engine = None
|
||||
|
||||
|
||||
options = [
|
||||
['ujson', 'loads', 'dumps', (ValueError,)],
|
||||
['yajl', 'loads', 'dumps', (TypeError, ValueError)],
|
||||
['jsonlib2', 'read', 'write', (ValueError,)],
|
||||
['jsonlib', 'read', 'write', (ValueError,)],
|
||||
['simplejson', 'loads', 'dumps', (TypeError, ValueError)],
|
||||
['json', 'loads', 'dumps', (TypeError, ValueError)],
|
||||
['simplejson_from_packages', 'loads', 'dumps', (ValueError,)],
|
||||
]
|
||||
|
||||
|
||||
def _import(engine):
|
||||
try:
|
||||
if '_from_' in engine:
|
||||
engine, package = engine.split('_from_')
|
||||
m = __import__(package, globals(), locals(), [engine], -1)
|
||||
return getattr(m, engine)
|
||||
|
||||
return __import__(engine)
|
||||
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
|
||||
def loads(s, **kwargs):
|
||||
"""Loads JSON object."""
|
||||
|
||||
try:
|
||||
return _engine[0](s)
|
||||
|
||||
except:
|
||||
# crazy 2/3 exception hack
|
||||
# http://www.voidspace.org.uk/python/weblog/arch_d7_2010_03_20.shtml
|
||||
|
||||
ExceptionClass, why = sys.exc_info()[:2]
|
||||
|
||||
if any([(issubclass(ExceptionClass, e)) for e in _engine[2]]):
|
||||
raise JSONError(why)
|
||||
else:
|
||||
raise why
|
||||
|
||||
|
||||
def dumps(o, **kwargs):
|
||||
"""Dumps JSON object."""
|
||||
|
||||
try:
|
||||
return _engine[1](o)
|
||||
|
||||
except:
|
||||
ExceptionClass, why = sys.exc_info()[:2]
|
||||
|
||||
if any([(issubclass(ExceptionClass, e)) for e in _engine[2]]):
|
||||
raise JSONError(why)
|
||||
else:
|
||||
raise why
|
||||
|
||||
|
||||
class JSONError(ValueError):
|
||||
"""JSON Failed."""
|
||||
|
||||
|
||||
# ------
|
||||
# Magic!
|
||||
# ------
|
||||
|
||||
|
||||
for e in options:
|
||||
|
||||
__engine = _import(e[0])
|
||||
|
||||
if __engine:
|
||||
engine, _engine = e[0], e[1:4]
|
||||
|
||||
for i in (0, 1):
|
||||
_engine[i] = getattr(__engine, _engine[i])
|
||||
|
||||
break
|
||||
@@ -0,0 +1,438 @@
|
||||
r"""JSON (JavaScript Object Notation) <http://json.org> is a subset of
|
||||
JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data
|
||||
interchange format.
|
||||
|
||||
:mod:`simplejson` exposes an API familiar to users of the standard library
|
||||
:mod:`marshal` and :mod:`pickle` modules. It is the externally maintained
|
||||
version of the :mod:`json` library contained in Python 2.6, but maintains
|
||||
compatibility with Python 2.4 and Python 2.5 and (currently) has
|
||||
significant performance advantages, even without using the optional C
|
||||
extension for speedups.
|
||||
|
||||
Encoding basic Python object hierarchies::
|
||||
|
||||
>>> import simplejson as json
|
||||
>>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}])
|
||||
'["foo", {"bar": ["baz", null, 1.0, 2]}]'
|
||||
>>> print json.dumps("\"foo\bar")
|
||||
"\"foo\bar"
|
||||
>>> print json.dumps(u'\u1234')
|
||||
"\u1234"
|
||||
>>> print json.dumps('\\')
|
||||
"\\"
|
||||
>>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True)
|
||||
{"a": 0, "b": 0, "c": 0}
|
||||
>>> from StringIO import StringIO
|
||||
>>> io = StringIO()
|
||||
>>> json.dump(['streaming API'], io)
|
||||
>>> io.getvalue()
|
||||
'["streaming API"]'
|
||||
|
||||
Compact encoding::
|
||||
|
||||
>>> import simplejson as json
|
||||
>>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':'))
|
||||
'[1,2,3,{"4":5,"6":7}]'
|
||||
|
||||
Pretty printing::
|
||||
|
||||
>>> import simplejson as json
|
||||
>>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=' ')
|
||||
>>> print '\n'.join([l.rstrip() for l in s.splitlines()])
|
||||
{
|
||||
"4": 5,
|
||||
"6": 7
|
||||
}
|
||||
|
||||
Decoding JSON::
|
||||
|
||||
>>> import simplejson as json
|
||||
>>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}]
|
||||
>>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj
|
||||
True
|
||||
>>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar'
|
||||
True
|
||||
>>> from StringIO import StringIO
|
||||
>>> io = StringIO('["streaming API"]')
|
||||
>>> json.load(io)[0] == 'streaming API'
|
||||
True
|
||||
|
||||
Specializing JSON object decoding::
|
||||
|
||||
>>> import simplejson as json
|
||||
>>> def as_complex(dct):
|
||||
... if '__complex__' in dct:
|
||||
... return complex(dct['real'], dct['imag'])
|
||||
... return dct
|
||||
...
|
||||
>>> json.loads('{"__complex__": true, "real": 1, "imag": 2}',
|
||||
... object_hook=as_complex)
|
||||
(1+2j)
|
||||
>>> from decimal import Decimal
|
||||
>>> json.loads('1.1', parse_float=Decimal) == Decimal('1.1')
|
||||
True
|
||||
|
||||
Specializing JSON object encoding::
|
||||
|
||||
>>> import simplejson as json
|
||||
>>> def encode_complex(obj):
|
||||
... if isinstance(obj, complex):
|
||||
... return [obj.real, obj.imag]
|
||||
... raise TypeError(repr(o) + " is not JSON serializable")
|
||||
...
|
||||
>>> json.dumps(2 + 1j, default=encode_complex)
|
||||
'[2.0, 1.0]'
|
||||
>>> json.JSONEncoder(default=encode_complex).encode(2 + 1j)
|
||||
'[2.0, 1.0]'
|
||||
>>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j))
|
||||
'[2.0, 1.0]'
|
||||
|
||||
|
||||
Using simplejson.tool from the shell to validate and pretty-print::
|
||||
|
||||
$ echo '{"json":"obj"}' | python -m simplejson.tool
|
||||
{
|
||||
"json": "obj"
|
||||
}
|
||||
$ echo '{ 1.2:3.4}' | python -m simplejson.tool
|
||||
Expecting property name: line 1 column 2 (char 2)
|
||||
"""
|
||||
__version__ = '2.1.6'
|
||||
__all__ = [
|
||||
'dump', 'dumps', 'load', 'loads',
|
||||
'JSONDecoder', 'JSONDecodeError', 'JSONEncoder',
|
||||
'OrderedDict',
|
||||
]
|
||||
|
||||
__author__ = 'Bob Ippolito <bob@redivi.com>'
|
||||
|
||||
from decimal import Decimal
|
||||
|
||||
from decoder import JSONDecoder, JSONDecodeError
|
||||
from encoder import JSONEncoder
|
||||
def _import_OrderedDict():
|
||||
import collections
|
||||
try:
|
||||
return collections.OrderedDict
|
||||
except AttributeError:
|
||||
import ordered_dict
|
||||
return ordered_dict.OrderedDict
|
||||
OrderedDict = _import_OrderedDict()
|
||||
|
||||
def _import_c_make_encoder():
|
||||
try:
|
||||
from simplejson._speedups import make_encoder
|
||||
return make_encoder
|
||||
except ImportError:
|
||||
return None
|
||||
|
||||
_default_encoder = JSONEncoder(
|
||||
skipkeys=False,
|
||||
ensure_ascii=True,
|
||||
check_circular=True,
|
||||
allow_nan=True,
|
||||
indent=None,
|
||||
separators=None,
|
||||
encoding='utf-8',
|
||||
default=None,
|
||||
use_decimal=False,
|
||||
)
|
||||
|
||||
def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
|
||||
allow_nan=True, cls=None, indent=None, separators=None,
|
||||
encoding='utf-8', default=None, use_decimal=False, **kw):
|
||||
"""Serialize ``obj`` as a JSON formatted stream to ``fp`` (a
|
||||
``.write()``-supporting file-like object).
|
||||
|
||||
If ``skipkeys`` is true then ``dict`` keys that are not basic types
|
||||
(``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
|
||||
will be skipped instead of raising a ``TypeError``.
|
||||
|
||||
If ``ensure_ascii`` is false, then the some chunks written to ``fp``
|
||||
may be ``unicode`` instances, subject to normal Python ``str`` to
|
||||
``unicode`` coercion rules. Unless ``fp.write()`` explicitly
|
||||
understands ``unicode`` (as in ``codecs.getwriter()``) this is likely
|
||||
to cause an error.
|
||||
|
||||
If ``check_circular`` is false, then the circular reference check
|
||||
for container types will be skipped and a circular reference will
|
||||
result in an ``OverflowError`` (or worse).
|
||||
|
||||
If ``allow_nan`` is false, then it will be a ``ValueError`` to
|
||||
serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``)
|
||||
in strict compliance of the JSON specification, instead of using the
|
||||
JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
|
||||
|
||||
If *indent* is a string, then JSON array elements and object members
|
||||
will be pretty-printed with a newline followed by that string repeated
|
||||
for each level of nesting. ``None`` (the default) selects the most compact
|
||||
representation without any newlines. For backwards compatibility with
|
||||
versions of simplejson earlier than 2.1.0, an integer is also accepted
|
||||
and is converted to a string with that many spaces.
|
||||
|
||||
If ``separators`` is an ``(item_separator, dict_separator)`` tuple
|
||||
then it will be used instead of the default ``(', ', ': ')`` separators.
|
||||
``(',', ':')`` is the most compact JSON representation.
|
||||
|
||||
``encoding`` is the character encoding for str instances, default is UTF-8.
|
||||
|
||||
``default(obj)`` is a function that should return a serializable version
|
||||
of obj or raise TypeError. The default simply raises TypeError.
|
||||
|
||||
If *use_decimal* is true (default: ``False``) then decimal.Decimal
|
||||
will be natively serialized to JSON with full precision.
|
||||
|
||||
To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
|
||||
``.default()`` method to serialize additional types), specify it with
|
||||
the ``cls`` kwarg.
|
||||
|
||||
"""
|
||||
# cached encoder
|
||||
if (not skipkeys and ensure_ascii and
|
||||
check_circular and allow_nan and
|
||||
cls is None and indent is None and separators is None and
|
||||
encoding == 'utf-8' and default is None and not use_decimal
|
||||
and not kw):
|
||||
iterable = _default_encoder.iterencode(obj)
|
||||
else:
|
||||
if cls is None:
|
||||
cls = JSONEncoder
|
||||
iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii,
|
||||
check_circular=check_circular, allow_nan=allow_nan, indent=indent,
|
||||
separators=separators, encoding=encoding,
|
||||
default=default, use_decimal=use_decimal, **kw).iterencode(obj)
|
||||
# could accelerate with writelines in some versions of Python, at
|
||||
# a debuggability cost
|
||||
for chunk in iterable:
|
||||
fp.write(chunk)
|
||||
|
||||
|
||||
def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
|
||||
allow_nan=True, cls=None, indent=None, separators=None,
|
||||
encoding='utf-8', default=None, use_decimal=False, **kw):
|
||||
"""Serialize ``obj`` to a JSON formatted ``str``.
|
||||
|
||||
If ``skipkeys`` is false then ``dict`` keys that are not basic types
|
||||
(``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
|
||||
will be skipped instead of raising a ``TypeError``.
|
||||
|
||||
If ``ensure_ascii`` is false, then the return value will be a
|
||||
``unicode`` instance subject to normal Python ``str`` to ``unicode``
|
||||
coercion rules instead of being escaped to an ASCII ``str``.
|
||||
|
||||
If ``check_circular`` is false, then the circular reference check
|
||||
for container types will be skipped and a circular reference will
|
||||
result in an ``OverflowError`` (or worse).
|
||||
|
||||
If ``allow_nan`` is false, then it will be a ``ValueError`` to
|
||||
serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in
|
||||
strict compliance of the JSON specification, instead of using the
|
||||
JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
|
||||
|
||||
If ``indent`` is a string, then JSON array elements and object members
|
||||
will be pretty-printed with a newline followed by that string repeated
|
||||
for each level of nesting. ``None`` (the default) selects the most compact
|
||||
representation without any newlines. For backwards compatibility with
|
||||
versions of simplejson earlier than 2.1.0, an integer is also accepted
|
||||
and is converted to a string with that many spaces.
|
||||
|
||||
If ``separators`` is an ``(item_separator, dict_separator)`` tuple
|
||||
then it will be used instead of the default ``(', ', ': ')`` separators.
|
||||
``(',', ':')`` is the most compact JSON representation.
|
||||
|
||||
``encoding`` is the character encoding for str instances, default is UTF-8.
|
||||
|
||||
``default(obj)`` is a function that should return a serializable version
|
||||
of obj or raise TypeError. The default simply raises TypeError.
|
||||
|
||||
If *use_decimal* is true (default: ``False``) then decimal.Decimal
|
||||
will be natively serialized to JSON with full precision.
|
||||
|
||||
To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
|
||||
``.default()`` method to serialize additional types), specify it with
|
||||
the ``cls`` kwarg.
|
||||
|
||||
"""
|
||||
# cached encoder
|
||||
if (not skipkeys and ensure_ascii and
|
||||
check_circular and allow_nan and
|
||||
cls is None and indent is None and separators is None and
|
||||
encoding == 'utf-8' and default is None and not use_decimal
|
||||
and not kw):
|
||||
return _default_encoder.encode(obj)
|
||||
if cls is None:
|
||||
cls = JSONEncoder
|
||||
return cls(
|
||||
skipkeys=skipkeys, ensure_ascii=ensure_ascii,
|
||||
check_circular=check_circular, allow_nan=allow_nan, indent=indent,
|
||||
separators=separators, encoding=encoding, default=default,
|
||||
use_decimal=use_decimal, **kw).encode(obj)
|
||||
|
||||
|
||||
_default_decoder = JSONDecoder(encoding=None, object_hook=None,
|
||||
object_pairs_hook=None)
|
||||
|
||||
|
||||
def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
|
||||
parse_int=None, parse_constant=None, object_pairs_hook=None,
|
||||
use_decimal=False, **kw):
|
||||
"""Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
|
||||
a JSON document) to a Python object.
|
||||
|
||||
*encoding* determines the encoding used to interpret any
|
||||
:class:`str` objects decoded by this instance (``'utf-8'`` by
|
||||
default). It has no effect when decoding :class:`unicode` objects.
|
||||
|
||||
Note that currently only encodings that are a superset of ASCII work,
|
||||
strings of other encodings should be passed in as :class:`unicode`.
|
||||
|
||||
*object_hook*, if specified, will be called with the result of every
|
||||
JSON object decoded and its return value will be used in place of the
|
||||
given :class:`dict`. This can be used to provide custom
|
||||
deserializations (e.g. to support JSON-RPC class hinting).
|
||||
|
||||
*object_pairs_hook* is an optional function that will be called with
|
||||
the result of any object literal decode with an ordered list of pairs.
|
||||
The return value of *object_pairs_hook* will be used instead of the
|
||||
:class:`dict`. This feature can be used to implement custom decoders
|
||||
that rely on the order that the key and value pairs are decoded (for
|
||||
example, :func:`collections.OrderedDict` will remember the order of
|
||||
insertion). If *object_hook* is also defined, the *object_pairs_hook*
|
||||
takes priority.
|
||||
|
||||
*parse_float*, if specified, will be called with the string of every
|
||||
JSON float to be decoded. By default, this is equivalent to
|
||||
``float(num_str)``. This can be used to use another datatype or parser
|
||||
for JSON floats (e.g. :class:`decimal.Decimal`).
|
||||
|
||||
*parse_int*, if specified, will be called with the string of every
|
||||
JSON int to be decoded. By default, this is equivalent to
|
||||
``int(num_str)``. This can be used to use another datatype or parser
|
||||
for JSON integers (e.g. :class:`float`).
|
||||
|
||||
*parse_constant*, if specified, will be called with one of the
|
||||
following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This
|
||||
can be used to raise an exception if invalid JSON numbers are
|
||||
encountered.
|
||||
|
||||
If *use_decimal* is true (default: ``False``) then it implies
|
||||
parse_float=decimal.Decimal for parity with ``dump``.
|
||||
|
||||
To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
|
||||
kwarg.
|
||||
|
||||
"""
|
||||
return loads(fp.read(),
|
||||
encoding=encoding, cls=cls, object_hook=object_hook,
|
||||
parse_float=parse_float, parse_int=parse_int,
|
||||
parse_constant=parse_constant, object_pairs_hook=object_pairs_hook,
|
||||
use_decimal=use_decimal, **kw)
|
||||
|
||||
|
||||
def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
|
||||
parse_int=None, parse_constant=None, object_pairs_hook=None,
|
||||
use_decimal=False, **kw):
|
||||
"""Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON
|
||||
document) to a Python object.
|
||||
|
||||
*encoding* determines the encoding used to interpret any
|
||||
:class:`str` objects decoded by this instance (``'utf-8'`` by
|
||||
default). It has no effect when decoding :class:`unicode` objects.
|
||||
|
||||
Note that currently only encodings that are a superset of ASCII work,
|
||||
strings of other encodings should be passed in as :class:`unicode`.
|
||||
|
||||
*object_hook*, if specified, will be called with the result of every
|
||||
JSON object decoded and its return value will be used in place of the
|
||||
given :class:`dict`. This can be used to provide custom
|
||||
deserializations (e.g. to support JSON-RPC class hinting).
|
||||
|
||||
*object_pairs_hook* is an optional function that will be called with
|
||||
the result of any object literal decode with an ordered list of pairs.
|
||||
The return value of *object_pairs_hook* will be used instead of the
|
||||
:class:`dict`. This feature can be used to implement custom decoders
|
||||
that rely on the order that the key and value pairs are decoded (for
|
||||
example, :func:`collections.OrderedDict` will remember the order of
|
||||
insertion). If *object_hook* is also defined, the *object_pairs_hook*
|
||||
takes priority.
|
||||
|
||||
*parse_float*, if specified, will be called with the string of every
|
||||
JSON float to be decoded. By default, this is equivalent to
|
||||
``float(num_str)``. This can be used to use another datatype or parser
|
||||
for JSON floats (e.g. :class:`decimal.Decimal`).
|
||||
|
||||
*parse_int*, if specified, will be called with the string of every
|
||||
JSON int to be decoded. By default, this is equivalent to
|
||||
``int(num_str)``. This can be used to use another datatype or parser
|
||||
for JSON integers (e.g. :class:`float`).
|
||||
|
||||
*parse_constant*, if specified, will be called with one of the
|
||||
following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This
|
||||
can be used to raise an exception if invalid JSON numbers are
|
||||
encountered.
|
||||
|
||||
If *use_decimal* is true (default: ``False``) then it implies
|
||||
parse_float=decimal.Decimal for parity with ``dump``.
|
||||
|
||||
To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
|
||||
kwarg.
|
||||
|
||||
"""
|
||||
if (cls is None and encoding is None and object_hook is None and
|
||||
parse_int is None and parse_float is None and
|
||||
parse_constant is None and object_pairs_hook is None
|
||||
and not use_decimal and not kw):
|
||||
return _default_decoder.decode(s)
|
||||
if cls is None:
|
||||
cls = JSONDecoder
|
||||
if object_hook is not None:
|
||||
kw['object_hook'] = object_hook
|
||||
if object_pairs_hook is not None:
|
||||
kw['object_pairs_hook'] = object_pairs_hook
|
||||
if parse_float is not None:
|
||||
kw['parse_float'] = parse_float
|
||||
if parse_int is not None:
|
||||
kw['parse_int'] = parse_int
|
||||
if parse_constant is not None:
|
||||
kw['parse_constant'] = parse_constant
|
||||
if use_decimal:
|
||||
if parse_float is not None:
|
||||
raise TypeError("use_decimal=True implies parse_float=Decimal")
|
||||
kw['parse_float'] = Decimal
|
||||
return cls(encoding=encoding, **kw).decode(s)
|
||||
|
||||
|
||||
def _toggle_speedups(enabled):
|
||||
import simplejson.decoder as dec
|
||||
import simplejson.encoder as enc
|
||||
import simplejson.scanner as scan
|
||||
c_make_encoder = _import_c_make_encoder()
|
||||
if enabled:
|
||||
dec.scanstring = dec.c_scanstring or dec.py_scanstring
|
||||
enc.c_make_encoder = c_make_encoder
|
||||
enc.encode_basestring_ascii = (enc.c_encode_basestring_ascii or
|
||||
enc.py_encode_basestring_ascii)
|
||||
scan.make_scanner = scan.c_make_scanner or scan.py_make_scanner
|
||||
else:
|
||||
dec.scanstring = dec.py_scanstring
|
||||
enc.c_make_encoder = None
|
||||
enc.encode_basestring_ascii = enc.py_encode_basestring_ascii
|
||||
scan.make_scanner = scan.py_make_scanner
|
||||
dec.make_scanner = scan.make_scanner
|
||||
global _default_decoder
|
||||
_default_decoder = JSONDecoder(
|
||||
encoding=None,
|
||||
object_hook=None,
|
||||
object_pairs_hook=None,
|
||||
)
|
||||
global _default_encoder
|
||||
_default_encoder = JSONEncoder(
|
||||
skipkeys=False,
|
||||
ensure_ascii=True,
|
||||
check_circular=True,
|
||||
allow_nan=True,
|
||||
indent=None,
|
||||
separators=None,
|
||||
encoding='utf-8',
|
||||
default=None,
|
||||
)
|
||||
@@ -0,0 +1,421 @@
|
||||
"""Implementation of JSONDecoder
|
||||
"""
|
||||
import re
|
||||
import sys
|
||||
import struct
|
||||
|
||||
from .scanner import make_scanner
|
||||
def _import_c_scanstring():
|
||||
try:
|
||||
from simplejson._speedups import scanstring
|
||||
return scanstring
|
||||
except ImportError:
|
||||
return None
|
||||
c_scanstring = _import_c_scanstring()
|
||||
|
||||
__all__ = ['JSONDecoder']
|
||||
|
||||
FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
|
||||
|
||||
def _floatconstants():
|
||||
_BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
|
||||
# The struct module in Python 2.4 would get frexp() out of range here
|
||||
# when an endian is specified in the format string. Fixed in Python 2.5+
|
||||
if sys.byteorder != 'big':
|
||||
_BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
|
||||
nan, inf = struct.unpack('dd', _BYTES)
|
||||
return nan, inf, -inf
|
||||
|
||||
NaN, PosInf, NegInf = _floatconstants()
|
||||
|
||||
|
||||
class JSONDecodeError(ValueError):
|
||||
"""Subclass of ValueError with the following additional properties:
|
||||
|
||||
msg: The unformatted error message
|
||||
doc: The JSON document being parsed
|
||||
pos: The start index of doc where parsing failed
|
||||
end: The end index of doc where parsing failed (may be None)
|
||||
lineno: The line corresponding to pos
|
||||
colno: The column corresponding to pos
|
||||
endlineno: The line corresponding to end (may be None)
|
||||
endcolno: The column corresponding to end (may be None)
|
||||
|
||||
"""
|
||||
def __init__(self, msg, doc, pos, end=None):
|
||||
ValueError.__init__(self, errmsg(msg, doc, pos, end=end))
|
||||
self.msg = msg
|
||||
self.doc = doc
|
||||
self.pos = pos
|
||||
self.end = end
|
||||
self.lineno, self.colno = linecol(doc, pos)
|
||||
if end is not None:
|
||||
self.endlineno, self.endcolno = linecol(doc, end)
|
||||
else:
|
||||
self.endlineno, self.endcolno = None, None
|
||||
|
||||
|
||||
def linecol(doc, pos):
|
||||
lineno = doc.count('\n', 0, pos) + 1
|
||||
if lineno == 1:
|
||||
colno = pos
|
||||
else:
|
||||
colno = pos - doc.rindex('\n', 0, pos)
|
||||
return lineno, colno
|
||||
|
||||
|
||||
def errmsg(msg, doc, pos, end=None):
|
||||
# Note that this function is called from _speedups
|
||||
lineno, colno = linecol(doc, pos)
|
||||
if end is None:
|
||||
#fmt = '{0}: line {1} column {2} (char {3})'
|
||||
#return fmt.format(msg, lineno, colno, pos)
|
||||
fmt = '%s: line %d column %d (char %d)'
|
||||
return fmt % (msg, lineno, colno, pos)
|
||||
endlineno, endcolno = linecol(doc, end)
|
||||
#fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
|
||||
#return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
|
||||
fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
|
||||
return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
|
||||
|
||||
|
||||
_CONSTANTS = {
|
||||
'-Infinity': NegInf,
|
||||
'Infinity': PosInf,
|
||||
'NaN': NaN,
|
||||
}
|
||||
|
||||
STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
|
||||
BACKSLASH = {
|
||||
'"': u'"', '\\': u'\\', '/': u'/',
|
||||
'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
|
||||
}
|
||||
|
||||
DEFAULT_ENCODING = "utf-8"
|
||||
|
||||
def py_scanstring(s, end, encoding=None, strict=True,
|
||||
_b=BACKSLASH, _m=STRINGCHUNK.match):
|
||||
"""Scan the string s for a JSON string. End is the index of the
|
||||
character in s after the quote that started the JSON string.
|
||||
Unescapes all valid JSON string escape sequences and raises ValueError
|
||||
on attempt to decode an invalid string. If strict is False then literal
|
||||
control characters are allowed in the string.
|
||||
|
||||
Returns a tuple of the decoded string and the index of the character in s
|
||||
after the end quote."""
|
||||
if encoding is None:
|
||||
encoding = DEFAULT_ENCODING
|
||||
chunks = []
|
||||
_append = chunks.append
|
||||
begin = end - 1
|
||||
while 1:
|
||||
chunk = _m(s, end)
|
||||
if chunk is None:
|
||||
raise JSONDecodeError(
|
||||
"Unterminated string starting at", s, begin)
|
||||
end = chunk.end()
|
||||
content, terminator = chunk.groups()
|
||||
# Content is contains zero or more unescaped string characters
|
||||
if content:
|
||||
if not isinstance(content, unicode):
|
||||
content = unicode(content, encoding)
|
||||
_append(content)
|
||||
# Terminator is the end of string, a literal control character,
|
||||
# or a backslash denoting that an escape sequence follows
|
||||
if terminator == '"':
|
||||
break
|
||||
elif terminator != '\\':
|
||||
if strict:
|
||||
msg = "Invalid control character %r at" % (terminator,)
|
||||
#msg = "Invalid control character {0!r} at".format(terminator)
|
||||
raise JSONDecodeError(msg, s, end)
|
||||
else:
|
||||
_append(terminator)
|
||||
continue
|
||||
try:
|
||||
esc = s[end]
|
||||
except IndexError:
|
||||
raise JSONDecodeError(
|
||||
"Unterminated string starting at", s, begin)
|
||||
# If not a unicode escape sequence, must be in the lookup table
|
||||
if esc != 'u':
|
||||
try:
|
||||
char = _b[esc]
|
||||
except KeyError:
|
||||
msg = "Invalid \\escape: " + repr(esc)
|
||||
raise JSONDecodeError(msg, s, end)
|
||||
end += 1
|
||||
else:
|
||||
# Unicode escape sequence
|
||||
esc = s[end + 1:end + 5]
|
||||
next_end = end + 5
|
||||
if len(esc) != 4:
|
||||
msg = "Invalid \\uXXXX escape"
|
||||
raise JSONDecodeError(msg, s, end)
|
||||
uni = int(esc, 16)
|
||||
# Check for surrogate pair on UCS-4 systems
|
||||
if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
|
||||
msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
|
||||
if not s[end + 5:end + 7] == '\\u':
|
||||
raise JSONDecodeError(msg, s, end)
|
||||
esc2 = s[end + 7:end + 11]
|
||||
if len(esc2) != 4:
|
||||
raise JSONDecodeError(msg, s, end)
|
||||
uni2 = int(esc2, 16)
|
||||
uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
|
||||
next_end += 6
|
||||
char = unichr(uni)
|
||||
end = next_end
|
||||
# Append the unescaped character
|
||||
_append(char)
|
||||
return u''.join(chunks), end
|
||||
|
||||
|
||||
# Use speedup if available
|
||||
scanstring = c_scanstring or py_scanstring
|
||||
|
||||
WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
|
||||
WHITESPACE_STR = ' \t\n\r'
|
||||
|
||||
def JSONObject((s, end), encoding, strict, scan_once, object_hook,
|
||||
object_pairs_hook, memo=None,
|
||||
_w=WHITESPACE.match, _ws=WHITESPACE_STR):
|
||||
# Backwards compatibility
|
||||
if memo is None:
|
||||
memo = {}
|
||||
memo_get = memo.setdefault
|
||||
pairs = []
|
||||
# Use a slice to prevent IndexError from being raised, the following
|
||||
# check will raise a more specific ValueError if the string is empty
|
||||
nextchar = s[end:end + 1]
|
||||
# Normally we expect nextchar == '"'
|
||||
if nextchar != '"':
|
||||
if nextchar in _ws:
|
||||
end = _w(s, end).end()
|
||||
nextchar = s[end:end + 1]
|
||||
# Trivial empty object
|
||||
if nextchar == '}':
|
||||
if object_pairs_hook is not None:
|
||||
result = object_pairs_hook(pairs)
|
||||
return result, end + 1
|
||||
pairs = {}
|
||||
if object_hook is not None:
|
||||
pairs = object_hook(pairs)
|
||||
return pairs, end + 1
|
||||
elif nextchar != '"':
|
||||
raise JSONDecodeError("Expecting property name", s, end)
|
||||
end += 1
|
||||
while True:
|
||||
key, end = scanstring(s, end, encoding, strict)
|
||||
key = memo_get(key, key)
|
||||
|
||||
# To skip some function call overhead we optimize the fast paths where
|
||||
# the JSON key separator is ": " or just ":".
|
||||
if s[end:end + 1] != ':':
|
||||
end = _w(s, end).end()
|
||||
if s[end:end + 1] != ':':
|
||||
raise JSONDecodeError("Expecting : delimiter", s, end)
|
||||
|
||||
end += 1
|
||||
|
||||
try:
|
||||
if s[end] in _ws:
|
||||
end += 1
|
||||
if s[end] in _ws:
|
||||
end = _w(s, end + 1).end()
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
try:
|
||||
value, end = scan_once(s, end)
|
||||
except StopIteration:
|
||||
raise JSONDecodeError("Expecting object", s, end)
|
||||
pairs.append((key, value))
|
||||
|
||||
try:
|
||||
nextchar = s[end]
|
||||
if nextchar in _ws:
|
||||
end = _w(s, end + 1).end()
|
||||
nextchar = s[end]
|
||||
except IndexError:
|
||||
nextchar = ''
|
||||
end += 1
|
||||
|
||||
if nextchar == '}':
|
||||
break
|
||||
elif nextchar != ',':
|
||||
raise JSONDecodeError("Expecting , delimiter", s, end - 1)
|
||||
|
||||
try:
|
||||
nextchar = s[end]
|
||||
if nextchar in _ws:
|
||||
end += 1
|
||||
nextchar = s[end]
|
||||
if nextchar in _ws:
|
||||
end = _w(s, end + 1).end()
|
||||
nextchar = s[end]
|
||||
except IndexError:
|
||||
nextchar = ''
|
||||
|
||||
end += 1
|
||||
if nextchar != '"':
|
||||
raise JSONDecodeError("Expecting property name", s, end - 1)
|
||||
|
||||
if object_pairs_hook is not None:
|
||||
result = object_pairs_hook(pairs)
|
||||
return result, end
|
||||
pairs = dict(pairs)
|
||||
if object_hook is not None:
|
||||
pairs = object_hook(pairs)
|
||||
return pairs, end
|
||||
|
||||
def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
|
||||
values = []
|
||||
nextchar = s[end:end + 1]
|
||||
if nextchar in _ws:
|
||||
end = _w(s, end + 1).end()
|
||||
nextchar = s[end:end + 1]
|
||||
# Look-ahead for trivial empty array
|
||||
if nextchar == ']':
|
||||
return values, end + 1
|
||||
_append = values.append
|
||||
while True:
|
||||
try:
|
||||
value, end = scan_once(s, end)
|
||||
except StopIteration:
|
||||
raise JSONDecodeError("Expecting object", s, end)
|
||||
_append(value)
|
||||
nextchar = s[end:end + 1]
|
||||
if nextchar in _ws:
|
||||
end = _w(s, end + 1).end()
|
||||
nextchar = s[end:end + 1]
|
||||
end += 1
|
||||
if nextchar == ']':
|
||||
break
|
||||
elif nextchar != ',':
|
||||
raise JSONDecodeError("Expecting , delimiter", s, end)
|
||||
|
||||
try:
|
||||
if s[end] in _ws:
|
||||
end += 1
|
||||
if s[end] in _ws:
|
||||
end = _w(s, end + 1).end()
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
return values, end
|
||||
|
||||
class JSONDecoder(object):
|
||||
"""Simple JSON <http://json.org> decoder
|
||||
|
||||
Performs the following translations in decoding by default:
|
||||
|
||||
+---------------+-------------------+
|
||||
| JSON | Python |
|
||||
+===============+===================+
|
||||
| object | dict |
|
||||
+---------------+-------------------+
|
||||
| array | list |
|
||||
+---------------+-------------------+
|
||||
| string | unicode |
|
||||
+---------------+-------------------+
|
||||
| number (int) | int, long |
|
||||
+---------------+-------------------+
|
||||
| number (real) | float |
|
||||
+---------------+-------------------+
|
||||
| true | True |
|
||||
+---------------+-------------------+
|
||||
| false | False |
|
||||
+---------------+-------------------+
|
||||
| null | None |
|
||||
+---------------+-------------------+
|
||||
|
||||
It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
|
||||
their corresponding ``float`` values, which is outside the JSON spec.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, encoding=None, object_hook=None, parse_float=None,
|
||||
parse_int=None, parse_constant=None, strict=True,
|
||||
object_pairs_hook=None):
|
||||
"""
|
||||
*encoding* determines the encoding used to interpret any
|
||||
:class:`str` objects decoded by this instance (``'utf-8'`` by
|
||||
default). It has no effect when decoding :class:`unicode` objects.
|
||||
|
||||
Note that currently only encodings that are a superset of ASCII work,
|
||||
strings of other encodings should be passed in as :class:`unicode`.
|
||||
|
||||
*object_hook*, if specified, will be called with the result of every
|
||||
JSON object decoded and its return value will be used in place of the
|
||||
given :class:`dict`. This can be used to provide custom
|
||||
deserializations (e.g. to support JSON-RPC class hinting).
|
||||
|
||||
*object_pairs_hook* is an optional function that will be called with
|
||||
the result of any object literal decode with an ordered list of pairs.
|
||||
The return value of *object_pairs_hook* will be used instead of the
|
||||
:class:`dict`. This feature can be used to implement custom decoders
|
||||
that rely on the order that the key and value pairs are decoded (for
|
||||
example, :func:`collections.OrderedDict` will remember the order of
|
||||
insertion). If *object_hook* is also defined, the *object_pairs_hook*
|
||||
takes priority.
|
||||
|
||||
*parse_float*, if specified, will be called with the string of every
|
||||
JSON float to be decoded. By default, this is equivalent to
|
||||
``float(num_str)``. This can be used to use another datatype or parser
|
||||
for JSON floats (e.g. :class:`decimal.Decimal`).
|
||||
|
||||
*parse_int*, if specified, will be called with the string of every
|
||||
JSON int to be decoded. By default, this is equivalent to
|
||||
``int(num_str)``. This can be used to use another datatype or parser
|
||||
for JSON integers (e.g. :class:`float`).
|
||||
|
||||
*parse_constant*, if specified, will be called with one of the
|
||||
following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This
|
||||
can be used to raise an exception if invalid JSON numbers are
|
||||
encountered.
|
||||
|
||||
*strict* controls the parser's behavior when it encounters an
|
||||
invalid control character in a string. The default setting of
|
||||
``True`` means that unescaped control characters are parse errors, if
|
||||
``False`` then control characters will be allowed in strings.
|
||||
|
||||
"""
|
||||
self.encoding = encoding
|
||||
self.object_hook = object_hook
|
||||
self.object_pairs_hook = object_pairs_hook
|
||||
self.parse_float = parse_float or float
|
||||
self.parse_int = parse_int or int
|
||||
self.parse_constant = parse_constant or _CONSTANTS.__getitem__
|
||||
self.strict = strict
|
||||
self.parse_object = JSONObject
|
||||
self.parse_array = JSONArray
|
||||
self.parse_string = scanstring
|
||||
self.memo = {}
|
||||
self.scan_once = make_scanner(self)
|
||||
|
||||
def decode(self, s, _w=WHITESPACE.match):
|
||||
"""Return the Python representation of ``s`` (a ``str`` or ``unicode``
|
||||
instance containing a JSON document)
|
||||
|
||||
"""
|
||||
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
|
||||
end = _w(s, end).end()
|
||||
if end != len(s):
|
||||
raise JSONDecodeError("Extra data", s, end, len(s))
|
||||
return obj
|
||||
|
||||
def raw_decode(self, s, idx=0):
|
||||
"""Decode a JSON document from ``s`` (a ``str`` or ``unicode``
|
||||
beginning with a JSON document) and return a 2-tuple of the Python
|
||||
representation and the index in ``s`` where the document ended.
|
||||
|
||||
This can be used to decode a JSON document from a string that may
|
||||
have extraneous data at the end.
|
||||
|
||||
"""
|
||||
try:
|
||||
obj, end = self.scan_once(s, idx)
|
||||
except StopIteration:
|
||||
raise JSONDecodeError("No JSON object could be decoded", s, idx)
|
||||
return obj, end
|
||||
@@ -0,0 +1,503 @@
|
||||
"""Implementation of JSONEncoder
|
||||
"""
|
||||
import re
|
||||
from decimal import Decimal
|
||||
|
||||
def _import_speedups():
|
||||
try:
|
||||
from simplejson import _speedups
|
||||
return _speedups.encode_basestring_ascii, _speedups.make_encoder
|
||||
except ImportError:
|
||||
return None, None
|
||||
c_encode_basestring_ascii, c_make_encoder = _import_speedups()
|
||||
|
||||
from .decoder import PosInf
|
||||
|
||||
ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
|
||||
ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
|
||||
HAS_UTF8 = re.compile(r'[\x80-\xff]')
|
||||
ESCAPE_DCT = {
|
||||
'\\': '\\\\',
|
||||
'"': '\\"',
|
||||
'\b': '\\b',
|
||||
'\f': '\\f',
|
||||
'\n': '\\n',
|
||||
'\r': '\\r',
|
||||
'\t': '\\t',
|
||||
}
|
||||
for i in range(0x20):
|
||||
#ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
|
||||
ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
|
||||
|
||||
FLOAT_REPR = repr
|
||||
|
||||
def encode_basestring(s):
|
||||
"""Return a JSON representation of a Python string
|
||||
|
||||
"""
|
||||
if isinstance(s, str) and HAS_UTF8.search(s) is not None:
|
||||
s = s.decode('utf-8')
|
||||
def replace(match):
|
||||
return ESCAPE_DCT[match.group(0)]
|
||||
return u'"' + ESCAPE.sub(replace, s) + u'"'
|
||||
|
||||
|
||||
def py_encode_basestring_ascii(s):
|
||||
"""Return an ASCII-only JSON representation of a Python string
|
||||
|
||||
"""
|
||||
if isinstance(s, str) and HAS_UTF8.search(s) is not None:
|
||||
s = s.decode('utf-8')
|
||||
def replace(match):
|
||||
s = match.group(0)
|
||||
try:
|
||||
return ESCAPE_DCT[s]
|
||||
except KeyError:
|
||||
n = ord(s)
|
||||
if n < 0x10000:
|
||||
#return '\\u{0:04x}'.format(n)
|
||||
return '\\u%04x' % (n,)
|
||||
else:
|
||||
# surrogate pair
|
||||
n -= 0x10000
|
||||
s1 = 0xd800 | ((n >> 10) & 0x3ff)
|
||||
s2 = 0xdc00 | (n & 0x3ff)
|
||||
#return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
|
||||
return '\\u%04x\\u%04x' % (s1, s2)
|
||||
return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
|
||||
|
||||
|
||||
encode_basestring_ascii = (
|
||||
c_encode_basestring_ascii or py_encode_basestring_ascii)
|
||||
|
||||
class JSONEncoder(object):
|
||||
"""Extensible JSON <http://json.org> encoder for Python data structures.
|
||||
|
||||
Supports the following objects and types by default:
|
||||
|
||||
+-------------------+---------------+
|
||||
| Python | JSON |
|
||||
+===================+===============+
|
||||
| dict | object |
|
||||
+-------------------+---------------+
|
||||
| list, tuple | array |
|
||||
+-------------------+---------------+
|
||||
| str, unicode | string |
|
||||
+-------------------+---------------+
|
||||
| int, long, float | number |
|
||||
+-------------------+---------------+
|
||||
| True | true |
|
||||
+-------------------+---------------+
|
||||
| False | false |
|
||||
+-------------------+---------------+
|
||||
| None | null |
|
||||
+-------------------+---------------+
|
||||
|
||||
To extend this to recognize other objects, subclass and implement a
|
||||
``.default()`` method with another method that returns a serializable
|
||||
object for ``o`` if possible, otherwise it should call the superclass
|
||||
implementation (to raise ``TypeError``).
|
||||
|
||||
"""
|
||||
item_separator = ', '
|
||||
key_separator = ': '
|
||||
def __init__(self, skipkeys=False, ensure_ascii=True,
|
||||
check_circular=True, allow_nan=True, sort_keys=False,
|
||||
indent=None, separators=None, encoding='utf-8', default=None,
|
||||
use_decimal=False):
|
||||
"""Constructor for JSONEncoder, with sensible defaults.
|
||||
|
||||
If skipkeys is false, then it is a TypeError to attempt
|
||||
encoding of keys that are not str, int, long, float or None. If
|
||||
skipkeys is True, such items are simply skipped.
|
||||
|
||||
If ensure_ascii is true, the output is guaranteed to be str
|
||||
objects with all incoming unicode characters escaped. If
|
||||
ensure_ascii is false, the output will be unicode object.
|
||||
|
||||
If check_circular is true, then lists, dicts, and custom encoded
|
||||
objects will be checked for circular references during encoding to
|
||||
prevent an infinite recursion (which would cause an OverflowError).
|
||||
Otherwise, no such check takes place.
|
||||
|
||||
If allow_nan is true, then NaN, Infinity, and -Infinity will be
|
||||
encoded as such. This behavior is not JSON specification compliant,
|
||||
but is consistent with most JavaScript based encoders and decoders.
|
||||
Otherwise, it will be a ValueError to encode such floats.
|
||||
|
||||
If sort_keys is true, then the output of dictionaries will be
|
||||
sorted by key; this is useful for regression tests to ensure
|
||||
that JSON serializations can be compared on a day-to-day basis.
|
||||
|
||||
If indent is a string, then JSON array elements and object members
|
||||
will be pretty-printed with a newline followed by that string repeated
|
||||
for each level of nesting. ``None`` (the default) selects the most compact
|
||||
representation without any newlines. For backwards compatibility with
|
||||
versions of simplejson earlier than 2.1.0, an integer is also accepted
|
||||
and is converted to a string with that many spaces.
|
||||
|
||||
If specified, separators should be a (item_separator, key_separator)
|
||||
tuple. The default is (', ', ': '). To get the most compact JSON
|
||||
representation you should specify (',', ':') to eliminate whitespace.
|
||||
|
||||
If specified, default is a function that gets called for objects
|
||||
that can't otherwise be serialized. It should return a JSON encodable
|
||||
version of the object or raise a ``TypeError``.
|
||||
|
||||
If encoding is not None, then all input strings will be
|
||||
transformed into unicode using that encoding prior to JSON-encoding.
|
||||
The default is UTF-8.
|
||||
|
||||
If use_decimal is true (not the default), ``decimal.Decimal`` will
|
||||
be supported directly by the encoder. For the inverse, decode JSON
|
||||
with ``parse_float=decimal.Decimal``.
|
||||
|
||||
"""
|
||||
|
||||
self.skipkeys = skipkeys
|
||||
self.ensure_ascii = ensure_ascii
|
||||
self.check_circular = check_circular
|
||||
self.allow_nan = allow_nan
|
||||
self.sort_keys = sort_keys
|
||||
self.use_decimal = use_decimal
|
||||
if isinstance(indent, (int, long)):
|
||||
indent = ' ' * indent
|
||||
self.indent = indent
|
||||
if separators is not None:
|
||||
self.item_separator, self.key_separator = separators
|
||||
elif indent is not None:
|
||||
self.item_separator = ','
|
||||
if default is not None:
|
||||
self.default = default
|
||||
self.encoding = encoding
|
||||
|
||||
def default(self, o):
|
||||
"""Implement this method in a subclass such that it returns
|
||||
a serializable object for ``o``, or calls the base implementation
|
||||
(to raise a ``TypeError``).
|
||||
|
||||
For example, to support arbitrary iterators, you could
|
||||
implement default like this::
|
||||
|
||||
def default(self, o):
|
||||
try:
|
||||
iterable = iter(o)
|
||||
except TypeError:
|
||||
pass
|
||||
else:
|
||||
return list(iterable)
|
||||
return JSONEncoder.default(self, o)
|
||||
|
||||
"""
|
||||
raise TypeError(repr(o) + " is not JSON serializable")
|
||||
|
||||
def encode(self, o):
|
||||
"""Return a JSON string representation of a Python data structure.
|
||||
|
||||
>>> from simplejson import JSONEncoder
|
||||
>>> JSONEncoder().encode({"foo": ["bar", "baz"]})
|
||||
'{"foo": ["bar", "baz"]}'
|
||||
|
||||
"""
|
||||
# This is for extremely simple cases and benchmarks.
|
||||
if isinstance(o, basestring):
|
||||
if isinstance(o, str):
|
||||
_encoding = self.encoding
|
||||
if (_encoding is not None
|
||||
and not (_encoding == 'utf-8')):
|
||||
o = o.decode(_encoding)
|
||||
if self.ensure_ascii:
|
||||
return encode_basestring_ascii(o)
|
||||
else:
|
||||
return encode_basestring(o)
|
||||
# This doesn't pass the iterator directly to ''.join() because the
|
||||
# exceptions aren't as detailed. The list call should be roughly
|
||||
# equivalent to the PySequence_Fast that ''.join() would do.
|
||||
chunks = self.iterencode(o, _one_shot=True)
|
||||
if not isinstance(chunks, (list, tuple)):
|
||||
chunks = list(chunks)
|
||||
if self.ensure_ascii:
|
||||
return ''.join(chunks)
|
||||
else:
|
||||
return u''.join(chunks)
|
||||
|
||||
def iterencode(self, o, _one_shot=False):
|
||||
"""Encode the given object and yield each string
|
||||
representation as available.
|
||||
|
||||
For example::
|
||||
|
||||
for chunk in JSONEncoder().iterencode(bigobject):
|
||||
mysocket.write(chunk)
|
||||
|
||||
"""
|
||||
if self.check_circular:
|
||||
markers = {}
|
||||
else:
|
||||
markers = None
|
||||
if self.ensure_ascii:
|
||||
_encoder = encode_basestring_ascii
|
||||
else:
|
||||
_encoder = encode_basestring
|
||||
if self.encoding != 'utf-8':
|
||||
def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
|
||||
if isinstance(o, str):
|
||||
o = o.decode(_encoding)
|
||||
return _orig_encoder(o)
|
||||
|
||||
def floatstr(o, allow_nan=self.allow_nan,
|
||||
_repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf):
|
||||
# Check for specials. Note that this type of test is processor
|
||||
# and/or platform-specific, so do tests which don't depend on
|
||||
# the internals.
|
||||
|
||||
if o != o:
|
||||
text = 'NaN'
|
||||
elif o == _inf:
|
||||
text = 'Infinity'
|
||||
elif o == _neginf:
|
||||
text = '-Infinity'
|
||||
else:
|
||||
return _repr(o)
|
||||
|
||||
if not allow_nan:
|
||||
raise ValueError(
|
||||
"Out of range float values are not JSON compliant: " +
|
||||
repr(o))
|
||||
|
||||
return text
|
||||
|
||||
|
||||
key_memo = {}
|
||||
if (_one_shot and c_make_encoder is not None
|
||||
and self.indent is None):
|
||||
_iterencode = c_make_encoder(
|
||||
markers, self.default, _encoder, self.indent,
|
||||
self.key_separator, self.item_separator, self.sort_keys,
|
||||
self.skipkeys, self.allow_nan, key_memo, self.use_decimal)
|
||||
else:
|
||||
_iterencode = _make_iterencode(
|
||||
markers, self.default, _encoder, self.indent, floatstr,
|
||||
self.key_separator, self.item_separator, self.sort_keys,
|
||||
self.skipkeys, _one_shot, self.use_decimal)
|
||||
try:
|
||||
return _iterencode(o, 0)
|
||||
finally:
|
||||
key_memo.clear()
|
||||
|
||||
|
||||
class JSONEncoderForHTML(JSONEncoder):
|
||||
"""An encoder that produces JSON safe to embed in HTML.
|
||||
|
||||
To embed JSON content in, say, a script tag on a web page, the
|
||||
characters &, < and > should be escaped. They cannot be escaped
|
||||
with the usual entities (e.g. &) because they are not expanded
|
||||
within <script> tags.
|
||||
"""
|
||||
|
||||
def encode(self, o):
|
||||
# Override JSONEncoder.encode because it has hacks for
|
||||
# performance that make things more complicated.
|
||||
chunks = self.iterencode(o, True)
|
||||
if self.ensure_ascii:
|
||||
return ''.join(chunks)
|
||||
else:
|
||||
return u''.join(chunks)
|
||||
|
||||
def iterencode(self, o, _one_shot=False):
|
||||
chunks = super(JSONEncoderForHTML, self).iterencode(o, _one_shot)
|
||||
for chunk in chunks:
|
||||
chunk = chunk.replace('&', '\\u0026')
|
||||
chunk = chunk.replace('<', '\\u003c')
|
||||
chunk = chunk.replace('>', '\\u003e')
|
||||
yield chunk
|
||||
|
||||
|
||||
def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
|
||||
_key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
|
||||
_use_decimal,
|
||||
## HACK: hand-optimized bytecode; turn globals into locals
|
||||
False=False,
|
||||
True=True,
|
||||
ValueError=ValueError,
|
||||
basestring=basestring,
|
||||
Decimal=Decimal,
|
||||
dict=dict,
|
||||
float=float,
|
||||
id=id,
|
||||
int=int,
|
||||
isinstance=isinstance,
|
||||
list=list,
|
||||
long=long,
|
||||
str=str,
|
||||
tuple=tuple,
|
||||
):
|
||||
|
||||
def _iterencode_list(lst, _current_indent_level):
|
||||
if not lst:
|
||||
yield '[]'
|
||||
return
|
||||
if markers is not None:
|
||||
markerid = id(lst)
|
||||
if markerid in markers:
|
||||
raise ValueError("Circular reference detected")
|
||||
markers[markerid] = lst
|
||||
buf = '['
|
||||
if _indent is not None:
|
||||
_current_indent_level += 1
|
||||
newline_indent = '\n' + (_indent * _current_indent_level)
|
||||
separator = _item_separator + newline_indent
|
||||
buf += newline_indent
|
||||
else:
|
||||
newline_indent = None
|
||||
separator = _item_separator
|
||||
first = True
|
||||
for value in lst:
|
||||
if first:
|
||||
first = False
|
||||
else:
|
||||
buf = separator
|
||||
if isinstance(value, basestring):
|
||||
yield buf + _encoder(value)
|
||||
elif value is None:
|
||||
yield buf + 'null'
|
||||
elif value is True:
|
||||
yield buf + 'true'
|
||||
elif value is False:
|
||||
yield buf + 'false'
|
||||
elif isinstance(value, (int, long)):
|
||||
yield buf + str(value)
|
||||
elif isinstance(value, float):
|
||||
yield buf + _floatstr(value)
|
||||
elif _use_decimal and isinstance(value, Decimal):
|
||||
yield buf + str(value)
|
||||
else:
|
||||
yield buf
|
||||
if isinstance(value, (list, tuple)):
|
||||
chunks = _iterencode_list(value, _current_indent_level)
|
||||
elif isinstance(value, dict):
|
||||
chunks = _iterencode_dict(value, _current_indent_level)
|
||||
else:
|
||||
chunks = _iterencode(value, _current_indent_level)
|
||||
for chunk in chunks:
|
||||
yield chunk
|
||||
if newline_indent is not None:
|
||||
_current_indent_level -= 1
|
||||
yield '\n' + (_indent * _current_indent_level)
|
||||
yield ']'
|
||||
if markers is not None:
|
||||
del markers[markerid]
|
||||
|
||||
def _iterencode_dict(dct, _current_indent_level):
|
||||
if not dct:
|
||||
yield '{}'
|
||||
return
|
||||
if markers is not None:
|
||||
markerid = id(dct)
|
||||
if markerid in markers:
|
||||
raise ValueError("Circular reference detected")
|
||||
markers[markerid] = dct
|
||||
yield '{'
|
||||
if _indent is not None:
|
||||
_current_indent_level += 1
|
||||
newline_indent = '\n' + (_indent * _current_indent_level)
|
||||
item_separator = _item_separator + newline_indent
|
||||
yield newline_indent
|
||||
else:
|
||||
newline_indent = None
|
||||
item_separator = _item_separator
|
||||
first = True
|
||||
if _sort_keys:
|
||||
items = dct.items()
|
||||
items.sort(key=lambda kv: kv[0])
|
||||
else:
|
||||
items = dct.iteritems()
|
||||
for key, value in items:
|
||||
if isinstance(key, basestring):
|
||||
pass
|
||||
# JavaScript is weakly typed for these, so it makes sense to
|
||||
# also allow them. Many encoders seem to do something like this.
|
||||
elif isinstance(key, float):
|
||||
key = _floatstr(key)
|
||||
elif key is True:
|
||||
key = 'true'
|
||||
elif key is False:
|
||||
key = 'false'
|
||||
elif key is None:
|
||||
key = 'null'
|
||||
elif isinstance(key, (int, long)):
|
||||
key = str(key)
|
||||
elif _skipkeys:
|
||||
continue
|
||||
else:
|
||||
raise TypeError("key " + repr(key) + " is not a string")
|
||||
if first:
|
||||
first = False
|
||||
else:
|
||||
yield item_separator
|
||||
yield _encoder(key)
|
||||
yield _key_separator
|
||||
if isinstance(value, basestring):
|
||||
yield _encoder(value)
|
||||
elif value is None:
|
||||
yield 'null'
|
||||
elif value is True:
|
||||
yield 'true'
|
||||
elif value is False:
|
||||
yield 'false'
|
||||
elif isinstance(value, (int, long)):
|
||||
yield str(value)
|
||||
elif isinstance(value, float):
|
||||
yield _floatstr(value)
|
||||
elif _use_decimal and isinstance(value, Decimal):
|
||||
yield str(value)
|
||||
else:
|
||||
if isinstance(value, (list, tuple)):
|
||||
chunks = _iterencode_list(value, _current_indent_level)
|
||||
elif isinstance(value, dict):
|
||||
chunks = _iterencode_dict(value, _current_indent_level)
|
||||
else:
|
||||
chunks = _iterencode(value, _current_indent_level)
|
||||
for chunk in chunks:
|
||||
yield chunk
|
||||
if newline_indent is not None:
|
||||
_current_indent_level -= 1
|
||||
yield '\n' + (_indent * _current_indent_level)
|
||||
yield '}'
|
||||
if markers is not None:
|
||||
del markers[markerid]
|
||||
|
||||
def _iterencode(o, _current_indent_level):
|
||||
if isinstance(o, basestring):
|
||||
yield _encoder(o)
|
||||
elif o is None:
|
||||
yield 'null'
|
||||
elif o is True:
|
||||
yield 'true'
|
||||
elif o is False:
|
||||
yield 'false'
|
||||
elif isinstance(o, (int, long)):
|
||||
yield str(o)
|
||||
elif isinstance(o, float):
|
||||
yield _floatstr(o)
|
||||
elif isinstance(o, (list, tuple)):
|
||||
for chunk in _iterencode_list(o, _current_indent_level):
|
||||
yield chunk
|
||||
elif isinstance(o, dict):
|
||||
for chunk in _iterencode_dict(o, _current_indent_level):
|
||||
yield chunk
|
||||
elif _use_decimal and isinstance(o, Decimal):
|
||||
yield str(o)
|
||||
else:
|
||||
if markers is not None:
|
||||
markerid = id(o)
|
||||
if markerid in markers:
|
||||
raise ValueError("Circular reference detected")
|
||||
markers[markerid] = o
|
||||
o = _default(o)
|
||||
for chunk in _iterencode(o, _current_indent_level):
|
||||
yield chunk
|
||||
if markers is not None:
|
||||
del markers[markerid]
|
||||
|
||||
return _iterencode
|
||||
@@ -0,0 +1,119 @@
|
||||
"""Drop-in replacement for collections.OrderedDict by Raymond Hettinger
|
||||
|
||||
http://code.activestate.com/recipes/576693/
|
||||
|
||||
"""
|
||||
from UserDict import DictMixin
|
||||
|
||||
# Modified from original to support Python 2.4, see
|
||||
# http://code.google.com/p/simplejson/issues/detail?id=53
|
||||
try:
|
||||
all
|
||||
except NameError:
|
||||
def all(seq):
|
||||
for elem in seq:
|
||||
if not elem:
|
||||
return False
|
||||
return True
|
||||
|
||||
class OrderedDict(dict, DictMixin):
|
||||
|
||||
def __init__(self, *args, **kwds):
|
||||
if len(args) > 1:
|
||||
raise TypeError('expected at most 1 arguments, got %d' % len(args))
|
||||
try:
|
||||
self.__end
|
||||
except AttributeError:
|
||||
self.clear()
|
||||
self.update(*args, **kwds)
|
||||
|
||||
def clear(self):
|
||||
self.__end = end = []
|
||||
end += [None, end, end] # sentinel node for doubly linked list
|
||||
self.__map = {} # key --> [key, prev, next]
|
||||
dict.clear(self)
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
if key not in self:
|
||||
end = self.__end
|
||||
curr = end[1]
|
||||
curr[2] = end[1] = self.__map[key] = [key, curr, end]
|
||||
dict.__setitem__(self, key, value)
|
||||
|
||||
def __delitem__(self, key):
|
||||
dict.__delitem__(self, key)
|
||||
key, prev, next = self.__map.pop(key)
|
||||
prev[2] = next
|
||||
next[1] = prev
|
||||
|
||||
def __iter__(self):
|
||||
end = self.__end
|
||||
curr = end[2]
|
||||
while curr is not end:
|
||||
yield curr[0]
|
||||
curr = curr[2]
|
||||
|
||||
def __reversed__(self):
|
||||
end = self.__end
|
||||
curr = end[1]
|
||||
while curr is not end:
|
||||
yield curr[0]
|
||||
curr = curr[1]
|
||||
|
||||
def popitem(self, last=True):
|
||||
if not self:
|
||||
raise KeyError('dictionary is empty')
|
||||
# Modified from original to support Python 2.4, see
|
||||
# http://code.google.com/p/simplejson/issues/detail?id=53
|
||||
if last:
|
||||
key = reversed(self).next()
|
||||
else:
|
||||
key = iter(self).next()
|
||||
value = self.pop(key)
|
||||
return key, value
|
||||
|
||||
def __reduce__(self):
|
||||
items = [[k, self[k]] for k in self]
|
||||
tmp = self.__map, self.__end
|
||||
del self.__map, self.__end
|
||||
inst_dict = vars(self).copy()
|
||||
self.__map, self.__end = tmp
|
||||
if inst_dict:
|
||||
return (self.__class__, (items,), inst_dict)
|
||||
return self.__class__, (items,)
|
||||
|
||||
def keys(self):
|
||||
return list(self)
|
||||
|
||||
setdefault = DictMixin.setdefault
|
||||
update = DictMixin.update
|
||||
pop = DictMixin.pop
|
||||
values = DictMixin.values
|
||||
items = DictMixin.items
|
||||
iterkeys = DictMixin.iterkeys
|
||||
itervalues = DictMixin.itervalues
|
||||
iteritems = DictMixin.iteritems
|
||||
|
||||
def __repr__(self):
|
||||
if not self:
|
||||
return '%s()' % (self.__class__.__name__,)
|
||||
return '%s(%r)' % (self.__class__.__name__, self.items())
|
||||
|
||||
def copy(self):
|
||||
return self.__class__(self)
|
||||
|
||||
@classmethod
|
||||
def fromkeys(cls, iterable, value=None):
|
||||
d = cls()
|
||||
for key in iterable:
|
||||
d[key] = value
|
||||
return d
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, OrderedDict):
|
||||
return len(self)==len(other) and \
|
||||
all(p==q for p, q in zip(self.items(), other.items()))
|
||||
return dict.__eq__(self, other)
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
@@ -0,0 +1,70 @@
|
||||
"""JSON token scanner
|
||||
"""
|
||||
import re
|
||||
|
||||
__all__ = ['make_scanner']
|
||||
|
||||
NUMBER_RE = re.compile(
|
||||
r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?',
|
||||
(re.VERBOSE | re.MULTILINE | re.DOTALL))
|
||||
|
||||
def py_make_scanner(context):
|
||||
parse_object = context.parse_object
|
||||
parse_array = context.parse_array
|
||||
parse_string = context.parse_string
|
||||
match_number = NUMBER_RE.match
|
||||
encoding = context.encoding
|
||||
strict = context.strict
|
||||
parse_float = context.parse_float
|
||||
parse_int = context.parse_int
|
||||
parse_constant = context.parse_constant
|
||||
object_hook = context.object_hook
|
||||
object_pairs_hook = context.object_pairs_hook
|
||||
memo = context.memo
|
||||
|
||||
def _scan_once(string, idx):
|
||||
try:
|
||||
nextchar = string[idx]
|
||||
except IndexError:
|
||||
raise StopIteration
|
||||
|
||||
if nextchar == '"':
|
||||
return parse_string(string, idx + 1, encoding, strict)
|
||||
elif nextchar == '{':
|
||||
return parse_object((string, idx + 1), encoding, strict,
|
||||
_scan_once, object_hook, object_pairs_hook, memo)
|
||||
elif nextchar == '[':
|
||||
return parse_array((string, idx + 1), _scan_once)
|
||||
elif nextchar == 'n' and string[idx:idx + 4] == 'null':
|
||||
return None, idx + 4
|
||||
elif nextchar == 't' and string[idx:idx + 4] == 'true':
|
||||
return True, idx + 4
|
||||
elif nextchar == 'f' and string[idx:idx + 5] == 'false':
|
||||
return False, idx + 5
|
||||
|
||||
m = match_number(string, idx)
|
||||
if m is not None:
|
||||
integer, frac, exp = m.groups()
|
||||
if frac or exp:
|
||||
res = parse_float(integer + (frac or '') + (exp or ''))
|
||||
else:
|
||||
res = parse_int(integer)
|
||||
return res, m.end()
|
||||
elif nextchar == 'N' and string[idx:idx + 3] == 'NaN':
|
||||
return parse_constant('NaN'), idx + 3
|
||||
elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity':
|
||||
return parse_constant('Infinity'), idx + 8
|
||||
elif nextchar == '-' and string[idx:idx + 9] == '-Infinity':
|
||||
return parse_constant('-Infinity'), idx + 9
|
||||
else:
|
||||
raise StopIteration
|
||||
|
||||
def scan_once(string, idx):
|
||||
try:
|
||||
return _scan_once(string, idx)
|
||||
finally:
|
||||
memo.clear()
|
||||
|
||||
return scan_once
|
||||
|
||||
make_scanner = py_make_scanner
|
||||
@@ -58,11 +58,15 @@ def load_workbook(filename, use_iterators = False):
|
||||
|
||||
"""
|
||||
|
||||
if isinstance(filename, file):
|
||||
try:
|
||||
# fileobject must have been opened with 'rb' flag
|
||||
# it is required by zipfile
|
||||
if 'b' not in filename.mode:
|
||||
raise OpenModeError("File-object must be opened in binary mode")
|
||||
except AttributeError:
|
||||
# filename is not an object
|
||||
# it doesn't have mode attribute
|
||||
pass
|
||||
|
||||
try:
|
||||
archive = ZipFile(filename, 'r', ZIP_DEFLATED)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,639 @@
|
||||
# -*- coding: cp1252 -*-
|
||||
|
||||
##
|
||||
# Support module for the xlrd package.
|
||||
#
|
||||
# <p>Portions copyright © 2005-2008 Stephen John Machin, Lingfo Pty Ltd</p>
|
||||
# <p>This module is part of the xlrd package, which is released under a BSD-style licence.</p>
|
||||
##
|
||||
|
||||
# 2008-02-10 SJM BIFF2 BLANK record
|
||||
# 2008-02-08 SJM Preparation for Excel 2.0 support
|
||||
# 2008-02-02 SJM Added suffixes (_B2, _B2_ONLY, etc) on record names for biff_dump & biff_count
|
||||
# 2007-12-04 SJM Added support for Excel 2.x (BIFF2) files.
|
||||
# 2007-09-08 SJM Avoid crash when zero-length Unicode string missing options byte.
|
||||
# 2007-04-22 SJM Remove experimental "trimming" facility.
|
||||
|
||||
DEBUG = 0
|
||||
|
||||
from struct import unpack
|
||||
import sys
|
||||
from timemachine import *
|
||||
|
||||
class XLRDError(Exception):
|
||||
pass
|
||||
|
||||
##
|
||||
# Parent of almost all other classes in the package. Defines a common "dump" method
|
||||
# for debugging.
|
||||
|
||||
class BaseObject(object):
|
||||
|
||||
_repr_these = []
|
||||
|
||||
##
|
||||
# @param f open file object, to which the dump is written
|
||||
# @param header text to write before the dump
|
||||
# @param footer text to write after the dump
|
||||
# @param indent number of leading spaces (for recursive calls)
|
||||
|
||||
def dump(self, f=None, header=None, footer=None, indent=0):
|
||||
if f is None:
|
||||
f = sys.stderr
|
||||
alist = self.__dict__.items()
|
||||
alist.sort()
|
||||
pad = " " * indent
|
||||
if header is not None: print >> f, header
|
||||
list_type = type([])
|
||||
dict_type = type({})
|
||||
for attr, value in alist:
|
||||
if getattr(value, 'dump', None) and attr != 'book':
|
||||
value.dump(f,
|
||||
header="%s%s (%s object):" % (pad, attr, value.__class__.__name__),
|
||||
indent=indent+4)
|
||||
elif attr not in self._repr_these and (
|
||||
isinstance(value, list_type) or isinstance(value, dict_type)
|
||||
):
|
||||
print >> f, "%s%s: %s, len = %d" % (pad, attr, type(value), len(value))
|
||||
else:
|
||||
print >> f, "%s%s: %r" % (pad, attr, value)
|
||||
if footer is not None: print >> f, footer
|
||||
|
||||
FUN, FDT, FNU, FGE, FTX = range(5) # unknown, date, number, general, text
|
||||
DATEFORMAT = FDT
|
||||
NUMBERFORMAT = FNU
|
||||
|
||||
(
|
||||
XL_CELL_EMPTY,
|
||||
XL_CELL_TEXT,
|
||||
XL_CELL_NUMBER,
|
||||
XL_CELL_DATE,
|
||||
XL_CELL_BOOLEAN,
|
||||
XL_CELL_ERROR,
|
||||
XL_CELL_BLANK, # for use in debugging, gathering stats, etc
|
||||
) = range(7)
|
||||
|
||||
biff_text_from_num = {
|
||||
0: "(not BIFF)",
|
||||
20: "2.0",
|
||||
21: "2.1",
|
||||
30: "3",
|
||||
40: "4S",
|
||||
45: "4W",
|
||||
50: "5",
|
||||
70: "7",
|
||||
80: "8",
|
||||
85: "8X",
|
||||
}
|
||||
|
||||
##
|
||||
# <p>This dictionary can be used to produce a text version of the internal codes
|
||||
# that Excel uses for error cells. Here are its contents:
|
||||
# <pre>
|
||||
# 0x00: '#NULL!', # Intersection of two cell ranges is empty
|
||||
# 0x07: '#DIV/0!', # Division by zero
|
||||
# 0x0F: '#VALUE!', # Wrong type of operand
|
||||
# 0x17: '#REF!', # Illegal or deleted cell reference
|
||||
# 0x1D: '#NAME?', # Wrong function or range name
|
||||
# 0x24: '#NUM!', # Value range overflow
|
||||
# 0x2A: '#N/A!', # Argument or function not available
|
||||
# </pre></p>
|
||||
|
||||
error_text_from_code = {
|
||||
0x00: '#NULL!', # Intersection of two cell ranges is empty
|
||||
0x07: '#DIV/0!', # Division by zero
|
||||
0x0F: '#VALUE!', # Wrong type of operand
|
||||
0x17: '#REF!', # Illegal or deleted cell reference
|
||||
0x1D: '#NAME?', # Wrong function or range name
|
||||
0x24: '#NUM!', # Value range overflow
|
||||
0x2A: '#N/A!', # Argument or function not available
|
||||
}
|
||||
|
||||
BIFF_FIRST_UNICODE = 80
|
||||
|
||||
XL_WORKBOOK_GLOBALS = WBKBLOBAL = 0x5
|
||||
XL_WORKBOOK_GLOBALS_4W = 0x100
|
||||
XL_WORKSHEET = WRKSHEET = 0x10
|
||||
|
||||
XL_BOUNDSHEET_WORKSHEET = 0x00
|
||||
XL_BOUNDSHEET_CHART = 0x02
|
||||
XL_BOUNDSHEET_VB_MODULE = 0x06
|
||||
|
||||
# XL_RK2 = 0x7e
|
||||
XL_ARRAY = 0x0221
|
||||
XL_ARRAY2 = 0x0021
|
||||
XL_BLANK = 0x0201
|
||||
XL_BLANK_B2 = 0x01
|
||||
XL_BOF = 0x809
|
||||
XL_BOOLERR = 0x205
|
||||
XL_BOOLERR_B2 = 0x5
|
||||
XL_BOUNDSHEET = 0x85
|
||||
XL_BUILTINFMTCOUNT = 0x56
|
||||
XL_CF = 0x01B1
|
||||
XL_CODEPAGE = 0x42
|
||||
XL_COLINFO = 0x7D
|
||||
XL_COLUMNDEFAULT = 0x20 # BIFF2 only
|
||||
XL_COLWIDTH = 0x24 # BIFF2 only
|
||||
XL_CONDFMT = 0x01B0
|
||||
XL_CONTINUE = 0x3c
|
||||
XL_COUNTRY = 0x8C
|
||||
XL_DATEMODE = 0x22
|
||||
XL_DEFAULTROWHEIGHT = 0x0225
|
||||
XL_DEFCOLWIDTH = 0x55
|
||||
XL_DIMENSION = 0x200
|
||||
XL_DIMENSION2 = 0x0
|
||||
XL_EFONT = 0x45
|
||||
XL_EOF = 0x0a
|
||||
XL_EXTERNNAME = 0x23
|
||||
XL_EXTERNSHEET = 0x17
|
||||
XL_EXTSST = 0xff
|
||||
XL_FEAT11 = 0x872
|
||||
XL_FILEPASS = 0x2f
|
||||
XL_FONT = 0x31
|
||||
XL_FONT_B3B4 = 0x231
|
||||
XL_FORMAT = 0x41e
|
||||
XL_FORMAT2 = 0x1E # BIFF2, BIFF3
|
||||
XL_FORMULA = 0x6
|
||||
XL_FORMULA3 = 0x206
|
||||
XL_FORMULA4 = 0x406
|
||||
XL_GCW = 0xab
|
||||
XL_INDEX = 0x20b
|
||||
XL_INTEGER = 0x2 # BIFF2 only
|
||||
XL_IXFE = 0x44 # BIFF2 only
|
||||
XL_LABEL = 0x204
|
||||
XL_LABEL_B2 = 0x04
|
||||
XL_LABELRANGES = 0x15f
|
||||
XL_LABELSST = 0xfd
|
||||
XL_MERGEDCELLS = 0xE5
|
||||
XL_MSO_DRAWING = 0x00EC
|
||||
XL_MSO_DRAWING_GROUP = 0x00EB
|
||||
XL_MSO_DRAWING_SELECTION = 0x00ED
|
||||
XL_MULRK = 0xbd
|
||||
XL_MULBLANK = 0xbe
|
||||
XL_NAME = 0x18
|
||||
XL_NOTE = 0x1c
|
||||
XL_NUMBER = 0x203
|
||||
XL_NUMBER_B2 = 0x3
|
||||
XL_OBJ = 0x5D
|
||||
XL_PALETTE = 0x92
|
||||
XL_RK = 0x27e
|
||||
XL_ROW = 0x208
|
||||
XL_ROW_B2 = 0x08
|
||||
XL_RSTRING = 0xd6
|
||||
XL_SHEETHDR = 0x8F # BIFF4W only
|
||||
XL_SHEETSOFFSET = 0x8E # BIFF4W only
|
||||
XL_SHRFMLA = 0x04bc
|
||||
XL_SST = 0xfc
|
||||
XL_STANDARDWIDTH = 0x99
|
||||
XL_STRING = 0x207
|
||||
XL_STRING_B2 = 0x7
|
||||
XL_STYLE = 0x293
|
||||
XL_SUPBOOK = 0x1AE
|
||||
XL_TABLEOP = 0x236
|
||||
XL_TABLEOP2 = 0x37
|
||||
XL_TABLEOP_B2 = 0x36
|
||||
XL_TXO = 0x1b6
|
||||
XL_UNCALCED = 0x5e
|
||||
XL_UNKNOWN = 0xffff
|
||||
XL_WINDOW2 = 0x023E
|
||||
XL_WRITEACCESS = 0x5C
|
||||
XL_XF = 0xe0
|
||||
XL_XF2 = 0x0043 # BIFF2 version of XF record
|
||||
XL_XF3 = 0x0243 # BIFF3 version of XF record
|
||||
XL_XF4 = 0x0443 # BIFF4 version of XF record
|
||||
|
||||
boflen = {0x0809: 8, 0x0409: 6, 0x0209: 6, 0x0009: 4}
|
||||
bofcodes = (0x0809, 0x0409, 0x0209, 0x0009)
|
||||
|
||||
XL_FORMULA_OPCODES = (0x0006, 0x0406, 0x0206)
|
||||
|
||||
_cell_opcode_list = [
|
||||
XL_BOOLERR,
|
||||
XL_FORMULA,
|
||||
XL_FORMULA3,
|
||||
XL_FORMULA4,
|
||||
XL_LABEL,
|
||||
XL_LABELSST,
|
||||
XL_MULRK,
|
||||
XL_NUMBER,
|
||||
XL_RK,
|
||||
XL_RSTRING,
|
||||
]
|
||||
_cell_opcode_dict = {}
|
||||
for _cell_opcode in _cell_opcode_list:
|
||||
_cell_opcode_dict[_cell_opcode] = 1
|
||||
is_cell_opcode = _cell_opcode_dict.has_key
|
||||
|
||||
# def fprintf(f, fmt, *vargs): f.write(fmt % vargs)
|
||||
|
||||
def fprintf(f, fmt, *vargs):
|
||||
if fmt.endswith('\n'):
|
||||
print >> f, fmt[:-1] % vargs
|
||||
else:
|
||||
print >> f, fmt % vargs,
|
||||
|
||||
def upkbits(tgt_obj, src, manifest, local_setattr=setattr):
|
||||
for n, mask, attr in manifest:
|
||||
local_setattr(tgt_obj, attr, (src & mask) >> n)
|
||||
|
||||
def upkbitsL(tgt_obj, src, manifest, local_setattr=setattr, local_int=int):
|
||||
for n, mask, attr in manifest:
|
||||
local_setattr(tgt_obj, attr, local_int((src & mask) >> n))
|
||||
|
||||
def unpack_string(data, pos, encoding, lenlen=1):
|
||||
nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
|
||||
pos += lenlen
|
||||
return unicode(data[pos:pos+nchars], encoding)
|
||||
|
||||
def unpack_string_update_pos(data, pos, encoding, lenlen=1, known_len=None):
|
||||
if known_len is not None:
|
||||
# On a NAME record, the length byte is detached from the front of the string.
|
||||
nchars = known_len
|
||||
else:
|
||||
nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
|
||||
pos += lenlen
|
||||
newpos = pos + nchars
|
||||
return (unicode(data[pos:newpos], encoding), newpos)
|
||||
|
||||
def unpack_unicode(data, pos, lenlen=2):
|
||||
"Return unicode_strg"
|
||||
nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
|
||||
if not nchars:
|
||||
# Ambiguous whether 0-length string should have an "options" byte.
|
||||
# Avoid crash if missing.
|
||||
return u""
|
||||
pos += lenlen
|
||||
options = ord(data[pos])
|
||||
pos += 1
|
||||
# phonetic = options & 0x04
|
||||
# richtext = options & 0x08
|
||||
if options & 0x08:
|
||||
# rt = unpack('<H', data[pos:pos+2])[0] # unused
|
||||
pos += 2
|
||||
if options & 0x04:
|
||||
# sz = unpack('<i', data[pos:pos+4])[0] # unused
|
||||
pos += 4
|
||||
if options & 0x01:
|
||||
# Uncompressed UTF-16-LE
|
||||
rawstrg = data[pos:pos+2*nchars]
|
||||
# if DEBUG: print "nchars=%d pos=%d rawstrg=%r" % (nchars, pos, rawstrg)
|
||||
strg = unicode(rawstrg, 'utf_16_le')
|
||||
# pos += 2*nchars
|
||||
else:
|
||||
# Note: this is COMPRESSED (not ASCII!) encoding!!!
|
||||
# Merely returning the raw bytes would work OK 99.99% of the time
|
||||
# if the local codepage was cp1252 -- however this would rapidly go pear-shaped
|
||||
# for other codepages so we grit our Anglocentric teeth and return Unicode :-)
|
||||
|
||||
strg = unicode(data[pos:pos+nchars], "latin_1")
|
||||
# pos += nchars
|
||||
# if richtext:
|
||||
# pos += 4 * rt
|
||||
# if phonetic:
|
||||
# pos += sz
|
||||
# return (strg, pos)
|
||||
return strg
|
||||
|
||||
def unpack_unicode_update_pos(data, pos, lenlen=2, known_len=None):
|
||||
"Return (unicode_strg, updated value of pos)"
|
||||
if known_len is not None:
|
||||
# On a NAME record, the length byte is detached from the front of the string.
|
||||
nchars = known_len
|
||||
else:
|
||||
nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
|
||||
pos += lenlen
|
||||
if not nchars and not data[pos:]:
|
||||
# Zero-length string with no options byte
|
||||
return (u"", pos)
|
||||
options = ord(data[pos])
|
||||
pos += 1
|
||||
phonetic = options & 0x04
|
||||
richtext = options & 0x08
|
||||
if richtext:
|
||||
rt = unpack('<H', data[pos:pos+2])[0]
|
||||
pos += 2
|
||||
if phonetic:
|
||||
sz = unpack('<i', data[pos:pos+4])[0]
|
||||
pos += 4
|
||||
if options & 0x01:
|
||||
# Uncompressed UTF-16-LE
|
||||
strg = unicode(data[pos:pos+2*nchars], 'utf_16_le')
|
||||
pos += 2*nchars
|
||||
else:
|
||||
# Note: this is COMPRESSED (not ASCII!) encoding!!!
|
||||
strg = unicode(data[pos:pos+nchars], "latin_1")
|
||||
pos += nchars
|
||||
if richtext:
|
||||
pos += 4 * rt
|
||||
if phonetic:
|
||||
pos += sz
|
||||
return (strg, pos)
|
||||
|
||||
def unpack_cell_range_address_list_update_pos(
|
||||
output_list, data, pos, biff_version, addr_size=6):
|
||||
# output_list is updated in situ
|
||||
if biff_version < 80:
|
||||
assert addr_size == 6
|
||||
else:
|
||||
assert addr_size in (6, 8)
|
||||
n, = unpack("<H", data[pos:pos+2])
|
||||
pos += 2
|
||||
if n:
|
||||
if addr_size == 6:
|
||||
fmt = "<HHBB"
|
||||
else:
|
||||
fmt = "<HHHH"
|
||||
for _unused in xrange(n):
|
||||
ra, rb, ca, cb = unpack(fmt, data[pos:pos+addr_size])
|
||||
output_list.append((ra, rb+1, ca, cb+1))
|
||||
pos += addr_size
|
||||
return pos
|
||||
|
||||
_brecstrg = """\
|
||||
0000 DIMENSIONS_B2
|
||||
0001 BLANK_B2
|
||||
0002 INTEGER_B2_ONLY
|
||||
0003 NUMBER_B2
|
||||
0004 LABEL_B2
|
||||
0005 BOOLERR_B2
|
||||
0006 FORMULA
|
||||
0007 STRING_B2
|
||||
0008 ROW_B2
|
||||
0009 BOF_B2
|
||||
000A EOF
|
||||
000B INDEX_B2_ONLY
|
||||
000C CALCCOUNT
|
||||
000D CALCMODE
|
||||
000E PRECISION
|
||||
000F REFMODE
|
||||
0010 DELTA
|
||||
0011 ITERATION
|
||||
0012 PROTECT
|
||||
0013 PASSWORD
|
||||
0014 HEADER
|
||||
0015 FOOTER
|
||||
0016 EXTERNCOUNT
|
||||
0017 EXTERNSHEET
|
||||
0018 NAME_B2,5+
|
||||
0019 WINDOWPROTECT
|
||||
001A VERTICALPAGEBREAKS
|
||||
001B HORIZONTALPAGEBREAKS
|
||||
001C NOTE
|
||||
001D SELECTION
|
||||
001E FORMAT_B2-3
|
||||
001F BUILTINFMTCOUNT_B2
|
||||
0020 COLUMNDEFAULT_B2_ONLY
|
||||
0021 ARRAY_B2_ONLY
|
||||
0022 DATEMODE
|
||||
0023 EXTERNNAME
|
||||
0024 COLWIDTH_B2_ONLY
|
||||
0025 DEFAULTROWHEIGHT_B2_ONLY
|
||||
0026 LEFTMARGIN
|
||||
0027 RIGHTMARGIN
|
||||
0028 TOPMARGIN
|
||||
0029 BOTTOMMARGIN
|
||||
002A PRINTHEADERS
|
||||
002B PRINTGRIDLINES
|
||||
002F FILEPASS
|
||||
0031 FONT
|
||||
0032 FONT2_B2_ONLY
|
||||
0036 TABLEOP_B2
|
||||
0037 TABLEOP2_B2
|
||||
003C CONTINUE
|
||||
003D WINDOW1
|
||||
003E WINDOW2_B2
|
||||
0040 BACKUP
|
||||
0041 PANE
|
||||
0042 CODEPAGE
|
||||
0043 XF_B2
|
||||
0044 IXFE_B2_ONLY
|
||||
0045 EFONT_B2_ONLY
|
||||
004D PLS
|
||||
0051 DCONREF
|
||||
0055 DEFCOLWIDTH
|
||||
0056 BUILTINFMTCOUNT_B3-4
|
||||
0059 XCT
|
||||
005A CRN
|
||||
005B FILESHARING
|
||||
005C WRITEACCESS
|
||||
005D OBJECT
|
||||
005E UNCALCED
|
||||
005F SAVERECALC
|
||||
0063 OBJECTPROTECT
|
||||
007D COLINFO
|
||||
007E RK2_mythical_?
|
||||
0080 GUTS
|
||||
0081 WSBOOL
|
||||
0082 GRIDSET
|
||||
0083 HCENTER
|
||||
0084 VCENTER
|
||||
0085 BOUNDSHEET
|
||||
0086 WRITEPROT
|
||||
008C COUNTRY
|
||||
008D HIDEOBJ
|
||||
008E SHEETSOFFSET
|
||||
008F SHEETHDR
|
||||
0090 SORT
|
||||
0092 PALETTE
|
||||
0099 STANDARDWIDTH
|
||||
009B FILTERMODE
|
||||
009C FNGROUPCOUNT
|
||||
009D AUTOFILTERINFO
|
||||
009E AUTOFILTER
|
||||
00A0 SCL
|
||||
00A1 SETUP
|
||||
00AB GCW
|
||||
00BD MULRK
|
||||
00BE MULBLANK
|
||||
00C1 MMS
|
||||
00D6 RSTRING
|
||||
00D7 DBCELL
|
||||
00DA BOOKBOOL
|
||||
00DD SCENPROTECT
|
||||
00E0 XF
|
||||
00E1 INTERFACEHDR
|
||||
00E2 INTERFACEEND
|
||||
00E5 MERGEDCELLS
|
||||
00E9 BITMAP
|
||||
00EB MSO_DRAWING_GROUP
|
||||
00EC MSO_DRAWING
|
||||
00ED MSO_DRAWING_SELECTION
|
||||
00EF PHONETIC
|
||||
00FC SST
|
||||
00FD LABELSST
|
||||
00FF EXTSST
|
||||
013D TABID
|
||||
015F LABELRANGES
|
||||
0160 USESELFS
|
||||
0161 DSF
|
||||
01AE SUPBOOK
|
||||
01AF PROTECTIONREV4
|
||||
01B0 CONDFMT
|
||||
01B1 CF
|
||||
01B2 DVAL
|
||||
01B6 TXO
|
||||
01B7 REFRESHALL
|
||||
01B8 HLINK
|
||||
01BC PASSWORDREV4
|
||||
01BE DV
|
||||
01C0 XL9FILE
|
||||
01C1 RECALCID
|
||||
0200 DIMENSIONS
|
||||
0201 BLANK
|
||||
0203 NUMBER
|
||||
0204 LABEL
|
||||
0205 BOOLERR
|
||||
0206 FORMULA_B3
|
||||
0207 STRING
|
||||
0208 ROW
|
||||
0209 BOF
|
||||
020B INDEX_B3+
|
||||
0218 NAME
|
||||
0221 ARRAY
|
||||
0223 EXTERNNAME_B3-4
|
||||
0225 DEFAULTROWHEIGHT
|
||||
0231 FONT_B3B4
|
||||
0236 TABLEOP
|
||||
023E WINDOW2
|
||||
0243 XF_B3
|
||||
027E RK
|
||||
0293 STYLE
|
||||
0406 FORMULA_B4
|
||||
0409 BOF
|
||||
041E FORMAT
|
||||
0443 XF_B4
|
||||
04BC SHRFMLA
|
||||
0800 QUICKTIP
|
||||
0809 BOF
|
||||
0862 SHEETLAYOUT
|
||||
0867 SHEETPROTECTION
|
||||
0868 RANGEPROTECTION
|
||||
"""
|
||||
|
||||
biff_rec_name_dict = {}
|
||||
for _buff in _brecstrg.splitlines():
|
||||
_numh, _name = _buff.split()
|
||||
biff_rec_name_dict[int(_numh, 16)] = _name
|
||||
del _buff, _name, _brecstrg
|
||||
|
||||
def hex_char_dump(strg, ofs, dlen, base=0, fout=sys.stdout, unnumbered=False):
|
||||
endpos = min(ofs + dlen, len(strg))
|
||||
pos = ofs
|
||||
numbered = not unnumbered
|
||||
num_prefix = ''
|
||||
while pos < endpos:
|
||||
endsub = min(pos + 16, endpos)
|
||||
substrg = strg[pos:endsub]
|
||||
lensub = endsub - pos
|
||||
if lensub <= 0 or lensub != len(substrg):
|
||||
fprintf(
|
||||
sys.stdout,
|
||||
'??? hex_char_dump: ofs=%d dlen=%d base=%d -> endpos=%d pos=%d endsub=%d substrg=%r\n',
|
||||
ofs, dlen, base, endpos, pos, endsub, substrg)
|
||||
break
|
||||
hexd = ''.join(["%02x " % ord(c) for c in substrg])
|
||||
chard = ''
|
||||
for c in substrg:
|
||||
if c == '\0':
|
||||
c = '~'
|
||||
elif not (' ' <= c <= '~'):
|
||||
c = '?'
|
||||
chard += c
|
||||
if numbered:
|
||||
num_prefix = "%5d: " % (base+pos-ofs)
|
||||
fprintf(fout, "%s %-48s %s\n", num_prefix, hexd, chard)
|
||||
pos = endsub
|
||||
|
||||
def biff_dump(mem, stream_offset, stream_len, base=0, fout=sys.stdout, unnumbered=False):
|
||||
pos = stream_offset
|
||||
stream_end = stream_offset + stream_len
|
||||
adj = base - stream_offset
|
||||
dummies = 0
|
||||
numbered = not unnumbered
|
||||
num_prefix = ''
|
||||
while stream_end - pos >= 4:
|
||||
rc, length = unpack('<HH', mem[pos:pos+4])
|
||||
if rc == 0 and length == 0:
|
||||
if mem[pos:] == '\0' * (stream_end - pos):
|
||||
dummies = stream_end - pos
|
||||
savpos = pos
|
||||
pos = stream_end
|
||||
break
|
||||
if dummies:
|
||||
dummies += 4
|
||||
else:
|
||||
savpos = pos
|
||||
dummies = 4
|
||||
pos += 4
|
||||
else:
|
||||
if dummies:
|
||||
if numbered:
|
||||
num_prefix = "%5d: " % (adj + savpos)
|
||||
fprintf(fout, "%s---- %d zero bytes skipped ----\n", num_prefix, dummies)
|
||||
dummies = 0
|
||||
recname = biff_rec_name_dict.get(rc, '<UNKNOWN>')
|
||||
if numbered:
|
||||
num_prefix = "%5d: " % (adj + pos)
|
||||
fprintf(fout, "%s%04x %s len = %04x (%d)\n", num_prefix, rc, recname, length, length)
|
||||
pos += 4
|
||||
hex_char_dump(mem, pos, length, adj+pos, fout, unnumbered)
|
||||
pos += length
|
||||
if dummies:
|
||||
if numbered:
|
||||
num_prefix = "%5d: " % (adj + savpos)
|
||||
fprintf(fout, "%s---- %d zero bytes skipped ----\n", num_prefix, dummies)
|
||||
if pos < stream_end:
|
||||
if numbered:
|
||||
num_prefix = "%5d: " % (adj + pos)
|
||||
fprintf(fout, "%s---- Misc bytes at end ----\n", num_prefix)
|
||||
hex_char_dump(mem, pos, stream_end-pos, adj + pos, fout, unnumbered)
|
||||
elif pos > stream_end:
|
||||
fprintf(fout, "Last dumped record has length (%d) that is too large\n", length)
|
||||
|
||||
def biff_count_records(mem, stream_offset, stream_len, fout=sys.stdout):
|
||||
pos = stream_offset
|
||||
stream_end = stream_offset + stream_len
|
||||
tally = {}
|
||||
while stream_end - pos >= 4:
|
||||
rc, length = unpack('<HH', mem[pos:pos+4])
|
||||
if rc == 0 and length == 0:
|
||||
if mem[pos:] == '\0' * (stream_end - pos):
|
||||
break
|
||||
recname = "<Dummy (zero)>"
|
||||
else:
|
||||
recname = biff_rec_name_dict.get(rc, None)
|
||||
if recname is None:
|
||||
recname = "Unknown_0x%04X" % rc
|
||||
if tally.has_key(recname):
|
||||
tally[recname] += 1
|
||||
else:
|
||||
tally[recname] = 1
|
||||
pos += length + 4
|
||||
slist = tally.items()
|
||||
slist.sort()
|
||||
for recname, count in slist:
|
||||
print >> fout, "%8d %s" % (count, recname)
|
||||
|
||||
encoding_from_codepage = {
|
||||
1200 : 'utf_16_le',
|
||||
10000: 'mac_roman',
|
||||
10006: 'mac_greek', # guess
|
||||
10007: 'mac_cyrillic', # guess
|
||||
10029: 'mac_latin2', # guess
|
||||
10079: 'mac_iceland', # guess
|
||||
10081: 'mac_turkish', # guess
|
||||
32768: 'mac_roman',
|
||||
32769: 'cp1252',
|
||||
}
|
||||
# some more guessing, for Indic scripts
|
||||
# codepage 57000 range:
|
||||
# 2 Devanagari [0]
|
||||
# 3 Bengali [1]
|
||||
# 4 Tamil [5]
|
||||
# 5 Telegu [6]
|
||||
# 6 Assamese [1] c.f. Bengali
|
||||
# 7 Oriya [4]
|
||||
# 8 Kannada [7]
|
||||
# 9 Malayalam [8]
|
||||
# 10 Gujarati [3]
|
||||
# 11 Gurmukhi [2]
|
||||
@@ -0,0 +1,358 @@
|
||||
# -*- coding: cp1252 -*-
|
||||
|
||||
##
|
||||
# Implements the minimal functionality required
|
||||
# to extract a "Workbook" or "Book" stream (as one big string)
|
||||
# from an OLE2 Compound Document file.
|
||||
# <p>Copyright © 2005-2008 Stephen John Machin, Lingfo Pty Ltd</p>
|
||||
# <p>This module is part of the xlrd package, which is released under a BSD-style licence.</p>
|
||||
##
|
||||
|
||||
# No part of the content of this file was derived from the works of David Giffin.
|
||||
|
||||
# 2008-11-04 SJM Avoid assertion error when -1 used instead of -2 for first_SID of empty SCSS [Frank Hoffsuemmer]
|
||||
# 2007-09-08 SJM Warning message if sector sizes are extremely large.
|
||||
# 2007-05-07 SJM Meaningful exception instead of IndexError if a SAT (sector allocation table) is corrupted.
|
||||
# 2007-04-22 SJM Missing "<" in a struct.unpack call => can't open files on bigendian platforms.
|
||||
|
||||
|
||||
import sys
|
||||
from struct import unpack
|
||||
from timemachine import *
|
||||
|
||||
##
|
||||
# Magic cookie that should appear in the first 8 bytes of the file.
|
||||
SIGNATURE = "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1"
|
||||
|
||||
EOCSID = -2
|
||||
FREESID = -1
|
||||
SATSID = -3
|
||||
MSATSID = -4
|
||||
|
||||
class CompDocError(Exception):
|
||||
pass
|
||||
|
||||
class DirNode(object):
|
||||
|
||||
def __init__(self, DID, dent, DEBUG=0):
|
||||
# dent is the 128-byte directory entry
|
||||
self.DID = DID
|
||||
# (cbufsize, self.etype, self.colour, self.left_DID, self.right_DID,
|
||||
# self.root_DID,
|
||||
# self.first_SID,
|
||||
# self.tot_size) = \
|
||||
# unpack('<HBBiii16x4x8x8xii4x', dent[64:128])
|
||||
(cbufsize, self.etype, self.colour, self.left_DID, self.right_DID,
|
||||
self.root_DID) = \
|
||||
unpack('<HBBiii', dent[64:80])
|
||||
(self.first_SID, self.tot_size) = \
|
||||
unpack('<ii', dent[116:124])
|
||||
if cbufsize == 0:
|
||||
self.name = u''
|
||||
else:
|
||||
self.name = unicode(dent[0:cbufsize-2], 'utf_16_le') # omit the trailing U+0000
|
||||
self.children = [] # filled in later
|
||||
self.parent = -1 # indicates orphan; fixed up later
|
||||
self.tsinfo = unpack('<IIII', dent[100:116])
|
||||
if DEBUG:
|
||||
self.dump(DEBUG)
|
||||
|
||||
def dump(self, DEBUG=1):
|
||||
print "DID=%d name=%r etype=%d DIDs(left=%d right=%d root=%d parent=%d kids=%r) first_SID=%d tot_size=%d" \
|
||||
% (self.DID, self.name, self.etype, self.left_DID,
|
||||
self.right_DID, self.root_DID, self.parent, self.children, self.first_SID, self.tot_size)
|
||||
if DEBUG == 2:
|
||||
# cre_lo, cre_hi, mod_lo, mod_hi = tsinfo
|
||||
print "timestamp info", self.tsinfo
|
||||
|
||||
def _build_family_tree(dirlist, parent_DID, child_DID):
|
||||
if child_DID < 0: return
|
||||
_build_family_tree(dirlist, parent_DID, dirlist[child_DID].left_DID)
|
||||
dirlist[parent_DID].children.append(child_DID)
|
||||
dirlist[child_DID].parent = parent_DID
|
||||
_build_family_tree(dirlist, parent_DID, dirlist[child_DID].right_DID)
|
||||
if dirlist[child_DID].etype == 1: # storage
|
||||
_build_family_tree(dirlist, child_DID, dirlist[child_DID].root_DID)
|
||||
|
||||
##
|
||||
# Compound document handler.
|
||||
# @param mem The raw contents of the file, as a string, or as an mmap.mmap() object. The
|
||||
# only operation it needs to support is slicing.
|
||||
|
||||
class CompDoc(object):
|
||||
|
||||
def __init__(self, mem, logfile=sys.stdout, DEBUG=0):
|
||||
self.logfile = logfile
|
||||
if mem[0:8] != SIGNATURE:
|
||||
raise CompDocError('Not an OLE2 compound document')
|
||||
if mem[28:30] != '\xFE\xFF':
|
||||
raise CompDocError('Expected "little-endian" marker, found %r' % mem[28:30])
|
||||
revision, version = unpack('<HH', mem[24:28])
|
||||
if DEBUG:
|
||||
print >> logfile, "\nCompDoc format: version=0x%04x revision=0x%04x" % (version, revision)
|
||||
self.mem = mem
|
||||
ssz, sssz = unpack('<HH', mem[30:34])
|
||||
if ssz > 20: # allows for 2**20 bytes i.e. 1MB
|
||||
print >> logfile, \
|
||||
"WARNING: sector size (2**%d) is preposterous; assuming 512 and continuing ..." \
|
||||
% ssz
|
||||
ssz = 9
|
||||
if sssz > ssz:
|
||||
print >> logfile, \
|
||||
"WARNING: short stream sector size (2**%d) is preposterous; assuming 64 and continuing ..." \
|
||||
% sssz
|
||||
sssz = 6
|
||||
self.sec_size = sec_size = 1 << ssz
|
||||
self.short_sec_size = 1 << sssz
|
||||
(
|
||||
SAT_tot_secs, self.dir_first_sec_sid, _unused, self.min_size_std_stream,
|
||||
SSAT_first_sec_sid, SSAT_tot_secs,
|
||||
MSAT_first_sec_sid, MSAT_tot_secs,
|
||||
# ) = unpack('<ii4xiiiii', mem[44:76])
|
||||
) = unpack('<iiiiiiii', mem[44:76])
|
||||
mem_data_len = len(mem) - 512
|
||||
mem_data_secs, left_over = divmod(mem_data_len, sec_size)
|
||||
if left_over:
|
||||
#### raise CompDocError("Not a whole number of sectors")
|
||||
print >> logfile, \
|
||||
"WARNING *** file size (%d) not 512 + multiple of sector size (%d)" \
|
||||
% (len(mem), sec_size)
|
||||
if DEBUG:
|
||||
print >> logfile, 'sec sizes', ssz, sssz, sec_size, self.short_sec_size
|
||||
print >> logfile, "mem data: %d bytes == %d sectors" % (mem_data_len, mem_data_secs)
|
||||
print >> logfile, "SAT_tot_secs=%d, dir_first_sec_sid=%d, min_size_std_stream=%d" \
|
||||
% (SAT_tot_secs, self.dir_first_sec_sid, self.min_size_std_stream,)
|
||||
print >> logfile, "SSAT_first_sec_sid=%d, SSAT_tot_secs=%d" % (SSAT_first_sec_sid, SSAT_tot_secs,)
|
||||
print >> logfile, "MSAT_first_sec_sid=%d, MSAT_tot_secs=%d" % (MSAT_first_sec_sid, MSAT_tot_secs,)
|
||||
nent = int_floor_div(sec_size, 4) # number of SID entries in a sector
|
||||
fmt = "<%di" % nent
|
||||
trunc_warned = 0
|
||||
#
|
||||
# === build the MSAT ===
|
||||
#
|
||||
MSAT = list(unpack('<109i', mem[76:512]))
|
||||
sid = MSAT_first_sec_sid
|
||||
while sid >= 0:
|
||||
if sid >= mem_data_secs:
|
||||
raise CompDocError(
|
||||
"MSAT extension: accessing sector %d but only %d in file" % (sid, mem_data_secs)
|
||||
)
|
||||
offset = 512 + sec_size * sid
|
||||
news = list(unpack(fmt, mem[offset:offset+sec_size]))
|
||||
sid = news.pop()
|
||||
MSAT.extend(news)
|
||||
if DEBUG:
|
||||
print >> logfile, "MSAT: len =", len(MSAT)
|
||||
print >> logfile, MSAT
|
||||
#
|
||||
# === build the SAT ===
|
||||
#
|
||||
self.SAT = []
|
||||
for msid in MSAT:
|
||||
if msid == FREESID: continue
|
||||
if msid >= mem_data_secs:
|
||||
if not trunc_warned:
|
||||
print >> logfile, "WARNING *** File is truncated, or OLE2 MSAT is corrupt!!"
|
||||
print >> logfile, \
|
||||
"INFO: Trying to access sector %d but only %d available" \
|
||||
% (msid, mem_data_secs)
|
||||
trunc_warned = 1
|
||||
continue
|
||||
offset = 512 + sec_size * msid
|
||||
news = list(unpack(fmt, mem[offset:offset+sec_size]))
|
||||
self.SAT.extend(news)
|
||||
if DEBUG:
|
||||
print >> logfile, "SAT: len =", len(self.SAT)
|
||||
print >> logfile, self.SAT
|
||||
# print >> logfile, "SAT ",
|
||||
# for i, s in enumerate(self.SAT):
|
||||
# print >> logfile, "entry: %4d offset: %6d, next entry: %4d" % (i, 512 + sec_size * i, s)
|
||||
# print >> logfile, "%d:%d " % (i, s),
|
||||
print
|
||||
|
||||
# === build the directory ===
|
||||
#
|
||||
dbytes = self._get_stream(
|
||||
self.mem, 512, self.SAT, self.sec_size, self.dir_first_sec_sid,
|
||||
name="directory")
|
||||
dirlist = []
|
||||
did = -1
|
||||
for pos in xrange(0, len(dbytes), 128):
|
||||
did += 1
|
||||
dirlist.append(DirNode(did, dbytes[pos:pos+128], 0))
|
||||
self.dirlist = dirlist
|
||||
_build_family_tree(dirlist, 0, dirlist[0].root_DID) # and stand well back ...
|
||||
if DEBUG:
|
||||
for d in dirlist:
|
||||
d.dump(DEBUG)
|
||||
#
|
||||
# === get the SSCS ===
|
||||
#
|
||||
sscs_dir = self.dirlist[0]
|
||||
assert sscs_dir.etype == 5 # root entry
|
||||
if sscs_dir.first_SID < 0 and sscs_dir.tot_size == 0:
|
||||
# Problem reported by Frank Hoffsuemmer: some software was
|
||||
# writing -1 instead of -2 (EOCSID) for the first_SID
|
||||
# when the SCCS was empty. Not having EOCSID caused assertion
|
||||
# failure in _get_stream.
|
||||
# Solution: avoid calling _get_stream in any case when the
|
||||
# SCSS appears to be empty.
|
||||
self.SSCS = ""
|
||||
else:
|
||||
self.SSCS = self._get_stream(
|
||||
self.mem, 512, self.SAT, sec_size, sscs_dir.first_SID,
|
||||
sscs_dir.tot_size, name="SSCS")
|
||||
# if DEBUG: print >> logfile, "SSCS", repr(self.SSCS)
|
||||
#
|
||||
# === build the SSAT ===
|
||||
#
|
||||
self.SSAT = []
|
||||
if SSAT_tot_secs > 0 and sscs_dir.tot_size == 0:
|
||||
print >> logfile, \
|
||||
"WARNING *** OLE2 inconsistency: SSCS size is 0 but SSAT size is non-zero"
|
||||
if sscs_dir.tot_size > 0:
|
||||
sid = SSAT_first_sec_sid
|
||||
nsecs = SSAT_tot_secs
|
||||
while sid >= 0 and nsecs > 0:
|
||||
nsecs -= 1
|
||||
start_pos = 512 + sid * sec_size
|
||||
news = list(unpack(fmt, mem[start_pos:start_pos+sec_size]))
|
||||
self.SSAT.extend(news)
|
||||
sid = self.SAT[sid]
|
||||
# assert SSAT_tot_secs == 0 or sid == EOCSID
|
||||
if DEBUG: print >> logfile, "SSAT last sid %d; remaining sectors %d" % (sid, nsecs)
|
||||
assert nsecs == 0 and sid == EOCSID
|
||||
if DEBUG: print >> logfile, "SSAT", self.SSAT
|
||||
|
||||
def _get_stream(self, mem, base, sat, sec_size, start_sid, size=None, name=''):
|
||||
# print >> self.logfile, "_get_stream", base, sec_size, start_sid, size
|
||||
sectors = []
|
||||
s = start_sid
|
||||
if size is None:
|
||||
# nothing to check against
|
||||
while s >= 0:
|
||||
start_pos = base + s * sec_size
|
||||
sectors.append(mem[start_pos:start_pos+sec_size])
|
||||
try:
|
||||
s = sat[s]
|
||||
except IndexError:
|
||||
raise CompDocError(
|
||||
"OLE2 stream %r: sector allocation table invalid entry (%d)" %
|
||||
(name, s)
|
||||
)
|
||||
assert s == EOCSID
|
||||
else:
|
||||
todo = size
|
||||
while s >= 0:
|
||||
start_pos = base + s * sec_size
|
||||
grab = sec_size
|
||||
if grab > todo:
|
||||
grab = todo
|
||||
todo -= grab
|
||||
sectors.append(mem[start_pos:start_pos+grab])
|
||||
try:
|
||||
s = sat[s]
|
||||
except IndexError:
|
||||
raise CompDocError(
|
||||
"OLE2 stream %r: sector allocation table invalid entry (%d)" %
|
||||
(name, s)
|
||||
)
|
||||
assert s == EOCSID
|
||||
if todo != 0:
|
||||
print >> self.logfile, \
|
||||
"WARNING *** OLE2 stream %r: expected size %d, actual size %d" \
|
||||
% (name, size, size - todo)
|
||||
return ''.join(sectors)
|
||||
|
||||
def _dir_search(self, path, storage_DID=0):
|
||||
# Return matching DirNode instance, or None
|
||||
head = path[0]
|
||||
tail = path[1:]
|
||||
dl = self.dirlist
|
||||
for child in dl[storage_DID].children:
|
||||
if dl[child].name.lower() == head.lower():
|
||||
et = dl[child].etype
|
||||
if et == 2:
|
||||
return dl[child]
|
||||
if et == 1:
|
||||
if not tail:
|
||||
raise CompDocError("Requested component is a 'storage'")
|
||||
return self._dir_search(tail, child)
|
||||
dl[child].dump(1)
|
||||
raise CompDocError("Requested stream is not a 'user stream'")
|
||||
return None
|
||||
|
||||
##
|
||||
# Interrogate the compound document's directory; return the stream as a string if found, otherwise
|
||||
# return None.
|
||||
# @param qname Name of the desired stream e.g. u'Workbook'. Should be in Unicode or convertible thereto.
|
||||
|
||||
def get_named_stream(self, qname):
|
||||
d = self._dir_search(qname.split("/"))
|
||||
if d is None:
|
||||
return None
|
||||
if d.tot_size >= self.min_size_std_stream:
|
||||
return self._get_stream(
|
||||
self.mem, 512, self.SAT, self.sec_size, d.first_SID,
|
||||
d.tot_size, name=qname)
|
||||
else:
|
||||
return self._get_stream(
|
||||
self.SSCS, 0, self.SSAT, self.short_sec_size, d.first_SID,
|
||||
d.tot_size, name=qname + " (from SSCS)")
|
||||
|
||||
##
|
||||
# Interrogate the compound document's directory.
|
||||
# If the named stream is not found, (None, 0, 0) will be returned.
|
||||
# If the named stream is found and is contiguous within the original byte sequence ("mem")
|
||||
# used when the document was opened,
|
||||
# then (mem, offset_to_start_of_stream, length_of_stream) is returned.
|
||||
# Otherwise a new string is built from the fragments and (new_string, 0, length_of_stream) is returned.
|
||||
# @param qname Name of the desired stream e.g. u'Workbook'. Should be in Unicode or convertible thereto.
|
||||
|
||||
def locate_named_stream(self, qname):
|
||||
d = self._dir_search(qname.split("/"))
|
||||
if d is None:
|
||||
return (None, 0, 0)
|
||||
if d.tot_size >= self.min_size_std_stream:
|
||||
return self._locate_stream(self.mem, 512, self.SAT, self.sec_size, d.first_SID, d.tot_size)
|
||||
else:
|
||||
return (
|
||||
self._get_stream(
|
||||
self.SSCS, 0, self.SSAT, self.short_sec_size, d.first_SID,
|
||||
d.tot_size, qname + " (from SSCS)"),
|
||||
0,
|
||||
d.tot_size
|
||||
)
|
||||
return (None, 0, 0) # not found
|
||||
|
||||
def _locate_stream(self, mem, base, sat, sec_size, start_sid, size):
|
||||
# print >> self.logfile, "_locate_stream", base, sec_size, start_sid, size
|
||||
s = start_sid
|
||||
if s < 0:
|
||||
raise CompDocError("_locate_stream: start_sid (%d) is -ve" % start_sid)
|
||||
p = -99 # dummy previous SID
|
||||
start_pos = -9999
|
||||
end_pos = -8888
|
||||
slices = []
|
||||
while s >= 0:
|
||||
if s == p+1:
|
||||
# contiguous sectors
|
||||
end_pos += sec_size
|
||||
else:
|
||||
# start new slice
|
||||
if p >= 0:
|
||||
# not first time
|
||||
slices.append((start_pos, end_pos))
|
||||
start_pos = base + s * sec_size
|
||||
end_pos = start_pos + sec_size
|
||||
p = s
|
||||
s = sat[s]
|
||||
assert s == EOCSID
|
||||
# print >> self.logfile, len(slices) + 1, "slices"
|
||||
if not slices:
|
||||
# The stream is contiguous ... just what we like!
|
||||
return (mem, start_pos, size)
|
||||
slices.append((start_pos, end_pos))
|
||||
return (''.join([mem[start_pos:end_pos] for start_pos, end_pos in slices]), 0, size)
|
||||
|
||||
# ==========================================================================================
|
||||
@@ -0,0 +1,69 @@
|
||||
<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.0 Strict//EN' 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'>
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv='Content-Type' content='text/html; charset=us-ascii' />
|
||||
<title>The compdoc Module</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>The compdoc Module</h1>
|
||||
<p>Implements the minimal functionality required
|
||||
to extract a "Workbook" or "Book" stream (as one big string)
|
||||
from an OLE2 Compound Document file.
|
||||
</p><p>Copyright © 2005-2008 Stephen John Machin, Lingfo Pty Ltd</p>
|
||||
<p>This module is part of the xlrd package, which is released under a BSD-style licence.</p>
|
||||
<h2>Module Contents</h2>
|
||||
<dl>
|
||||
<dt><b>CompDoc(mem, logfile=sys.stdout, DEBUG=0)</b> (class) [<a href='#compdoc.CompDoc-class'>#</a>]</dt>
|
||||
<dd>
|
||||
<p>Compound document handler.</p>
|
||||
<dl>
|
||||
<dt><i>mem</i></dt>
|
||||
<dd>
|
||||
The raw contents of the file, as a string, or as an mmap.mmap() object. The
|
||||
only operation it needs to support is slicing.</dd>
|
||||
</dl><br />
|
||||
<p>For more information about this class, see <a href='#compdoc.CompDoc-class'><i>The CompDoc Class</i></a>.</p>
|
||||
</dd>
|
||||
<dt><a id='compdoc.SIGNATURE-variable' name='compdoc.SIGNATURE-variable'><b>SIGNATURE</b></a> (variable) [<a href='#compdoc.SIGNATURE-variable'>#</a>]</dt>
|
||||
<dd>
|
||||
<p>Magic cookie that should appear in the first 8 bytes of the file.</p>
|
||||
</dd>
|
||||
</dl>
|
||||
<h2><a id='compdoc.CompDoc-class' name='compdoc.CompDoc-class'>The CompDoc Class</a></h2>
|
||||
<dl>
|
||||
<dt><b>CompDoc(mem, logfile=sys.stdout, DEBUG=0)</b> (class) [<a href='#compdoc.CompDoc-class'>#</a>]</dt>
|
||||
<dd>
|
||||
<p>Compound document handler.</p>
|
||||
<dl>
|
||||
<dt><i>mem</i></dt>
|
||||
<dd>
|
||||
The raw contents of the file, as a string, or as an mmap.mmap() object. The
|
||||
only operation it needs to support is slicing.</dd>
|
||||
</dl><br />
|
||||
</dd>
|
||||
<dt><a id='compdoc.CompDoc.get_named_stream-method' name='compdoc.CompDoc.get_named_stream-method'><b>get_named_stream(qname)</b></a> [<a href='#compdoc.CompDoc.get_named_stream-method'>#</a>]</dt>
|
||||
<dd>
|
||||
<p>Interrogate the compound document's directory; return the stream as a string if found, otherwise
|
||||
return None.</p>
|
||||
<dl>
|
||||
<dt><i>qname</i></dt>
|
||||
<dd>
|
||||
Name of the desired stream e.g. u'Workbook'. Should be in Unicode or convertible thereto.</dd>
|
||||
</dl><br />
|
||||
</dd>
|
||||
<dt><a id='compdoc.CompDoc.locate_named_stream-method' name='compdoc.CompDoc.locate_named_stream-method'><b>locate_named_stream(qname)</b></a> [<a href='#compdoc.CompDoc.locate_named_stream-method'>#</a>]</dt>
|
||||
<dd>
|
||||
<p>Interrogate the compound document's directory.
|
||||
If the named stream is not found, (None, 0, 0) will be returned.
|
||||
If the named stream is found and is contiguous within the original byte sequence ("mem")
|
||||
used when the document was opened,
|
||||
then (mem, offset_to_start_of_stream, length_of_stream) is returned.
|
||||
Otherwise a new string is built from the fragments and (new_string, 0, length_of_stream) is returned.</p>
|
||||
<dl>
|
||||
<dt><i>qname</i></dt>
|
||||
<dd>
|
||||
Name of the desired stream e.g. u'Workbook'. Should be in Unicode or convertible thereto.</dd>
|
||||
</dl><br />
|
||||
</dd>
|
||||
</dl>
|
||||
</body></html>
|
||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
@@ -0,0 +1,178 @@
|
||||
# -*- coding: cp1252 -*-
|
||||
|
||||
##
|
||||
# Module/script example of the xlrd API for extracting information
|
||||
# about named references, named constants, etc.
|
||||
#
|
||||
# <p>Copyright © 2006 Stephen John Machin, Lingfo Pty Ltd</p>
|
||||
# <p>This module is part of the xlrd package, which is released under a BSD-style licence.</p>
|
||||
##
|
||||
|
||||
import xlrd
|
||||
import sys
|
||||
import glob
|
||||
|
||||
def scope_as_string(book, scope):
|
||||
if 0 <= scope < book.nsheets:
|
||||
return "sheet #%d (%r)" % (scope, book.sheet_names()[scope])
|
||||
if scope == -1:
|
||||
return "Global"
|
||||
if scope == -2:
|
||||
return "Macro/VBA"
|
||||
return "Unknown scope value (%r)" % scope
|
||||
|
||||
def do_scope_query(book, scope_strg, show_contents=0, f=sys.stdout):
|
||||
try:
|
||||
qscope = int(scope_strg)
|
||||
except ValueError:
|
||||
if scope_strg == "*":
|
||||
qscope = None # means "all'
|
||||
else:
|
||||
# so assume it's a sheet name ...
|
||||
qscope = book.sheet_names().index(scope_strg)
|
||||
print >> f, "%r => %d" % (scope_strg, qscope)
|
||||
for nobj in book.name_obj_list:
|
||||
if qscope is None or nobj.scope == qscope:
|
||||
show_name_object(book, nobj, show_contents, f)
|
||||
|
||||
def show_name_details(book, name, show_contents=0, f=sys.stdout):
|
||||
"""
|
||||
book -- Book object obtained from xlrd.open_workbook().
|
||||
name -- The name that's being investigated.
|
||||
show_contents -- 0: Don't; 1: Non-empty cells only; 2: All cells
|
||||
f -- Open output file handle.
|
||||
"""
|
||||
name_lcase = name.lower() # Excel names are case-insensitive.
|
||||
nobj_list = book.name_map.get(name_lcase)
|
||||
if not nobj_list:
|
||||
print >> f, "%r: unknown name" % name
|
||||
return
|
||||
for nobj in nobj_list:
|
||||
show_name_object(book, nobj, show_contents, f)
|
||||
|
||||
def show_name_details_in_scope(
|
||||
book, name, scope_strg, show_contents=0, f=sys.stdout,
|
||||
):
|
||||
try:
|
||||
scope = int(scope_strg)
|
||||
except ValueError:
|
||||
# so assume it's a sheet name ...
|
||||
scope = book.sheet_names().index(scope_strg)
|
||||
print >> f, "%r => %d" % (scope_strg, scope)
|
||||
name_lcase = name.lower() # Excel names are case-insensitive.
|
||||
while 1:
|
||||
nobj = book.name_and_scope_map.get((name_lcase, scope))
|
||||
if nobj:
|
||||
break
|
||||
print >> f, "Name %r not found in scope %d" % (name, scope)
|
||||
if scope == -1:
|
||||
return
|
||||
scope = -1 # Try again with global scope
|
||||
print >> f, "Name %r found in scope %d" % (name, scope)
|
||||
show_name_object(book, nobj, show_contents, f)
|
||||
|
||||
def showable_cell_value(celltype, cellvalue, datemode):
|
||||
if celltype == xlrd.XL_CELL_DATE:
|
||||
try:
|
||||
showval = xlrd.xldate_as_tuple(cellvalue, datemode)
|
||||
except xlrd.XLDateError:
|
||||
e1, e2 = sys.exc_info()[:2]
|
||||
showval = "%s:%s" % (e1.__name__, e2)
|
||||
elif celltype == xlrd.XL_CELL_ERROR:
|
||||
showval = xlrd.error_text_from_code.get(
|
||||
cellvalue, '<Unknown error code 0x%02x>' % cellvalue)
|
||||
else:
|
||||
showval = cellvalue
|
||||
return showval
|
||||
|
||||
def show_name_object(book, nobj, show_contents=0, f=sys.stdout):
|
||||
print >> f, "\nName: %r, scope: %r (%s)" \
|
||||
% (nobj.name, nobj.scope, scope_as_string(book, nobj.scope))
|
||||
res = nobj.result
|
||||
print >> f, "Formula eval result: %r" % res
|
||||
if res is None:
|
||||
return
|
||||
# result should be an instance of the Operand class
|
||||
kind = res.kind
|
||||
value = res.value
|
||||
if kind >= 0:
|
||||
# A scalar, or unknown ... you've seen all there is to see.
|
||||
pass
|
||||
elif kind == xlrd.oREL:
|
||||
# A list of Ref3D objects representing *relative* ranges
|
||||
for i in xrange(len(value)):
|
||||
ref3d = value[i]
|
||||
print >> f, "Range %d: %r ==> %s"% (i, ref3d.coords, xlrd.rangename3drel(book, ref3d))
|
||||
elif kind == xlrd.oREF:
|
||||
# A list of Ref3D objects
|
||||
for i in xrange(len(value)):
|
||||
ref3d = value[i]
|
||||
print >> f, "Range %d: %r ==> %s"% (i, ref3d.coords, xlrd.rangename3d(book, ref3d))
|
||||
if not show_contents:
|
||||
continue
|
||||
datemode = book.datemode
|
||||
for shx in xrange(ref3d.shtxlo, ref3d.shtxhi):
|
||||
sh = book.sheet_by_index(shx)
|
||||
print >> f, " Sheet #%d (%s)" % (shx, sh.name)
|
||||
rowlim = min(ref3d.rowxhi, sh.nrows)
|
||||
collim = min(ref3d.colxhi, sh.ncols)
|
||||
for rowx in xrange(ref3d.rowxlo, rowlim):
|
||||
for colx in xrange(ref3d.colxlo, collim):
|
||||
cty = sh.cell_type(rowx, colx)
|
||||
if cty == xlrd.XL_CELL_EMPTY and show_contents == 1:
|
||||
continue
|
||||
cval = sh.cell_value(rowx, colx)
|
||||
sval = showable_cell_value(cty, cval, datemode)
|
||||
print >> f, " (%3d,%3d) %-5s: %r" \
|
||||
% (rowx, colx, xlrd.cellname(rowx, colx), sval)
|
||||
|
||||
if __name__ == "__main__":
|
||||
def usage():
|
||||
text = """
|
||||
usage: xlrdnameAIPdemo.py glob_pattern name scope show_contents
|
||||
|
||||
where:
|
||||
"glob_pattern" designates a set of files
|
||||
"name" is a name or '*' (all names)
|
||||
"scope" is -1 (global) or a sheet number
|
||||
or a sheet name or * (all scopes)
|
||||
"show_contents" is one of 0 (no show),
|
||||
1 (only non-empty cells), or 2 (all cells)
|
||||
|
||||
Examples (script name and glob_pattern arg omitted for brevity)
|
||||
[Searching through book.name_obj_list]
|
||||
* * 0 lists all names
|
||||
* * 1 lists all names, showing referenced non-empty cells
|
||||
* 1 0 lists all names local to the 2nd sheet
|
||||
* Northern 0 lists all names local to the 'Northern' sheet
|
||||
* -1 0 lists all names with global scope
|
||||
[Initial direct access through book.name_map]
|
||||
Sales * 0 lists all occurrences of "Sales" in any scope
|
||||
[Direct access through book.name_and_scope_map]
|
||||
Revenue -1 0 checks if "Revenue" exists in global scope
|
||||
|
||||
"""
|
||||
sys.stdout.write(text)
|
||||
|
||||
if len(sys.argv) != 5:
|
||||
usage()
|
||||
sys.exit(0)
|
||||
arg_pattern = sys.argv[1] # glob pattern e.g. "foo*.xls"
|
||||
arg_name = sys.argv[2] # see below
|
||||
arg_scope = sys.argv[3] # see below
|
||||
arg_show_contents = int(sys.argv[4]) # 0: no show, 1: only non-empty cells,
|
||||
# 2: all cells
|
||||
for fname in glob.glob(arg_pattern):
|
||||
book = xlrd.open_workbook(fname)
|
||||
if arg_name == "*":
|
||||
# Examine book.name_obj_list to find all names
|
||||
# in a given scope ("*" => all scopes)
|
||||
do_scope_query(book, arg_scope, arg_show_contents)
|
||||
elif arg_scope == "*":
|
||||
# Using book.name_map to find all usage of a name.
|
||||
show_name_details(book, arg_name, arg_show_contents)
|
||||
else:
|
||||
# Using book.name_and_scope_map to find which if any instances
|
||||
# of a name are visible in the given scope, which can be supplied
|
||||
# as -1 (global) or a sheet number or a sheet name.
|
||||
show_name_details_in_scope(book, arg_name, arg_scope, arg_show_contents)
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,77 @@
|
||||
# -*- coding: cp1252 -*-
|
||||
|
||||
"""
|
||||
Portions copyright © 2005-2009, Stephen John Machin, Lingfo Pty Ltd
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
3. None of the names of Stephen John Machin, Lingfo Pty Ltd and any
|
||||
contributors may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
|
||||
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
THE POSSIBILITY OF SUCH DAMAGE.
|
||||
"""
|
||||
|
||||
"""
|
||||
/*-
|
||||
* Copyright (c) 2001 David Giffin.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Based on the the Java version: Andrew Khan Copyright (c) 2000.
|
||||
*
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* 3. All advertising materials mentioning features or use of this
|
||||
* software must display the following acknowledgment:
|
||||
* "This product includes software developed by
|
||||
* David Giffin <david@giffin.org>."
|
||||
*
|
||||
* 4. Redistributions of any form whatsoever must retain the following
|
||||
* acknowledgment:
|
||||
* "This product includes software developed by
|
||||
* David Giffin <david@giffin.org>."
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY DAVID GIFFIN ``AS IS'' AND ANY
|
||||
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DAVID GIFFIN OR
|
||||
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
* OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
"""
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,44 @@
|
||||
# -*- coding: cp1252 -*-
|
||||
|
||||
##
|
||||
# <p>Copyright © 2006-2008 Stephen John Machin, Lingfo Pty Ltd</p>
|
||||
# <p>This module is part of the xlrd package, which is released under a BSD-style licence.</p>
|
||||
##
|
||||
|
||||
# timemachine.py -- adaptation for earlier Pythons e.g. 2.1
|
||||
# usage: from timemachine import *
|
||||
|
||||
# 2008-02-08 SJM Generalised method of detecting IronPython
|
||||
|
||||
import sys
|
||||
|
||||
python_version = sys.version_info[:2] # e.g. version 2.4 -> (2, 4)
|
||||
|
||||
CAN_PICKLE_ARRAY = python_version >= (2, 5)
|
||||
CAN_SUBCLASS_BUILTIN = python_version >= (2, 2)
|
||||
|
||||
if sys.version.find("IronPython") >= 0:
|
||||
array_array = None
|
||||
else:
|
||||
from array import array as array_array
|
||||
|
||||
if python_version < (2, 2):
|
||||
class object:
|
||||
pass
|
||||
False = 0
|
||||
True = 1
|
||||
|
||||
def int_floor_div(x, y):
|
||||
return divmod(x, y)[0]
|
||||
|
||||
def intbool(x):
|
||||
if x:
|
||||
return 1
|
||||
return 0
|
||||
|
||||
if python_version < (2, 3):
|
||||
def sum(sequence, start=0):
|
||||
tot = start
|
||||
for item in aseq:
|
||||
tot += item
|
||||
return tot
|
||||
@@ -0,0 +1,171 @@
|
||||
# -*- coding: cp1252 -*-
|
||||
|
||||
# No part of the content of this file was derived from the works of David Giffin.
|
||||
|
||||
##
|
||||
# <p>Copyright © 2005-2008 Stephen John Machin, Lingfo Pty Ltd</p>
|
||||
# <p>This module is part of the xlrd package, which is released under a BSD-style licence.</p>
|
||||
#
|
||||
# <p>Provides function(s) for dealing with Microsoft Excel ™ dates.</p>
|
||||
##
|
||||
|
||||
# 2008-10-18 SJM Fix bug in xldate_from_date_tuple (affected some years after 2099)
|
||||
|
||||
# The conversion from days to (year, month, day) starts with
|
||||
# an integral "julian day number" aka JDN.
|
||||
# FWIW, JDN 0 corresponds to noon on Monday November 24 in Gregorian year -4713.
|
||||
# More importantly:
|
||||
# Noon on Gregorian 1900-03-01 (day 61 in the 1900-based system) is JDN 2415080.0
|
||||
# Noon on Gregorian 1904-01-02 (day 1 in the 1904-based system) is JDN 2416482.0
|
||||
|
||||
from timemachine import int_floor_div as ifd
|
||||
|
||||
_JDN_delta = (2415080 - 61, 2416482 - 1)
|
||||
assert _JDN_delta[1] - _JDN_delta[0] == 1462
|
||||
|
||||
class XLDateError(ValueError): pass
|
||||
|
||||
class XLDateNegative(XLDateError): pass
|
||||
class XLDateAmbiguous(XLDateError): pass
|
||||
class XLDateTooLarge(XLDateError): pass
|
||||
class XLDateBadDatemode(XLDateError): pass
|
||||
class XLDateBadTuple(XLDateError): pass
|
||||
|
||||
_XLDAYS_TOO_LARGE = (2958466, 2958466 - 1462) # This is equivalent to 10000-01-01
|
||||
|
||||
##
|
||||
# Convert an Excel number (presumed to represent a date, a datetime or a time) into
|
||||
# a tuple suitable for feeding to datetime or mx.DateTime constructors.
|
||||
# @param xldate The Excel number
|
||||
# @param datemode 0: 1900-based, 1: 1904-based.
|
||||
# <br>WARNING: when using this function to
|
||||
# interpret the contents of a workbook, you should pass in the Book.datemode
|
||||
# attribute of that workbook. Whether
|
||||
# the workbook has ever been anywhere near a Macintosh is irrelevant.
|
||||
# @return Gregorian (year, month, day, hour, minute, nearest_second).
|
||||
# <br>Special case: if 0.0 <= xldate < 1.0, it is assumed to represent a time;
|
||||
# (0, 0, 0, hour, minute, second) will be returned.
|
||||
# <br>Note: 1904-01-01 is not regarded as a valid date in the datemode 1 system; its "serial number"
|
||||
# is zero.
|
||||
# @throws XLDateNegative xldate < 0.00
|
||||
# @throws XLDateAmbiguous The 1900 leap-year problem (datemode == 0 and 1.0 <= xldate < 61.0)
|
||||
# @throws XLDateTooLarge Gregorian year 10000 or later
|
||||
# @throws XLDateBadDatemode datemode arg is neither 0 nor 1
|
||||
# @throws XLDateError Covers the 4 specific errors
|
||||
|
||||
def xldate_as_tuple(xldate, datemode):
|
||||
if datemode not in (0, 1):
|
||||
raise XLDateBadDatemode(datemode)
|
||||
if xldate == 0.00:
|
||||
return (0, 0, 0, 0, 0, 0)
|
||||
if xldate < 0.00:
|
||||
raise XLDateNegative(xldate)
|
||||
xldays = int(xldate)
|
||||
frac = xldate - xldays
|
||||
seconds = int(round(frac * 86400.0))
|
||||
assert 0 <= seconds <= 86400
|
||||
if seconds == 86400:
|
||||
hour = minute = second = 0
|
||||
xldays += 1
|
||||
else:
|
||||
# second = seconds % 60; minutes = seconds // 60
|
||||
minutes, second = divmod(seconds, 60)
|
||||
# minute = minutes % 60; hour = minutes // 60
|
||||
hour, minute = divmod(minutes, 60)
|
||||
if xldays >= _XLDAYS_TOO_LARGE[datemode]:
|
||||
raise XLDateTooLarge(xldate)
|
||||
|
||||
if xldays == 0:
|
||||
return (0, 0, 0, hour, minute, second)
|
||||
|
||||
if xldays < 61 and datemode == 0:
|
||||
raise XLDateAmbiguous(xldate)
|
||||
|
||||
jdn = xldays + _JDN_delta[datemode]
|
||||
yreg = (ifd(ifd(jdn * 4 + 274277, 146097) * 3, 4) + jdn + 1363) * 4 + 3
|
||||
mp = ifd(yreg % 1461, 4) * 535 + 333
|
||||
d = ifd(mp % 16384, 535) + 1
|
||||
# mp /= 16384
|
||||
mp >>= 14
|
||||
if mp >= 10:
|
||||
return (ifd(yreg, 1461) - 4715, mp - 9, d, hour, minute, second)
|
||||
else:
|
||||
return (ifd(yreg, 1461) - 4716, mp + 3, d, hour, minute, second)
|
||||
|
||||
# === conversions from date/time to xl numbers
|
||||
|
||||
def _leap(y):
|
||||
if y % 4: return 0
|
||||
if y % 100: return 1
|
||||
if y % 400: return 0
|
||||
return 1
|
||||
|
||||
_days_in_month = (None, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
|
||||
|
||||
##
|
||||
# Convert a date tuple (year, month, day) to an Excel date.
|
||||
# @param year Gregorian year.
|
||||
# @param month 1 <= month <= 12
|
||||
# @param day 1 <= day <= last day of that (year, month)
|
||||
# @param datemode 0: 1900-based, 1: 1904-based.
|
||||
# @throws XLDateAmbiguous The 1900 leap-year problem (datemode == 0 and 1.0 <= xldate < 61.0)
|
||||
# @throws XLDateBadDatemode datemode arg is neither 0 nor 1
|
||||
# @throws XLDateBadTuple (year, month, day) is too early/late or has invalid component(s)
|
||||
# @throws XLDateError Covers the specific errors
|
||||
|
||||
def xldate_from_date_tuple((year, month, day), datemode):
|
||||
|
||||
if datemode not in (0, 1):
|
||||
raise XLDateBadDatemode(datemode)
|
||||
|
||||
if year == 0 and month == 0 and day == 0:
|
||||
return 0.00
|
||||
|
||||
if not (1900 <= year <= 9999):
|
||||
raise XLDateBadTuple("Invalid year: %r" % ((year, month, day),))
|
||||
if not (1 <= month <= 12):
|
||||
raise XLDateBadTuple("Invalid month: %r" % ((year, month, day),))
|
||||
if day < 1 \
|
||||
or (day > _days_in_month[month] and not(day == 29 and month == 2 and _leap(year))):
|
||||
raise XLDateBadTuple("Invalid day: %r" % ((year, month, day),))
|
||||
|
||||
Yp = year + 4716
|
||||
M = month
|
||||
if M <= 2:
|
||||
Yp = Yp - 1
|
||||
Mp = M + 9
|
||||
else:
|
||||
Mp = M - 3
|
||||
jdn = ifd(1461 * Yp, 4) + ifd(979 * Mp + 16, 32) + \
|
||||
day - 1364 - ifd(ifd(Yp + 184, 100) * 3, 4)
|
||||
xldays = jdn - _JDN_delta[datemode]
|
||||
if xldays <= 0:
|
||||
raise XLDateBadTuple("Invalid (year, month, day): %r" % ((year, month, day),))
|
||||
if xldays < 61 and datemode == 0:
|
||||
raise XLDateAmbiguous("Before 1900-03-01: %r" % ((year, month, day),))
|
||||
return float(xldays)
|
||||
|
||||
##
|
||||
# Convert a time tuple (hour, minute, second) to an Excel "date" value (fraction of a day).
|
||||
# @param hour 0 <= hour < 24
|
||||
# @param minute 0 <= minute < 60
|
||||
# @param second 0 <= second < 60
|
||||
# @throws XLDateBadTuple Out-of-range hour, minute, or second
|
||||
|
||||
def xldate_from_time_tuple((hour, minute, second)):
|
||||
if 0 <= hour < 24 and 0 <= minute < 60 and 0 <= second < 60:
|
||||
return ((second / 60.0 + minute) / 60.0 + hour) / 24.0
|
||||
raise XLDateBadTuple("Invalid (hour, minute, second): %r" % ((hour, minute, second),))
|
||||
|
||||
##
|
||||
# Convert a datetime tuple (year, month, day, hour, minute, second) to an Excel date value.
|
||||
# For more details, refer to other xldate_from_*_tuple functions.
|
||||
# @param datetime_tuple (year, month, day, hour, minute, second)
|
||||
# @param datemode 0: 1900-based, 1: 1904-based.
|
||||
|
||||
def xldate_from_datetime_tuple(datetime_tuple, datemode):
|
||||
return (
|
||||
xldate_from_date_tuple(datetime_tuple[:3], datemode)
|
||||
+
|
||||
xldate_from_time_tuple(datetime_tuple[3:])
|
||||
)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,620 @@
|
||||
# Support module for the xlrd3 package.
|
||||
#
|
||||
# Portions copyright (c) 2005-2008 Stephen John Machin, Lingfo Pty Ltd
|
||||
# This module is part of the xlrd package, which is released under a
|
||||
# BSD-style licence.
|
||||
#
|
||||
# 2010-12-08 mozman refactoring for python 3
|
||||
# 2008-02-10 SJM BIFF2 BLANK record
|
||||
# 2008-02-08 SJM Preparation for Excel 2.0 support
|
||||
# 2008-02-02 SJM Added suffixes (_B2, _B2_ONLY, etc) on record names for
|
||||
# biff_dump & biff_count
|
||||
# 2007-12-04 SJM Added support for Excel 2.x (BIFF2) files.
|
||||
# 2007-09-08 SJM Avoid crash when zero-length Unicode string missing options byte.
|
||||
# 2007-04-22 SJM Remove experimental "trimming" facility.
|
||||
|
||||
import sys
|
||||
from struct import unpack
|
||||
|
||||
encoding_from_codepage = {
|
||||
1200 : 'utf_16_le',
|
||||
10000: 'mac_roman',
|
||||
10006: 'mac_greek', # guess
|
||||
10007: 'mac_cyrillic', # guess
|
||||
10029: 'mac_latin2', # guess
|
||||
10079: 'mac_iceland', # guess
|
||||
10081: 'mac_turkish', # guess
|
||||
32768: 'mac_roman',
|
||||
32769: 'cp1252',
|
||||
}
|
||||
|
||||
# some more guessing, for Indic scripts
|
||||
# codepage 57000 range:
|
||||
# 2 Devanagari [0]
|
||||
# 3 Bengali [1]
|
||||
# 4 Tamil [5]
|
||||
# 5 Telegu [6]
|
||||
# 6 Assamese [1] c.f. Bengali
|
||||
# 7 Oriya [4]
|
||||
# 8 Kannada [7]
|
||||
# 9 Malayalam [8]
|
||||
# 10 Gujarati [3]
|
||||
# 11 Gurmukhi [2]
|
||||
|
||||
FUN = 0 # unknown
|
||||
FDT = 1 # date
|
||||
FNU = 2 # number
|
||||
FGE = 3 # general
|
||||
FTX = 4 # text
|
||||
|
||||
DATEFORMAT = FDT
|
||||
NUMBERFORMAT = FNU
|
||||
|
||||
XL_CELL_EMPTY = 0
|
||||
XL_CELL_TEXT = 1
|
||||
XL_CELL_NUMBER = 2
|
||||
XL_CELL_DATE = 3
|
||||
XL_CELL_BOOLEAN = 4
|
||||
XL_CELL_ERROR = 5
|
||||
XL_CELL_BLANK = 6 # for use in debugging, gathering stats, etc
|
||||
|
||||
biff_text_from_num = {
|
||||
0: "(not BIFF)",
|
||||
20: "2.0",
|
||||
21: "2.1",
|
||||
30: "3",
|
||||
40: "4S",
|
||||
45: "4W",
|
||||
50: "5",
|
||||
70: "7",
|
||||
80: "8",
|
||||
85: "8X",
|
||||
}
|
||||
|
||||
# This dictionary can be used to produce a text version of the internal codes
|
||||
# that Excel uses for error cells. Here are its contents:
|
||||
error_text_from_code = {
|
||||
0x00: '#NULL!', # Intersection of two cell ranges is empty
|
||||
0x07: '#DIV/0!', # Division by zero
|
||||
0x0F: '#VALUE!', # Wrong type of operand
|
||||
0x17: '#REF!', # Illegal or deleted cell reference
|
||||
0x1D: '#NAME?', # Wrong function or range name
|
||||
0x24: '#NUM!', # Value range overflow
|
||||
0x2A: '#N/A!', # Argument or function not available
|
||||
}
|
||||
|
||||
BIFF_FIRST_UNICODE = 80
|
||||
|
||||
XL_WORKBOOK_GLOBALS = WBKBLOBAL = 0x5
|
||||
XL_WORKBOOK_GLOBALS_4W = 0x100
|
||||
XL_WORKSHEET = WRKSHEET = 0x10
|
||||
|
||||
XL_BOUNDSHEET_WORKSHEET = 0x00
|
||||
XL_BOUNDSHEET_CHART = 0x02
|
||||
XL_BOUNDSHEET_VB_MODULE = 0x06
|
||||
|
||||
# XL_RK2 = 0x7e
|
||||
XL_ARRAY = 0x0221
|
||||
XL_ARRAY2 = 0x0021
|
||||
XL_BLANK = 0x0201
|
||||
XL_BLANK_B2 = 0x01
|
||||
XL_BOF = 0x809
|
||||
XL_BOOLERR = 0x205
|
||||
XL_BOOLERR_B2 = 0x5
|
||||
XL_BOUNDSHEET = 0x85
|
||||
XL_BUILTINFMTCOUNT = 0x56
|
||||
XL_CF = 0x01B1
|
||||
XL_CODEPAGE = 0x42
|
||||
XL_COLINFO = 0x7D
|
||||
XL_COLUMNDEFAULT = 0x20 # BIFF2 only
|
||||
XL_COLWIDTH = 0x24 # BIFF2 only
|
||||
XL_CONDFMT = 0x01B0
|
||||
XL_CONTINUE = 0x3c
|
||||
XL_COUNTRY = 0x8C
|
||||
XL_DATEMODE = 0x22
|
||||
XL_DEFAULTROWHEIGHT = 0x0225
|
||||
XL_DEFCOLWIDTH = 0x55
|
||||
XL_DIMENSION = 0x200
|
||||
XL_DIMENSION2 = 0x0
|
||||
XL_EFONT = 0x45
|
||||
XL_EOF = 0x0a
|
||||
XL_EXTERNNAME = 0x23
|
||||
XL_EXTERNSHEET = 0x17
|
||||
XL_EXTSST = 0xff
|
||||
XL_FEAT11 = 0x872
|
||||
XL_FILEPASS = 0x2f
|
||||
XL_FONT = 0x31
|
||||
XL_FONT_B3B4 = 0x231
|
||||
XL_FORMAT = 0x41e
|
||||
XL_FORMAT2 = 0x1E # BIFF2, BIFF3
|
||||
XL_FORMULA = 0x6
|
||||
XL_FORMULA3 = 0x206
|
||||
XL_FORMULA4 = 0x406
|
||||
XL_GCW = 0xab
|
||||
XL_INDEX = 0x20b
|
||||
XL_INTEGER = 0x2 # BIFF2 only
|
||||
XL_IXFE = 0x44 # BIFF2 only
|
||||
XL_LABEL = 0x204
|
||||
XL_LABEL_B2 = 0x04
|
||||
XL_LABELRANGES = 0x15f
|
||||
XL_LABELSST = 0xfd
|
||||
XL_MERGEDCELLS = 0xE5
|
||||
XL_MSO_DRAWING = 0x00EC
|
||||
XL_MSO_DRAWING_GROUP = 0x00EB
|
||||
XL_MSO_DRAWING_SELECTION = 0x00ED
|
||||
XL_MULRK = 0xbd
|
||||
XL_MULBLANK = 0xbe
|
||||
XL_NAME = 0x18
|
||||
XL_NOTE = 0x1c
|
||||
XL_NUMBER = 0x203
|
||||
XL_NUMBER_B2 = 0x3
|
||||
XL_OBJ = 0x5D
|
||||
XL_PALETTE = 0x92
|
||||
XL_RK = 0x27e
|
||||
XL_ROW = 0x208
|
||||
XL_ROW_B2 = 0x08
|
||||
XL_RSTRING = 0xd6
|
||||
XL_SHEETHDR = 0x8F # BIFF4W only
|
||||
XL_SHEETSOFFSET = 0x8E # BIFF4W only
|
||||
XL_SHRFMLA = 0x04bc
|
||||
XL_SST = 0xfc
|
||||
XL_STANDARDWIDTH = 0x99
|
||||
XL_STRING = 0x207
|
||||
XL_STRING_B2 = 0x7
|
||||
XL_STYLE = 0x293
|
||||
XL_SUPBOOK = 0x1AE
|
||||
XL_TABLEOP = 0x236
|
||||
XL_TABLEOP2 = 0x37
|
||||
XL_TABLEOP_B2 = 0x36
|
||||
XL_TXO = 0x1b6
|
||||
XL_UNCALCED = 0x5e
|
||||
XL_UNKNOWN = 0xffff
|
||||
XL_WINDOW2 = 0x023E
|
||||
XL_WRITEACCESS = 0x5C
|
||||
XL_XF = 0xe0
|
||||
XL_XF2 = 0x0043 # BIFF2 version of XF record
|
||||
XL_XF3 = 0x0243 # BIFF3 version of XF record
|
||||
XL_XF4 = 0x0443 # BIFF4 version of XF record
|
||||
|
||||
boflen = {
|
||||
0x0809: 8,
|
||||
0x0409: 6,
|
||||
0x0209: 6,
|
||||
0x0009: 4,
|
||||
}
|
||||
|
||||
bofcodes = (0x0809, 0x0409, 0x0209, 0x0009)
|
||||
|
||||
XL_FORMULA_OPCODES = (0x0006, 0x0406, 0x0206)
|
||||
|
||||
_cell_opcode_list = (
|
||||
XL_BOOLERR,
|
||||
XL_FORMULA,
|
||||
XL_FORMULA3,
|
||||
XL_FORMULA4,
|
||||
XL_LABEL,
|
||||
XL_LABELSST,
|
||||
XL_MULRK,
|
||||
XL_NUMBER,
|
||||
XL_RK,
|
||||
XL_RSTRING,
|
||||
)
|
||||
|
||||
biff_rec_name_dict = {
|
||||
0x0000: 'DIMENSIONS_B2',
|
||||
0x0001: 'BLANK_B2',
|
||||
0x0002: 'INTEGER_B2_ONLY',
|
||||
0x0003: 'NUMBER_B2',
|
||||
0x0004: 'LABEL_B2',
|
||||
0x0005: 'BOOLERR_B2',
|
||||
0x0006: 'FORMULA',
|
||||
0x0007: 'STRING_B2',
|
||||
0x0008: 'ROW_B2',
|
||||
0x0009: 'BOF_B2',
|
||||
0x000A: 'EOF',
|
||||
0x000B: 'INDEX_B2_ONLY',
|
||||
0x000C: 'CALCCOUNT',
|
||||
0x000D: 'CALCMODE',
|
||||
0x000E: 'PRECISION',
|
||||
0x000F: 'REFMODE',
|
||||
0x0010: 'DELTA',
|
||||
0x0011: 'ITERATION',
|
||||
0x0012: 'PROTECT',
|
||||
0x0013: 'PASSWORD',
|
||||
0x0014: 'HEADER',
|
||||
0x0015: 'FOOTER',
|
||||
0x0016: 'EXTERNCOUNT',
|
||||
0x0017: 'EXTERNSHEET',
|
||||
0x0018: 'NAME_B2,5+',
|
||||
0x0019: 'WINDOWPROTECT',
|
||||
0x001A: 'VERTICALPAGEBREAKS',
|
||||
0x001B: 'HORIZONTALPAGEBREAKS',
|
||||
0x001C: 'NOTE',
|
||||
0x001D: 'SELECTION',
|
||||
0x001E: 'FORMAT_B2-3',
|
||||
0x001F: 'BUILTINFMTCOUNT_B2',
|
||||
0x0020: 'COLUMNDEFAULT_B2_ONLY',
|
||||
0x0021: 'ARRAY_B2_ONLY',
|
||||
0x0022: 'DATEMODE',
|
||||
0x0023: 'EXTERNNAME',
|
||||
0x0024: 'COLWIDTH_B2_ONLY',
|
||||
0x0025: 'DEFAULTROWHEIGHT_B2_ONLY',
|
||||
0x0026: 'LEFTMARGIN',
|
||||
0x0027: 'RIGHTMARGIN',
|
||||
0x0028: 'TOPMARGIN',
|
||||
0x0029: 'BOTTOMMARGIN',
|
||||
0x002A: 'PRINTHEADERS',
|
||||
0x002B: 'PRINTGRIDLINES',
|
||||
0x002F: 'FILEPASS',
|
||||
0x0031: 'FONT',
|
||||
0x0032: 'FONT2_B2_ONLY',
|
||||
0x0036: 'TABLEOP_B2',
|
||||
0x0037: 'TABLEOP2_B2',
|
||||
0x003C: 'CONTINUE',
|
||||
0x003D: 'WINDOW1',
|
||||
0x003E: 'WINDOW2_B2',
|
||||
0x0040: 'BACKUP',
|
||||
0x0041: 'PANE',
|
||||
0x0042: 'CODEPAGE',
|
||||
0x0043: 'XF_B2',
|
||||
0x0044: 'IXFE_B2_ONLY',
|
||||
0x0045: 'EFONT_B2_ONLY',
|
||||
0x004D: 'PLS',
|
||||
0x0051: 'DCONREF',
|
||||
0x0055: 'DEFCOLWIDTH',
|
||||
0x0056: 'BUILTINFMTCOUNT_B3-4',
|
||||
0x0059: 'XCT',
|
||||
0x005A: 'CRN',
|
||||
0x005B: 'FILESHARING',
|
||||
0x005C: 'WRITEACCESS',
|
||||
0x005D: 'OBJECT',
|
||||
0x005E: 'UNCALCED',
|
||||
0x005F: 'SAVERECALC',
|
||||
0x0063: 'OBJECTPROTECT',
|
||||
0x007D: 'COLINFO',
|
||||
0x007E: 'RK2_mythical_?',
|
||||
0x0080: 'GUTS',
|
||||
0x0081: 'WSBOOL',
|
||||
0x0082: 'GRIDSET',
|
||||
0x0083: 'HCENTER',
|
||||
0x0084: 'VCENTER',
|
||||
0x0085: 'BOUNDSHEET',
|
||||
0x0086: 'WRITEPROT',
|
||||
0x008C: 'COUNTRY',
|
||||
0x008D: 'HIDEOBJ',
|
||||
0x008E: 'SHEETSOFFSET',
|
||||
0x008F: 'SHEETHDR',
|
||||
0x0090: 'SORT',
|
||||
0x0092: 'PALETTE',
|
||||
0x0099: 'STANDARDWIDTH',
|
||||
0x009B: 'FILTERMODE',
|
||||
0x009C: 'FNGROUPCOUNT',
|
||||
0x009D: 'AUTOFILTERINFO',
|
||||
0x009E: 'AUTOFILTER',
|
||||
0x00A0: 'SCL',
|
||||
0x00A1: 'SETUP',
|
||||
0x00AB: 'GCW',
|
||||
0x00BD: 'MULRK',
|
||||
0x00BE: 'MULBLANK',
|
||||
0x00C1: 'MMS',
|
||||
0x00D6: 'RSTRING',
|
||||
0x00D7: 'DBCELL',
|
||||
0x00DA: 'BOOKBOOL',
|
||||
0x00DD: 'SCENPROTECT',
|
||||
0x00E0: 'XF',
|
||||
0x00E1: 'INTERFACEHDR',
|
||||
0x00E2: 'INTERFACEEND',
|
||||
0x00E5: 'MERGEDCELLS',
|
||||
0x00E9: 'BITMAP',
|
||||
0x00EB: 'MSO_DRAWING_GROUP',
|
||||
0x00EC: 'MSO_DRAWING',
|
||||
0x00ED: 'MSO_DRAWING_SELECTION',
|
||||
0x00EF: 'PHONETIC',
|
||||
0x00FC: 'SST',
|
||||
0x00FD: 'LABELSST',
|
||||
0x00FF: 'EXTSST',
|
||||
0x013D: 'TABID',
|
||||
0x015F: 'LABELRANGES',
|
||||
0x0160: 'USESELFS',
|
||||
0x0161: 'DSF',
|
||||
0x01AE: 'SUPBOOK',
|
||||
0x01AF: 'PROTECTIONREV4',
|
||||
0x01B0: 'CONDFMT',
|
||||
0x01B1: 'CF',
|
||||
0x01B2: 'DVAL',
|
||||
0x01B6: 'TXO',
|
||||
0x01B7: 'REFRESHALL',
|
||||
0x01B8: 'HLINK',
|
||||
0x01BC: 'PASSWORDREV4',
|
||||
0x01BE: 'DV',
|
||||
0x01C0: 'XL9FILE',
|
||||
0x01C1: 'RECALCID',
|
||||
0x0200: 'DIMENSIONS',
|
||||
0x0201: 'BLANK',
|
||||
0x0203: 'NUMBER',
|
||||
0x0204: 'LABEL',
|
||||
0x0205: 'BOOLERR',
|
||||
0x0206: 'FORMULA_B3',
|
||||
0x0207: 'STRING',
|
||||
0x0208: 'ROW',
|
||||
0x0209: 'BOF',
|
||||
0x020B: 'INDEX_B3+',
|
||||
0x0218: 'NAME',
|
||||
0x0221: 'ARRAY',
|
||||
0x0223: 'EXTERNNAME_B3-4',
|
||||
0x0225: 'DEFAULTROWHEIGHT',
|
||||
0x0231: 'FONT_B3B4',
|
||||
0x0236: 'TABLEOP',
|
||||
0x023E: 'WINDOW2',
|
||||
0x0243: 'XF_B3',
|
||||
0x027E: 'RK',
|
||||
0x0293: 'STYLE',
|
||||
0x0406: 'FORMULA_B4',
|
||||
0x0409: 'BOF',
|
||||
0x041E: 'FORMAT',
|
||||
0x0443: 'XF_B4',
|
||||
0x04BC: 'SHRFMLA',
|
||||
0x0800: 'QUICKTIP',
|
||||
0x0809: 'BOF',
|
||||
0x0862: 'SHEETLAYOUT',
|
||||
0x0867: 'SHEETPROTECTION',
|
||||
0x0868: 'RANGEPROTECTION',
|
||||
}
|
||||
|
||||
class XLRDError(Exception):
|
||||
pass
|
||||
|
||||
class BaseObject:
|
||||
"""
|
||||
Parent of almost all other classes in the package. Defines a common
|
||||
'dump' method for debugging.
|
||||
"""
|
||||
_repr_these = []
|
||||
|
||||
def dump(self, f=None, header=None, footer=None, indent=0):
|
||||
"""
|
||||
:param f: open file object, to which the dump is written
|
||||
:param header: text to write before the dump
|
||||
:param footer: text to write after the dump
|
||||
:param indent: number of leading spaces (for recursive calls)
|
||||
"""
|
||||
if f is None:
|
||||
f = sys.stderr
|
||||
pad = " " * indent
|
||||
|
||||
if header is not None:
|
||||
print(header, file=f)
|
||||
|
||||
for attr, value in sorted(self.__dict__.items()):
|
||||
if getattr(value, 'dump', None) and attr != 'book':
|
||||
value.dump(f,
|
||||
header="%s%s (%s object):" % (pad, attr, value.__class__.__name__),
|
||||
indent=indent+4)
|
||||
elif attr not in self._repr_these and \
|
||||
(isinstance(value, list) or
|
||||
isinstance(value, dict)):
|
||||
print("%s%s: %s, len = %d" % (pad, attr, type(value), len(value)), file=f)
|
||||
else:
|
||||
print("%s%s: %r" % (pad, attr, value), file=f)
|
||||
if footer is not None:
|
||||
print(footer, file=f)
|
||||
|
||||
def fprintf(f, fmt, *vargs):
|
||||
print(fmt.rstrip('\n') % vargs, file=f)
|
||||
|
||||
def upkbits(tgt_obj, src, manifest, local_setattr=setattr):
|
||||
for n, mask, attr in manifest:
|
||||
local_setattr(tgt_obj, attr, (src & mask) >> n)
|
||||
|
||||
def upkbitsL(tgt_obj, src, manifest, local_setattr=setattr, local_int=int):
|
||||
for n, mask, attr in manifest:
|
||||
local_setattr(tgt_obj, attr, local_int((src & mask) >> n))
|
||||
|
||||
def unpack_string(data, pos, encoding, lenlen=1):
|
||||
nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
|
||||
pos += lenlen
|
||||
return str(data[pos:pos+nchars], encoding)
|
||||
|
||||
def unpack_string_update_pos(data, pos, encoding, lenlen=1, known_len=None):
|
||||
if known_len is not None:
|
||||
# On a NAME record, the length byte is detached from the front of the string.
|
||||
nchars = known_len
|
||||
else:
|
||||
nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
|
||||
pos += lenlen
|
||||
|
||||
newpos = pos + nchars
|
||||
return (str(data[pos:newpos], encoding), newpos)
|
||||
|
||||
def unpack_unicode(data, pos, lenlen=2):
|
||||
""" Return unicode_strg """
|
||||
nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
|
||||
if not nchars:
|
||||
# Ambiguous whether 0-length string should have an "options" byte.
|
||||
# Avoid crash if missing.
|
||||
return ""
|
||||
pos += lenlen
|
||||
options = data[pos]
|
||||
pos += 1
|
||||
|
||||
if options & 0x08: # richtext
|
||||
pos += 2
|
||||
|
||||
if options & 0x04: # phonetic
|
||||
pos += 4
|
||||
|
||||
if options & 0x01:
|
||||
# Uncompressed UTF-16-LE
|
||||
rawstrg = data[pos:pos+2*nchars]
|
||||
strg = str(rawstrg, 'utf_16_le')
|
||||
else:
|
||||
# Note: this is COMPRESSED (not ASCII!) encoding!!!
|
||||
# Merely returning the raw bytes would work OK 99.99% of the time
|
||||
# if the local codepage was cp1252 -- however this would rapidly go pear-shaped
|
||||
# for other codepages so we grit our Anglocentric teeth and return Unicode :-)
|
||||
strg = str(data[pos:pos+nchars], "latin_1")
|
||||
return strg
|
||||
|
||||
def unpack_unicode_update_pos(data, pos, lenlen=2, known_len=None):
|
||||
""" Return (unicode_strg, updated value of pos) """
|
||||
if known_len is not None:
|
||||
# On a NAME record, the length byte is detached from the front of the string.
|
||||
nchars = known_len
|
||||
else:
|
||||
nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
|
||||
pos += lenlen
|
||||
|
||||
if not nchars and not data[pos:]:
|
||||
# Zero-length string with no options byte
|
||||
return ("", pos)
|
||||
|
||||
options = data[pos]
|
||||
pos += 1
|
||||
phonetic = options & 0x04
|
||||
richtext = options & 0x08
|
||||
|
||||
if richtext:
|
||||
rt = unpack('<H', data[pos:pos+2])[0]
|
||||
pos += 2
|
||||
|
||||
if phonetic:
|
||||
sz = unpack('<i', data[pos:pos+4])[0]
|
||||
pos += 4
|
||||
|
||||
if options & 0x01:
|
||||
# Uncompressed UTF-16-LE
|
||||
strg = str(data[pos:pos+2*nchars], 'utf_16_le')
|
||||
pos += 2*nchars
|
||||
else:
|
||||
# Note: this is COMPRESSED (not ASCII!) encoding!!!
|
||||
strg = str(data[pos:pos+nchars], "latin_1")
|
||||
pos += nchars
|
||||
|
||||
if richtext:
|
||||
pos += 4 * rt
|
||||
|
||||
if phonetic:
|
||||
pos += sz
|
||||
|
||||
return (strg, pos)
|
||||
|
||||
def unpack_cell_range_address_list_update_pos(
|
||||
output_list, data, pos, biff_version, addr_size=6):
|
||||
# output_list is updated in situ
|
||||
if biff_version < 80:
|
||||
assert addr_size == 6
|
||||
else:
|
||||
assert addr_size in (6, 8)
|
||||
n, = unpack("<H", data[pos:pos+2])
|
||||
pos += 2
|
||||
if n:
|
||||
fmt = "<HHBB" if addr_size == 6 else "<HHHH"
|
||||
for _unused in range(n):
|
||||
ra, rb, ca, cb = unpack(fmt, data[pos:pos+addr_size])
|
||||
output_list.append((ra, rb+1, ca, cb+1))
|
||||
pos += addr_size
|
||||
return pos
|
||||
|
||||
def hex_char_dump(strg, ofs, dlen, base=0, fout=sys.stdout, unnumbered=False):
|
||||
endpos = min(ofs + dlen, len(strg))
|
||||
pos = ofs
|
||||
numbered = not unnumbered
|
||||
num_prefix = ''
|
||||
while pos < endpos:
|
||||
endsub = min(pos + 16, endpos)
|
||||
substrg = strg[pos:endsub]
|
||||
lensub = endsub - pos
|
||||
if lensub <= 0 or lensub != len(substrg):
|
||||
fprintf(
|
||||
sys.stdout,
|
||||
'??? hex_char_dump: ofs=%d dlen=%d base=%d -> endpos=%d pos=%d endsub=%d substrg=%r\n',
|
||||
ofs, dlen, base, endpos, pos, endsub, substrg)
|
||||
break
|
||||
hexd = ''.join(["%02x " % c for c in substrg])
|
||||
chard = ''
|
||||
for c in substrg:
|
||||
if c == ord('\0'):
|
||||
c = '~'
|
||||
elif not (' ' <= chr(c) <= '~'):
|
||||
c = '?'
|
||||
if isinstance(c, int):
|
||||
c = chr(c)
|
||||
chard += c
|
||||
if numbered:
|
||||
num_prefix = "%5d: " % (base+pos-ofs)
|
||||
fprintf(fout, "%s %-48s %s\n", num_prefix, hexd, chard)
|
||||
pos = endsub
|
||||
|
||||
def biff_dump(mem, stream_offset, stream_len, base=0, fout=sys.stdout,
|
||||
unnumbered=False):
|
||||
pos = stream_offset
|
||||
stream_end = stream_offset + stream_len
|
||||
adj = base - stream_offset
|
||||
dummies = 0
|
||||
numbered = not unnumbered
|
||||
num_prefix = ''
|
||||
while stream_end - pos >= 4:
|
||||
rc, length = unpack('<HH', mem[pos:pos+4])
|
||||
if rc == 0 and length == 0:
|
||||
if mem[pos:] == '\0' * (stream_end - pos):
|
||||
dummies = stream_end - pos
|
||||
savpos = pos
|
||||
pos = stream_end
|
||||
break
|
||||
|
||||
if dummies:
|
||||
dummies += 4
|
||||
else:
|
||||
savpos = pos
|
||||
dummies = 4
|
||||
pos += 4
|
||||
else:
|
||||
if dummies:
|
||||
if numbered:
|
||||
num_prefix = "%5d: " % (adj + savpos)
|
||||
fprintf(fout, "%s---- %d zero bytes skipped ----\n",
|
||||
num_prefix, dummies)
|
||||
dummies = 0
|
||||
|
||||
recname = biff_rec_name_dict.get(rc, '<UNKNOWN>')
|
||||
if numbered:
|
||||
num_prefix = "%5d: " % (adj + pos)
|
||||
fprintf(fout, "%s%04x %s len = %04x (%d)\n",
|
||||
num_prefix, rc, recname, length, length)
|
||||
pos += 4
|
||||
hex_char_dump(mem, pos, length, adj+pos, fout, unnumbered)
|
||||
pos += length
|
||||
if dummies:
|
||||
if numbered:
|
||||
num_prefix = "%5d: " % (adj + savpos)
|
||||
fprintf(fout, "%s---- %d zero bytes skipped ----\n", num_prefix, dummies)
|
||||
|
||||
if pos < stream_end:
|
||||
if numbered:
|
||||
num_prefix = "%5d: " % (adj + pos)
|
||||
fprintf(fout, "%s---- Misc bytes at end ----\n", num_prefix)
|
||||
hex_char_dump(mem, pos, stream_end-pos, adj + pos, fout, unnumbered)
|
||||
elif pos > stream_end:
|
||||
fprintf(fout, "Last dumped record has length (%d) that is too large\n", length)
|
||||
|
||||
def biff_count_records(mem, stream_offset, stream_len, fout=sys.stdout):
|
||||
pos = stream_offset
|
||||
stream_end = stream_offset + stream_len
|
||||
tally = {}
|
||||
while stream_end - pos >= 4:
|
||||
rc, length = unpack('<HH', mem[pos:pos+4])
|
||||
if rc == 0 and length == 0:
|
||||
if mem[pos:] == '\0' * (stream_end - pos):
|
||||
break
|
||||
recname = "<Dummy (zero)>"
|
||||
else:
|
||||
recname = biff_rec_name_dict.get(rc, None)
|
||||
if recname is None:
|
||||
recname = "Unknown_0x%04X" % rc
|
||||
if recname in tally:
|
||||
tally[recname] += 1
|
||||
else:
|
||||
tally[recname] = 1
|
||||
pos += length + 4
|
||||
for recname, count in sorted(tally.items()):
|
||||
fprintf(fout, "%8d %s", count, recname)
|
||||
@@ -0,0 +1,346 @@
|
||||
# Implements the minimal functionality required
|
||||
# to extract a "Workbook" or "Book" stream (as one big string)
|
||||
# from an OLE2 Compound Document file.
|
||||
#
|
||||
# Copyright © 2005-2008 Stephen John Machin, Lingfo Pty Ltd
|
||||
# This module is part of the xlrd3 package, which is released under a BSD-style licence.
|
||||
|
||||
# No part of the content of this file was derived from the works of David Giffin.
|
||||
|
||||
# 2008-11-04 SJM Avoid assertion error when -1 used instead of -2 for first_SID of empty SCSS [Frank Hoffsuemmer]
|
||||
# 2007-09-08 SJM Warning message if sector sizes are extremely large.
|
||||
# 2007-05-07 SJM Meaningful exception instead of IndexError if a SAT (sector allocation table) is corrupted.
|
||||
# 2007-04-22 SJM Missing "<" in a struct.unpack call => can't open files on bigendian platforms.
|
||||
|
||||
|
||||
import sys
|
||||
from struct import unpack
|
||||
|
||||
# Magic cookie that should appear in the first 8 bytes of the file.
|
||||
SIGNATURE = b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1"
|
||||
|
||||
EOCSID = -2
|
||||
FREESID = -1
|
||||
SATSID = -3
|
||||
MSATSID = -4
|
||||
|
||||
def int_floor_div(x, y):
|
||||
return divmod(x, y)[0]
|
||||
|
||||
class CompDocError(Exception):
|
||||
pass
|
||||
|
||||
class DirNode(object):
|
||||
|
||||
def __init__(self, DID, dent, DEBUG=0):
|
||||
# dent is the 128-byte directory entry
|
||||
self.DID = DID
|
||||
# (cbufsize, self.etype, self.colour, self.left_DID, self.right_DID,
|
||||
# self.root_DID,
|
||||
# self.first_SID,
|
||||
# self.tot_size) = \
|
||||
# unpack('<HBBiii16x4x8x8xii4x', dent[64:128])
|
||||
(cbufsize, self.etype, self.colour, self.left_DID, self.right_DID,
|
||||
self.root_DID) = \
|
||||
unpack('<HBBiii', dent[64:80])
|
||||
(self.first_SID, self.tot_size) = \
|
||||
unpack('<ii', dent[116:124])
|
||||
if cbufsize == 0:
|
||||
self.name = ''
|
||||
else:
|
||||
self.name = str(dent[0:cbufsize-2], 'utf_16_le') # omit the trailing U+0000
|
||||
self.children = [] # filled in later
|
||||
self.parent = -1 # indicates orphan; fixed up later
|
||||
self.tsinfo = unpack('<IIII', dent[100:116])
|
||||
if DEBUG:
|
||||
self.dump(DEBUG)
|
||||
|
||||
def dump(self, DEBUG=1):
|
||||
print("DID=%d name=%r etype=%d DIDs(left=%d right=%d root=%d parent=%d kids=%r) first_SID=%d tot_size=%d" \
|
||||
% (self.DID, self.name, self.etype, self.left_DID,
|
||||
self.right_DID, self.root_DID, self.parent, self.children, self.first_SID, self.tot_size))
|
||||
if DEBUG == 2:
|
||||
# cre_lo, cre_hi, mod_lo, mod_hi = tsinfo
|
||||
print("timestamp info", self.tsinfo)
|
||||
|
||||
def _build_family_tree(dirlist, parent_DID, child_DID):
|
||||
if child_DID < 0: return
|
||||
_build_family_tree(dirlist, parent_DID, dirlist[child_DID].left_DID)
|
||||
dirlist[parent_DID].children.append(child_DID)
|
||||
dirlist[child_DID].parent = parent_DID
|
||||
_build_family_tree(dirlist, parent_DID, dirlist[child_DID].right_DID)
|
||||
if dirlist[child_DID].etype == 1: # storage
|
||||
_build_family_tree(dirlist, child_DID, dirlist[child_DID].root_DID)
|
||||
|
||||
# Compound document handler.
|
||||
# @param mem The raw contents of the file, as a string, or as an mmap.mmap() object. The
|
||||
# only operation it needs to support is slicing.
|
||||
|
||||
class CompDoc(object):
|
||||
|
||||
def __init__(self, mem, logfile=sys.stdout, DEBUG=0):
|
||||
self.logfile = logfile
|
||||
if mem[0:8] != SIGNATURE:
|
||||
raise CompDocError('Not an OLE2 compound document')
|
||||
if mem[28:30] != b'\xFE\xFF':
|
||||
raise CompDocError('Expected "little-endian" marker, found %r' % mem[28:30])
|
||||
revision, version = unpack('<HH', mem[24:28])
|
||||
if DEBUG:
|
||||
print("\nCompDoc format: version=0x%04x revision=0x%04x" % (version, revision), file=logfile)
|
||||
self.mem = mem
|
||||
ssz, sssz = unpack('<HH', mem[30:34])
|
||||
if ssz > 20: # allows for 2**20 bytes i.e. 1MB
|
||||
print("WARNING: sector size (2**%d) is preposterous; assuming 512 and continuing ..." \
|
||||
% ssz, file=logfile)
|
||||
ssz = 9
|
||||
if sssz > ssz:
|
||||
print("WARNING: short stream sector size (2**%d) is preposterous; assuming 64 and continuing ..." \
|
||||
% sssz, file=logfile)
|
||||
sssz = 6
|
||||
self.sec_size = sec_size = 1 << ssz
|
||||
self.short_sec_size = 1 << sssz
|
||||
(
|
||||
SAT_tot_secs, self.dir_first_sec_sid, _unused, self.min_size_std_stream,
|
||||
SSAT_first_sec_sid, SSAT_tot_secs,
|
||||
MSAT_first_sec_sid, MSAT_tot_secs,
|
||||
# ) = unpack('<ii4xiiiii', mem[44:76])
|
||||
) = unpack('<iiiiiiii', mem[44:76])
|
||||
mem_data_len = len(mem) - 512
|
||||
mem_data_secs, left_over = divmod(mem_data_len, sec_size)
|
||||
if left_over:
|
||||
#### raise CompDocError("Not a whole number of sectors")
|
||||
print("WARNING *** file size (%d) not 512 + multiple of sector size (%d)" \
|
||||
% (len(mem), sec_size), file=logfile)
|
||||
if DEBUG:
|
||||
print('sec sizes', ssz, sssz, sec_size, self.short_sec_size, file=logfile)
|
||||
print("mem data: %d bytes == %d sectors" % (mem_data_len, mem_data_secs), file=logfile)
|
||||
print("SAT_tot_secs=%d, dir_first_sec_sid=%d, min_size_std_stream=%d" \
|
||||
% (SAT_tot_secs, self.dir_first_sec_sid, self.min_size_std_stream,), file=logfile)
|
||||
print("SSAT_first_sec_sid=%d, SSAT_tot_secs=%d" % (SSAT_first_sec_sid, SSAT_tot_secs,), file=logfile)
|
||||
print("MSAT_first_sec_sid=%d, MSAT_tot_secs=%d" % (MSAT_first_sec_sid, MSAT_tot_secs,), file=logfile)
|
||||
nent = int_floor_div(sec_size, 4) # number of SID entries in a sector
|
||||
fmt = "<%di" % nent
|
||||
trunc_warned = 0
|
||||
#
|
||||
# === build the MSAT ===
|
||||
#
|
||||
MSAT = list(unpack('<109i', mem[76:512]))
|
||||
sid = MSAT_first_sec_sid
|
||||
while sid >= 0:
|
||||
if sid >= mem_data_secs:
|
||||
raise CompDocError(
|
||||
"MSAT extension: accessing sector %d but only %d in file" % (sid, mem_data_secs)
|
||||
)
|
||||
offset = 512 + sec_size * sid
|
||||
news = list(unpack(fmt, mem[offset:offset+sec_size]))
|
||||
sid = news.pop()
|
||||
MSAT.extend(news)
|
||||
if DEBUG:
|
||||
print("MSAT: len =", len(MSAT), file=logfile)
|
||||
print(MSAT, file=logfile)
|
||||
#
|
||||
# === build the SAT ===
|
||||
#
|
||||
self.SAT = []
|
||||
for msid in MSAT:
|
||||
if msid == FREESID: continue
|
||||
if msid >= mem_data_secs:
|
||||
if not trunc_warned:
|
||||
print("WARNING *** File is truncated, or OLE2 MSAT is corrupt!!", file=logfile)
|
||||
print("INFO: Trying to access sector %d but only %d available" \
|
||||
% (msid, mem_data_secs), file=logfile)
|
||||
trunc_warned = 1
|
||||
continue
|
||||
offset = 512 + sec_size * msid
|
||||
news = list(unpack(fmt, mem[offset:offset+sec_size]))
|
||||
self.SAT.extend(news)
|
||||
if DEBUG:
|
||||
print("SAT: len =", len(self.SAT), file=logfile)
|
||||
print(self.SAT, file=logfile)
|
||||
# print >> logfile, "SAT ",
|
||||
# for i, s in enumerate(self.SAT):
|
||||
# print >> logfile, "entry: %4d offset: %6d, next entry: %4d" % (i, 512 + sec_size * i, s)
|
||||
# print >> logfile, "%d:%d " % (i, s),
|
||||
print()
|
||||
|
||||
# === build the directory ===
|
||||
#
|
||||
dbytes = self._get_stream(
|
||||
self.mem, 512, self.SAT, self.sec_size, self.dir_first_sec_sid,
|
||||
name="directory")
|
||||
dirlist = []
|
||||
did = -1
|
||||
for pos in range(0, len(dbytes), 128):
|
||||
did += 1
|
||||
dirlist.append(DirNode(did, dbytes[pos:pos+128], 0))
|
||||
self.dirlist = dirlist
|
||||
_build_family_tree(dirlist, 0, dirlist[0].root_DID) # and stand well back ...
|
||||
if DEBUG:
|
||||
for d in dirlist:
|
||||
d.dump(DEBUG)
|
||||
#
|
||||
# === get the SSCS ===
|
||||
#
|
||||
sscs_dir = self.dirlist[0]
|
||||
assert sscs_dir.etype == 5 # root entry
|
||||
if sscs_dir.first_SID < 0 and sscs_dir.tot_size == 0:
|
||||
# Problem reported by Frank Hoffsuemmer: some software was
|
||||
# writing -1 instead of -2 (EOCSID) for the first_SID
|
||||
# when the SCCS was empty. Not having EOCSID caused assertion
|
||||
# failure in _get_stream.
|
||||
# Solution: avoid calling _get_stream in any case when the
|
||||
# SCSS appears to be empty.
|
||||
self.SSCS = ""
|
||||
else:
|
||||
self.SSCS = self._get_stream(
|
||||
self.mem, 512, self.SAT, sec_size, sscs_dir.first_SID,
|
||||
sscs_dir.tot_size, name="SSCS")
|
||||
# if DEBUG: print >> logfile, "SSCS", repr(self.SSCS)
|
||||
#
|
||||
# === build the SSAT ===
|
||||
#
|
||||
self.SSAT = []
|
||||
if SSAT_tot_secs > 0 and sscs_dir.tot_size == 0:
|
||||
print("WARNING *** OLE2 inconsistency: SSCS size is 0 but SSAT size is non-zero", file=logfile)
|
||||
if sscs_dir.tot_size > 0:
|
||||
sid = SSAT_first_sec_sid
|
||||
nsecs = SSAT_tot_secs
|
||||
while sid >= 0 and nsecs > 0:
|
||||
nsecs -= 1
|
||||
start_pos = 512 + sid * sec_size
|
||||
news = list(unpack(fmt, mem[start_pos:start_pos+sec_size]))
|
||||
self.SSAT.extend(news)
|
||||
sid = self.SAT[sid]
|
||||
# assert SSAT_tot_secs == 0 or sid == EOCSID
|
||||
if DEBUG: print("SSAT last sid %d; remaining sectors %d" % (sid, nsecs), file=logfile)
|
||||
assert nsecs == 0 and sid == EOCSID
|
||||
if DEBUG: print("SSAT", self.SSAT, file=logfile)
|
||||
|
||||
def _get_stream(self, mem, base, sat, sec_size, start_sid, size=None, name=''):
|
||||
# print >> self.logfile, "_get_stream", base, sec_size, start_sid, size
|
||||
sectors = []
|
||||
s = start_sid
|
||||
if size is None:
|
||||
# nothing to check against
|
||||
while s >= 0:
|
||||
start_pos = base + s * sec_size
|
||||
sectors.append(mem[start_pos:start_pos+sec_size])
|
||||
try:
|
||||
s = sat[s]
|
||||
except IndexError:
|
||||
raise CompDocError(
|
||||
"OLE2 stream %r: sector allocation table invalid entry (%d)" %
|
||||
(name, s)
|
||||
)
|
||||
assert s == EOCSID
|
||||
else:
|
||||
todo = size
|
||||
while s >= 0:
|
||||
start_pos = base + s * sec_size
|
||||
grab = sec_size
|
||||
if grab > todo:
|
||||
grab = todo
|
||||
todo -= grab
|
||||
sectors.append(mem[start_pos:start_pos+grab])
|
||||
try:
|
||||
s = sat[s]
|
||||
except IndexError:
|
||||
raise CompDocError(
|
||||
"OLE2 stream %r: sector allocation table invalid entry (%d)" %
|
||||
(name, s)
|
||||
)
|
||||
assert s == EOCSID
|
||||
if todo != 0:
|
||||
print("WARNING *** OLE2 stream %r: expected size %d, actual size %d" \
|
||||
% (name, size, size - todo), file=self.logfile)
|
||||
return b''.join(sectors)
|
||||
|
||||
def _dir_search(self, path, storage_DID=0):
|
||||
# Return matching DirNode instance, or None
|
||||
head = path[0]
|
||||
tail = path[1:]
|
||||
dl = self.dirlist
|
||||
for child in dl[storage_DID].children:
|
||||
if dl[child].name.lower() == head.lower():
|
||||
et = dl[child].etype
|
||||
if et == 2:
|
||||
return dl[child]
|
||||
if et == 1:
|
||||
if not tail:
|
||||
raise CompDocError("Requested component is a 'storage'")
|
||||
return self._dir_search(tail, child)
|
||||
dl[child].dump(1)
|
||||
raise CompDocError("Requested stream is not a 'user stream'")
|
||||
return None
|
||||
|
||||
##
|
||||
# Interrogate the compound document's directory; return the stream as a string if found, otherwise
|
||||
# return None.
|
||||
# @param qname Name of the desired stream e.g. u'Workbook'. Should be in Unicode or convertible thereto.
|
||||
|
||||
def get_named_stream(self, qname):
|
||||
d = self._dir_search(qname.split("/"))
|
||||
if d is None:
|
||||
return None
|
||||
if d.tot_size >= self.min_size_std_stream:
|
||||
return self._get_stream(
|
||||
self.mem, 512, self.SAT, self.sec_size, d.first_SID,
|
||||
d.tot_size, name=qname)
|
||||
else:
|
||||
return self._get_stream(
|
||||
self.SSCS, 0, self.SSAT, self.short_sec_size, d.first_SID,
|
||||
d.tot_size, name=qname + " (from SSCS)")
|
||||
|
||||
# Interrogate the compound document's directory.
|
||||
# If the named stream is not found, (None, 0, 0) will be returned.
|
||||
# If the named stream is found and is contiguous within the original byte sequence ("mem")
|
||||
# used when the document was opened,
|
||||
# then (mem, offset_to_start_of_stream, length_of_stream) is returned.
|
||||
# Otherwise a new string is built from the fragments and (new_string, 0, length_of_stream) is returned.
|
||||
# @param qname Name of the desired stream e.g. u'Workbook'. Should be in Unicode or convertible thereto.
|
||||
|
||||
def locate_named_stream(self, qname):
|
||||
d = self._dir_search(qname.split("/"))
|
||||
if d is None:
|
||||
return (None, 0, 0)
|
||||
if d.tot_size >= self.min_size_std_stream:
|
||||
return self._locate_stream(self.mem, 512, self.SAT, self.sec_size, d.first_SID, d.tot_size)
|
||||
else:
|
||||
return (
|
||||
self._get_stream(
|
||||
self.SSCS, 0, self.SSAT, self.short_sec_size, d.first_SID,
|
||||
d.tot_size, qname + " (from SSCS)"),
|
||||
0,
|
||||
d.tot_size
|
||||
)
|
||||
return (None, 0, 0) # not found
|
||||
|
||||
def _locate_stream(self, mem, base, sat, sec_size, start_sid, size):
|
||||
# print >> self.logfile, "_locate_stream", base, sec_size, start_sid, size
|
||||
s = start_sid
|
||||
if s < 0:
|
||||
raise CompDocError("_locate_stream: start_sid (%d) is -ve" % start_sid)
|
||||
p = -99 # dummy previous SID
|
||||
start_pos = -9999
|
||||
end_pos = -8888
|
||||
slices = []
|
||||
while s >= 0:
|
||||
if s == p+1:
|
||||
# contiguous sectors
|
||||
end_pos += sec_size
|
||||
else:
|
||||
# start new slice
|
||||
if p >= 0:
|
||||
# not first time
|
||||
slices.append((start_pos, end_pos))
|
||||
start_pos = base + s * sec_size
|
||||
end_pos = start_pos + sec_size
|
||||
p = s
|
||||
s = sat[s]
|
||||
assert s == EOCSID
|
||||
# print >> self.logfile, len(slices) + 1, "slices"
|
||||
if not slices:
|
||||
# The stream is contiguous ... just what we like!
|
||||
return (mem, start_pos, size)
|
||||
slices.append((start_pos, end_pos))
|
||||
return (b''.join([mem[start_pos:end_pos] for start_pos, end_pos in slices]), 0, size)
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,276 @@
|
||||
# Author: mozman <mozman@gmx.at>
|
||||
# Purpose: xfcell -- cell with convenient xf function
|
||||
# Created: 04.12.2010
|
||||
# Copyright (C) 2010, Manfred Moitzi
|
||||
# License: BSD-style licence
|
||||
|
||||
"""
|
||||
The XFCell() object contains the data for one cell.
|
||||
|
||||
WARNING: You don't call this class yourself. You access Cell objects
|
||||
via methods of the Sheet object(s) that you found in the Book object that
|
||||
was returned when you called xlrd.open_workbook("myfile.xls").
|
||||
|
||||
Cell objects have four attributes: `ctype` is an int, `value` (which depends
|
||||
on `ctype`), `xf_index` and `sheet`, a reference to the containing sheet. If
|
||||
**formatting_info** is not enabled when the workbook is opened, xf_index will
|
||||
be **None**.
|
||||
|
||||
The following table describes the types of cells and how their values
|
||||
are represented in Python.
|
||||
|
||||
=============== ===== ============ ==========================================
|
||||
Type symbol Const Python value Note
|
||||
=============== ===== ============ ==========================================
|
||||
XL_CELL_EMPTY 0 ""
|
||||
XL_CELL_TEXT 1 str
|
||||
XL_CELL_NUMBER 2 float
|
||||
XL_CELL_DATE 3 float
|
||||
XL_CELL_BOOLEAN 4 int 1 means TRUE, 0 means FALSE
|
||||
XL_CELL_ERROR 5 int representing internal Excel codes; for a
|
||||
text representation, refer to the supplied
|
||||
dictionary error_text_from_code
|
||||
XL_CELL_BLANK 6 "" this type will appear only when
|
||||
open_workbook(..., formatting_info=True)
|
||||
is used.
|
||||
=============== ===== ============ ==========================================
|
||||
"""
|
||||
|
||||
import datetime
|
||||
|
||||
from .xldate import xldate_as_tuple
|
||||
from .biffh import XL_CELL_DATE, BaseObject
|
||||
|
||||
class XFCell(BaseObject):
|
||||
""" Extended Cell() class with convenient methods for easy access of cell
|
||||
properties.
|
||||
"""
|
||||
__slots__ = ['sheet', 'ctype', 'value', 'xf']
|
||||
|
||||
def __init__(self, ctype, value, xf_index=None, sheet=None):
|
||||
self.sheet = sheet
|
||||
self.ctype = ctype
|
||||
self.value = value
|
||||
|
||||
if xf_index is not None:
|
||||
self.xf = self.book.xf_list[xf_index]
|
||||
else:
|
||||
self.xf = None
|
||||
|
||||
@property
|
||||
def book(self):
|
||||
return self.sheet.book
|
||||
|
||||
@property
|
||||
def has_xf(self):
|
||||
return (self.xf is not None)
|
||||
|
||||
@property
|
||||
def xf_index(self):
|
||||
if self.has_xf:
|
||||
return self.xf.xf_index
|
||||
else:
|
||||
return None
|
||||
|
||||
@property
|
||||
def parent_style(self):
|
||||
return self.book.xf_list[self.xf.parent_style_index]
|
||||
|
||||
@property
|
||||
def is_datetime(self):
|
||||
return self.ctype == XL_CELL_DATE
|
||||
|
||||
@property
|
||||
def has_date(self):
|
||||
if self.is_datetime:
|
||||
return self.value > 1.
|
||||
return False
|
||||
|
||||
def get_color(self, index):
|
||||
return self.book.colour_map[index]
|
||||
|
||||
def datetime(self):
|
||||
""" Returns a datetime.datetime object if cell type is XL_CELL_DATE
|
||||
else raises a TypeError, and raises ValueError if the the cell has
|
||||
not date value (only time value is present).
|
||||
"""
|
||||
if self.is_datetime:
|
||||
if self.has_date:
|
||||
date = xldate_as_tuple(self.value, self.book.datemode)
|
||||
return datetime.datetime(*date)
|
||||
else:
|
||||
raise ValueError("Cell has no date value.")
|
||||
else:
|
||||
raise TypeError("Cell is not a XL_CELL_DATE.")
|
||||
|
||||
def date(self):
|
||||
""" Returns a datetime.date object if cell type is XL_CELL_DATE
|
||||
else raises a **TypeError**. Raises **ValueError** if the cell
|
||||
doesn't have a date value (only time value is present).
|
||||
"""
|
||||
dt = self.datetime()
|
||||
return dt.date()
|
||||
|
||||
def time(self):
|
||||
""" Returns a datetime.time object if cell type is XL_CELL_DATE else
|
||||
raises a TypeError.
|
||||
"""
|
||||
if self.is_datetime:
|
||||
date = xldate_as_tuple(self.value, self.book.datemode)
|
||||
return datetime.time(date[3], date[4], date[5])
|
||||
else:
|
||||
raise TypeError("Cell is not a XL_CELL_DATE.")
|
||||
|
||||
#
|
||||
# access the XFBackground() class
|
||||
#
|
||||
|
||||
@property
|
||||
def background(self):
|
||||
if self.xf.is_style and \
|
||||
self.xf._background_flag == 0:
|
||||
return self.xf.background
|
||||
elif self.xf._background_flag:
|
||||
return self.xf.background
|
||||
else:
|
||||
return self.parent_style.background
|
||||
|
||||
def background_color(self):
|
||||
""" Get cell background-color as 3-tuple. """
|
||||
color_index = self.xf.background.background_colour_index
|
||||
return self.get_color(color_index)
|
||||
|
||||
def fill_pattern(self):
|
||||
return self.xf.background.fill_pattern
|
||||
|
||||
def pattern_color(self):
|
||||
color_index = self.xf.background.pattern_colour_index
|
||||
return self.get_color(color_index)
|
||||
|
||||
#
|
||||
# access the Font() class
|
||||
#
|
||||
|
||||
@property
|
||||
def font_index(self):
|
||||
if self.xf.is_style and \
|
||||
self.xf._font_flag == 0:
|
||||
return self.xf.font_index
|
||||
elif self.xf._font_flag:
|
||||
return self.xf.font_index
|
||||
else:
|
||||
return self.parent_style.font_index
|
||||
|
||||
@property
|
||||
def font(self):
|
||||
""" Get the Font() class. """
|
||||
return self.book.font_list[self.xf.font_index]
|
||||
|
||||
def font_color(self):
|
||||
""" Get cell foreground-color as 3-tuple. """
|
||||
return self.get_color(self.font.colour_index)
|
||||
|
||||
#
|
||||
# access the Format() class
|
||||
#
|
||||
|
||||
@property
|
||||
def format_key(self):
|
||||
if self.xf.is_style and \
|
||||
self.xf._format_flag == 0:
|
||||
return self.xf.format_key
|
||||
elif self.xf._format_flag:
|
||||
return self.xf.format_key
|
||||
else:
|
||||
return self.parent_style.format_key
|
||||
|
||||
@property
|
||||
def format(self):
|
||||
""" Get the Format() class. """
|
||||
return self.book.format_map[self.format_key]
|
||||
|
||||
def format_str(self):
|
||||
""" Get the associated 'format_str'. """
|
||||
return self.format.format_str
|
||||
|
||||
#
|
||||
# access the XFAligment() class
|
||||
#
|
||||
|
||||
@property
|
||||
def alignment(self):
|
||||
if self.xf.is_style and \
|
||||
self.xf._alignment_flag == 0:
|
||||
return self.xf.alignment
|
||||
elif self.xf._alignment_flag:
|
||||
return self.xf.alignment
|
||||
else:
|
||||
return self.parent_style.alignment
|
||||
|
||||
#
|
||||
# access the XFBorder() class
|
||||
#
|
||||
|
||||
@property
|
||||
def border(self):
|
||||
if self.xf.is_style and \
|
||||
self.xf._border_flag == 0:
|
||||
return self.xf.border
|
||||
elif self.xf._border_flag:
|
||||
return self.xf.border
|
||||
else:
|
||||
return self.parent_style.border
|
||||
|
||||
def bordercolors(self):
|
||||
""" Get border color as dict of rgb-color-tuples. """
|
||||
border = self.border
|
||||
return {
|
||||
'top': self.get_color(border.top_colour_index),
|
||||
'bottom': self.get_color(border.bottom_colour_index),
|
||||
'left': self.get_color(border.left_colour_index),
|
||||
'right': self.get_color(border.right_colour_index),
|
||||
'diag': self.get_color(border.diag_colour_index),
|
||||
}
|
||||
|
||||
def borderstyles(self):
|
||||
""" Get border styles as dict of ints. """
|
||||
border = self.border
|
||||
return {
|
||||
'top': border.top_line_style,
|
||||
'bottom': border.bottom_line_style,
|
||||
'left': border.left_line_style,
|
||||
'right': border.right_line_style,
|
||||
'diag': border.diag_line_style,
|
||||
}
|
||||
|
||||
@property
|
||||
def has_up_diag(self):
|
||||
""" Draw a line across the cell from bottom left to top right. """
|
||||
return bool(self.border.diag_up)
|
||||
|
||||
@property
|
||||
def has_down_diag(self):
|
||||
""" Draw a line across the cell from top left to bottom right. """
|
||||
return bool(self.border.diag_down)
|
||||
|
||||
#
|
||||
# access the XFProtection() class
|
||||
#
|
||||
|
||||
@property
|
||||
def protection(self):
|
||||
if self.xf.is_style and \
|
||||
self.xf._protection_flag == 0:
|
||||
return self.xf.protection
|
||||
elif self.xf._protection_flag:
|
||||
return self.xf.protection
|
||||
else:
|
||||
return self.parent_style.protection
|
||||
|
||||
@property
|
||||
def is_cell_locked(self):
|
||||
return bool(self.protection.cell_locked)
|
||||
|
||||
@property
|
||||
def is_formula_hidden(self):
|
||||
return bool(self.protection.cell_locked)
|
||||
@@ -0,0 +1,84 @@
|
||||
# Author: mozman <mozman@gmx.at>
|
||||
# Purpose: xfconst -- xf constants
|
||||
# Created: 05.12.2010
|
||||
# Copyright (C) 2010, Manfred Moitzi
|
||||
# License: BSD-style licence
|
||||
|
||||
# excelfileformat section 5.115.1 pg. 219
|
||||
HOR_ALIGN_GENERAL = 0
|
||||
HOR_ALIGN_LEFT = 1
|
||||
HOR_ALIGN_CENTRED = 2
|
||||
HOR_ALIGN_RIGHT = 3
|
||||
HOR_ALIGN_FILLED = 4
|
||||
HOR_ALIGN_JUSTIFIED = 5
|
||||
HOR_ALIGN_CENTRED_ACROSS_SELECTION = 6
|
||||
HOR_ALIGN_DISTRIBUTED = 7
|
||||
|
||||
# excelfileformat section 5.115.1 pg. 220
|
||||
VERT_ALIGN_TOP = 0
|
||||
VERT_ALIGN_CENTRED = 1
|
||||
VERT_ALIGN_BOTTOM = 2
|
||||
VERT_ALIGN_JUSTIFIED = 3
|
||||
VERT_ALIGN_DISTRIBUTED = 4
|
||||
|
||||
# excelfileformat section 5.115.1 pg. 220
|
||||
ORIENTATION_NONE = 0
|
||||
ORIENTATION_STACKED = 1
|
||||
ORIENTATION_90_COUNTERCLOCKWISE = 2
|
||||
ORIENTATION_90_CLOCKWISE = 3
|
||||
|
||||
# excelfileformat section 5.115.1 pg. 220
|
||||
ROTATION_NONE = 0
|
||||
ROTATION_STACKED = 255
|
||||
# other values:
|
||||
# Value Description
|
||||
# ======= =================================
|
||||
# 1-90 1 to 90 degrees counterclockwise
|
||||
# 91-180 1 to 90 degrees clockwise
|
||||
|
||||
# excelfileformat section 2.5.11 Line Styles for Cell Borders pg. 24
|
||||
LS_NOLINE = 0
|
||||
|
||||
# solid line
|
||||
LS_THIN = 1
|
||||
|
||||
# solid line
|
||||
LS_MEDIUM = 2
|
||||
|
||||
# - - - - - - - - -
|
||||
LS_DASHED = 3
|
||||
|
||||
# .................
|
||||
LS_DOTTED = 4
|
||||
|
||||
# solid line
|
||||
LS_THICK = 5
|
||||
|
||||
# =================
|
||||
LS_DOUBLE = 6
|
||||
|
||||
# very thin dotted
|
||||
LS_HAIR = 7
|
||||
|
||||
# - - - - - - - - -
|
||||
LS_MEDIUM_DASHED = 8
|
||||
|
||||
# - . - . - . - . -
|
||||
LS_THIN_DASH_DOTTED = 9
|
||||
|
||||
# - . - . - . - . -
|
||||
LS_MEDIUM_DASH_DOTTED = 10
|
||||
|
||||
# - .. - .. - .. - .. -
|
||||
LS_THIN_DASH_DOT_DOTTED = 11
|
||||
|
||||
# - .. - .. - .. - .. -
|
||||
LS_MEDIUM_DASH_DOT_DOTTED = 12
|
||||
|
||||
# \\\ . \\\ . \\\ . \\\ . \\\
|
||||
LS_SLANTED_MEDIUM_DASH_DOTTED = 13
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,167 @@
|
||||
# No part of the content of this file was derived from the works of David Giffin.
|
||||
#
|
||||
# Copyright © 2005-2008 Stephen John Machin, Lingfo Pty Ltd
|
||||
# This module is part of the xlrd3 package, which is released under a
|
||||
# BSD-style licence.
|
||||
#
|
||||
# Provides function(s) for dealing with Microsoft Excel ™ dates.
|
||||
#
|
||||
# 2008-10-18 SJM Fix bug in xldate_from_date_tuple (affected some years after 2099)
|
||||
#
|
||||
# The conversion from days to (year, month, day) starts with
|
||||
# an integral "julian day number" aka JDN.
|
||||
# FWIW, JDN 0 corresponds to noon on Monday November 24 in Gregorian year -4713.
|
||||
# More importantly:
|
||||
# Noon on Gregorian 1900-03-01 (day 61 in the 1900-based system) is JDN 2415080.0
|
||||
# Noon on Gregorian 1904-01-02 (day 1 in the 1904-based system) is JDN 2416482.0
|
||||
|
||||
def ifd(x, y):
|
||||
return divmod(x, y)[0]
|
||||
|
||||
_JDN_delta = (2415080 - 61, 2416482 - 1)
|
||||
assert _JDN_delta[1] - _JDN_delta[0] == 1462
|
||||
|
||||
class XLDateError(ValueError): pass
|
||||
|
||||
class XLDateNegative(XLDateError): pass
|
||||
class XLDateAmbiguous(XLDateError): pass
|
||||
class XLDateTooLarge(XLDateError): pass
|
||||
class XLDateBadDatemode(XLDateError): pass
|
||||
class XLDateBadTuple(XLDateError): pass
|
||||
|
||||
_XLDAYS_TOO_LARGE = (2958466, 2958466 - 1462) # This is equivalent to 10000-01-01
|
||||
|
||||
# Convert an Excel number (presumed to represent a date, a datetime or a time) into
|
||||
# a tuple suitable for feeding to datetime or mx.DateTime constructors.
|
||||
# @param xldate The Excel number
|
||||
# @param datemode 0: 1900-based, 1: 1904-based.
|
||||
# <br>WARNING: when using this function to
|
||||
# interpret the contents of a workbook, you should pass in the Book.datemode
|
||||
# attribute of that workbook. Whether
|
||||
# the workbook has ever been anywhere near a Macintosh is irrelevant.
|
||||
# @return Gregorian (year, month, day, hour, minute, nearest_second).
|
||||
# <br>Special case: if 0.0 <= xldate < 1.0, it is assumed to represent a time;
|
||||
# (0, 0, 0, hour, minute, second) will be returned.
|
||||
# <br>Note: 1904-01-01 is not regarded as a valid date in the datemode 1 system; its "serial number"
|
||||
# is zero.
|
||||
# @throws XLDateNegative xldate < 0.00
|
||||
# @throws XLDateAmbiguous The 1900 leap-year problem (datemode == 0 and 1.0 <= xldate < 61.0)
|
||||
# @throws XLDateTooLarge Gregorian year 10000 or later
|
||||
# @throws XLDateBadDatemode datemode arg is neither 0 nor 1
|
||||
# @throws XLDateError Covers the 4 specific errors
|
||||
|
||||
def xldate_as_tuple(xldate, datemode):
|
||||
if datemode not in (0, 1):
|
||||
raise XLDateBadDatemode(datemode)
|
||||
if xldate == 0.00:
|
||||
return (0, 0, 0, 0, 0, 0)
|
||||
if xldate < 0.00:
|
||||
raise XLDateNegative(xldate)
|
||||
xldays = int(xldate)
|
||||
frac = xldate - xldays
|
||||
seconds = int(round(frac * 86400.0))
|
||||
assert 0 <= seconds <= 86400
|
||||
if seconds == 86400:
|
||||
hour = minute = second = 0
|
||||
xldays += 1
|
||||
else:
|
||||
# second = seconds % 60; minutes = seconds // 60
|
||||
minutes, second = divmod(seconds, 60)
|
||||
# minute = minutes % 60; hour = minutes // 60
|
||||
hour, minute = divmod(minutes, 60)
|
||||
if xldays >= _XLDAYS_TOO_LARGE[datemode]:
|
||||
raise XLDateTooLarge(xldate)
|
||||
|
||||
if xldays == 0:
|
||||
return (0, 0, 0, hour, minute, second)
|
||||
|
||||
if xldays < 61 and datemode == 0:
|
||||
raise XLDateAmbiguous(xldate)
|
||||
|
||||
jdn = xldays + _JDN_delta[datemode]
|
||||
yreg = (ifd(ifd(jdn * 4 + 274277, 146097) * 3, 4) + jdn + 1363) * 4 + 3
|
||||
mp = ifd(yreg % 1461, 4) * 535 + 333
|
||||
d = ifd(mp % 16384, 535) + 1
|
||||
# mp /= 16384
|
||||
mp >>= 14
|
||||
if mp >= 10:
|
||||
return (ifd(yreg, 1461) - 4715, mp - 9, d, hour, minute, second)
|
||||
else:
|
||||
return (ifd(yreg, 1461) - 4716, mp + 3, d, hour, minute, second)
|
||||
|
||||
# === conversions from date/time to xl numbers
|
||||
|
||||
def _leap(y):
|
||||
if y % 4: return 0
|
||||
if y % 100: return 1
|
||||
if y % 400: return 0
|
||||
return 1
|
||||
|
||||
_days_in_month = (None, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
|
||||
|
||||
# Convert a date tuple (year, month, day) to an Excel date.
|
||||
# @param year Gregorian year.
|
||||
# @param month 1 <= month <= 12
|
||||
# @param day 1 <= day <= last day of that (year, month)
|
||||
# @param datemode 0: 1900-based, 1: 1904-based.
|
||||
# @throws XLDateAmbiguous The 1900 leap-year problem (datemode == 0 and 1.0 <= xldate < 61.0)
|
||||
# @throws XLDateBadDatemode datemode arg is neither 0 nor 1
|
||||
# @throws XLDateBadTuple (year, month, day) is too early/late or has invalid component(s)
|
||||
# @throws XLDateError Covers the specific errors
|
||||
|
||||
def xldate_from_date_tuple(datetuple, datemode):
|
||||
|
||||
(year, month, day) = datetuple
|
||||
if datemode not in (0, 1):
|
||||
raise XLDateBadDatemode(datemode)
|
||||
|
||||
if year == 0 and month == 0 and day == 0:
|
||||
return 0.00
|
||||
|
||||
if not (1900 <= year <= 9999):
|
||||
raise XLDateBadTuple("Invalid year: %r" % ((year, month, day),))
|
||||
if not (1 <= month <= 12):
|
||||
raise XLDateBadTuple("Invalid month: %r" % ((year, month, day),))
|
||||
if day < 1 \
|
||||
or (day > _days_in_month[month] and not(day == 29 and month == 2 and _leap(year))):
|
||||
raise XLDateBadTuple("Invalid day: %r" % ((year, month, day),))
|
||||
|
||||
Yp = year + 4716
|
||||
M = month
|
||||
if M <= 2:
|
||||
Yp = Yp - 1
|
||||
Mp = M + 9
|
||||
else:
|
||||
Mp = M - 3
|
||||
jdn = ifd(1461 * Yp, 4) + ifd(979 * Mp + 16, 32) + \
|
||||
day - 1364 - ifd(ifd(Yp + 184, 100) * 3, 4)
|
||||
xldays = jdn - _JDN_delta[datemode]
|
||||
if xldays <= 0:
|
||||
raise XLDateBadTuple("Invalid (year, month, day): %r" % ((year, month, day),))
|
||||
if xldays < 61 and datemode == 0:
|
||||
raise XLDateAmbiguous("Before 1900-03-01: %r" % ((year, month, day),))
|
||||
return float(xldays)
|
||||
|
||||
# Convert a time tuple (hour, minute, second) to an Excel "date" value (fraction of a day).
|
||||
# @param hour 0 <= hour < 24
|
||||
# @param minute 0 <= minute < 60
|
||||
# @param second 0 <= second < 60
|
||||
# @throws XLDateBadTuple Out-of-range hour, minute, or second
|
||||
|
||||
def xldate_from_time_tuple(timetuple):
|
||||
(hour, minute, second) = timetuple
|
||||
if 0 <= hour < 24 and 0 <= minute < 60 and 0 <= second < 60:
|
||||
return ((second / 60.0 + minute) / 60.0 + hour) / 24.0
|
||||
raise XLDateBadTuple("Invalid (hour, minute, second): %r" % ((hour, minute, second),))
|
||||
|
||||
# Convert a datetime tuple (year, month, day, hour, minute, second) to an Excel date value.
|
||||
# For more details, refer to other xldate_from_*_tuple functions.
|
||||
# @param datetime_tuple (year, month, day, hour, minute, second)
|
||||
# @param datemode 0: 1900-based, 1: 1904-based.
|
||||
|
||||
def xldate_from_datetime_tuple(datetime_tuple, datemode):
|
||||
return (
|
||||
xldate_from_date_tuple(datetime_tuple[:3], datemode)
|
||||
+
|
||||
xldate_from_time_tuple(datetime_tuple[3:])
|
||||
)
|
||||
@@ -4,7 +4,7 @@ from .antlr import ANTLRException
|
||||
|
||||
|
||||
class Formula(object):
|
||||
__slots__ = ["__init__", "__s", "__parser", "__sheet_refs", "__xcall_refs"]
|
||||
__slots__ = ["__s", "__parser", "__sheet_refs", "__xcall_refs"]
|
||||
|
||||
|
||||
def __init__(self, s):
|
||||
|
||||
+243
-41
@@ -1,19 +1,15 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""Tests for Tablib."""
|
||||
|
||||
import json
|
||||
import unittest
|
||||
import sys
|
||||
|
||||
if sys.version_info[0] > 2:
|
||||
from tablib.packages import markup3 as markup
|
||||
else:
|
||||
from tablib.packages import markup
|
||||
|
||||
|
||||
|
||||
import os
|
||||
import tablib
|
||||
from tablib.compat import markup, unicode, is_py3
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -33,7 +29,7 @@ class TablibTestCase(unittest.TestCase):
|
||||
self.george = ('George', 'Washington', 67)
|
||||
self.tom = ('Thomas', 'Jefferson', 50)
|
||||
|
||||
self.founders = tablib.Dataset(headers=self.headers)
|
||||
self.founders = tablib.Dataset(headers=self.headers, title='Founders')
|
||||
self.founders.append(self.john)
|
||||
self.founders.append(self.george)
|
||||
self.founders.append(self.tom)
|
||||
@@ -63,6 +59,18 @@ class TablibTestCase(unittest.TestCase):
|
||||
|
||||
self.assertRaises(tablib.InvalidDimensions, data.append, new_row)
|
||||
|
||||
def test_set_headers_with_incorrect_dimension(self):
|
||||
"""Verify headers correctly detects mismatch of number of
|
||||
headers and data.
|
||||
"""
|
||||
|
||||
data.append(self.john)
|
||||
|
||||
def set_header_callable():
|
||||
data.headers = ['first_name']
|
||||
|
||||
self.assertRaises(tablib.InvalidDimensions, set_header_callable)
|
||||
|
||||
|
||||
def test_add_column(self):
|
||||
"""Verify adding column works with/without headers."""
|
||||
@@ -72,17 +80,17 @@ class TablibTestCase(unittest.TestCase):
|
||||
|
||||
new_col = ['reitz', 'monke']
|
||||
|
||||
data.append(col=new_col)
|
||||
data.append_col(new_col)
|
||||
|
||||
self.assertEquals(data[0], ('kenneth', 'reitz'))
|
||||
self.assertEquals(data.width, 2)
|
||||
self.assertEqual(data[0], ('kenneth', 'reitz'))
|
||||
self.assertEqual(data.width, 2)
|
||||
|
||||
# With Headers
|
||||
data.headers = ('fname', 'lname')
|
||||
new_col = [21, 22]
|
||||
data.append(col=new_col, header='age')
|
||||
data.append_col(new_col, header='age')
|
||||
|
||||
self.assertEquals(data['age'], new_col)
|
||||
self.assertEqual(data['age'], new_col)
|
||||
|
||||
|
||||
def test_add_column_no_data_no_headers(self):
|
||||
@@ -90,30 +98,95 @@ class TablibTestCase(unittest.TestCase):
|
||||
|
||||
new_col = ('reitz', 'monke')
|
||||
|
||||
data.append(col=new_col)
|
||||
data.append_col(new_col)
|
||||
|
||||
self.assertEquals(data[0], tuple([new_col[0]]))
|
||||
self.assertEquals(data.width, 1)
|
||||
self.assertEquals(data.height, len(new_col))
|
||||
self.assertEqual(data[0], tuple([new_col[0]]))
|
||||
self.assertEqual(data.width, 1)
|
||||
self.assertEqual(data.height, len(new_col))
|
||||
|
||||
|
||||
def test_add_column_with_header_ignored(self):
|
||||
"""Verify append_col() ignores the header if data.headers has
|
||||
not previously been set
|
||||
"""
|
||||
|
||||
new_col = ('reitz', 'monke')
|
||||
|
||||
data.append_col(new_col, header='first_name')
|
||||
|
||||
self.assertEqual(data[0], tuple([new_col[0]]))
|
||||
self.assertEqual(data.width, 1)
|
||||
self.assertEqual(data.height, len(new_col))
|
||||
self.assertEqual(data.headers, None)
|
||||
|
||||
|
||||
def test_add_column_with_header_and_headers_only_exist(self):
|
||||
"""Verify append_col() with header correctly detects mismatch when
|
||||
headers exist but there is no existing row data
|
||||
"""
|
||||
|
||||
data.headers = ['first_name']
|
||||
#no data
|
||||
|
||||
new_col = ('allen')
|
||||
|
||||
def append_col_callable():
|
||||
data.append_col(new_col, header='middle_name')
|
||||
|
||||
self.assertRaises(tablib.InvalidDimensions, append_col_callable)
|
||||
|
||||
|
||||
def test_add_column_with_header_and_data_exists(self):
|
||||
"""Verify append_col() works when headers and rows exists"""
|
||||
|
||||
data.headers = self.headers
|
||||
data.append(self.john)
|
||||
|
||||
new_col = [10];
|
||||
|
||||
data.append_col(new_col, header='age')
|
||||
|
||||
self.assertEqual(data.height, 1)
|
||||
self.assertEqual(data.width, len(self.john) + 1)
|
||||
self.assertEqual(data['age'], new_col)
|
||||
self.assertEqual(len(data.headers), len(self.headers) + 1)
|
||||
|
||||
|
||||
def test_add_callable_column(self):
|
||||
"""Verify adding column with values specified as callable."""
|
||||
new_col = [lambda x: x[0]]
|
||||
self.founders.append(col=new_col, header='first_again')
|
||||
#
|
||||
# self.assertTrue(map(lambda x: x[0] == x[-1], self.founders))
|
||||
|
||||
new_col = lambda x: x[0]
|
||||
|
||||
self.founders.append_col(new_col, header='first_again')
|
||||
|
||||
|
||||
def test_header_slicing(self):
|
||||
"""Verify slicing by headers."""
|
||||
|
||||
self.assertEqual(self.founders['first_name'],
|
||||
[self.john[0], self.george[0], self.tom[0]])
|
||||
[self.john[0], self.george[0], self.tom[0]])
|
||||
|
||||
self.assertEqual(self.founders['last_name'],
|
||||
[self.john[1], self.george[1], self.tom[1]])
|
||||
[self.john[1], self.george[1], self.tom[1]])
|
||||
|
||||
self.assertEqual(self.founders['gpa'],
|
||||
[self.john[2], self.george[2], self.tom[2]])
|
||||
[self.john[2], self.george[2], self.tom[2]])
|
||||
|
||||
|
||||
def test_get_col(self):
|
||||
"""Verify getting columns by index"""
|
||||
|
||||
self.assertEqual(
|
||||
self.founders.get_col(list(self.headers).index('first_name')),
|
||||
[self.john[0], self.george[0], self.tom[0]])
|
||||
|
||||
self.assertEqual(
|
||||
self.founders.get_col(list(self.headers).index('last_name')),
|
||||
[self.john[1], self.george[1], self.tom[1]])
|
||||
|
||||
self.assertEqual(
|
||||
self.founders.get_col(list(self.headers).index('gpa')),
|
||||
[self.john[2], self.george[2], self.tom[2]])
|
||||
|
||||
|
||||
def test_data_slicing(self):
|
||||
@@ -173,10 +246,11 @@ class TablibTestCase(unittest.TestCase):
|
||||
|
||||
self.assertEqual(csv, self.founders.csv)
|
||||
|
||||
def test_tsv_export(self):
|
||||
"""Verify exporting dataset object as CSV."""
|
||||
|
||||
# Build up the csv string with headers first, followed by each row
|
||||
def test_tsv_export(self):
|
||||
"""Verify exporting dataset object as TSV."""
|
||||
|
||||
# Build up the tsv string with headers first, followed by each row
|
||||
tsv = ''
|
||||
for col in self.headers:
|
||||
tsv += col + '\t'
|
||||
@@ -190,8 +264,8 @@ class TablibTestCase(unittest.TestCase):
|
||||
|
||||
self.assertEqual(tsv, self.founders.tsv)
|
||||
|
||||
def test_html_export(self):
|
||||
|
||||
def test_html_export(self):
|
||||
"""HTML export"""
|
||||
|
||||
html = markup.page()
|
||||
@@ -211,10 +285,36 @@ class TablibTestCase(unittest.TestCase):
|
||||
self.assertEqual(html, self.founders.html)
|
||||
|
||||
|
||||
def test_unicode_append(self):
|
||||
"""Passes in a single unicode charecter and exports."""
|
||||
def test_html_export_none_value(self):
|
||||
"""HTML export"""
|
||||
|
||||
html = markup.page()
|
||||
html.table.open()
|
||||
html.thead.open()
|
||||
|
||||
html.tr(markup.oneliner.th(['foo','', 'bar']))
|
||||
html.thead.close()
|
||||
|
||||
html.tr(markup.oneliner.td(['foo','', 'bar']))
|
||||
|
||||
html.table.close()
|
||||
html = str(html)
|
||||
|
||||
headers = ['foo', None, 'bar'];
|
||||
d = tablib.Dataset(['foo', None, 'bar'], headers=headers)
|
||||
|
||||
self.assertEqual(html, d.html)
|
||||
|
||||
|
||||
def test_unicode_append(self):
|
||||
"""Passes in a single unicode character and exports."""
|
||||
|
||||
if is_py3:
|
||||
new_row = ('å', 'é')
|
||||
else:
|
||||
exec("new_row = (u'å', u'é')")
|
||||
|
||||
|
||||
new_row = ('å', 'é')
|
||||
data.append(new_row)
|
||||
|
||||
data.json
|
||||
@@ -228,7 +328,7 @@ class TablibTestCase(unittest.TestCase):
|
||||
|
||||
|
||||
def test_book_export_no_exceptions(self):
|
||||
"""Test that varoius exports don't error out."""
|
||||
"""Test that various exports don't error out."""
|
||||
|
||||
book = tablib.Databook()
|
||||
book.add_sheet(data)
|
||||
@@ -250,7 +350,7 @@ class TablibTestCase(unittest.TestCase):
|
||||
|
||||
data.json = _json
|
||||
|
||||
self.assertEqual(_json, data.json)
|
||||
self.assertEqual(json.loads(_json), json.loads(data.json))
|
||||
|
||||
|
||||
def test_json_import_book(self):
|
||||
@@ -264,7 +364,7 @@ class TablibTestCase(unittest.TestCase):
|
||||
|
||||
book.json = _json
|
||||
|
||||
self.assertEqual(_json, book.json)
|
||||
self.assertEqual(json.loads(_json), json.loads(book.json))
|
||||
|
||||
|
||||
def test_yaml_import_set(self):
|
||||
@@ -321,6 +421,22 @@ class TablibTestCase(unittest.TestCase):
|
||||
self.assertEqual(_csv, data.csv)
|
||||
|
||||
|
||||
def test_csv_import_set_with_newlines(self):
|
||||
"""Generate and import CSV set serialization when row values have
|
||||
newlines."""
|
||||
data.append(('Markdown\n=======',
|
||||
'A cool language\n\nwith paragraphs'))
|
||||
data.append(('reStructedText\n==============',
|
||||
'Another cool language\n\nwith paragraphs'))
|
||||
data.headers = ('title', 'body')
|
||||
|
||||
_csv = data.csv
|
||||
|
||||
data.csv = _csv
|
||||
|
||||
self.assertEqual(_csv, data.csv)
|
||||
|
||||
|
||||
def test_tsv_import_set(self):
|
||||
"""Generate and import TSV set serialization."""
|
||||
data.append(self.john)
|
||||
@@ -382,12 +498,14 @@ class TablibTestCase(unittest.TestCase):
|
||||
"""Test YAML format detection."""
|
||||
|
||||
_yaml = '- {age: 90, first_name: John, last_name: Adams}'
|
||||
_tsv = 'foo\tbar'
|
||||
_bunk = (
|
||||
'¡¡¡¡¡¡---///\n\n\n¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶'
|
||||
)
|
||||
|
||||
self.assertTrue(tablib.formats.yaml.detect(_yaml))
|
||||
self.assertFalse(tablib.formats.yaml.detect(_bunk))
|
||||
self.assertFalse(tablib.formats.yaml.detect(_tsv))
|
||||
|
||||
|
||||
def test_auto_format_detect(self):
|
||||
@@ -396,10 +514,12 @@ class TablibTestCase(unittest.TestCase):
|
||||
_yaml = '- {age: 90, first_name: John, last_name: Adams}'
|
||||
_json = '[{"last_name": "Adams","age": 90,"first_name": "John"}]'
|
||||
_csv = '1,2,3\n4,5,6\n7,8,9\n'
|
||||
_tsv = '1\t2\t3\n4\t5\t6\n7\t8\t9\n'
|
||||
_bunk = '¡¡¡¡¡¡---///\n\n\n¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶'
|
||||
|
||||
self.assertEqual(tablib.detect(_yaml)[0], tablib.formats.yaml)
|
||||
self.assertEqual(tablib.detect(_csv)[0], tablib.formats.csv)
|
||||
self.assertEqual(tablib.detect(_tsv)[0], tablib.formats.tsv)
|
||||
self.assertEqual(tablib.detect(_json)[0], tablib.formats.json)
|
||||
self.assertEqual(tablib.detect(_bunk)[0], None)
|
||||
|
||||
@@ -420,7 +540,6 @@ class TablibTestCase(unittest.TestCase):
|
||||
|
||||
|
||||
def test_row_stacking(self):
|
||||
|
||||
"""Row stacking."""
|
||||
|
||||
to_join = tablib.Dataset(headers=self.founders.headers)
|
||||
@@ -428,7 +547,7 @@ class TablibTestCase(unittest.TestCase):
|
||||
for row in self.founders:
|
||||
to_join.append(row=row)
|
||||
|
||||
row_stacked = self.founders.stack_rows(to_join)
|
||||
row_stacked = self.founders.stack(to_join)
|
||||
|
||||
for column in row_stacked.headers:
|
||||
|
||||
@@ -438,7 +557,6 @@ class TablibTestCase(unittest.TestCase):
|
||||
|
||||
|
||||
def test_column_stacking(self):
|
||||
|
||||
"""Column stacking"""
|
||||
|
||||
to_join = tablib.Dataset(headers=self.founders.headers)
|
||||
@@ -446,7 +564,7 @@ class TablibTestCase(unittest.TestCase):
|
||||
for row in self.founders:
|
||||
to_join.append(row=row)
|
||||
|
||||
column_stacked = self.founders.stack_columns(to_join)
|
||||
column_stacked = self.founders.stack_cols(to_join)
|
||||
|
||||
for index, row in enumerate(column_stacked):
|
||||
|
||||
@@ -459,10 +577,10 @@ class TablibTestCase(unittest.TestCase):
|
||||
|
||||
|
||||
def test_sorting(self):
|
||||
|
||||
"""Sort columns."""
|
||||
|
||||
sorted_data = self.founders.sort(col="first_name")
|
||||
self.assertEqual(sorted_data.title, 'Founders')
|
||||
|
||||
first_row = sorted_data[0]
|
||||
second_row = sorted_data[2]
|
||||
@@ -517,5 +635,89 @@ class TablibTestCase(unittest.TestCase):
|
||||
|
||||
data.csv
|
||||
|
||||
def test_csv_column_select(self):
|
||||
"""Build up a CSV and test selecting a column"""
|
||||
|
||||
data = tablib.Dataset()
|
||||
data.csv = self.founders.csv
|
||||
|
||||
headers = data.headers
|
||||
self.assertTrue(isinstance(headers[0], unicode))
|
||||
|
||||
orig_first_name = self.founders[self.headers[0]]
|
||||
csv_first_name = data[headers[0]]
|
||||
self.assertEqual(orig_first_name, csv_first_name)
|
||||
|
||||
|
||||
def test_csv_column_delete(self):
|
||||
"""Build up a CSV and test deleting a column"""
|
||||
|
||||
data = tablib.Dataset()
|
||||
data.csv = self.founders.csv
|
||||
|
||||
target_header = data.headers[0]
|
||||
self.assertTrue(isinstance(target_header, unicode))
|
||||
|
||||
del data[target_header]
|
||||
|
||||
self.assertTrue(target_header not in data.headers)
|
||||
|
||||
def test_csv_column_sort(self):
|
||||
"""Build up a CSV and test sorting a column by name"""
|
||||
|
||||
data = tablib.Dataset()
|
||||
data.csv = self.founders.csv
|
||||
|
||||
orig_target_header = self.founders.headers[1]
|
||||
target_header = data.headers[1]
|
||||
|
||||
self.founders.sort(orig_target_header)
|
||||
data.sort(target_header)
|
||||
|
||||
self.assertEqual(self.founders[orig_target_header], data[target_header])
|
||||
|
||||
def test_unicode_renders_markdown_table(self):
|
||||
# add another entry to test right field width for
|
||||
# integer
|
||||
self.founders.append(('Old', 'Man', 100500))
|
||||
|
||||
self.assertEqual(
|
||||
"""
|
||||
first_name|last_name |gpa
|
||||
----------|----------|------
|
||||
John |Adams |90
|
||||
George |Washington|67
|
||||
Thomas |Jefferson |50
|
||||
Old |Man |100500
|
||||
""".strip(),
|
||||
unicode(self.founders)
|
||||
)
|
||||
|
||||
|
||||
def test_databook_add_sheet_accepts_only_dataset_instances(self):
|
||||
class NotDataset(object):
|
||||
def append(self, item):
|
||||
pass
|
||||
|
||||
dataset = NotDataset()
|
||||
dataset.append(self.john)
|
||||
|
||||
self.assertRaises(tablib.InvalidDatasetType, book.add_sheet, dataset)
|
||||
|
||||
|
||||
def test_databook_add_sheet_accepts_dataset_subclasses(self):
|
||||
class DatasetSubclass(tablib.Dataset):
|
||||
pass
|
||||
|
||||
# just checking if subclass of tablib.Dataset can be added to Databook
|
||||
dataset = DatasetSubclass()
|
||||
dataset.append(self.john)
|
||||
dataset.append(self.tom)
|
||||
|
||||
try:
|
||||
book.add_sheet(dataset)
|
||||
except tablib.InvalidDatasetType:
|
||||
self.fail("Subclass of tablib.Dataset should be accepted by Databook.add_sheet")
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -1,17 +1,15 @@
|
||||
# Tox (http://tox.testrun.org/) is a tool for running tests
|
||||
# in multiple virtualenvs. This configuration file will run the
|
||||
# test suite on all supported python versions. To use it, "pip install tox"
|
||||
# and then run "tox" from this directory.
|
||||
|
||||
[tox]
|
||||
envlist = py25,py26,py27,py3
|
||||
envlist = py26, py27, py32, py33, pypy
|
||||
|
||||
[testenv]
|
||||
commands=py.test --junitxml=junit-{envname}.xml
|
||||
deps = pytest
|
||||
|
||||
[testenv:py25]
|
||||
simplejson = pytest simplejson
|
||||
|
||||
[testenv:pypy]
|
||||
basepython=/usr/bin/pypy-c
|
||||
simplejson = pytest simplejson
|
||||
|
||||
[testenv:py3]
|
||||
basepython=/usr/bin/python3
|
||||
simplejson = pytest
|
||||
commands = python setup.py test
|
||||
deps =
|
||||
pytest
|
||||
PyYAML
|
||||
xlrd
|
||||
omnijson
|
||||
|
||||
Reference in New Issue
Block a user