From 6d097c0214f9d581a2c41e4c701cfaca3796f874 Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Sat, 16 May 2020 08:07:32 +0200 Subject: [PATCH] Fixes #465 - Allow importing 'ragged' .xlsx files (#466) --- HISTORY.md | 1 + src/tablib/formats/_xlsx.py | 2 ++ tests/files/ragged.xlsx | Bin 0 -> 4698 bytes tests/test_tablib.py | 7 +++++++ 4 files changed, 10 insertions(+) create mode 100644 tests/files/ragged.xlsx diff --git a/HISTORY.md b/HISTORY.md index de2ab9d..c7a1336 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -12,6 +12,7 @@ - Fixed minimal openpyxl dependency version to 2.6.0 (#457). - Dates from xls files are now read as Python datetime objects (#373). +- Allow import of "ragged" xlsx files (#465). ### Improvements diff --git a/src/tablib/formats/_xlsx.py b/src/tablib/formats/_xlsx.py index dffc192..e740373 100644 --- a/src/tablib/formats/_xlsx.py +++ b/src/tablib/formats/_xlsx.py @@ -92,6 +92,8 @@ class XLSXFormat: if (i == 0) and (headers): data.headers = row_vals else: + if i > 0 and len(row_vals) < data.width: + row_vals += [''] * (data.width - len(row_vals)) data.append(row_vals) dbook.add_sheet(data) diff --git a/tests/files/ragged.xlsx b/tests/files/ragged.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..339ec22b11798877814c3140bbefc26488eed673 GIT binary patch literal 4698 zcmai12RzjO|L5#^_MX`!WJflU?7er^aaNp}%@J|-st_`ZvuE}yq#+}F<%|$g#{W*g z-!Jw5efzy`eLj!Jy`Oil^?W^F+UgjX6ll1(xML`!<|0);Sh1*OvsztV$Ol+Om4%n z$2+gxed@m{*^&pbZPbuq1yI)RTCfrCfZ=J}65$ip5&bIWNxkkrYH39)M_Ws-Yz11D z)Q|>0=r+A=c=N{mb;9+xwM5G-#jqQtcq3}Qh9l)0%6oFS*?~Qck2x1NIx(^_miEsy zCL-%a@sH&57`{ueMpD^+$2z(@Ur~xvrE^pJ1ikNmxNsaGMl7AT5y3O5s&FceCvir1 z@M%D|edxW_z$c1+bVjZ2t4)dl12NT{mhCAHDt_Bx4Ne%0VZ#HB65p0SRr3yt)p7HI zsx46HR0^qXI@n!jgz#Fch>l-mum~WT-AqD)nPo_OUFZ)bS^7<#)n% zO@>&vmoSy@=va8x@r@zQ``;7ARj`#^QQ$Ltf&pKzI!OagZMt=5#-lv}h0(1|5!$KK zb3zmD4O0Y_)+P6Je%O5BM#Po|h5??ZEQ_Z{^7*ja-^Ej$5G~o1f2(rdL>5WpSr2SIt^Zh@NL(LEKRRc1;M@v)ST{H9&P$rg46Iz70?;pzWwH zCTu8gw=p<>QjxL9CtaN5FiYLt+S}lGXxDs7%cJ3u9ir$quyQrsm6T4^iLVNq;9{icXGt@SS( z$<~BFJnLGxGi6e;BI_%CMOmlUWgPFwDBjPq3EL$H;!U|B5P-Yj7Sxva<@6e6NrtT-jN<)I_cAHzy?WHb=;X5z7fnZ<6;J^n^}ku8--_aY{1)nXusiGBCYbWTFSeAmjW(V%WCQJ^rKK1xE8FMkeNZd0w$ey-Zrz;aSM?m^s6`RfP5lAzDny^?p zS{|ZL&7ca3v*qh5K|jbCH05WlZN~h-|LCmRVoocLSkr=N^6OI682V0=F{cn=D!a$o zR9iR9kxIz|`aJG{92JAgX9x{A(nDTk9R9iZ$$!Sc$H~FXLEz&3qV8WCO}o#qker5} zVJxEY3)!+oyGjnFK8q)z4X` z%nPvPR!ABqI_Qirx&qfYWJ(t47XyOmLL=mqh0LeDsm^B0tFfQa5jTOK-<&v*f<(+Q zx->OSnp%W3^af(IA}j=g8X}HNTV=@*Aje6@*11K;Ziu6J%T`6i#FNP4&5+P~x4AiY zS04&b&U5mj-Q@&AlOO^6LoI`_n@%n3hHE%8r^NzH=w>QFmrYnV}XgzUIv8F zeA7wZ&~nne0>2%C(pE>WA3)Kh9fPOog?P#U2m3>K5uayxaITuQtcd%jDVUfz?jI(J zeGQ-@&_qGR&A_4(*d)<3EgtxDUgbC=X`e45PO#o4Tyb3danpz46-a5G4()z#;5#~^9qgfI z;V;idauIylXwS#^$u(BiBq z#9dl$*Sg7(neUB@=^y%s88)c1lt}TGBa8CNz?6gQ6XM~+Y9aasK(%kU+v~~aFL`y& zX0DOb6f1~L66(2ww#DMx5M?rPYy#4!q4~<;h(ueM zKWg-L#&exm_6N;2PqSO|hZv1k9?sqyjyed+fCMTEKIQK#YFZjE4(O+4#_aQgLt7>H8&&LZjwNb-b zIZ+!a7pWNx=Z*y-f)I?&I`o_seGw5s?km|kJ$x9>iGr%y;IX23%^E&8W z2J&xrB47W^AVmLi%*P+(aN$v%{;+$lAO(^;B36R?1mg-g0R^Dg8a3*y+n?y{u=zq* zerN(tzgya^zj7)NE7PZ%|8ZmO<5Bivh)8y;8gnX~Cq) z@={+h+q>t2J>?9ybE~Q{n=w7LbP*X66td0mchzh>Yo+`=nnsuw8I5oUXJD~dd1s8g z7La?8F3TEL?mHOQTH*seiS+vcZnZm7LpSK0V1Y)&W~`V!&5l>xNelkeWZDT@S^;p_ zy3*s!6LRc40!m4aEHq|R|45xVxj$fWJn0q5l_2f91l~y&8v&_3UP>5G6~QTzpQMmN zkl5BQRZH>&oZQ#%Kqejs_xWr)!w7PWME-T00)G|)YMk1F!LAod6sC?;q9oB;V^T`9 z5q3&^v8o^S(~!?8rk5YdZ5M9Y=ZMU-rz9yAfC{Qka=|i_=k9ZKc_Q->USeFJEEr~m zhwh}FW_!mh6+J9+pmKT2WMGgIFiEY|!n`(mYi$JY!*o&#Z{LkVkyv~}pt?~+Z_2oT zH`}#SX!I0DMm```<*h+>yLNXPhq%J(nxMG}FZy#$bHb012*{XpLGUwuMwM)5W)D|2q%rnX**$kRmhQIWm~18-5$*d2pgc|PrO|Y*=pmwladnOF!|E>4xNQY^eMb= zeb;OF!+t9>lec<<-ywhC88fsfwOaO>=>4GhV^i}x?7~3d(*crX+qmhNlHRJd7@N&; zTA|DY{EcT}?CJF8A-;};@aY5`=o?PxD23*Ud=;MVdTg>bFToxo0LvuB_?Wc=zNX9>2 zujXLYnEHO0CpLeBM1k58dft0-KIQ~Jr3=^%3)TvD-*uZG&S`enhFrP#_)Q-(r4^K29W01%%z4HxI~&_~HUDKf?7p;d+eL#u1?RKWARXcU zpIXmM5$x{c;O=9k@9*K@ZGmdiI=x{{WL2C1&RF*-McpM?BC`))F5#-C0*yB{?A{Cb zf0~$stC_Nwv4p*|Y_0E?5=PXO?B~IIvB|UM`Gk1-3*?@Mi-nj#ANKCAYf_|ky1wYN z<2Y2cm1)Voc1T(}Z?Bc+mqo@kA$0;HFRXckMbkVur8Vt7>mR)vhl@SuYLBm5nwM{6>Iet)z zR``y$b?-HM;jR@~epP}3>3EwARfNg|^v^teb zrV~Fn-+CP!{o=;P@V4V<`$g2RT-am0JcPxs6ewiB5olAuD=fTTG_j~L`koxn#w70f87B&hUANj~e KQeoMJ&;JMT@P~~6 literal 0 HcmV?d00001 diff --git a/tests/test_tablib.py b/tests/test_tablib.py index ca57d00..3efc87f 100755 --- a/tests/test_tablib.py +++ b/tests/test_tablib.py @@ -1015,6 +1015,13 @@ class XLSXTests(BaseTestCase): self.assertEqual(data.dict[0]['float'], 21.55) self.assertEqual(data.dict[0]['date/time'], date_time) + def test_xlsx_import_set_ragged(self): + """Import XLSX file when not all rows have the same length.""" + xlsx_source = Path(__file__).parent / 'files' / 'ragged.xlsx' + with open(str(xlsx_source), mode='rb') as fh: + book = tablib.Databook().load(fh, 'xlsx') + self.assertEqual(book.sheets()[0].pop(), (1.0, '')) + def test_xlsx_wrong_char(self): """Bad characters are not silently ignored. We let the exception bubble up.""" from openpyxl.utils.exceptions import IllegalCharacterError