diff --git a/README.md b/README.md index b1b56d5..223a85a 100644 --- a/README.md +++ b/README.md @@ -74,6 +74,7 @@ Earth 6371 5973.6 Moon 1737 73.5 Mars 3390 641.85 ----- ------ ------------- + ``` The following tabular data types are supported: @@ -101,6 +102,7 @@ Sun 696000 1.9891e+09 Earth 6371 5973.6 Moon 1737 73.5 Mars 3390 641.85 + ``` If `headers="firstrow"`, then the first row of data is used: @@ -112,6 +114,7 @@ Name Age ------ ----- Alice 24 Bob 19 + ``` If `headers="keys"`, then the keys of a dictionary/dataframe, or column @@ -125,6 +128,7 @@ Name Age ------ ----- Alice 24 Bob 19 + ``` When data is a list of dictionaries, a dictionary can be passed as `headers` @@ -137,6 +141,7 @@ Name Age ------ ----- Alice 24 Bob 19 + ``` ### Row Indices @@ -154,6 +159,7 @@ or `showindex=False`. To add a custom row index column, pass 0 F 24 1 M 19 - - -- + ``` ### Table format @@ -210,6 +216,7 @@ item qty spam 42 eggs 451 bacon 0 + ``` `simple` is the default format (the default may change in future @@ -223,6 +230,7 @@ item qty spam 42 eggs 451 bacon 0 + ``` `github` follows the conventions of GitHub flavored Markdown. It @@ -230,11 +238,12 @@ corresponds to the `pipe` format without alignment colons: ```pycon >>> print(tabulate(table, headers, tablefmt="github")) -| item | qty | +| item | qty | |--------|-------| -| spam | 42 | -| eggs | 451 | -| bacon | 0 | +| spam | 42 | +| eggs | 451 | +| bacon | 0 | + ``` `grid` is like tables formatted by Emacs' @@ -252,6 +261,7 @@ corresponds to the `pipe` format without alignment colons: +--------+-------+ | bacon | 0 | +--------+-------+ + ``` `simple_grid` draws a grid using single-line box-drawing characters: @@ -333,6 +343,7 @@ corresponds to the `pipe` format without alignment colons: ├────────┼───────┤ │ bacon │ 0 │ ╘════════╧═══════╛ + ``` `colon_grid` is similar to `grid` but uses colons only to define @@ -437,6 +448,7 @@ similar the alignment specification of Pandoc `grid_tables`: spam | 42 eggs | 451 bacon | 0 + ``` `pretty` attempts to be close to the format emitted by the PrettyTables @@ -451,6 +463,7 @@ library: | eggs | 451 | | bacon | 0 | +-------+-----+ + ``` `psql` is like tables formatted by Postgres' psql cli: @@ -464,6 +477,7 @@ library: | eggs | 451 | | bacon | 0 | +--------+-------+ + ``` `pipe` follows the conventions of [PHP Markdown @@ -478,6 +492,7 @@ indicate column alignment: | spam | 42 | | eggs | 451 | | bacon | 0 | + ``` `asciidoc` formats data like a simple table of the @@ -488,11 +503,12 @@ format: >>> print(tabulate(table, headers, tablefmt="asciidoc")) [cols="8<,7>",options="header"] |==== -| item | qty -| spam | 42 -| eggs | 451 -| bacon | 0 +| item | qty +| spam | 42 +| eggs | 451 +| bacon | 0 |==== + ``` `orgtbl` follows the conventions of Emacs @@ -506,6 +522,7 @@ in the minor orgtbl-mode. Hence its name: | spam | 42 | | eggs | 451 | | bacon | 0 | + ``` `jira` follows the conventions of Atlassian Jira markup language: @@ -516,6 +533,7 @@ in the minor orgtbl-mode. Hence its name: | spam | 42 | | eggs | 451 | | bacon | 0 | + ``` `rst` formats data like a simple table of the @@ -531,6 +549,7 @@ spam 42 eggs 451 bacon 0 ====== ===== + ``` `mediawiki` format produces a table markup used in @@ -550,6 +569,7 @@ MediaWiki-based sites: |- | bacon || style="text-align: right;"| 0 |} + ``` `moinmoin` format produces a table markup used in @@ -557,20 +577,22 @@ MediaWiki-based sites: ```pycon >>> print(tabulate(table, headers, tablefmt="moinmoin")) -|| ''' item ''' || ''' quantity ''' || -|| spam || 41.999 || -|| eggs || 451 || -|| bacon || || +|| ''' item ''' || ''' qty ''' || +|| spam || 42 || +|| eggs || 451 || +|| bacon || 0 || + ``` `youtrack` format produces a table markup used in Youtrack tickets: ```pycon >>> print(tabulate(table, headers, tablefmt="youtrack")) -|| item || quantity || -| spam | 41.999 | -| eggs | 451 | -| bacon | | +|| item || qty || +| spam | 42 | +| eggs | 451 | +| bacon | 0 | + ``` `textile` format produces a table markup used in @@ -582,6 +604,7 @@ MediaWiki-based sites: |<. spam |>. 42 | |<. eggs |>. 451 | |<. bacon |>. 0 | + ``` `html` produces standard HTML markup as an html.escape'd str @@ -592,13 +615,16 @@ and a .str property so that the raw HTML remains accessible. ```pycon >>> print(tabulate(table, headers, tablefmt="html")) - + + +
item qty
spam 42
eggs 451
bacon 0
+ ``` `latex` format creates a `tabular` environment for LaTeX markup, @@ -616,6 +642,7 @@ correspondents: bacon & 0 \\ \hline \end{tabular} + ``` `latex_raw` behaves like `latex` but does not escape LaTeX commands and @@ -650,6 +677,7 @@ at a glance: 12345 1234.5 ---------- + ``` Compare this with a more common right alignment: @@ -663,6 +691,7 @@ Compare this with a more common right alignment: 12345 1234.5 ------ + ``` For `tabulate`, anything which can be parsed as a number is a number. @@ -671,7 +700,7 @@ comes in handy when reading a mixed table of text and numbers from a file: ```pycon ->>> import csv ; from StringIO import StringIO +>>> import csv; from io import StringIO >>> table = list(csv.reader(StringIO("spam, 42\neggs, 451\n"))) >>> table [['spam', ' 42'], ['eggs', ' 451']] @@ -680,16 +709,18 @@ file: spam 42 eggs 451 ---- ---- + ``` To disable this feature use `disable_numparse=True`. ```pycon ->>> print(tabulate.tabulate([["Ver1", "18.0"], ["Ver2","19.2"]], tablefmt="simple", disable_numparse=True)) +>>> print(tabulate([["Ver1", "18.0"], ["Ver2","19.2"]], tablefmt="simple", disable_numparse=True)) ---- ---- Ver1 18.0 Ver2 19.2 ---- ---- + ``` ### Custom column alignment @@ -704,6 +735,7 @@ Furthermore, you can define `colalign` for column-specific alignment as a list o 1 2 3 4 111 222 333 444 --- --- --- --- + ``` ### Custom header alignment @@ -714,11 +746,11 @@ Headers' alignment can be defined separately from columns'. Like for columns, yo ```pycon >>> print(tabulate([[1,2,3,4,5,6],[111,222,333,444,555,666]], colglobalalign = 'center', colalign = ('left',), headers = ['h','e','a','d','e','r'], headersglobalalign = 'right', headersalign = ('same','same','left','global','center'))) - h e a d e r --- --- --- --- --- --- 1 2 3 4 5 6 111 222 333 444 555 666 + ``` ### Number formatting @@ -732,6 +764,7 @@ columns of decimal numbers. Use `floatfmt` named argument: pi 3.1416 e 2.7183 -- ------ + ``` `floatfmt` argument can be a list or a tuple of format strings, one per @@ -742,6 +775,7 @@ column, in which case every column may have different number formatting: --- ----- ------- 0.1 0.123 0.12345 --- ----- ------- + ``` `intfmt` works similarly for integers @@ -752,6 +786,47 @@ column, in which case every column may have different number formatting: b 90,000 - ------ + +### Type Deduction and Missing Values + +When `tabulate` sees numerical data (with our without comma separators), it +attempts to align the column on the decimal point. However, if it observes +non-numerical data in the column, it aligns it to the left by default. If +data is missing in a column (either None or empty values), the remaining +data in the column is used to infer the type: + +```pycon +>>> from fractions import Fraction +>>> test_table = [ +... [None, "1.23423515351", Fraction(1, 3)], +... [Fraction(56789, 1000000), 12345.1, b"abc"], +... ["", b"", None], +... [Fraction(10000, 3), None, ""], +... ] +>>> print(tabulate(test_table, floatfmt=",.5g", missingval="?")) +------------ ----------- --- + ? 1.2342 1/3 + 0.056789 12,345 abc + ? +3,333.3 ? +------------ ----------- --- + +``` + +The deduced type (eg. str, float) influences the rendering of any types +that have alternative representations. For example, since `Fraction` has +methods `__str__` and `__float__` defined (and hence is convertible to a +`float` and also has a `str` representation), the appropriate +representation is selected for the column's deduced type. In order to not +lose precision accidentally, types having both an `__int__` and +`__float__` representation will be considered a `float`. + +Therefore, if your table contains types convertible to int/float but you'd +*prefer* they be represented as strings, or your strings *might* all look +like numbers such as "1e23": either convert them to the desired +representation before you `tabulate`, or ensure that the column always +contains at least one other `str`. + ### Text formatting By default, `tabulate` removes leading and trailing whitespace from text @@ -802,6 +877,7 @@ a multiline cell, and headers with a multiline cell: ```pycon >>> table = [["eggs",451],["more\nspam",42]] >>> headers = ["item\nname", "qty"] + ``` `plain` tables: @@ -813,6 +889,7 @@ name eggs 451 more 42 spam + ``` `simple` tables: @@ -825,6 +902,7 @@ name eggs 451 more 42 spam + ``` `grid` tables: @@ -840,6 +918,7 @@ spam | more | 42 | | spam | | +--------+-------+ + ``` `fancy_grid` tables: @@ -855,6 +934,7 @@ spam │ more │ 42 │ │ spam │ │ ╘════════╧═══════╛ + ``` `pipe` tables: @@ -867,6 +947,7 @@ spam | eggs | 451 | | more | 42 | | spam | | + ``` `orgtbl` tables: @@ -879,18 +960,19 @@ spam | eggs | 451 | | more | 42 | | spam | | + ``` `jira` tables: ```pycon >>> print(tabulate(table, headers, tablefmt="jira")) -| item | qty | -| name | | -|:-------|------:| +|| item || qty || +|| name || || | eggs | 451 | | more | 42 | | spam | | + ``` `presto` tables: @@ -903,6 +985,7 @@ spam eggs | 451 more | 42 spam | + ``` `pretty` tables: @@ -917,6 +1000,7 @@ spam | more | 42 | | spam | | +------+-----+ + ``` `psql` tables: @@ -931,6 +1015,7 @@ spam | more | 42 | | spam | | +--------+-------+ + ``` `rst` tables: @@ -945,6 +1030,7 @@ eggs 451 more 42 spam ====== ===== + ``` Multiline cells are not well-supported for the other table formats. @@ -974,6 +1060,32 @@ the lines being wrapped would probably be significantly longer than this. | John Smith | Middle | | | Manager | +------------+---------+ + +``` + +Text is preferably wrapped on whitespaces and right after the hyphens in hyphenated words. + +break_long_words (default: True) If true, then words longer than width will be broken in order to ensure that no lines are longer than width. +If it is false, long words will not be broken, and some lines may be longer than width. +(Long words will be put on a line by themselves, in order to minimize the amount by which width is exceeded.) + +break_on_hyphens (default: True) If true, wrapping will occur preferably on whitespaces and right after hyphens in compound words, as it is customary in English. +If false, only whitespaces will be considered as potentially good places for line breaks. + +```pycon +>>> print(tabulate([["John Smith", "Middle-Manager"]], headers=["Name", "Title"], tablefmt="grid", maxcolwidths=[None, 5], break_long_words=False)) ++------------+---------+ +| Name | Title | ++============+=========+ +| John Smith | Middle- | +| | Manager | ++------------+---------+ +>>> print(tabulate([["John Smith", "Middle-Manager"]], headers=["Name", "Title"], tablefmt="grid", maxcolwidths=[None, 5], break_long_words=False, break_on_hyphens=False)) ++------------+----------------+ +| Name | Title | ++============+================+ +| John Smith | Middle-Manager | ++------------+----------------+ ``` ### Adding Separating lines @@ -1011,12 +1123,12 @@ table, however, ANSI escape sequences are not removed so the original styling is Some terminals support a special grouping of ANSI escape sequences that are intended to display hyperlinks much in the same way they are shown in browsers. These are handled just as mentioned before: non-printable -ANSI escape sequences are removed prior to string length calculation. The only diifference with escaped +ANSI escape sequences are removed prior to string length calculation. The only difference with escaped hyperlinks is that column width will be based on the length of the URL _text_ rather than the URL itself (terminals would show this text). For example: >>> len('\x1b]8;;https://example.com\x1b\\example\x1b]8;;\x1b\\') # display length is 7, showing 'example' - 45 + 40 Usage of the command line utility @@ -1136,6 +1248,13 @@ tox -e lint See `tox.ini` file to learn how to use to test individual Python versions. +To test the "doctest" examples and their outputs in `README.md`: + +```shell +python3 -m pip install pytest-doctestplus[md] +python3 -m doctest README.md +``` + Contributors ------------ diff --git a/pyproject.toml b/pyproject.toml index 4144f9b..d13e92d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,16 +1,16 @@ [build-system] -requires = ["setuptools>=61.2.0", "setuptools_scm[toml]>=3.4.3"] +requires = ["setuptools>=77.0.3", "setuptools_scm[toml]>=3.4.3"] build-backend = "setuptools.build_meta" [project] name = "tabulate" authors = [{name = "Sergey Astanin", email = "s.astanin@gmail.com"}] -license = {text = "MIT"} +license = "MIT" +license-files = ["LICENSE"] description = "Pretty-print tabular data" readme = "README.md" classifiers = [ "Development Status :: 4 - Beta", - "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.9", diff --git a/tabulate/__init__.py b/tabulate/__init__.py index 0d249ac..e100c09 100644 --- a/tabulate/__init__.py +++ b/tabulate/__init__.py @@ -33,6 +33,15 @@ def _is_file(f): # minimum extra space in headers MIN_PADDING = 2 +# Whether or not to preserve leading/trailing whitespace in data. +PRESERVE_WHITESPACE = False + +# TextWrapper breaks words longer than 'width'. +_BREAK_LONG_WORDS = True +# TextWrapper is breaking hyphenated words. +_BREAK_ON_HYPHENS = True + + _DEFAULT_FLOATFMT = "g" _DEFAULT_INTFMT = "" _DEFAULT_MISSINGVAL = "" @@ -737,6 +746,7 @@ def escape_empty(val): "pretty": "pretty", "psql": "psql", "rst": "rst", + "github": "github", "outline": "outline", "simple_outline": "simple_outline", "rounded_outline": "rounded_outline", @@ -876,25 +886,55 @@ def _isconvertible(conv, string): def _isnumber(string): - """ + """Detects if something *could* be considered a numeric value, vs. just a string. + + This promotes types convertible to both int and float to be considered + a float. Note that, iff *all* values appear to be some form of numeric + value such as eg. "1e2", they would be considered numbers! + + The exception is things that appear to be numbers but overflow to + +/-inf, eg. "1e23456"; we'll have to exclude them explicitly. + + >>> _isnumber(123) + True + >>> _isnumber(123.45) + True >>> _isnumber("123.45") True >>> _isnumber("123") True >>> _isnumber("spam") False - >>> _isnumber("123e45678") + >>> _isnumber("123e45") + True + >>> _isnumber("123e45678") # evaluates equal to 'inf', but ... isn't False >>> _isnumber("inf") True + >>> from fractions import Fraction + >>> _isnumber(Fraction(1,3)) + True + """ - if not _isconvertible(float, string): - return False - elif isinstance(string, (str, bytes)) and ( - math.isinf(float(string)) or math.isnan(float(string)) - ): - return string.lower() in ["inf", "-inf", "nan"] - return True + return ( + # fast path + type(string) in (float, int) + # covers 'NaN', +/- 'inf', and eg. '1e2', as well as any type + # convertible to int/float. + or ( + _isconvertible(float, string) + and ( + # some other type convertible to float + not isinstance(string, (str, bytes)) + # or, a numeric string eg. "1e1...", "NaN", ..., but isn't + # just an over/underflow + or ( + not (math.isinf(float(string)) or math.isnan(float(string))) + or string.lower() in ["inf", "-inf", "nan"] + ) + ) + ) + ) def _isint(string, inttype=int): @@ -1547,7 +1587,12 @@ def _normalize_tabular_data(tabular_data, headers, showindex="default"): field_names = [field.name for field in dataclasses.fields(rows[0])] if headers == "keys": headers = field_names - rows = [[getattr(row, f) for f in field_names] for row in rows] + rows = [ + [getattr(row, f) for f in field_names] + if not _is_separating_line(row) + else row + for row in rows + ] elif headers == "keys" and len(rows) > 0: # keys are column indices @@ -1593,7 +1638,7 @@ def _normalize_tabular_data(tabular_data, headers, showindex="default"): return rows, headers, headers_pad -def _wrap_text_to_colwidths(list_of_lists, colwidths, numparses=True): +def _wrap_text_to_colwidths(list_of_lists, colwidths, numparses=True, break_long_words=_BREAK_LONG_WORDS, break_on_hyphens=_BREAK_ON_HYPHENS): if len(list_of_lists): num_cols = len(list_of_lists[0]) else: @@ -1610,14 +1655,8 @@ def _wrap_text_to_colwidths(list_of_lists, colwidths, numparses=True): continue if width is not None: - wrapper = _CustomTextWrap(width=width) - # Cast based on our internal type handling. Any future custom - # formatting of types (such as datetimes) may need to be more - # explicit than just `str` of the object. Also doesn't work for - # custom floatfmt/intfmt, nor with any missing/blank cells. - casted_cell = ( - str(cell) if _isnumber(cell) else _type(cell, numparse)(cell) - ) + wrapper = _CustomTextWrap(width=width, break_long_words=break_long_words, break_on_hyphens=break_on_hyphens) + casted_cell = str(cell) wrapped = [ "\n".join(wrapper.wrap(line)) for line in casted_cell.splitlines() @@ -1675,6 +1714,8 @@ def tabulate( headersalign=None, rowalign=None, maxheadercolwidths=None, + break_long_words=_BREAK_LONG_WORDS, + break_on_hyphens=_BREAK_ON_HYPHENS, ): """Format a fixed width table for pretty printing. @@ -2217,7 +2258,7 @@ def tabulate( numparses = _expand_numparse(disable_numparse, num_cols) list_of_lists = _wrap_text_to_colwidths( - list_of_lists, maxcolwidths, numparses=numparses + list_of_lists, maxcolwidths, numparses=numparses, break_long_words=break_long_words, break_on_hyphens=break_on_hyphens ) if maxheadercolwidths is not None: @@ -2231,7 +2272,7 @@ def tabulate( numparses = _expand_numparse(disable_numparse, num_cols) headers = _wrap_text_to_colwidths( - [headers], maxheadercolwidths, numparses=numparses + [headers], maxheadercolwidths, numparses=numparses, break_long_words=break_long_words, break_on_hyphens=break_on_hyphens )[0] # empty values in the first column of RST tables should be escaped (issue #82) diff --git a/test/test_api.py b/test/test_api.py index 062573c..f35d09a 100644 --- a/test/test_api.py +++ b/test/test_api.py @@ -56,6 +56,8 @@ def test_tabulate_signature(): ("headersalign", None), ("rowalign", None), ("maxheadercolwidths", None), + ("break_long_words", True), + ("break_on_hyphens", True), ] _check_signature(tabulate, expected_sig) diff --git a/test/test_input.py b/test/test_input.py index b910a34..8368770 100644 --- a/test/test_input.py +++ b/test/test_input.py @@ -1,6 +1,6 @@ """Test support of the various forms of tabular data.""" -from tabulate import tabulate +from tabulate import tabulate, SEPARATING_LINE from common import assert_equal, assert_in, raises, skip try: @@ -520,6 +520,28 @@ def test_py37orlater_list_of_dataclasses_headers(): skip("test_py37orlater_list_of_dataclasses_headers is skipped") +def test_py37orlater_list_of_dataclasses_with_separating_line(): + "Input: a list of dataclasses with a separating line" + try: + from dataclasses import make_dataclass + + Person = make_dataclass("Person", ["name", "age", "height"]) + ld = [Person("Alice", 23, 169.5), SEPARATING_LINE, Person("Bob", 27, 175.0)] + result = tabulate(ld, headers="keys") + expected = "\n".join( + [ + "name age height", + "------ ----- --------", + "Alice 23 169.5", + "------ ----- --------", + "Bob 27 175", + ] + ) + assert_equal(expected, result) + except ImportError: + skip("test_py37orlater_list_of_dataclasses_keys is skipped") + + def test_list_bytes(): "Input: a list of bytes. (issue #192)" lb = [["你好".encode()], ["你好"]] diff --git a/test/test_output.py b/test/test_output.py index e3d369a..12dfc3a 100644 --- a/test/test_output.py +++ b/test/test_output.py @@ -509,6 +509,23 @@ def test_github(): assert_equal(expected, result) +def test_github_multiline(): + "Output: github with multiline cells with headers" + table = [[2, "foo\nbar"]] + headers = ("more\nspam eggs", "more spam\n& eggs") + expected = "\n".join( + [ + "| more | more spam |", + "| spam eggs | & eggs |", + "|-------------|-------------|", + "| 2 | foo |", + "| | bar |", + ] + ) + result = tabulate(table, headers, tablefmt="github") + assert_equal(expected, result) + + def test_grid(): "Output: grid with headers" expected = "\n".join( @@ -3302,3 +3319,32 @@ def test_preserve_whitespace(): expected = "\n".join(["h1 h2 h3", "---- ---- ----", "foo bar foo"]) result = tabulate(test_table, table_headers, preserve_whitespace=False) assert_equal(expected, result) + +def test_break_long_words(): + "Output: Default table output, with breakwords true." + table_headers = ["h1", "h2", "h3"] + test_table = [[" foo1", " bar2 ", "foo3"]] + + # Table is not wrapped on 3 letters due to long word + expected = "h1 h2 h3\n---- ---- ----\nfoo1 bar2 foo3" + result = tabulate(test_table, table_headers, maxcolwidths=3, break_long_words=False) + assert_equal(expected, result) + + # Table max width is 3 letters + expected = "h1 h2 h3\n---- ---- ----\nf ba foo\noo1 r2 3" + result = tabulate(test_table, table_headers, maxcolwidths=3, break_long_words=True) + assert_equal(expected, result) + +def test_break_on_hyphens(): + "Output: Default table output, with break on hyphens true." + table_headers = ["h1", "h2", "h3"] + test_table = [[" foo-bar", " bar-bar ", "foo-foo"]] + # Table max width is 5, long lines breaks on hyphens + expected = "h1 h2 h3\n---- ---- -----\nfoo bar- foo-f\n-bar bar oo" + result = tabulate(test_table, table_headers, maxcolwidths=5, break_on_hyphens=False) + assert_equal(expected, result) + + # Table data is no longer breaks on hyphens + expected = "h1 h2 h3\n---- ---- ----\nfoo- bar- foo-\nbar bar foo" + result = tabulate(test_table, table_headers, maxcolwidths=5, break_on_hyphens=True) + assert_equal(expected, result) diff --git a/test/test_textwrapper.py b/test/test_textwrapper.py index 8c0a6cc..46dd818 100644 --- a/test/test_textwrapper.py +++ b/test/test_textwrapper.py @@ -220,3 +220,27 @@ def test_wrap_datetime(): ] expected = "\n".join(expected) assert_equal(expected, result) + + +def test_wrap_optional_bool_strs(): + """TextWrapper: Show that str bools and None can be wrapped without crashing""" + data = [ + ["First Entry", "True"], + ["Second Entry", None], + ] + headers = ["Title", "When"] + result = tabulate(data, headers=headers, tablefmt="grid", maxcolwidths=[7, 5]) + + expected = [ + "+---------+--------+", + "| Title | When |", + "+=========+========+", + "| First | True |", + "| Entry | |", + "+---------+--------+", + "| Second | None |", + "| Entry | |", + "+---------+--------+", + ] + expected = "\n".join(expected) + assert_equal(expected, result)