@@ -104,6 +104,10 @@ class Document(proto.Message):
104104 revisions (MutableSequence[google.cloud.documentai_v1beta3.types.Document.Revision]):
105105 Placeholder. Revision history of this
106106 document.
107+ document_layout (google.cloud.documentai_v1beta3.types.Document.DocumentLayout):
108+ Parsed layout of the document.
109+ chunked_document (google.cloud.documentai_v1beta3.types.Document.ChunkedDocument):
110+ Document chunked based on chunking config.
107111 """
108112
109113 class ShardInfo (proto .Message ):
@@ -1811,6 +1815,317 @@ class TextChange(proto.Message):
18111815 message = "Document.Provenance" ,
18121816 )
18131817
1818+ class DocumentLayout (proto .Message ):
1819+ r"""Represents the parsed layout of a document as a collection of
1820+ blocks that the document is divided into.
1821+
1822+ Attributes:
1823+ blocks (MutableSequence[google.cloud.documentai_v1beta3.types.Document.DocumentLayout.DocumentLayoutBlock]):
1824+ List of blocks in the document.
1825+ """
1826+
1827+ class DocumentLayoutBlock (proto .Message ):
1828+ r"""Represents a block. A block could be one of the various types
1829+ (text, table, list) supported.
1830+
1831+ This message has `oneof`_ fields (mutually exclusive fields).
1832+ For each oneof, at most one member field can be set at the same time.
1833+ Setting any member of the oneof automatically clears all other
1834+ members.
1835+
1836+ .. _oneof: https://proto-plus-python.readthedocs.io/en/stable/fields.html#oneofs-mutually-exclusive-fields
1837+
1838+ Attributes:
1839+ text_block (google.cloud.documentai_v1beta3.types.Document.DocumentLayout.DocumentLayoutBlock.LayoutTextBlock):
1840+ Block consisting of text content.
1841+
1842+ This field is a member of `oneof`_ ``block``.
1843+ table_block (google.cloud.documentai_v1beta3.types.Document.DocumentLayout.DocumentLayoutBlock.LayoutTableBlock):
1844+ Block consisting of table content/structure.
1845+
1846+ This field is a member of `oneof`_ ``block``.
1847+ list_block (google.cloud.documentai_v1beta3.types.Document.DocumentLayout.DocumentLayoutBlock.LayoutListBlock):
1848+ Block consisting of list content/structure.
1849+
1850+ This field is a member of `oneof`_ ``block``.
1851+ block_id (str):
1852+ ID of the block.
1853+ page_span (google.cloud.documentai_v1beta3.types.Document.DocumentLayout.DocumentLayoutBlock.LayoutPageSpan):
1854+ Page span of the block.
1855+ """
1856+
1857+ class LayoutPageSpan (proto .Message ):
1858+ r"""Represents where the block starts and ends in the document.
1859+
1860+ Attributes:
1861+ page_start (int):
1862+ Page where block starts in the document.
1863+ page_end (int):
1864+ Page where block ends in the document.
1865+ """
1866+
1867+ page_start : int = proto .Field (
1868+ proto .INT32 ,
1869+ number = 1 ,
1870+ )
1871+ page_end : int = proto .Field (
1872+ proto .INT32 ,
1873+ number = 2 ,
1874+ )
1875+
1876+ class LayoutTextBlock (proto .Message ):
1877+ r"""Represents a text type block.
1878+
1879+ Attributes:
1880+ text (str):
1881+ Text content stored in the block.
1882+ type_ (str):
1883+ Type of the text in the block. Available options are:
1884+ ``paragraph``, ``subtitle``, ``heading-1``, ``heading-2``,
1885+ ``heading-3``, ``heading-4``, ``heading-5``, ``header``,
1886+ ``footer``.
1887+ blocks (MutableSequence[google.cloud.documentai_v1beta3.types.Document.DocumentLayout.DocumentLayoutBlock]):
1888+ A text block could further have child blocks.
1889+ Repeated blocks support further hierarchies and
1890+ nested blocks.
1891+ """
1892+
1893+ text : str = proto .Field (
1894+ proto .STRING ,
1895+ number = 1 ,
1896+ )
1897+ type_ : str = proto .Field (
1898+ proto .STRING ,
1899+ number = 2 ,
1900+ )
1901+ blocks : MutableSequence [
1902+ "Document.DocumentLayout.DocumentLayoutBlock"
1903+ ] = proto .RepeatedField (
1904+ proto .MESSAGE ,
1905+ number = 3 ,
1906+ message = "Document.DocumentLayout.DocumentLayoutBlock" ,
1907+ )
1908+
1909+ class LayoutTableBlock (proto .Message ):
1910+ r"""Represents a table type block.
1911+
1912+ Attributes:
1913+ header_rows (MutableSequence[google.cloud.documentai_v1beta3.types.Document.DocumentLayout.DocumentLayoutBlock.LayoutTableRow]):
1914+ Header rows at the top of the table.
1915+ body_rows (MutableSequence[google.cloud.documentai_v1beta3.types.Document.DocumentLayout.DocumentLayoutBlock.LayoutTableRow]):
1916+ Body rows containing main table content.
1917+ caption (str):
1918+ Table caption/title.
1919+ """
1920+
1921+ header_rows : MutableSequence [
1922+ "Document.DocumentLayout.DocumentLayoutBlock.LayoutTableRow"
1923+ ] = proto .RepeatedField (
1924+ proto .MESSAGE ,
1925+ number = 1 ,
1926+ message = "Document.DocumentLayout.DocumentLayoutBlock.LayoutTableRow" ,
1927+ )
1928+ body_rows : MutableSequence [
1929+ "Document.DocumentLayout.DocumentLayoutBlock.LayoutTableRow"
1930+ ] = proto .RepeatedField (
1931+ proto .MESSAGE ,
1932+ number = 2 ,
1933+ message = "Document.DocumentLayout.DocumentLayoutBlock.LayoutTableRow" ,
1934+ )
1935+ caption : str = proto .Field (
1936+ proto .STRING ,
1937+ number = 3 ,
1938+ )
1939+
1940+ class LayoutTableRow (proto .Message ):
1941+ r"""Represents a row in a table.
1942+
1943+ Attributes:
1944+ cells (MutableSequence[google.cloud.documentai_v1beta3.types.Document.DocumentLayout.DocumentLayoutBlock.LayoutTableCell]):
1945+ A table row is a list of table cells.
1946+ """
1947+
1948+ cells : MutableSequence [
1949+ "Document.DocumentLayout.DocumentLayoutBlock.LayoutTableCell"
1950+ ] = proto .RepeatedField (
1951+ proto .MESSAGE ,
1952+ number = 1 ,
1953+ message = "Document.DocumentLayout.DocumentLayoutBlock.LayoutTableCell" ,
1954+ )
1955+
1956+ class LayoutTableCell (proto .Message ):
1957+ r"""Represents a cell in a table row.
1958+
1959+ Attributes:
1960+ blocks (MutableSequence[google.cloud.documentai_v1beta3.types.Document.DocumentLayout.DocumentLayoutBlock]):
1961+ A table cell is a list of blocks.
1962+ Repeated blocks support further hierarchies and
1963+ nested blocks.
1964+ row_span (int):
1965+ How many rows this cell spans.
1966+ col_span (int):
1967+ How many columns this cell spans.
1968+ """
1969+
1970+ blocks : MutableSequence [
1971+ "Document.DocumentLayout.DocumentLayoutBlock"
1972+ ] = proto .RepeatedField (
1973+ proto .MESSAGE ,
1974+ number = 1 ,
1975+ message = "Document.DocumentLayout.DocumentLayoutBlock" ,
1976+ )
1977+ row_span : int = proto .Field (
1978+ proto .INT32 ,
1979+ number = 2 ,
1980+ )
1981+ col_span : int = proto .Field (
1982+ proto .INT32 ,
1983+ number = 3 ,
1984+ )
1985+
1986+ class LayoutListBlock (proto .Message ):
1987+ r"""Represents a list type block.
1988+
1989+ Attributes:
1990+ list_entries (MutableSequence[google.cloud.documentai_v1beta3.types.Document.DocumentLayout.DocumentLayoutBlock.LayoutListEntry]):
1991+ List entries that constitute a list block.
1992+ type_ (str):
1993+ Type of the list_entries (if exist). Available options are
1994+ ``ordered`` and ``unordered``.
1995+ """
1996+
1997+ list_entries : MutableSequence [
1998+ "Document.DocumentLayout.DocumentLayoutBlock.LayoutListEntry"
1999+ ] = proto .RepeatedField (
2000+ proto .MESSAGE ,
2001+ number = 1 ,
2002+ message = "Document.DocumentLayout.DocumentLayoutBlock.LayoutListEntry" ,
2003+ )
2004+ type_ : str = proto .Field (
2005+ proto .STRING ,
2006+ number = 2 ,
2007+ )
2008+
2009+ class LayoutListEntry (proto .Message ):
2010+ r"""Represents an entry in the list.
2011+
2012+ Attributes:
2013+ blocks (MutableSequence[google.cloud.documentai_v1beta3.types.Document.DocumentLayout.DocumentLayoutBlock]):
2014+ A list entry is a list of blocks.
2015+ Repeated blocks support further hierarchies and
2016+ nested blocks.
2017+ """
2018+
2019+ blocks : MutableSequence [
2020+ "Document.DocumentLayout.DocumentLayoutBlock"
2021+ ] = proto .RepeatedField (
2022+ proto .MESSAGE ,
2023+ number = 1 ,
2024+ message = "Document.DocumentLayout.DocumentLayoutBlock" ,
2025+ )
2026+
2027+ text_block : "Document.DocumentLayout.DocumentLayoutBlock.LayoutTextBlock" = proto .Field (
2028+ proto .MESSAGE ,
2029+ number = 2 ,
2030+ oneof = "block" ,
2031+ message = "Document.DocumentLayout.DocumentLayoutBlock.LayoutTextBlock" ,
2032+ )
2033+ table_block : "Document.DocumentLayout.DocumentLayoutBlock.LayoutTableBlock" = proto .Field (
2034+ proto .MESSAGE ,
2035+ number = 3 ,
2036+ oneof = "block" ,
2037+ message = "Document.DocumentLayout.DocumentLayoutBlock.LayoutTableBlock" ,
2038+ )
2039+ list_block : "Document.DocumentLayout.DocumentLayoutBlock.LayoutListBlock" = proto .Field (
2040+ proto .MESSAGE ,
2041+ number = 4 ,
2042+ oneof = "block" ,
2043+ message = "Document.DocumentLayout.DocumentLayoutBlock.LayoutListBlock" ,
2044+ )
2045+ block_id : str = proto .Field (
2046+ proto .STRING ,
2047+ number = 1 ,
2048+ )
2049+ page_span : "Document.DocumentLayout.DocumentLayoutBlock.LayoutPageSpan" = proto .Field (
2050+ proto .MESSAGE ,
2051+ number = 5 ,
2052+ message = "Document.DocumentLayout.DocumentLayoutBlock.LayoutPageSpan" ,
2053+ )
2054+
2055+ blocks : MutableSequence [
2056+ "Document.DocumentLayout.DocumentLayoutBlock"
2057+ ] = proto .RepeatedField (
2058+ proto .MESSAGE ,
2059+ number = 1 ,
2060+ message = "Document.DocumentLayout.DocumentLayoutBlock" ,
2061+ )
2062+
2063+ class ChunkedDocument (proto .Message ):
2064+ r"""Represents the chunks that the document is divided into.
2065+
2066+ Attributes:
2067+ chunks (MutableSequence[google.cloud.documentai_v1beta3.types.Document.ChunkedDocument.Chunk]):
2068+ List of chunks.
2069+ """
2070+
2071+ class Chunk (proto .Message ):
2072+ r"""Represents a chunk.
2073+
2074+ Attributes:
2075+ chunk_id (str):
2076+ ID of the chunk.
2077+ source_block_ids (MutableSequence[str]):
2078+ List of all parsed documents layout source
2079+ blocks used to generate the chunk.
2080+ content (str):
2081+ Text content of the chunk.
2082+ page_span (google.cloud.documentai_v1beta3.types.Document.ChunkedDocument.Chunk.ChunkPageSpan):
2083+ Page span of the chunk.
2084+ """
2085+
2086+ class ChunkPageSpan (proto .Message ):
2087+ r"""Represents where the chunk starts and ends in the document.
2088+
2089+ Attributes:
2090+ page_start (int):
2091+ Page where chunk starts in the document.
2092+ page_end (int):
2093+ Page where chunk ends in the document.
2094+ """
2095+
2096+ page_start : int = proto .Field (
2097+ proto .INT32 ,
2098+ number = 1 ,
2099+ )
2100+ page_end : int = proto .Field (
2101+ proto .INT32 ,
2102+ number = 2 ,
2103+ )
2104+
2105+ chunk_id : str = proto .Field (
2106+ proto .STRING ,
2107+ number = 1 ,
2108+ )
2109+ source_block_ids : MutableSequence [str ] = proto .RepeatedField (
2110+ proto .STRING ,
2111+ number = 2 ,
2112+ )
2113+ content : str = proto .Field (
2114+ proto .STRING ,
2115+ number = 3 ,
2116+ )
2117+ page_span : "Document.ChunkedDocument.Chunk.ChunkPageSpan" = proto .Field (
2118+ proto .MESSAGE ,
2119+ number = 4 ,
2120+ message = "Document.ChunkedDocument.Chunk.ChunkPageSpan" ,
2121+ )
2122+
2123+ chunks : MutableSequence ["Document.ChunkedDocument.Chunk" ] = proto .RepeatedField (
2124+ proto .MESSAGE ,
2125+ number = 1 ,
2126+ message = "Document.ChunkedDocument.Chunk" ,
2127+ )
2128+
18142129 uri : str = proto .Field (
18152130 proto .STRING ,
18162131 number = 1 ,
@@ -1869,6 +2184,16 @@ class TextChange(proto.Message):
18692184 number = 13 ,
18702185 message = Revision ,
18712186 )
2187+ document_layout : DocumentLayout = proto .Field (
2188+ proto .MESSAGE ,
2189+ number = 17 ,
2190+ message = DocumentLayout ,
2191+ )
2192+ chunked_document : ChunkedDocument = proto .Field (
2193+ proto .MESSAGE ,
2194+ number = 18 ,
2195+ message = ChunkedDocument ,
2196+ )
18722197
18732198
18742199class RevisionRef (proto .Message ):
0 commit comments