diff --git a/.coveragerc b/.coveragerc
index 742e899d..dda4f66a 100644
--- a/.coveragerc
+++ b/.coveragerc
@@ -22,7 +22,7 @@ omit =
google/cloud/__init__.py
[report]
-fail_under = 100
+fail_under = 99
show_missing = True
exclude_lines =
# Re-enable the standard pragma
diff --git a/.github/.Owlbot.lock.yaml b/.github/.OwlBot.lock.yaml
similarity index 84%
rename from .github/.Owlbot.lock.yaml
rename to .github/.OwlBot.lock.yaml
index 6d9822ca..ff729308 100644
--- a/.github/.Owlbot.lock.yaml
+++ b/.github/.OwlBot.lock.yaml
@@ -13,5 +13,4 @@
# limitations under the License.
docker:
image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest
- digest: sha256:e7bb19d47c13839fe8c147e50e02e8b6cf5da8edd1af8b82208cd6f66cc2829c
-# created: 2022-07-05T18:31:20.838186805Z
\ No newline at end of file
+ digest: sha256:2e247c7bf5154df7f98cce087a20ca7605e236340c7d6d1a14447e5c06791bd6
diff --git a/.github/.Owlbot.yaml b/.github/.OwlBot.yaml
similarity index 90%
rename from .github/.Owlbot.yaml
rename to .github/.OwlBot.yaml
index ef9b33cb..c379bd30 100644
--- a/.github/.Owlbot.yaml
+++ b/.github/.OwlBot.yaml
@@ -15,4 +15,4 @@
docker:
image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest
-begin-after-commit-hash: be22498ce258bf2d5fe12fd696d3ad9a2b6c430e
\ No newline at end of file
+begin-after-commit-hash: 92006bb3cdc84677aa93c7f5235424ec2b157146
diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml
index f77dfdea..280fec17 100644
--- a/.github/workflows/unittest.yml
+++ b/.github/workflows/unittest.yml
@@ -8,7 +8,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
- python: ['3.7', '3.8', '3.9', '3.10']
+ python: ['3.7', '3.8', '3.9', '3.10', '3.11']
steps:
- name: Checkout
uses: actions/checkout@v3
@@ -54,4 +54,4 @@ jobs:
- name: Report coverage results
run: |
coverage combine .coverage-results/.coverage*
- coverage report --show-missing --fail-under=95
+ coverage report --show-missing --fail-under=99
diff --git a/.gitignore b/.gitignore
index f7ae0606..b4243ced 100644
--- a/.gitignore
+++ b/.gitignore
@@ -29,7 +29,6 @@ pip-log.txt
.nox
.cache
.pytest_cache
-.mypy_cache/
# Mac
diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt
index a2639539..66a2172a 100644
--- a/.kokoro/requirements.txt
+++ b/.kokoro/requirements.txt
@@ -1,5 +1,5 @@
#
-# This file is autogenerated by pip-compile with Python 3.11
+# This file is autogenerated by pip-compile with Python 3.9
# by the following command:
#
# pip-compile --allow-unsafe --generate-hashes requirements.in
@@ -116,7 +116,6 @@ commonmark==0.9.1 \
cryptography==39.0.1 \
--hash=sha256:0f8da300b5c8af9f98111ffd512910bc792b4c77392a9523624680f7956a99d4 \
--hash=sha256:35f7c7d015d474f4011e859e93e789c87d21f6f4880ebdc29896a60403328f1f \
- --hash=sha256:4789d1e3e257965e960232345002262ede4d094d1a19f4d3b52e48d4d8f3b885 \
--hash=sha256:5aa67414fcdfa22cf052e640cb5ddc461924a045cacf325cd164e65312d99502 \
--hash=sha256:5d2d8b87a490bfcd407ed9d49093793d0f75198a35e6eb1a923ce1ee86c62b41 \
--hash=sha256:6687ef6d0a6497e2b58e7c5b852b53f62142cfa7cd1555795758934da363a965 \
@@ -127,7 +126,6 @@ cryptography==39.0.1 \
--hash=sha256:96f1157a7c08b5b189b16b47bc9db2332269d6680a196341bf30046330d15388 \
--hash=sha256:aec5a6c9864be7df2240c382740fcf3b96928c46604eaa7f3091f58b878c0bb6 \
--hash=sha256:b0afd054cd42f3d213bf82c629efb1ee5f22eba35bf0eec88ea9ea7304f511a2 \
- --hash=sha256:c5caeb8188c24888c90b5108a441c106f7faa4c4c075a2bcae438c6e8ca73cef \
--hash=sha256:ced4e447ae29ca194449a3f1ce132ded8fcab06971ef5f618605aacaa612beac \
--hash=sha256:d1f6198ee6d9148405e49887803907fe8962a23e6c6f83ea7d98f1c0de375695 \
--hash=sha256:e124352fd3db36a9d4a21c1aa27fd5d051e621845cb87fb851c08f4f75ce8be6 \
@@ -137,7 +135,9 @@ cryptography==39.0.1 \
--hash=sha256:f24077a3b5298a5a06a8e0536e3ea9ec60e4c7ac486755e5fb6e6ea9b3500106 \
--hash=sha256:fdd188c8a6ef8769f148f88f859884507b954cc64db6b52f66ef199bb9ad660a \
--hash=sha256:fe913f20024eb2cb2f323e42a64bdf2911bb9738a15dba7d3cce48151034e3a8
- # via gcp-releasetool
+ # via
+ # gcp-releasetool
+ # secretstorage
distlib==0.3.6 \
--hash=sha256:14bad2d9b04d3a36127ac97f30b12a19268f211063d8f8ee4f47108896e11b46 \
--hash=sha256:f35c4b692542ca110de7ef0bea44d73981caeb34ca0b9b6b2e6d7790dda8f80e
@@ -273,6 +273,12 @@ jaraco-classes==3.2.3 \
--hash=sha256:2353de3288bc6b82120752201c6b1c1a14b058267fa424ed5ce5984e3b922158 \
--hash=sha256:89559fa5c1d3c34eff6f631ad80bb21f378dbcbb35dd161fd2c6b93f5be2f98a
# via keyring
+jeepney==0.8.0 \
+ --hash=sha256:5efe48d255973902f6badc3ce55e2aa6c5c3b3bc642059ef3a91247bcfcc5806 \
+ --hash=sha256:c0a454ad016ca575060802ee4d590dd912e35c122fa04e70306de3d076cce755
+ # via
+ # keyring
+ # secretstorage
jinja2==3.1.2 \
--hash=sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852 \
--hash=sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61
@@ -438,6 +444,10 @@ rsa==4.9 \
--hash=sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7 \
--hash=sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21
# via google-auth
+secretstorage==3.3.3 \
+ --hash=sha256:2403533ef369eca6d2ba81718576c5e0f564d5cca1b58f73a8b23e7d4eeebd77 \
+ --hash=sha256:f356e6628222568e3af06f2eba8df495efa13b3b63081dafd4f7d9a7b7bc9f99
+ # via keyring
six==1.16.0 \
--hash=sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926 \
--hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254
@@ -446,9 +456,9 @@ six==1.16.0 \
# gcp-docuploader
# google-auth
# python-dateutil
-twine==4.0.2 \
- --hash=sha256:929bc3c280033347a00f847236564d1c52a3e61b1ac2516c97c48f3ceab756d8 \
- --hash=sha256:9e102ef5fdd5a20661eb88fad46338806c3bd32cf1db729603fe3697b1bc83c8
+twine==4.0.1 \
+ --hash=sha256:42026c18e394eac3e06693ee52010baa5313e4811d5a11050e7d48436cf41b9e \
+ --hash=sha256:96b1cf12f7ae611a4a40b6ae8e9570215daff0611828f5fe1f37a16255ab24a0
# via -r requirements.in
typing-extensions==4.4.0 \
--hash=sha256:1511434bb92bf8dd198c12b1cc812e800d4181cfcb867674e0f8279cc93087aa \
@@ -478,7 +488,7 @@ zipp==3.10.0 \
# via importlib-metadata
# The following packages are considered to be unsafe in a requirements file:
-setuptools==67.4.0 \
- --hash=sha256:e5fd0a713141a4a105412233c63dc4e17ba0090c8e8334594ac790ec97792330 \
- --hash=sha256:f106dee1b506dee5102cc3f3e9e68137bbad6d47b616be7991714b0c62204251
+setuptools==65.5.1 \
+ --hash=sha256:d0b9a8433464d5800cbe05094acf5c6d52a91bfac9b52bcfc4d41382be5d5d31 \
+ --hash=sha256:e197a19aa8ec9722928f2206f8de752def0e4c9fc6953527360d1c36d94ddb2f
# via -r requirements.in
diff --git a/.kokoro/samples/python3.11/common.cfg b/.kokoro/samples/python3.11/common.cfg
new file mode 100644
index 00000000..49c5b22e
--- /dev/null
+++ b/.kokoro/samples/python3.11/common.cfg
@@ -0,0 +1,40 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+# Build logs will be here
+action {
+ define_artifacts {
+ regex: "**/*sponge_log.xml"
+ }
+}
+
+# Specify which tests to run
+env_vars: {
+ key: "RUN_TESTS_SESSION"
+ value: "py-3.11"
+}
+
+# Declare build specific Cloud project.
+env_vars: {
+ key: "BUILD_SPECIFIC_GCLOUD_PROJECT"
+ value: "python-docs-samples-tests-311"
+}
+
+env_vars: {
+ key: "TRAMPOLINE_BUILD_FILE"
+ value: "github/python-documentai-toolbox/.kokoro/test-samples.sh"
+}
+
+# Configure the docker image for kokoro-trampoline.
+env_vars: {
+ key: "TRAMPOLINE_IMAGE"
+ value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker"
+}
+
+# Download secrets for samples
+gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples"
+
+# Download trampoline resources.
+gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline"
+
+# Use the trampoline script to run in docker.
+build_file: "python-documentai-toolbox/.kokoro/trampoline_v2.sh"
\ No newline at end of file
diff --git a/.kokoro/samples/python3.11/continuous.cfg b/.kokoro/samples/python3.11/continuous.cfg
new file mode 100644
index 00000000..a1c8d975
--- /dev/null
+++ b/.kokoro/samples/python3.11/continuous.cfg
@@ -0,0 +1,6 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+env_vars: {
+ key: "INSTALL_LIBRARY_FROM_SOURCE"
+ value: "True"
+}
\ No newline at end of file
diff --git a/.kokoro/samples/python3.11/periodic-head.cfg b/.kokoro/samples/python3.11/periodic-head.cfg
new file mode 100644
index 00000000..060cdc8a
--- /dev/null
+++ b/.kokoro/samples/python3.11/periodic-head.cfg
@@ -0,0 +1,11 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+env_vars: {
+ key: "INSTALL_LIBRARY_FROM_SOURCE"
+ value: "True"
+}
+
+env_vars: {
+ key: "TRAMPOLINE_BUILD_FILE"
+ value: "github/python-documentai-toolbox/.kokoro/test-samples-against-head.sh"
+}
diff --git a/.kokoro/samples/python3.11/periodic.cfg b/.kokoro/samples/python3.11/periodic.cfg
new file mode 100644
index 00000000..71cd1e59
--- /dev/null
+++ b/.kokoro/samples/python3.11/periodic.cfg
@@ -0,0 +1,6 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+env_vars: {
+ key: "INSTALL_LIBRARY_FROM_SOURCE"
+ value: "False"
+}
diff --git a/.kokoro/samples/python3.11/presubmit.cfg b/.kokoro/samples/python3.11/presubmit.cfg
new file mode 100644
index 00000000..a1c8d975
--- /dev/null
+++ b/.kokoro/samples/python3.11/presubmit.cfg
@@ -0,0 +1,6 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+env_vars: {
+ key: "INSTALL_LIBRARY_FROM_SOURCE"
+ value: "True"
+}
\ No newline at end of file
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 46d23716..5405cc8f 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -25,7 +25,7 @@ repos:
rev: 22.3.0
hooks:
- id: black
-- repo: https://gitlab.com/pycqa/flake8
+- repo: https://github.com/pycqa/flake8
rev: 3.9.2
hooks:
- id: flake8
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 38b1ced0..abf05b23 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,12 @@
# Changelog
+## [0.4.1-alpha](https://github.com/googleapis/python-documentai-toolbox/compare/v0.4.0-alpha...v0.4.1-alpha) (2023-03-21)
+
+
+### Miscellaneous Chores
+
+* Release 0.4.1-alpha ([#85](https://github.com/googleapis/python-documentai-toolbox/issues/85)) ([bc8d6c7](https://github.com/googleapis/python-documentai-toolbox/commit/bc8d6c75fdee7e3efd8138916a731a881cec8811))
+
## [0.4.0-alpha](https://github.com/googleapis/python-documentai-toolbox/compare/v0.3.0-alpha...v0.4.0-alpha) (2023-03-09)
diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
index 90624015..3ce9b08a 100644
--- a/CONTRIBUTING.rst
+++ b/CONTRIBUTING.rst
@@ -22,7 +22,7 @@ In order to add a feature:
documentation.
- The feature must work fully on the following CPython versions:
- 3.7, 3.8, 3.9 and 3.10 on both UNIX and Windows.
+ 3.7, 3.8, 3.9, 3.10 and 3.11 on both UNIX and Windows.
- The feature must not add unnecessary dependencies (where
"unnecessary" is of course subjective, but new dependencies should
@@ -72,7 +72,7 @@ We use `nox `__ to instrument our tests.
- To run a single unit test::
- $ nox -s unit-3.10 -- -k
+ $ nox -s unit-3.11 -- -k
.. note::
@@ -225,11 +225,13 @@ We support:
- `Python 3.8`_
- `Python 3.9`_
- `Python 3.10`_
+- `Python 3.11`_
.. _Python 3.7: https://docs.python.org/3.7/
.. _Python 3.8: https://docs.python.org/3.8/
.. _Python 3.9: https://docs.python.org/3.9/
.. _Python 3.10: https://docs.python.org/3.10/
+.. _Python 3.11: https://docs.python.org/3.11/
Supported versions can be found in our ``noxfile.py`` `config`_.
diff --git a/google/cloud/documentai_toolbox/utilities/utilities.py b/google/cloud/documentai_toolbox/utilities/utilities.py
index bcaacc88..51ec8e75 100644
--- a/google/cloud/documentai_toolbox/utilities/utilities.py
+++ b/google/cloud/documentai_toolbox/utilities/utilities.py
@@ -96,7 +96,8 @@ def print_gcs_document_tree(gcs_bucket_name: str, gcs_prefix: str) -> None:
if dir_size > FILES_TO_DISPLAY:
print("│ ....")
print(f"{FILENAME_TREE_LAST}{file_name}\n")
- elif idx <= FILES_TO_DISPLAY:
+ break
+ if idx <= FILES_TO_DISPLAY:
print(f"{FILENAME_TREE_MIDDLE}{file_name}")
@@ -163,7 +164,7 @@ def create_batches(
)
)
- if batch != []:
+ if batch:
# Append the last batch, which could be less than `batch_size`
batches.append(
documentai.BatchDocumentsInputConfig(
diff --git a/google/cloud/documentai_toolbox/version.py b/google/cloud/documentai_toolbox/version.py
index 7f088250..6beebb3f 100644
--- a/google/cloud/documentai_toolbox/version.py
+++ b/google/cloud/documentai_toolbox/version.py
@@ -13,4 +13,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-__version__ = "0.4.0-alpha"
+__version__ = "0.4.1-alpha"
diff --git a/noxfile.py b/noxfile.py
index 5025a91e..91a45415 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -31,7 +31,7 @@
DEFAULT_PYTHON_VERSION = "3.8"
-UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10"]
+UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11"]
UNIT_TEST_STANDARD_DEPENDENCIES = [
"mock",
"asyncmock",
@@ -188,9 +188,9 @@ def unit(session):
def install_systemtest_dependencies(session, *constraints):
# Use pre-release gRPC for system tests.
- # Exclude version 1.49.0rc1 which has a known issue.
- # See https://github.com/grpc/grpc/pull/30642
- session.install("--pre", "grpcio!=1.49.0rc1")
+ # Exclude version 1.52.0rc1 which has a known issue.
+ # See https://github.com/grpc/grpc/issues/32163
+ session.install("--pre", "grpcio!=1.52.0rc1")
session.install(*SYSTEM_TEST_STANDARD_DEPENDENCIES, *constraints)
@@ -267,7 +267,7 @@ def cover(session):
test runs (not system test runs), and then erases coverage data.
"""
session.install("coverage", "pytest-cov")
- session.run("coverage", "report", "--show-missing", "--fail-under=95")
+ session.run("coverage", "report", "--show-missing", "--fail-under=99")
session.run("coverage", "erase")
@@ -345,9 +345,7 @@ def prerelease_deps(session):
unit_deps_all = UNIT_TEST_STANDARD_DEPENDENCIES + UNIT_TEST_EXTERNAL_DEPENDENCIES
session.install(*unit_deps_all)
system_deps_all = (
- SYSTEM_TEST_STANDARD_DEPENDENCIES
- + SYSTEM_TEST_EXTERNAL_DEPENDENCIES
- + SYSTEM_TEST_EXTRAS
+ SYSTEM_TEST_STANDARD_DEPENDENCIES + SYSTEM_TEST_EXTERNAL_DEPENDENCIES
)
session.install(*system_deps_all)
@@ -377,8 +375,8 @@ def prerelease_deps(session):
# dependency of grpc
"six",
"googleapis-common-protos",
- # Exclude version 1.49.0rc1 which has a known issue. See https://github.com/grpc/grpc/pull/30642
- "grpcio!=1.49.0rc1",
+ # Exclude version 1.52.0rc1 which has a known issue. See https://github.com/grpc/grpc/issues/32163
+ "grpcio!=1.52.0rc1",
"grpcio-status",
"google-api-core",
"proto-plus",
diff --git a/owlbot.py b/owlbot.py
index 2c343db8..327db1be 100644
--- a/owlbot.py
+++ b/owlbot.py
@@ -29,9 +29,13 @@
# ----------------------------------------------------------------------------
templated_files = common.py_library(
system_test_python_versions=["3.8"],
- cov_level=100,
+ cov_level=99,
intersphinx_dependencies={
"pandas": "https://pandas.pydata.org/pandas-docs/stable/"
},
)
s.move(templated_files, excludes=["docs/multiprocessing.rst", "README.rst"])
+
+# run format session for all directories which have a noxfile
+for noxfile in pathlib.Path(".").glob("**/noxfile.py"):
+ s.shell.run(["nox", "-s", "blacken"], cwd=noxfile.parent, hide_output=False)
diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt
index 91d51543..14ccab37 100644
--- a/samples/snippets/requirements-test.txt
+++ b/samples/snippets/requirements-test.txt
@@ -1,3 +1,3 @@
pytest==7.2.2
mock==5.0.1
-google-cloud-bigquery==3.6.0
+google-cloud-bigquery==3.7.0
diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt
index e955a0c3..85734acd 100644
--- a/samples/snippets/requirements.txt
+++ b/samples/snippets/requirements.txt
@@ -1,4 +1,4 @@
-google-cloud-bigquery==3.6.0
+google-cloud-bigquery==3.7.0
google-cloud-documentai==2.14.0
google-cloud-storage==2.7.0
google-cloud-documentai-toolbox==0.1.1a0
diff --git a/scripts/readme-gen/readme_gen.py b/scripts/readme-gen/readme_gen.py
index b533840a..91b59676 100644
--- a/scripts/readme-gen/readme_gen.py
+++ b/scripts/readme-gen/readme_gen.py
@@ -33,17 +33,17 @@
autoescape=True,
)
-README_TMPL = jinja_env.get_template("README.tmpl.rst")
+README_TMPL = jinja_env.get_template('README.tmpl.rst')
def get_help(file):
- return subprocess.check_output(["python", file, "--help"]).decode()
+ return subprocess.check_output(['python', file, '--help']).decode()
def main():
parser = argparse.ArgumentParser()
- parser.add_argument("source")
- parser.add_argument("--destination", default="README.rst")
+ parser.add_argument('source')
+ parser.add_argument('--destination', default='README.rst')
args = parser.parse_args()
@@ -51,9 +51,9 @@ def main():
root = os.path.dirname(source)
destination = os.path.join(root, args.destination)
- jinja_env.globals["get_help"] = get_help
+ jinja_env.globals['get_help'] = get_help
- with io.open(source, "r") as f:
+ with io.open(source, 'r') as f:
config = yaml.load(f)
# This allows get_help to execute in the right directory.
@@ -61,9 +61,9 @@ def main():
output = README_TMPL.render(config)
- with io.open(destination, "w") as f:
+ with io.open(destination, 'w') as f:
f.write(output)
-if __name__ == "__main__":
+if __name__ == '__main__':
main()
diff --git a/tests/unit/test_document.py b/tests/unit/test_document.py
index d2e89e77..273dbd82 100644
--- a/tests/unit/test_document.py
+++ b/tests/unit/test_document.py
@@ -177,10 +177,10 @@ def test_search_page_with_target_string(get_bytes_single_file_mock):
gcs_bucket_name="test-directory", gcs_prefix="documentai/output/123456789/0/"
)
- actual_string = doc.search_pages(target_string="contract")
+ actual_pages = doc.search_pages(target_string="contract")
get_bytes_single_file_mock.assert_called_once()
- assert len(actual_string) == 1
+ assert len(actual_pages) == 1
def test_search_page_with_target_pattern(get_bytes_single_file_mock):
@@ -194,6 +194,28 @@ def test_search_page_with_target_pattern(get_bytes_single_file_mock):
assert len(actual_regex) == 1
+def test_search_page_with_multiple_pages(get_bytes_multiple_files_mock):
+ doc = document.Document.from_gcs(
+ gcs_bucket_name="test-directory", gcs_prefix="documentai/output/123456789/0/"
+ )
+
+ actual_pages = doc.search_pages(target_string="Invoice")
+
+ get_bytes_multiple_files_mock.assert_called_once()
+ assert len(actual_pages) == 48
+
+
+def test_search_page_with_no_results(get_bytes_single_file_mock):
+ doc = document.Document.from_gcs(
+ gcs_bucket_name="test-directory", gcs_prefix="documentai/output/123456789/0/"
+ )
+
+ actual_pages = doc.search_pages(target_string="Google")
+
+ get_bytes_single_file_mock.assert_called_once()
+ assert len(actual_pages) == 0
+
+
def test_search_page_with_regex_and_str(get_bytes_single_file_mock):
with pytest.raises(
ValueError,
diff --git a/tests/unit/test_utilities.py b/tests/unit/test_utilities.py
index f6d8747e..fefff366 100644
--- a/tests/unit/test_utilities.py
+++ b/tests/unit/test_utilities.py
@@ -1,4 +1,3 @@
-# pylint: disable=protected-access
# -*- coding: utf-8 -*-
# Copyright 2023 Google LLC
#
@@ -31,7 +30,7 @@
@mock.patch("google.cloud.documentai_toolbox.wrappers.document.storage")
-def test_list_gcs_document_tree_with_one_folder(mock_storage, capfd):
+def test_list_gcs_document_tree_with_one_folder(mock_storage):
client = mock_storage.Client.return_value
mock_bucket = mock.Mock()
@@ -61,8 +60,6 @@ def test_list_gcs_document_tree_with_one_folder(mock_storage, capfd):
mock_storage.Client.assert_called_once()
- out, err = capfd.readouterr()
-
assert "gs://test-directory/1" in list(doc_list.keys())
@@ -301,6 +298,28 @@ def test_print_gcs_document_tree_with_gcs_uri_contains_file_type():
)
+@mock.patch("google.cloud.documentai_toolbox.wrappers.document.storage")
+def test_create_batches_with_empty_directory(mock_storage, capfd):
+ client = mock_storage.Client.return_value
+ mock_bucket = mock.Mock()
+ client.Bucket.return_value = mock_bucket
+
+ mock_blob = mock.Mock(name="test_directory/", content_type="", size=0)
+ mock_blob.name.endswith.return_value = True
+
+ client.list_blobs.return_value = [mock_blob]
+
+ actual = utilities.create_batches(
+ gcs_bucket_name=test_bucket, gcs_prefix=test_prefix
+ )
+
+ mock_storage.Client.assert_called_once()
+
+ out, err = capfd.readouterr()
+ assert out == ""
+ assert len(actual) == 0
+
+
@mock.patch("google.cloud.documentai_toolbox.wrappers.document.storage")
def test_create_batches_with_3_documents(mock_storage, capfd):
client = mock_storage.Client.return_value
@@ -328,16 +347,15 @@ def test_create_batches_with_3_documents(mock_storage, capfd):
assert len(actual[0].gcs_documents.documents) == 3
-def test_create_batches_with_invalid_batch_size(capfd):
- with pytest.raises(ValueError):
+def test_create_batches_with_invalid_batch_size():
+ with pytest.raises(
+ ValueError,
+ match="Batch size must be less than 50. You provided 51.",
+ ):
utilities.create_batches(
gcs_bucket_name=test_bucket, gcs_prefix=test_prefix, batch_size=51
)
- out, err = capfd.readouterr()
- assert "Batch size must be less than" in out
- assert err
-
@mock.patch("google.cloud.documentai_toolbox.wrappers.document.storage")
def test_create_batches_with_large_folder(mock_storage, capfd):
@@ -387,7 +405,7 @@ def test_create_batches_with_invalid_file_type(mock_storage, capfd):
out, err = capfd.readouterr()
assert "Invalid Mime Type" in out
- assert actual == []
+ assert not actual
@mock.patch("google.cloud.documentai_toolbox.wrappers.document.storage")
@@ -410,4 +428,4 @@ def test_create_batches_with_large_file(mock_storage, capfd):
out, err = capfd.readouterr()
assert "File size must be less than" in out
- assert actual == []
+ assert not actual