diff --git a/.coveragerc b/.coveragerc index 742e899d..dda4f66a 100644 --- a/.coveragerc +++ b/.coveragerc @@ -22,7 +22,7 @@ omit = google/cloud/__init__.py [report] -fail_under = 100 +fail_under = 99 show_missing = True exclude_lines = # Re-enable the standard pragma diff --git a/.github/.Owlbot.lock.yaml b/.github/.OwlBot.lock.yaml similarity index 84% rename from .github/.Owlbot.lock.yaml rename to .github/.OwlBot.lock.yaml index 6d9822ca..ff729308 100644 --- a/.github/.Owlbot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,4 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:e7bb19d47c13839fe8c147e50e02e8b6cf5da8edd1af8b82208cd6f66cc2829c -# created: 2022-07-05T18:31:20.838186805Z \ No newline at end of file + digest: sha256:2e247c7bf5154df7f98cce087a20ca7605e236340c7d6d1a14447e5c06791bd6 diff --git a/.github/.Owlbot.yaml b/.github/.OwlBot.yaml similarity index 90% rename from .github/.Owlbot.yaml rename to .github/.OwlBot.yaml index ef9b33cb..c379bd30 100644 --- a/.github/.Owlbot.yaml +++ b/.github/.OwlBot.yaml @@ -15,4 +15,4 @@ docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest -begin-after-commit-hash: be22498ce258bf2d5fe12fd696d3ad9a2b6c430e \ No newline at end of file +begin-after-commit-hash: 92006bb3cdc84677aa93c7f5235424ec2b157146 diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml index f77dfdea..280fec17 100644 --- a/.github/workflows/unittest.yml +++ b/.github/workflows/unittest.yml @@ -8,7 +8,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python: ['3.7', '3.8', '3.9', '3.10'] + python: ['3.7', '3.8', '3.9', '3.10', '3.11'] steps: - name: Checkout uses: actions/checkout@v3 @@ -54,4 +54,4 @@ jobs: - name: Report coverage results run: | coverage combine .coverage-results/.coverage* - coverage report --show-missing --fail-under=95 + coverage report --show-missing --fail-under=99 diff --git a/.gitignore b/.gitignore index f7ae0606..b4243ced 100644 --- a/.gitignore +++ b/.gitignore @@ -29,7 +29,6 @@ pip-log.txt .nox .cache .pytest_cache -.mypy_cache/ # Mac diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index a2639539..66a2172a 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.11 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --allow-unsafe --generate-hashes requirements.in @@ -116,7 +116,6 @@ commonmark==0.9.1 \ cryptography==39.0.1 \ --hash=sha256:0f8da300b5c8af9f98111ffd512910bc792b4c77392a9523624680f7956a99d4 \ --hash=sha256:35f7c7d015d474f4011e859e93e789c87d21f6f4880ebdc29896a60403328f1f \ - --hash=sha256:4789d1e3e257965e960232345002262ede4d094d1a19f4d3b52e48d4d8f3b885 \ --hash=sha256:5aa67414fcdfa22cf052e640cb5ddc461924a045cacf325cd164e65312d99502 \ --hash=sha256:5d2d8b87a490bfcd407ed9d49093793d0f75198a35e6eb1a923ce1ee86c62b41 \ --hash=sha256:6687ef6d0a6497e2b58e7c5b852b53f62142cfa7cd1555795758934da363a965 \ @@ -127,7 +126,6 @@ cryptography==39.0.1 \ --hash=sha256:96f1157a7c08b5b189b16b47bc9db2332269d6680a196341bf30046330d15388 \ --hash=sha256:aec5a6c9864be7df2240c382740fcf3b96928c46604eaa7f3091f58b878c0bb6 \ --hash=sha256:b0afd054cd42f3d213bf82c629efb1ee5f22eba35bf0eec88ea9ea7304f511a2 \ - --hash=sha256:c5caeb8188c24888c90b5108a441c106f7faa4c4c075a2bcae438c6e8ca73cef \ --hash=sha256:ced4e447ae29ca194449a3f1ce132ded8fcab06971ef5f618605aacaa612beac \ --hash=sha256:d1f6198ee6d9148405e49887803907fe8962a23e6c6f83ea7d98f1c0de375695 \ --hash=sha256:e124352fd3db36a9d4a21c1aa27fd5d051e621845cb87fb851c08f4f75ce8be6 \ @@ -137,7 +135,9 @@ cryptography==39.0.1 \ --hash=sha256:f24077a3b5298a5a06a8e0536e3ea9ec60e4c7ac486755e5fb6e6ea9b3500106 \ --hash=sha256:fdd188c8a6ef8769f148f88f859884507b954cc64db6b52f66ef199bb9ad660a \ --hash=sha256:fe913f20024eb2cb2f323e42a64bdf2911bb9738a15dba7d3cce48151034e3a8 - # via gcp-releasetool + # via + # gcp-releasetool + # secretstorage distlib==0.3.6 \ --hash=sha256:14bad2d9b04d3a36127ac97f30b12a19268f211063d8f8ee4f47108896e11b46 \ --hash=sha256:f35c4b692542ca110de7ef0bea44d73981caeb34ca0b9b6b2e6d7790dda8f80e @@ -273,6 +273,12 @@ jaraco-classes==3.2.3 \ --hash=sha256:2353de3288bc6b82120752201c6b1c1a14b058267fa424ed5ce5984e3b922158 \ --hash=sha256:89559fa5c1d3c34eff6f631ad80bb21f378dbcbb35dd161fd2c6b93f5be2f98a # via keyring +jeepney==0.8.0 \ + --hash=sha256:5efe48d255973902f6badc3ce55e2aa6c5c3b3bc642059ef3a91247bcfcc5806 \ + --hash=sha256:c0a454ad016ca575060802ee4d590dd912e35c122fa04e70306de3d076cce755 + # via + # keyring + # secretstorage jinja2==3.1.2 \ --hash=sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852 \ --hash=sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61 @@ -438,6 +444,10 @@ rsa==4.9 \ --hash=sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7 \ --hash=sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21 # via google-auth +secretstorage==3.3.3 \ + --hash=sha256:2403533ef369eca6d2ba81718576c5e0f564d5cca1b58f73a8b23e7d4eeebd77 \ + --hash=sha256:f356e6628222568e3af06f2eba8df495efa13b3b63081dafd4f7d9a7b7bc9f99 + # via keyring six==1.16.0 \ --hash=sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926 \ --hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 @@ -446,9 +456,9 @@ six==1.16.0 \ # gcp-docuploader # google-auth # python-dateutil -twine==4.0.2 \ - --hash=sha256:929bc3c280033347a00f847236564d1c52a3e61b1ac2516c97c48f3ceab756d8 \ - --hash=sha256:9e102ef5fdd5a20661eb88fad46338806c3bd32cf1db729603fe3697b1bc83c8 +twine==4.0.1 \ + --hash=sha256:42026c18e394eac3e06693ee52010baa5313e4811d5a11050e7d48436cf41b9e \ + --hash=sha256:96b1cf12f7ae611a4a40b6ae8e9570215daff0611828f5fe1f37a16255ab24a0 # via -r requirements.in typing-extensions==4.4.0 \ --hash=sha256:1511434bb92bf8dd198c12b1cc812e800d4181cfcb867674e0f8279cc93087aa \ @@ -478,7 +488,7 @@ zipp==3.10.0 \ # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: -setuptools==67.4.0 \ - --hash=sha256:e5fd0a713141a4a105412233c63dc4e17ba0090c8e8334594ac790ec97792330 \ - --hash=sha256:f106dee1b506dee5102cc3f3e9e68137bbad6d47b616be7991714b0c62204251 +setuptools==65.5.1 \ + --hash=sha256:d0b9a8433464d5800cbe05094acf5c6d52a91bfac9b52bcfc4d41382be5d5d31 \ + --hash=sha256:e197a19aa8ec9722928f2206f8de752def0e4c9fc6953527360d1c36d94ddb2f # via -r requirements.in diff --git a/.kokoro/samples/python3.11/common.cfg b/.kokoro/samples/python3.11/common.cfg new file mode 100644 index 00000000..49c5b22e --- /dev/null +++ b/.kokoro/samples/python3.11/common.cfg @@ -0,0 +1,40 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Build logs will be here +action { + define_artifacts { + regex: "**/*sponge_log.xml" + } +} + +# Specify which tests to run +env_vars: { + key: "RUN_TESTS_SESSION" + value: "py-3.11" +} + +# Declare build specific Cloud project. +env_vars: { + key: "BUILD_SPECIFIC_GCLOUD_PROJECT" + value: "python-docs-samples-tests-311" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-documentai-toolbox/.kokoro/test-samples.sh" +} + +# Configure the docker image for kokoro-trampoline. +env_vars: { + key: "TRAMPOLINE_IMAGE" + value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker" +} + +# Download secrets for samples +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" + +# Download trampoline resources. +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" + +# Use the trampoline script to run in docker. +build_file: "python-documentai-toolbox/.kokoro/trampoline_v2.sh" \ No newline at end of file diff --git a/.kokoro/samples/python3.11/continuous.cfg b/.kokoro/samples/python3.11/continuous.cfg new file mode 100644 index 00000000..a1c8d975 --- /dev/null +++ b/.kokoro/samples/python3.11/continuous.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file diff --git a/.kokoro/samples/python3.11/periodic-head.cfg b/.kokoro/samples/python3.11/periodic-head.cfg new file mode 100644 index 00000000..060cdc8a --- /dev/null +++ b/.kokoro/samples/python3.11/periodic-head.cfg @@ -0,0 +1,11 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-documentai-toolbox/.kokoro/test-samples-against-head.sh" +} diff --git a/.kokoro/samples/python3.11/periodic.cfg b/.kokoro/samples/python3.11/periodic.cfg new file mode 100644 index 00000000..71cd1e59 --- /dev/null +++ b/.kokoro/samples/python3.11/periodic.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "False" +} diff --git a/.kokoro/samples/python3.11/presubmit.cfg b/.kokoro/samples/python3.11/presubmit.cfg new file mode 100644 index 00000000..a1c8d975 --- /dev/null +++ b/.kokoro/samples/python3.11/presubmit.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 46d23716..5405cc8f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -25,7 +25,7 @@ repos: rev: 22.3.0 hooks: - id: black -- repo: https://gitlab.com/pycqa/flake8 +- repo: https://github.com/pycqa/flake8 rev: 3.9.2 hooks: - id: flake8 diff --git a/CHANGELOG.md b/CHANGELOG.md index 38b1ced0..abf05b23 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## [0.4.1-alpha](https://github.com/googleapis/python-documentai-toolbox/compare/v0.4.0-alpha...v0.4.1-alpha) (2023-03-21) + + +### Miscellaneous Chores + +* Release 0.4.1-alpha ([#85](https://github.com/googleapis/python-documentai-toolbox/issues/85)) ([bc8d6c7](https://github.com/googleapis/python-documentai-toolbox/commit/bc8d6c75fdee7e3efd8138916a731a881cec8811)) + ## [0.4.0-alpha](https://github.com/googleapis/python-documentai-toolbox/compare/v0.3.0-alpha...v0.4.0-alpha) (2023-03-09) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 90624015..3ce9b08a 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -22,7 +22,7 @@ In order to add a feature: documentation. - The feature must work fully on the following CPython versions: - 3.7, 3.8, 3.9 and 3.10 on both UNIX and Windows. + 3.7, 3.8, 3.9, 3.10 and 3.11 on both UNIX and Windows. - The feature must not add unnecessary dependencies (where "unnecessary" is of course subjective, but new dependencies should @@ -72,7 +72,7 @@ We use `nox `__ to instrument our tests. - To run a single unit test:: - $ nox -s unit-3.10 -- -k + $ nox -s unit-3.11 -- -k .. note:: @@ -225,11 +225,13 @@ We support: - `Python 3.8`_ - `Python 3.9`_ - `Python 3.10`_ +- `Python 3.11`_ .. _Python 3.7: https://docs.python.org/3.7/ .. _Python 3.8: https://docs.python.org/3.8/ .. _Python 3.9: https://docs.python.org/3.9/ .. _Python 3.10: https://docs.python.org/3.10/ +.. _Python 3.11: https://docs.python.org/3.11/ Supported versions can be found in our ``noxfile.py`` `config`_. diff --git a/google/cloud/documentai_toolbox/utilities/utilities.py b/google/cloud/documentai_toolbox/utilities/utilities.py index bcaacc88..51ec8e75 100644 --- a/google/cloud/documentai_toolbox/utilities/utilities.py +++ b/google/cloud/documentai_toolbox/utilities/utilities.py @@ -96,7 +96,8 @@ def print_gcs_document_tree(gcs_bucket_name: str, gcs_prefix: str) -> None: if dir_size > FILES_TO_DISPLAY: print("│ ....") print(f"{FILENAME_TREE_LAST}{file_name}\n") - elif idx <= FILES_TO_DISPLAY: + break + if idx <= FILES_TO_DISPLAY: print(f"{FILENAME_TREE_MIDDLE}{file_name}") @@ -163,7 +164,7 @@ def create_batches( ) ) - if batch != []: + if batch: # Append the last batch, which could be less than `batch_size` batches.append( documentai.BatchDocumentsInputConfig( diff --git a/google/cloud/documentai_toolbox/version.py b/google/cloud/documentai_toolbox/version.py index 7f088250..6beebb3f 100644 --- a/google/cloud/documentai_toolbox/version.py +++ b/google/cloud/documentai_toolbox/version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "0.4.0-alpha" +__version__ = "0.4.1-alpha" diff --git a/noxfile.py b/noxfile.py index 5025a91e..91a45415 100644 --- a/noxfile.py +++ b/noxfile.py @@ -31,7 +31,7 @@ DEFAULT_PYTHON_VERSION = "3.8" -UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10"] +UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11"] UNIT_TEST_STANDARD_DEPENDENCIES = [ "mock", "asyncmock", @@ -188,9 +188,9 @@ def unit(session): def install_systemtest_dependencies(session, *constraints): # Use pre-release gRPC for system tests. - # Exclude version 1.49.0rc1 which has a known issue. - # See https://github.com/grpc/grpc/pull/30642 - session.install("--pre", "grpcio!=1.49.0rc1") + # Exclude version 1.52.0rc1 which has a known issue. + # See https://github.com/grpc/grpc/issues/32163 + session.install("--pre", "grpcio!=1.52.0rc1") session.install(*SYSTEM_TEST_STANDARD_DEPENDENCIES, *constraints) @@ -267,7 +267,7 @@ def cover(session): test runs (not system test runs), and then erases coverage data. """ session.install("coverage", "pytest-cov") - session.run("coverage", "report", "--show-missing", "--fail-under=95") + session.run("coverage", "report", "--show-missing", "--fail-under=99") session.run("coverage", "erase") @@ -345,9 +345,7 @@ def prerelease_deps(session): unit_deps_all = UNIT_TEST_STANDARD_DEPENDENCIES + UNIT_TEST_EXTERNAL_DEPENDENCIES session.install(*unit_deps_all) system_deps_all = ( - SYSTEM_TEST_STANDARD_DEPENDENCIES - + SYSTEM_TEST_EXTERNAL_DEPENDENCIES - + SYSTEM_TEST_EXTRAS + SYSTEM_TEST_STANDARD_DEPENDENCIES + SYSTEM_TEST_EXTERNAL_DEPENDENCIES ) session.install(*system_deps_all) @@ -377,8 +375,8 @@ def prerelease_deps(session): # dependency of grpc "six", "googleapis-common-protos", - # Exclude version 1.49.0rc1 which has a known issue. See https://github.com/grpc/grpc/pull/30642 - "grpcio!=1.49.0rc1", + # Exclude version 1.52.0rc1 which has a known issue. See https://github.com/grpc/grpc/issues/32163 + "grpcio!=1.52.0rc1", "grpcio-status", "google-api-core", "proto-plus", diff --git a/owlbot.py b/owlbot.py index 2c343db8..327db1be 100644 --- a/owlbot.py +++ b/owlbot.py @@ -29,9 +29,13 @@ # ---------------------------------------------------------------------------- templated_files = common.py_library( system_test_python_versions=["3.8"], - cov_level=100, + cov_level=99, intersphinx_dependencies={ "pandas": "https://pandas.pydata.org/pandas-docs/stable/" }, ) s.move(templated_files, excludes=["docs/multiprocessing.rst", "README.rst"]) + +# run format session for all directories which have a noxfile +for noxfile in pathlib.Path(".").glob("**/noxfile.py"): + s.shell.run(["nox", "-s", "blacken"], cwd=noxfile.parent, hide_output=False) diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 91d51543..14ccab37 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,3 +1,3 @@ pytest==7.2.2 mock==5.0.1 -google-cloud-bigquery==3.6.0 +google-cloud-bigquery==3.7.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index e955a0c3..85734acd 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==3.6.0 +google-cloud-bigquery==3.7.0 google-cloud-documentai==2.14.0 google-cloud-storage==2.7.0 google-cloud-documentai-toolbox==0.1.1a0 diff --git a/scripts/readme-gen/readme_gen.py b/scripts/readme-gen/readme_gen.py index b533840a..91b59676 100644 --- a/scripts/readme-gen/readme_gen.py +++ b/scripts/readme-gen/readme_gen.py @@ -33,17 +33,17 @@ autoescape=True, ) -README_TMPL = jinja_env.get_template("README.tmpl.rst") +README_TMPL = jinja_env.get_template('README.tmpl.rst') def get_help(file): - return subprocess.check_output(["python", file, "--help"]).decode() + return subprocess.check_output(['python', file, '--help']).decode() def main(): parser = argparse.ArgumentParser() - parser.add_argument("source") - parser.add_argument("--destination", default="README.rst") + parser.add_argument('source') + parser.add_argument('--destination', default='README.rst') args = parser.parse_args() @@ -51,9 +51,9 @@ def main(): root = os.path.dirname(source) destination = os.path.join(root, args.destination) - jinja_env.globals["get_help"] = get_help + jinja_env.globals['get_help'] = get_help - with io.open(source, "r") as f: + with io.open(source, 'r') as f: config = yaml.load(f) # This allows get_help to execute in the right directory. @@ -61,9 +61,9 @@ def main(): output = README_TMPL.render(config) - with io.open(destination, "w") as f: + with io.open(destination, 'w') as f: f.write(output) -if __name__ == "__main__": +if __name__ == '__main__': main() diff --git a/tests/unit/test_document.py b/tests/unit/test_document.py index d2e89e77..273dbd82 100644 --- a/tests/unit/test_document.py +++ b/tests/unit/test_document.py @@ -177,10 +177,10 @@ def test_search_page_with_target_string(get_bytes_single_file_mock): gcs_bucket_name="test-directory", gcs_prefix="documentai/output/123456789/0/" ) - actual_string = doc.search_pages(target_string="contract") + actual_pages = doc.search_pages(target_string="contract") get_bytes_single_file_mock.assert_called_once() - assert len(actual_string) == 1 + assert len(actual_pages) == 1 def test_search_page_with_target_pattern(get_bytes_single_file_mock): @@ -194,6 +194,28 @@ def test_search_page_with_target_pattern(get_bytes_single_file_mock): assert len(actual_regex) == 1 +def test_search_page_with_multiple_pages(get_bytes_multiple_files_mock): + doc = document.Document.from_gcs( + gcs_bucket_name="test-directory", gcs_prefix="documentai/output/123456789/0/" + ) + + actual_pages = doc.search_pages(target_string="Invoice") + + get_bytes_multiple_files_mock.assert_called_once() + assert len(actual_pages) == 48 + + +def test_search_page_with_no_results(get_bytes_single_file_mock): + doc = document.Document.from_gcs( + gcs_bucket_name="test-directory", gcs_prefix="documentai/output/123456789/0/" + ) + + actual_pages = doc.search_pages(target_string="Google") + + get_bytes_single_file_mock.assert_called_once() + assert len(actual_pages) == 0 + + def test_search_page_with_regex_and_str(get_bytes_single_file_mock): with pytest.raises( ValueError, diff --git a/tests/unit/test_utilities.py b/tests/unit/test_utilities.py index f6d8747e..fefff366 100644 --- a/tests/unit/test_utilities.py +++ b/tests/unit/test_utilities.py @@ -1,4 +1,3 @@ -# pylint: disable=protected-access # -*- coding: utf-8 -*- # Copyright 2023 Google LLC # @@ -31,7 +30,7 @@ @mock.patch("google.cloud.documentai_toolbox.wrappers.document.storage") -def test_list_gcs_document_tree_with_one_folder(mock_storage, capfd): +def test_list_gcs_document_tree_with_one_folder(mock_storage): client = mock_storage.Client.return_value mock_bucket = mock.Mock() @@ -61,8 +60,6 @@ def test_list_gcs_document_tree_with_one_folder(mock_storage, capfd): mock_storage.Client.assert_called_once() - out, err = capfd.readouterr() - assert "gs://test-directory/1" in list(doc_list.keys()) @@ -301,6 +298,28 @@ def test_print_gcs_document_tree_with_gcs_uri_contains_file_type(): ) +@mock.patch("google.cloud.documentai_toolbox.wrappers.document.storage") +def test_create_batches_with_empty_directory(mock_storage, capfd): + client = mock_storage.Client.return_value + mock_bucket = mock.Mock() + client.Bucket.return_value = mock_bucket + + mock_blob = mock.Mock(name="test_directory/", content_type="", size=0) + mock_blob.name.endswith.return_value = True + + client.list_blobs.return_value = [mock_blob] + + actual = utilities.create_batches( + gcs_bucket_name=test_bucket, gcs_prefix=test_prefix + ) + + mock_storage.Client.assert_called_once() + + out, err = capfd.readouterr() + assert out == "" + assert len(actual) == 0 + + @mock.patch("google.cloud.documentai_toolbox.wrappers.document.storage") def test_create_batches_with_3_documents(mock_storage, capfd): client = mock_storage.Client.return_value @@ -328,16 +347,15 @@ def test_create_batches_with_3_documents(mock_storage, capfd): assert len(actual[0].gcs_documents.documents) == 3 -def test_create_batches_with_invalid_batch_size(capfd): - with pytest.raises(ValueError): +def test_create_batches_with_invalid_batch_size(): + with pytest.raises( + ValueError, + match="Batch size must be less than 50. You provided 51.", + ): utilities.create_batches( gcs_bucket_name=test_bucket, gcs_prefix=test_prefix, batch_size=51 ) - out, err = capfd.readouterr() - assert "Batch size must be less than" in out - assert err - @mock.patch("google.cloud.documentai_toolbox.wrappers.document.storage") def test_create_batches_with_large_folder(mock_storage, capfd): @@ -387,7 +405,7 @@ def test_create_batches_with_invalid_file_type(mock_storage, capfd): out, err = capfd.readouterr() assert "Invalid Mime Type" in out - assert actual == [] + assert not actual @mock.patch("google.cloud.documentai_toolbox.wrappers.document.storage") @@ -410,4 +428,4 @@ def test_create_batches_with_large_file(mock_storage, capfd): out, err = capfd.readouterr() assert "File size must be less than" in out - assert actual == [] + assert not actual