diff --git a/.github/workflows/docs-ci.yml b/.github/workflows/docs-ci.yml index 511b7c28..186c3cd8 100644 --- a/.github/workflows/docs-ci.yml +++ b/.github/workflows/docs-ci.yml @@ -1,37 +1,33 @@ -name: CI Documentation +name: CI Documentation and Code style on: [push, pull_request] jobs: build: - runs-on: ubuntu-20.04 + runs-on: ubuntu-24.04 strategy: max-parallel: 4 matrix: - python-version: [3.9] + python-version: [3.12] steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - - name: Give permission to run scripts - run: chmod +x ./docs/scripts/doc8_style_check.sh - - name: Install Dependencies - run: pip install -e .[docs] - - - name: Check Sphinx Documentation build minimally - working-directory: ./docs - run: sphinx-build -E -W source build + run: ./configure --dev - - name: Check for documentation style errors - working-directory: ./docs - run: ./scripts/doc8_style_check.sh + - name: Check documentation and HTML for errors and dead links + run: make docs-check + - name: Check documentation for style errors + run: make doc8 + - name: Check for Code style errors + run: make check diff --git a/.github/workflows/pypi-release.yml b/.github/workflows/pypi-release.yml index 95857301..ca12e50b 100644 --- a/.github/workflows/pypi-release.yml +++ b/.github/workflows/pypi-release.yml @@ -21,23 +21,26 @@ on: jobs: build-pypi-distribs: name: Build and publish library to PyPI - runs-on: ubuntu-20.04 + runs-on: ubuntu-24.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: - python-version: 3.9 + python-version: 3.12 - - name: Install pypa/build - run: python -m pip install build --user + - name: Install pypa/build and twine + run: python -m pip install --user build twine - name: Build a binary wheel and a source tarball run: python -m build --sdist --wheel --outdir dist/ + - name: Validate wheel and sdis for Pypi + run: python -m twine check dist/* + - name: Upload built archives - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: pypi_archives path: dist/* @@ -47,17 +50,17 @@ jobs: name: Create GH release needs: - build-pypi-distribs - runs-on: ubuntu-20.04 + runs-on: ubuntu-24.04 steps: - name: Download built archives - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: pypi_archives path: dist - name: Create GH release - uses: softprops/action-gh-release@v1 + uses: softprops/action-gh-release@v2 with: draft: true files: dist/* @@ -67,11 +70,14 @@ jobs: name: Create PyPI release needs: - create-gh-release - runs-on: ubuntu-20.04 + runs-on: ubuntu-24.04 + environment: pypi-publish + permissions: + id-token: write steps: - name: Download built archives - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: pypi_archives path: dist @@ -79,5 +85,3 @@ jobs: - name: Publish to PyPI if: startsWith(github.ref, 'refs/tags') uses: pypa/gh-action-pypi-publish@release/v1 - with: - password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/.gitignore b/.gitignore index 2d48196f..4818bb3a 100644 --- a/.gitignore +++ b/.gitignore @@ -72,3 +72,5 @@ tcl # Ignore Jupyter Notebook related temp files .ipynb_checkpoints/ +/.ruff_cache/ +.env \ No newline at end of file diff --git a/.readthedocs.yml b/.readthedocs.yml index 8ab23688..683f3a82 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -26,4 +26,4 @@ python: - method: pip path: . extra_requirements: - - docs + - dev diff --git a/AUTHORS.rst b/AUTHORS.rst index d248703c..98d13aa7 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -11,4 +11,5 @@ The following organizations or individuals have contributed to this repo: - Saravanan G @SaravananOffl - Sebastian Schuberth @sschuberth - Steven Esser @majurg +- Theodore Aptekarev @piiq - Thomas Druez @tdruez diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 1b731573..dc638663 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,10 +1,72 @@ Release notes ============= +Version 32.4.2 - (2025-01-08) +----------------------------- + +- Support setting the tar archive filter in python3.14 + https://github.com/aboutcode-org/commoncode/issues/88 + +Version 32.4.1 - (2025-01-07) +----------------------------- + +- Update system.py for python3.14 + +Version 32.4.0 - (2025-10-22) +----------------------------- + +- Fix ``click`` 8.3.0 compatibililty issues. + https://github.com/aboutcode-org/commoncode/pull/92 + +- Drop python 3.9 support and add python 3.14 + https://github.com/aboutcode-org/commoncode/pull/92 + +- Handle paths with non-utf-8 bytes + https://github.com/aboutcode-org/commoncode/pull/91 + +Version 32.3.0 - (2025-06-11) +----------------------------- + +- Fix ``click`` compatibililty issues. + https://github.com/aboutcode-org/commoncode/pull/89 + https://github.com/aboutcode-org/commoncode/pull/86 + +Version 32.2.1 - (2025-03-06) +----------------------------- + +- migrate ``os.listdir()`` to ``os.scandir()`` to increase performance +- Fix checksums for empty files + +Version 32.2.0 - (2025-02-15) +----------------------------- + +- Adjust as_unicode for breaking bs4 4.13 changes + See https://github.com/aboutcode-org/commoncode/issues/79 +- Add support for python 3.13 +- Drop deprecated CI runners and update CI runners to latest + +Version 32.1.0 - (2024-12-06) +----------------------------- + +- Compute file checksums from streaming the file content in chunks to avoid running out of memory +- Drop support for python 3.8 and add support for python 3.12 + +Version 32.0.0 - (2024-09-05) +----------------------------- + +- Add new optional argument to generate YAML test data files from a template +- Migrate URLs to new aboutcode.org org +- Drop support for Python 3.7 +- Replace charset_normalizer by chardet because of unstable behavior between minor versions + See https://github.com/jawah/charset_normalizer/issues/520 +- Adopt black and isort style + + Version 31.2.1 - (2024-05-16) ----------------------------- -- Remove ``commoncode.system.get_etc_os_release_info`` and replace it with ``commoncode.distro_os_release_parser``. +- Remove ``commoncode.system.get_etc_os_release_info`` and replace it with + ``commoncode.distro_os_release_parser``. Version 31.2.0 - (2024-05-16) diff --git a/MANIFEST.in b/MANIFEST.in index 8424cbea..44900097 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,6 @@ graft src +graft docs +graft etc include *.LICENSE include NOTICE @@ -6,10 +8,19 @@ include *.ABOUT include *.toml include *.yml include *.rst +include *.png include setup.* include configure* include requirements* include .giti* +include .dockerignore +include .gitignore +include .readthedocs.yml +include manage.py +include Dockerfile* +include Makefile +include MANIFEST.in -global-exclude *.py[co] __pycache__ *.*~ +include .VERSION +global-exclude *.py[co] __pycache__ *.*~ diff --git a/Makefile b/Makefile index cc36c355..3041547b 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ # ScanCode is a trademark of nexB Inc. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/skeleton for support or download. +# See https://github.com/aboutcode-org/skeleton for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # @@ -13,31 +13,33 @@ PYTHON_EXE?=python3 VENV=venv ACTIVATE?=. ${VENV}/bin/activate; -dev: - @echo "-> Configure the development envt." - ./configure --dev -isort: - @echo "-> Apply isort changes to ensure proper imports ordering" - ${VENV}/bin/isort --sl -l 100 src tests setup.py +conf: + @echo "-> Install dependencies" + ./configure -black: - @echo "-> Apply black code formatter" - ${VENV}/bin/black -l 100 src tests setup.py +dev: + @echo "-> Configure and install development dependencies" + ./configure --dev doc8: @echo "-> Run doc8 validation" - @${ACTIVATE} doc8 --max-line-length 100 --ignore-path docs/_build/ --quiet docs/ + @${ACTIVATE} doc8 --quiet docs/ *.rst -valid: isort black +valid: + @echo "-> Run Ruff format" + @${ACTIVATE} ruff format + @echo "-> Run Ruff linter" + @${ACTIVATE} ruff check --fix check: - @echo "-> Run pycodestyle (PEP8) validation" - @${ACTIVATE} pycodestyle --max-line-length=100 --exclude=.eggs,venv,lib,thirdparty,docs,migrations,settings.py,.cache . - @echo "-> Run isort imports ordering validation" - @${ACTIVATE} isort --sl --check-only -l 100 setup.py src tests . - @echo "-> Run black validation" - @${ACTIVATE} black --check --check -l 100 src tests setup.py + @echo "-> Run Ruff linter validation (pycodestyle, bandit, isort, and more)" + @${ACTIVATE} ruff check + @echo "-> Run Ruff format validation" + @${ACTIVATE} ruff format --check + @$(MAKE) doc8 + @echo "-> Run ABOUT files validation" + @${ACTIVATE} about check etc/ clean: @echo "-> Clean the Python env" @@ -49,6 +51,10 @@ test: docs: rm -rf docs/_build/ - @${ACTIVATE} sphinx-build docs/ docs/_build/ + @${ACTIVATE} sphinx-build docs/source docs/_build/ + +docs-check: + @${ACTIVATE} sphinx-build -E -W -b html docs/source docs/_build/ + @${ACTIVATE} sphinx-build -E -W -b linkcheck docs/source docs/_build/ -.PHONY: conf dev check valid black isort clean test docs +.PHONY: conf dev check valid clean test docs docs-check diff --git a/NOTICE b/NOTICE index 2643d9b1..b5a2017b 100644 --- a/NOTICE +++ b/NOTICE @@ -2,6 +2,6 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # diff --git a/README.rst b/README.rst index c2106da3..654b53e8 100644 --- a/README.rst +++ b/README.rst @@ -3,12 +3,12 @@ CommonCode - license: Apache-2.0 - copyright: copyright (c) nexB. Inc. and others -- homepage_url: https://github.com/nexB/commoncode +- homepage_url: https://github.com/aboutcode-org/commoncode - keywords: utilities, scancode-toolkit, commoncode Commoncode provides a set of common functions and utilities for handling various things like paths, dates, files and hashes. It started as library in scancode-toolkit. -Visit https://aboutcode.org and https://github.com/nexB/ for support and download. +Visit https://aboutcode.org and https://github.com/aboutcode-org/ for support and download. To install this package use:: diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 02c7530c..df185c43 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -1,4 +1,3 @@ - ################################################################################ # We use Azure to run the full tests suites on multiple Python 3.x # on multiple Windows, macOS and Linux versions all on 64 bits @@ -6,113 +5,97 @@ ################################################################################ jobs: - -################################################################################ -# These jobs are using VMs and Azure-provided Pythons 3.8 -################################################################################ - - - template: etc/ci/azure-posix.yml - parameters: - job_name: ubuntu20_cpython - image_name: ubuntu-20.04 - python_versions: ['3.7', '3.8', '3.9', '3.10', '3.11'] - test_suites: - all: venv/bin/pytest -n 2 -vvs - - - template: etc/ci/azure-posix.yml - parameters: - job_name: ubuntu22_cpython - image_name: ubuntu-22.04 - python_versions: ['3.7', '3.8', '3.9', '3.10', '3.11'] - test_suites: - all: venv/bin/pytest -n 2 -vvs - - - template: etc/ci/azure-posix.yml - parameters: - job_name: macos11_cpython - image_name: macOS-11 - python_versions: ['3.7', '3.8', '3.9', '3.10', '3.11'] - test_suites: - all: venv/bin/pytest -n 2 -vvs - - - template: etc/ci/azure-posix.yml - parameters: - job_name: macos12_cpython - image_name: macOS-12 - python_versions: ['3.7', '3.8', '3.9', '3.10', '3.11'] - test_suites: - all: venv/bin/pytest -n 2 -vvs - - - template: etc/ci/azure-posix.yml - parameters: - job_name: macos13_cpython - image_name: macOS-13 - python_versions: ['3.7', '3.8', '3.9', '3.10', '3.11'] - test_suites: - all: venv/bin/pytest -n 2 -vvs - - - template: etc/ci/azure-win.yml - parameters: - job_name: win2019_cpython - image_name: windows-2019 - python_versions: ['3.7', '3.8', '3.9', '3.10', '3.11'] - test_suites: - all: venv\Scripts\pytest -n 2 -vvs - - - template: etc/ci/azure-win.yml - parameters: - job_name: win2022_cpython - image_name: windows-2022 - python_versions: ['3.7', '3.8', '3.9', '3.10', '3.11'] - test_suites: - all: venv\Scripts\pytest -n 2 -vvs - - -################################################################################ -# Test using many version of Click to work around regressions in API -################################################################################ - - - - template: etc/ci/azure-posix.yml - parameters: - job_name: ubuntu20_test_all_supported_click_versions - image_name: ubuntu-20.04 - python_versions: ['3.7', '3.8', '3.9', '3.10'] - test_suites: - click_versions: | - for clk_ver in 8.1.3 8.1.2 8.1.1 8.1.0 8.0.4 8.0.2 8.0.3 8.0.1 7.1.2 7.1.1 7.1 6.7; - do - pip install click==$clk_ver; - venv/bin/pytest -vvs tests/test_cliutils_progressbar.py; - done - - -################################################################################ -# Tests using a plain pip install to get the latest of all wheels -################################################################################ - - - - template: etc/ci/azure-posix.yml - parameters: - job_name: ubuntu20_cpython_latest_from_pip - image_name: ubuntu-20.04 - python_versions: ['3.7', '3.8', '3.9', '3.10'] - test_suites: - all: venv/bin/pip install --upgrade-strategy eager --force-reinstall --upgrade -e . && venv/bin/pytest -n 2 -vvs - - - - template: etc/ci/azure-win.yml - parameters: - job_name: win2019_cpython_latest_from_pip - image_name: windows-2019 - python_versions: ['3.7', '3.8', '3.9', '3.10'] - test_suites: - all: venv\Scripts\pip install --upgrade-strategy eager --force-reinstall --upgrade -e . && venv\Scripts\pytest -n 2 -vvs - - - template: etc/ci/azure-posix.yml - parameters: - job_name: macos11_cpython_latest_from_pip - image_name: macos-11 - python_versions: ['3.7', '3.8', '3.9', '3.10'] - test_suites: - all: venv/bin/pip install --upgrade-strategy eager --force-reinstall --upgrade -e . && venv/bin/pytest -n 2 -vvs + ################################################################################ + # These jobs are using VMs and Azure-provided Pythons 3.8 + ################################################################################ + + - template: etc/ci/azure-posix.yml + parameters: + job_name: ubuntu22_cpython + image_name: ubuntu-22.04 + python_versions: ["3.10", "3.11", "3.12", "3.13", "3.14"] + test_suites: + all: venv/bin/pytest -n 2 -vvs + + - template: etc/ci/azure-posix.yml + parameters: + job_name: ubuntu24_cpython + image_name: ubuntu-24.04 + python_versions: ["3.10", "3.11", "3.12", "3.13", "3.14"] + test_suites: + all: venv/bin/pytest -n 2 -vvs + + - template: etc/ci/azure-posix.yml + parameters: + job_name: macos14_cpython + image_name: macOS-14 + python_versions: ["3.10", "3.11", "3.12", "3.13", "3.14"] + test_suites: + all: venv/bin/pytest -n 2 -vvs + + - template: etc/ci/azure-posix.yml + parameters: + job_name: macos15_cpython + image_name: macOS-15 + python_versions: ["3.10", "3.11", "3.12", "3.13", "3.14"] + test_suites: + all: venv/bin/pytest -n 2 -vvs + + - template: etc/ci/azure-win.yml + parameters: + job_name: win2025_cpython + image_name: windows-2025 + python_versions: ["3.10", "3.11", "3.12", "3.13", "3.14"] + test_suites: + all: venv\Scripts\pytest -n 2 -vvs + + - template: etc/ci/azure-win.yml + parameters: + job_name: win2022_cpython + image_name: windows-2022 + python_versions: ["3.10", "3.11", "3.12", "3.13", "3.14"] + test_suites: + all: venv\Scripts\pytest -n 2 -vvs + + ################################################################################ + # Test using many version of Click to work around regressions in API + ################################################################################ + + - template: etc/ci/azure-posix.yml + parameters: + job_name: ubuntu24_test_all_supported_click_versions + image_name: ubuntu-24.04 + python_versions: ["3.10", "3.11", "3.12", "3.13", "3.14"] + test_suites: + click_versions: | + for clk_ver in 8.3.1 8.3.0 8.2.0 8.2.1 8.1.8 8.1.7 8.1.6 8.1.5 8.1.4 8.1.3 8.1.2 8.1.1 8.1.0 8.0.4 8.0.2 8.0.3 8.0.1 7.1.2 7.1.1 7.1 6.7; + do + venv/bin/pip install click==$clk_ver; + venv/bin/pytest -vvs tests/test_cliutils_progressbar.py; + done + + ################################################################################ + # Tests using a plain pip install to get the latest of all wheels + ################################################################################ + + - template: etc/ci/azure-posix.yml + parameters: + job_name: ubuntu24_cpython_latest_from_pip + image_name: ubuntu-24.04 + python_versions: ["3.10", "3.11", "3.12", "3.13", "3.14"] + test_suites: + all: | + venv/bin/pip install --upgrade-strategy eager --force-reinstall --upgrade -e . + venv/bin/pip freeze + venv/bin/pytest -n 2 -vvs + + - template: etc/ci/azure-win.yml + parameters: + job_name: win2022_cpython_latest_from_pip + image_name: windows-2022 + python_versions: ["3.10", "3.11", "3.12", "3.13", "3.14"] + test_suites: + all: | + venv\Scripts\pip install --upgrade-strategy eager --force-reinstall --upgrade -e . + venv\Scripts\pip freeze + venv\Scripts\pytest -n 2 -vvs diff --git a/commoncode.ABOUT b/commoncode.ABOUT index 80578504..30f0e029 100644 --- a/commoncode.ABOUT +++ b/commoncode.ABOUT @@ -4,10 +4,10 @@ description: Commoncode provides a set of common functions and utilities for handling various things like paths, dates, files and hashes. It started as library in scancode-toolkit. keywords: utilities, commoncode, scancode-toolkit -homepage_url: https://github.com/nexB/commoncode +homepage_url: https://github.com/aboutcode-org/commoncode holder: nexB. Inc. and others holder_contact: info@aboutcode.org -homepage_url: https://github.com/nexB/commoncode +homepage_url: https://github.com/aboutcode-org/commoncode license_expression: apache-2.0 name: commoncode package_url: pkg:pypi/commoncode diff --git a/configure b/configure index 926a894e..6d317d4c 100755 --- a/configure +++ b/configure @@ -3,7 +3,7 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/ for support or download. +# See https://github.com/aboutcode-org/ for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # @@ -29,14 +29,13 @@ CLI_ARGS=$1 # Requirement arguments passed to pip and used by default or with --dev. REQUIREMENTS="--editable . --constraint requirements.txt" -DEV_REQUIREMENTS="--editable .[testing] --constraint requirements.txt --constraint requirements-dev.txt" -DOCS_REQUIREMENTS="--editable .[docs] --constraint requirements.txt" +DEV_REQUIREMENTS="--editable .[dev] --constraint requirements.txt --constraint requirements-dev.txt" # where we create a virtualenv VIRTUALENV_DIR=venv # Cleanable files and directories to delete with the --clean option -CLEANABLE="build dist venv .cache .eggs" +CLEANABLE="build dist venv .cache .eggs *.egg-info docs/_build/ pip-selfcheck.json" # extra arguments passed to pip PIP_EXTRA_ARGS=" " @@ -111,7 +110,7 @@ create_virtualenv() { fi $PYTHON_EXECUTABLE "$VIRTUALENV_PYZ" \ - --wheel embed --pip embed --setuptools embed \ + --pip embed --setuptools embed \ --seeder pip \ --never-download \ --no-periodic-update \ @@ -168,6 +167,7 @@ clean() { for cln in $CLEANABLE; do rm -rf "${CFG_ROOT_DIR:?}/${cln:?}"; done + find . -type f -name '*.py[co]' -delete -o -type d -name __pycache__ -delete set +e exit } @@ -185,7 +185,6 @@ while getopts :-: optchar; do help ) cli_help;; clean ) find_python && clean;; dev ) CFG_REQUIREMENTS="$DEV_REQUIREMENTS";; - docs ) CFG_REQUIREMENTS="$DOCS_REQUIREMENTS";; esac;; esac done diff --git a/configure.bat b/configure.bat index 5e95b311..15ab7015 100644 --- a/configure.bat +++ b/configure.bat @@ -4,7 +4,7 @@ @rem Copyright (c) nexB Inc. and others. All rights reserved. @rem SPDX-License-Identifier: Apache-2.0 @rem See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -@rem See https://github.com/nexB/ for support or download. +@rem See https://github.com/aboutcode-org/ for support or download. @rem See https://aboutcode.org for more information about nexB OSS projects. @@ -27,8 +27,7 @@ @rem # Requirement arguments passed to pip and used by default or with --dev. set "REQUIREMENTS=--editable . --constraint requirements.txt" -set "DEV_REQUIREMENTS=--editable .[testing] --constraint requirements.txt --constraint requirements-dev.txt" -set "DOCS_REQUIREMENTS=--editable .[docs] --constraint requirements.txt" +set "DEV_REQUIREMENTS=--editable .[dev] --constraint requirements.txt --constraint requirements-dev.txt" @rem # where we create a virtualenv set "VIRTUALENV_DIR=venv" @@ -76,9 +75,6 @@ if not "%1" == "" ( if "%1" EQU "--dev" ( set "CFG_REQUIREMENTS=%DEV_REQUIREMENTS%" ) - if "%1" EQU "--docs" ( - set "CFG_REQUIREMENTS=%DOCS_REQUIREMENTS%" - ) shift goto again ) @@ -114,7 +110,7 @@ if not exist "%CFG_BIN_DIR%\python.exe" ( if exist "%CFG_ROOT_DIR%\etc\thirdparty\virtualenv.pyz" ( %PYTHON_EXECUTABLE% "%CFG_ROOT_DIR%\etc\thirdparty\virtualenv.pyz" ^ - --wheel embed --pip embed --setuptools embed ^ + --pip embed --setuptools embed ^ --seeder pip ^ --never-download ^ --no-periodic-update ^ @@ -130,7 +126,7 @@ if not exist "%CFG_BIN_DIR%\python.exe" ( ) ) %PYTHON_EXECUTABLE% "%CFG_ROOT_DIR%\%VIRTUALENV_DIR%\virtualenv.pyz" ^ - --wheel embed --pip embed --setuptools embed ^ + --pip embed --setuptools embed ^ --seeder pip ^ --never-download ^ --no-periodic-update ^ diff --git a/docs/Makefile b/docs/Makefile index d0c3cbf1..94f686b2 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -5,8 +5,9 @@ # from the environment for the first two. SPHINXOPTS ?= SPHINXBUILD ?= sphinx-build +SPHINXAUTOBUILD = sphinx-autobuild SOURCEDIR = source -BUILDDIR = build +BUILDDIR = _build # Put it first so that "make" without argument is like "make help". help: @@ -14,6 +15,13 @@ help: .PHONY: help Makefile +# Run the development server using sphinx-autobuild +docs: + @echo + @echo "Starting up the docs server..." + @echo + $(SPHINXAUTOBUILD) --port 8000 --watch ${SOURCEDIR} $(SOURCEDIR) "$(BUILDDIR)/html" $(SPHINXOPTS) $(O) + # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile diff --git a/docs/make.bat b/docs/make.bat index 6247f7e2..4a3c1a48 100644 --- a/docs/make.bat +++ b/docs/make.bat @@ -7,11 +7,16 @@ REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) +if "%SPHINXAUTOBUILD%" == "" ( + set SPHINXAUTOBUILD=sphinx-autobuild +) set SOURCEDIR=source set BUILDDIR=build if "%1" == "" goto help +if "%1" == "docs" goto docs + %SPHINXBUILD% >NUL 2>NUL if errorlevel 9009 ( echo. @@ -28,6 +33,13 @@ if errorlevel 9009 ( %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% goto end +:docs +@echo +@echo Starting up the docs server... +@echo +%SPHINXAUTOBUILD% --port 8000 --watch %SOURCEDIR% %SOURCEDIR% %BUILDDIR%\html %SPHINXOPTS% %O% +goto end + :help %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% diff --git a/docs/scripts/doc8_style_check.sh b/docs/scripts/doc8_style_check.sh deleted file mode 100644 index 94163239..00000000 --- a/docs/scripts/doc8_style_check.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash -# halt script on error -set -e -# Check for Style Code Violations -doc8 --max-line-length 100 source --ignore D000 --quiet \ No newline at end of file diff --git a/docs/scripts/sphinx_build_link_check.sh b/docs/scripts/sphinx_build_link_check.sh deleted file mode 100644 index c5426863..00000000 --- a/docs/scripts/sphinx_build_link_check.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash -# halt script on error -set -e -# Build locally, and then check links -sphinx-build -E -W -b linkcheck source build \ No newline at end of file diff --git a/docs/source/_static/theme_overrides.css b/docs/source/_static/theme_overrides.css index 9662d63a..5863ccf5 100644 --- a/docs/source/_static/theme_overrides.css +++ b/docs/source/_static/theme_overrides.css @@ -1,353 +1,26 @@ -body { - color: #000000; -} - -p { - margin-bottom: 10px; -} - -.wy-plain-list-disc, .rst-content .section ul, .rst-content .toctree-wrapper ul, article ul { - margin-bottom: 10px; -} - -.custom_header_01 { - color: #cc0000; - font-size: 22px; - font-weight: bold; - line-height: 50px; -} - -h1, h2, h3, h4, h5, h6 { - margin-bottom: 20px; - margin-top: 20px; -} - -h5 { - font-size: 18px; - color: #000000; - font-style: italic; - margin-bottom: 10px; -} - -h6 { - font-size: 15px; - color: #000000; - font-style: italic; - margin-bottom: 10px; -} - -/* custom admonitions */ -/* success */ -.custom-admonition-success .admonition-title { - color: #000000; - background: #ccffcc; - border-radius: 5px 5px 0px 0px; -} -div.custom-admonition-success.admonition { - color: #000000; - background: #ffffff; - border: solid 1px #cccccc; - border-radius: 5px; - box-shadow: 1px 1px 5px 3px #d8d8d8; - margin: 20px 0px 30px 0px; -} - -/* important */ -.custom-admonition-important .admonition-title { - color: #000000; - background: #ccffcc; - border-radius: 5px 5px 0px 0px; - border-bottom: solid 1px #000000; -} -div.custom-admonition-important.admonition { - color: #000000; - background: #ffffff; - border: solid 1px #cccccc; - border-radius: 5px; - box-shadow: 1px 1px 5px 3px #d8d8d8; - margin: 20px 0px 30px 0px; -} - -/* caution */ -.custom-admonition-caution .admonition-title { - color: #000000; - background: #ffff99; - border-radius: 5px 5px 0px 0px; - border-bottom: solid 1px #e8e8e8; -} -div.custom-admonition-caution.admonition { - color: #000000; - background: #ffffff; - border: solid 1px #cccccc; - border-radius: 5px; - box-shadow: 1px 1px 5px 3px #d8d8d8; - margin: 20px 0px 30px 0px; -} - -/* note */ -.custom-admonition-note .admonition-title { - color: #ffffff; - background: #006bb3; - border-radius: 5px 5px 0px 0px; -} -div.custom-admonition-note.admonition { - color: #000000; - background: #ffffff; - border: solid 1px #cccccc; - border-radius: 5px; - box-shadow: 1px 1px 5px 3px #d8d8d8; - margin: 20px 0px 30px 0px; -} - -/* todo */ -.custom-admonition-todo .admonition-title { - color: #000000; - background: #cce6ff; - border-radius: 5px 5px 0px 0px; - border-bottom: solid 1px #99ccff; -} -div.custom-admonition-todo.admonition { - color: #000000; - background: #ffffff; - border: solid 1px #99ccff; - border-radius: 5px; - box-shadow: 1px 1px 5px 3px #d8d8d8; - margin: 20px 0px 30px 0px; -} - -/* examples */ -.custom-admonition-examples .admonition-title { - color: #000000; - background: #ffe6cc; - border-radius: 5px 5px 0px 0px; - border-bottom: solid 1px #d8d8d8; -} -div.custom-admonition-examples.admonition { - color: #000000; - background: #ffffff; - border: solid 1px #cccccc; - border-radius: 5px; - box-shadow: 1px 1px 5px 3px #d8d8d8; - margin: 20px 0px 30px 0px; -} - +/* this is the container for the pages */ .wy-nav-content { max-width: 100%; - padding-right: 100px; - padding-left: 100px; - background-color: #f2f2f2; -} - -div.rst-content { - background-color: #ffffff; - border: solid 1px #e5e5e5; - padding: 20px 40px 20px 40px; -} - -.rst-content .guilabel { - border: 1px solid #ffff99; - background: #ffff99; - font-size: 100%; - font-weight: normal; - border-radius: 4px; - padding: 2px 0px; - margin: auto 2px; - vertical-align: middle; -} - -.rst-content kbd { - font-family: SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",Courier,monospace; - border: solid 1px #d8d8d8; - background-color: #f5f5f5; - padding: 0px 3px; - border-radius: 3px; -} - -.wy-nav-content-wrap a { - color: #0066cc; - text-decoration: none; -} -.wy-nav-content-wrap a:hover { - color: #0099cc; - text-decoration: underline; -} - -.wy-nav-top a { - color: #ffffff; -} - -/* Based on numerous similar approaches e.g., https://github.com/readthedocs/sphinx_rtd_theme/issues/117 and https://rackerlabs.github.io/docs-rackspace/tools/rtd-tables.html -- but remove form-factor limits to enable table wrap on full-size and smallest-size form factors */ -.wy-table-responsive table td { - white-space: normal !important; -} - -.rst-content table.docutils td, -.rst-content table.docutils th { - padding: 5px 10px 5px 10px; -} -.rst-content table.docutils td p, -.rst-content table.docutils th p { - font-size: 14px; - margin-bottom: 0px; -} -.rst-content table.docutils td p cite, -.rst-content table.docutils th p cite { - font-size: 14px; - background-color: transparent; -} - -.colwidths-given th { - border: solid 1px #d8d8d8 !important; -} -.colwidths-given td { - border: solid 1px #d8d8d8 !important; -} - -/*handles single-tick inline code*/ -.wy-body-for-nav cite { - color: #000000; - background-color: transparent; - font-style: normal; - font-family: "Courier New"; - font-size: 13px; - padding: 3px 3px 3px 3px; -} - -.rst-content pre.literal-block, .rst-content div[class^="highlight"] pre, .rst-content .linenodiv pre { - font-family: SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",Courier,monospace; - font-size: 13px; - overflow: visible; - white-space: pre-wrap; - color: #000000; -} - -.rst-content pre.literal-block, .rst-content div[class^='highlight'] { - background-color: #f8f8f8; - border: solid 1px #e8e8e8; -} - -/* This enables inline code to wrap. */ -code, .rst-content tt, .rst-content code { - white-space: pre-wrap; - padding: 2px 3px 1px; - border-radius: 3px; - font-size: 13px; - background-color: #ffffff; -} - -/* use this added class for code blocks attached to bulleted list items */ -.highlight-top-margin { - margin-top: 20px !important; -} - -/* change color of inline code block */ -span.pre { - color: #e01e5a; -} - -.wy-body-for-nav blockquote { - margin: 1em 0; - padding-left: 1em; - border-left: 4px solid #ddd; - color: #000000; -} - -/* Fix the unwanted top and bottom padding inside a nested bulleted/numbered list */ -.rst-content .section ol p, .rst-content .section ul p { - margin-bottom: 0px; -} - -/* add spacing between bullets for legibility */ -.rst-content .section ol li, .rst-content .section ul li { - margin-bottom: 5px; -} - -.rst-content .section ol li:first-child, .rst-content .section ul li:first-child { - margin-top: 5px; -} - -/* but exclude the toctree bullets */ -.rst-content .toctree-wrapper ul li, .rst-content .toctree-wrapper ul li:first-child { + padding: 0px 40px 0px 0px; margin-top: 0px; - margin-bottom: 0px; } -/* remove extra space at bottom of multine list-table cell */ -.rst-content .line-block { - margin-left: 0px; - margin-bottom: 0px; - line-height: 24px; +.wy-nav-content-wrap { + border-right: solid 1px; } -/* fix extra vertical spacing in page toctree */ -.rst-content .toctree-wrapper ul li ul, article ul li ul { - margin-top: 0; - margin-bottom: 0; -} - -/* this is used by the genindex added via layout.html (see source/_templates/) to sidebar toc */ -.reference.internal.toc-index { - color: #d9d9d9; -} - -.reference.internal.toc-index.current { - background-color: #ffffff; - color: #000000; - font-weight: bold; -} - -.toc-index-div { - border-top: solid 1px #000000; - margin-top: 10px; - padding-top: 5px; -} - -.indextable ul li { - font-size: 14px; - margin-bottom: 5px; -} - -/* The next 2 fix the poor vertical spacing in genindex.html (the alphabetized index) */ -.indextable.genindextable { - margin-bottom: 20px; -} - -div.genindex-jumpbox { - margin-bottom: 10px; -} - -/* rst image classes */ - -.clear-both { - clear: both; - } - -.float-left { - float: left; - margin-right: 20px; -} - -img { - border: solid 1px #e8e8e8; -} - -/* These are custom and need to be defined in conf.py to access in all pages, e.g., '.. role:: red' */ -.img-title { - color: #000000; - /* neither padding nor margin works for vertical spacing bc it's a span -- line-height does, sort of */ - line-height: 3.0; - font-style: italic; - font-weight: 600; -} - -.img-title-para { - color: #000000; - margin-top: 20px; - margin-bottom: 0px; - font-style: italic; - font-weight: 500; -} - -.red { - color: red; +div.rst-content { + max-width: 1300px; + border: 0; + padding: 10px 80px 10px 80px; + margin-left: 50px; +} + +@media (max-width: 768px) { + div.rst-content { + max-width: 1300px; + border: 0; + padding: 0px 10px 10px 10px; + margin-left: 0px; + } } diff --git a/docs/source/conf.py b/docs/source/conf.py index 918d62c1..056ca6ea 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -18,7 +18,7 @@ # -- Project information ----------------------------------------------------- project = "nexb-skeleton" -copyright = "nexB Inc. and others." +copyright = "nexB Inc., AboutCode and others." author = "AboutCode.org authors and contributors" @@ -30,6 +30,10 @@ extensions = [ "sphinx.ext.intersphinx", "sphinx_reredirects", + "sphinx_rtd_theme", + "sphinx_rtd_dark_mode", + "sphinx.ext.extlinks", + "sphinx_copybutton", ] @@ -39,11 +43,14 @@ # This points to aboutcode.readthedocs.io # In case of "undefined label" ERRORS check docs on intersphinx to troubleshoot -# Link was created at commit - https://github.com/nexB/aboutcode/commit/faea9fcf3248f8f198844fe34d43833224ac4a83 +# Link was created at commit - https://github.com/aboutcode-org/aboutcode/commit/faea9fcf3248f8f198844fe34d43833224ac4a83 intersphinx_mapping = { "aboutcode": ("https://aboutcode.readthedocs.io/en/latest/", None), - "scancode-workbench": ("https://scancode-workbench.readthedocs.io/en/develop/", None), + "scancode-workbench": ( + "https://scancode-workbench.readthedocs.io/en/develop/", + None, + ), } @@ -78,14 +85,17 @@ "conf_py_path": "/docs/source/", # path in the checkout to the docs root } -html_css_files = ["_static/theme_overrides.css"] +html_css_files = [ + "theme_overrides.css", +] # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. html_show_sphinx = True # Define CSS and HTML abbreviations used in .rst files. These are examples. -# .. role:: is used to refer to styles defined in _static/theme_overrides.css and is used like this: :red:`text` +# .. role:: is used to refer to styles defined in _static/theme_overrides.css +# and is used like this: :red:`text` rst_prolog = """ .. |psf| replace:: Python Software Foundation @@ -104,6 +114,4 @@ # -- Options for LaTeX output ------------------------------------------------- -latex_elements = { - 'classoptions': ',openany,oneside' -} \ No newline at end of file +latex_elements = {"classoptions": ",openany,oneside"} diff --git a/docs/source/contribute/contrib_doc.rst b/docs/source/contribute/contrib_doc.rst index 13882e10..2a719a52 100644 --- a/docs/source/contribute/contrib_doc.rst +++ b/docs/source/contribute/contrib_doc.rst @@ -8,109 +8,59 @@ Contributing to the Documentation Setup Local Build ----------------- -To get started, create or identify a working directory on your local machine. +To get started, check out and configure the repository for development:: -Open that directory and execute the following command in a terminal session:: + git clone https://github.com/aboutcode-org/.git - git clone https://github.com/nexB/skeleton.git + cd your-repo + ./configure --dev -That will create an ``/skeleton`` directory in your working directory. -Now you can install the dependencies in a virtualenv:: - - cd skeleton - ./configure --docs +(Or use "make dev") .. note:: - In case of windows, run ``configure --docs`` instead of this. - -Now, this will install the following prerequisites: - -- Sphinx -- sphinx_rtd_theme (the format theme used by ReadTheDocs) -- docs8 (style linter) + In case of windows, run ``configure --dev``. -These requirements are already present in setup.cfg and `./configure --docs` installs them. +This will install and configure all requirements foer development including for docs development. -Now you can build the HTML documents locally:: +Now you can build the HTML documentation locally:: source venv/bin/activate - cd docs - make html - -Assuming that your Sphinx installation was successful, Sphinx should build a local instance of the -documentation .html files:: - - open build/html/index.html - -.. note:: - - In case this command did not work, for example on Ubuntu 18.04 you may get a message like “Couldn’t - get a file descriptor referring to the console”, try: - - :: - - see build/html/index.html + make docs -You now have a local build of the AboutCode documents. +This will build a local instance of the ``docs/_build`` directory:: -.. _contrib_doc_share_improvements: + open docs/_build/index.html -Share Document Improvements ---------------------------- - -Ensure that you have the latest files:: - - git pull - git status -Before commiting changes run Continious Integration Scripts locally to run tests. Refer -:ref:`doc_ci` for instructions on the same. +To validate the documentation style and content, use:: -Follow standard git procedures to upload your new and modified files. The following commands are -examples:: - - git status - git add source/index.rst - git add source/how-to-scan.rst - git status - git commit -m "New how-to document that explains how to scan" - git status - git push - git status - -The Scancode-Toolkit webhook with ReadTheDocs should rebuild the documentation after your -Pull Request is Merged. + source venv/bin/activate + make doc8 + make docs-check -Refer the `Pro Git Book `_ available online for Git tutorials -covering more complex topics on Branching, Merging, Rebasing etc. .. _doc_ci: Continuous Integration ---------------------- -The documentations are checked on every new commit through Travis-CI, so that common errors are -avoided and documentation standards are enforced. Travis-CI presently checks for these 3 aspects -of the documentation : +The documentations are checked on every new commit, so that common errors are avoided and +documentation standards are enforced. We checks for these aspects of the documentation: 1. Successful Builds (By using ``sphinx-build``) -2. No Broken Links (By Using ``link-check``) -3. Linting Errors (By Using ``Doc8``) +2. No Broken Links (By Using ``linkcheck``) +3. Linting Errors (By Using ``doc8``) -So run these scripts at your local system before creating a Pull Request:: +You myst run these scripts locally before creating a pull request:: - cd docs - ./scripts/sphinx_build_link_check.sh - ./scripts/doc8_style_check.sh + make doc8 + make check-docs -If you don't have permission to run the scripts, run:: - - chmod u+x ./scripts/doc8_style_check.sh .. _doc_style_docs8: -Style Checks Using ``Doc8`` +Style Checks Using ``doc8`` --------------------------- How To Run Style Tests @@ -118,8 +68,7 @@ How To Run Style Tests In the project root, run the following commands:: - $ cd docs - $ ./scripts/doc8_style_check.sh + make doc8 A sample output is:: @@ -143,11 +92,13 @@ A sample output is:: Now fix the errors and run again till there isn't any style error in the documentation. + What is Checked? ^^^^^^^^^^^^^^^^ PyCQA is an Organization for code quality tools (and plugins) for the Python programming language. -Doc8 is a sub-project of the same Organization. Refer this `README `_ for more details. +Doc8 is a sub-project of the same Organization. Refer this +`README `_ for more details. What is checked: @@ -164,16 +115,19 @@ What is checked: - no carriage returns (use UNIX newlines) - D004 - no newline at end of file - D005 + .. _doc_interspinx: Interspinx ---------- -ScanCode toolkit documentation uses `Intersphinx `_ +AboutCode documentation uses +`Intersphinx `_ to link to other Sphinx Documentations, to maintain links to other Aboutcode Projects. To link sections in the same documentation, standart reST labels are used. Refer -`Cross-Referencing `_ for more information. +`Cross-Referencing `_ +for more information. For example:: @@ -223,6 +177,7 @@ Intersphinx, and you link to that label, it will create a link to the local labe For more information, refer this tutorial named `Using Intersphinx `_. + .. _doc_style_conv: Style Conventions for the Documentaion @@ -230,7 +185,7 @@ Style Conventions for the Documentaion 1. Headings - (`Refer `_) + (`Refer `_) Normally, there are no heading levels assigned to certain characters as the structure is determined from the succession of headings. However, this convention is used in Python’s Style Guide for documenting which you may follow: @@ -303,12 +258,14 @@ Style Conventions for the Documentaion ``rst_snippets/warning_snippets/`` and then included to eliminate redundancy, as these are frequently used in multiple files. + Converting from Markdown ------------------------ -If you want to convert a ``.md`` file to a ``.rst`` file, this `tool `_ -does it pretty well. You'd still have to clean up and check for errors as this contains a lot of -bugs. But this is definitely better than converting everything by yourself. +If you want to convert a ``.md`` file to a ``.rst`` file, this +`tool `_ does it pretty well. +You will still have to clean up and check for errors as this contains a lot of bugs. But this is +definitely better than converting everything by yourself. This will be helpful in converting GitHub wiki's (Markdown Files) to reStructuredtext files for Sphinx/ReadTheDocs hosting. diff --git a/etc/scripts/check_thirdparty.py b/etc/scripts/check_thirdparty.py index b052f25b..65ae595e 100644 --- a/etc/scripts/check_thirdparty.py +++ b/etc/scripts/check_thirdparty.py @@ -1,11 +1,10 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # # Copyright (c) nexB Inc. and others. All rights reserved. # ScanCode is a trademark of nexB Inc. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/skeleton for support or download. +# See https://github.com/aboutcode-org/skeleton for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # import click @@ -42,8 +41,7 @@ def check_thirdparty_dir( """ Check a thirdparty directory for problems and print these on screen. """ - # check for problems - print(f"==> CHECK FOR PROBLEMS") + print("==> CHECK FOR PROBLEMS") utils_thirdparty.find_problems( dest_dir=dest, report_missing_sources=sdists, diff --git a/etc/scripts/fetch_thirdparty.py b/etc/scripts/fetch_thirdparty.py index eedf05c6..76a19a60 100644 --- a/etc/scripts/fetch_thirdparty.py +++ b/etc/scripts/fetch_thirdparty.py @@ -1,23 +1,21 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # # Copyright (c) nexB Inc. and others. All rights reserved. # ScanCode is a trademark of nexB Inc. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/skeleton for support or download. +# See https://github.com/aboutcode-org/skeleton for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # import itertools -import os import sys from collections import defaultdict import click -import utils_thirdparty import utils_requirements +import utils_thirdparty TRACE = False TRACE_DEEP = False @@ -109,7 +107,8 @@ @click.option( "--use-cached-index", is_flag=True, - help="Use on disk cached PyPI indexes list of packages and versions and do not refetch if present.", + help="Use on disk cached PyPI indexes list of packages and versions and " + "do not refetch if present.", ) @click.option( "--sdist-only", @@ -120,7 +119,7 @@ show_default=False, multiple=True, help="Package name(s) that come only in sdist format (no wheels). " - "The command will not fail and exit if no wheel exists for these names", + "The command will not fail and exit if no wheel exists for these names", ) @click.option( "--wheel-only", @@ -131,7 +130,7 @@ show_default=False, multiple=True, help="Package name(s) that come only in wheel format (no sdist). " - "The command will not fail and exit if no sdist exists for these names", + "The command will not fail and exit if no sdist exists for these names", ) @click.option( "--no-dist", @@ -142,7 +141,7 @@ show_default=False, multiple=True, help="Package name(s) that do not come either in wheel or sdist format. " - "The command will not fail and exit if no distribution exists for these names", + "The command will not fail and exit if no distribution exists for these names", ) @click.help_option("-h", "--help") def fetch_thirdparty( @@ -248,7 +247,6 @@ def fetch_thirdparty( print(f"Processing: {name} @ {version}") if wheels: for environment in environments: - if TRACE: print(f" ==> Fetching wheel for envt: {environment}") @@ -262,11 +260,9 @@ def fetch_thirdparty( if not fetched: wheels_or_sdist_not_found[f"{name}=={version}"].append(environment) if TRACE: - print(f" NOT FOUND") + print(" NOT FOUND") - if (sdists or - (f"{name}=={version}" in wheels_or_sdist_not_found and name in sdist_only) - ): + if sdists or (f"{name}=={version}" in wheels_or_sdist_not_found and name in sdist_only): if TRACE: print(f" ==> Fetching sdist: {name}=={version}") @@ -279,17 +275,17 @@ def fetch_thirdparty( if not fetched: wheels_or_sdist_not_found[f"{name}=={version}"].append("sdist") if TRACE: - print(f" NOT FOUND") + print(" NOT FOUND") mia = [] for nv, dists in wheels_or_sdist_not_found.items(): name, _, version = nv.partition("==") if name in no_dist: continue - sdist_missing = sdists and "sdist" in dists and not name in wheel_only + sdist_missing = sdists and "sdist" in dists and name not in wheel_only if sdist_missing: mia.append(f"SDist missing: {nv} {dists}") - wheels_missing = wheels and any(d for d in dists if d != "sdist") and not name in sdist_only + wheels_missing = wheels and any(d for d in dists if d != "sdist") and name not in sdist_only if wheels_missing: mia.append(f"Wheels missing: {nv} {dists}") @@ -298,12 +294,12 @@ def fetch_thirdparty( print(m) raise Exception(mia) - print(f"==> FETCHING OR CREATING ABOUT AND LICENSE FILES") + print("==> FETCHING OR CREATING ABOUT AND LICENSE FILES") utils_thirdparty.fetch_abouts_and_licenses(dest_dir=dest_dir, use_cached_index=use_cached_index) utils_thirdparty.clean_about_files(dest_dir=dest_dir) # check for problems - print(f"==> CHECK FOR PROBLEMS") + print("==> CHECK FOR PROBLEMS") utils_thirdparty.find_problems( dest_dir=dest_dir, report_missing_sources=sdists, diff --git a/etc/scripts/gen_pypi_simple.py b/etc/scripts/gen_pypi_simple.py index 214d90dc..89d06265 100644 --- a/etc/scripts/gen_pypi_simple.py +++ b/etc/scripts/gen_pypi_simple.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # SPDX-License-Identifier: BSD-2-Clause-Views AND MIT # Copyright (c) 2010 David Wolever . All rights reserved. @@ -69,7 +68,6 @@ def get_package_name_from_filename(filename): raise InvalidDistributionFilename(filename) elif filename.endswith(wheel_ext): - wheel_info = get_wheel_from_filename(filename) if not wheel_info: @@ -133,7 +131,7 @@ def build_links_package_index(packages_by_package_name, base_url): Return an HTML document as string which is a links index of all packages """ document = [] - header = f""" + header = """ Links for all packages @@ -178,13 +176,13 @@ def simple_index_entry(self, base_url): def build_pypi_index(directory, base_url="https://thirdparty.aboutcode.org/pypi"): """ - Using a ``directory`` directory of wheels and sdists, create the a PyPI - simple directory index at ``directory``/simple/ populated with the proper - PyPI simple index directory structure crafted using symlinks. + Create the a PyPI simple directory index using a ``directory`` directory of wheels and sdists in + the direvctory at ``directory``/simple/ populated with the proper PyPI simple index directory + structure crafted using symlinks. - WARNING: The ``directory``/simple/ directory is removed if it exists. - NOTE: in addition to the a PyPI simple index.html there is also a links.html - index file generated which is suitable to use with pip's --find-links + WARNING: The ``directory``/simple/ directory is removed if it exists. NOTE: in addition to the a + PyPI simple index.html there is also a links.html index file generated which is suitable to use + with pip's --find-links """ directory = Path(directory) @@ -200,11 +198,10 @@ def build_pypi_index(directory, base_url="https://thirdparty.aboutcode.org/pypi" simple_html_index = [ "", "PyPI Simple Index", - '' '', + '', ] for pkg_file in directory.iterdir(): - pkg_filename = pkg_file.name if ( diff --git a/etc/scripts/gen_requirements.py b/etc/scripts/gen_requirements.py index 07e26f77..1b879442 100644 --- a/etc/scripts/gen_requirements.py +++ b/etc/scripts/gen_requirements.py @@ -1,11 +1,10 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # # Copyright (c) nexB Inc. and others. All rights reserved. # ScanCode is a trademark of nexB Inc. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/skeleton for support or download. +# See https://github.com/aboutcode-org/skeleton for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # import argparse @@ -34,7 +33,8 @@ def gen_requirements(): type=pathlib.Path, required=True, metavar="DIR", - help="Path to the 'site-packages' directory where wheels are installed such as lib/python3.6/site-packages", + help="Path to the 'site-packages' directory where wheels are installed " + "such as lib/python3.12/site-packages", ) parser.add_argument( "-r", diff --git a/etc/scripts/gen_requirements_dev.py b/etc/scripts/gen_requirements_dev.py index 12cc06d3..85482056 100644 --- a/etc/scripts/gen_requirements_dev.py +++ b/etc/scripts/gen_requirements_dev.py @@ -1,11 +1,10 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # # Copyright (c) nexB Inc. and others. All rights reserved. # ScanCode is a trademark of nexB Inc. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/skeleton for support or download. +# See https://github.com/aboutcode-org/skeleton for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # import argparse @@ -36,7 +35,8 @@ def gen_dev_requirements(): type=pathlib.Path, required=True, metavar="DIR", - help='Path to the "site-packages" directory where wheels are installed such as lib/python3.6/site-packages', + help="Path to the 'site-packages' directory where wheels are installed " + "such as lib/python3.12/site-packages", ) parser.add_argument( "-d", diff --git a/etc/scripts/test_utils_pip_compatibility_tags.py b/etc/scripts/test_utils_pip_compatibility_tags.py index 98187c56..0e9c360a 100644 --- a/etc/scripts/test_utils_pip_compatibility_tags.py +++ b/etc/scripts/test_utils_pip_compatibility_tags.py @@ -1,4 +1,5 @@ -"""Generate and work with PEP 425 Compatibility Tags. +""" +Generate and work with PEP 425 Compatibility Tags. copied from pip-20.3.1 pip/tests/unit/test_utils_compatibility_tags.py download_url: https://raw.githubusercontent.com/pypa/pip/20.3.1/tests/unit/test_utils_compatibility_tags.py @@ -25,8 +26,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ -from unittest.mock import patch import sysconfig +from unittest.mock import patch import pytest @@ -51,7 +52,7 @@ def test_version_info_to_nodot(version_info, expected): assert actual == expected -class Testcompatibility_tags(object): +class Testcompatibility_tags: def mock_get_config_var(self, **kwd): """ Patch sysconfig.get_config_var for arbitrary keys. @@ -82,7 +83,7 @@ def test_no_hyphen_tag(self): assert "-" not in tag.platform -class TestManylinux2010Tags(object): +class TestManylinux2010Tags: @pytest.mark.parametrize( "manylinux2010,manylinux1", [ @@ -105,7 +106,7 @@ def test_manylinux2010_implies_manylinux1(self, manylinux2010, manylinux1): assert arches[:2] == [manylinux2010, manylinux1] -class TestManylinux2014Tags(object): +class TestManylinux2014Tags: @pytest.mark.parametrize( "manylinuxA,manylinuxB", [ diff --git a/etc/scripts/update_skeleton.py b/etc/scripts/update_skeleton.py new file mode 100644 index 00000000..374c06f2 --- /dev/null +++ b/etc/scripts/update_skeleton.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python +# +# Copyright (c) nexB Inc. AboutCode, and others. All rights reserved. +# ScanCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/skeleton for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from pathlib import Path +import os +import subprocess + +import click + + +ABOUTCODE_PUBLIC_REPO_NAMES = [ + "aboutcode-toolkit", + "ahocode", + "bitcode", + "clearcode-toolkit", + "commoncode", + "container-inspector", + "debian-inspector", + "deltacode", + "elf-inspector", + "extractcode", + "fetchcode", + "gemfileparser2", + "gh-issue-sandbox", + "go-inspector", + "heritedcode", + "license-expression", + "license_copyright_pipeline", + "nuget-inspector", + "pip-requirements-parser", + "plugincode", + "purldb", + "pygmars", + "python-inspector", + "sanexml", + "saneyaml", + "scancode-analyzer", + "scancode-toolkit-contrib", + "scancode-toolkit-reference-scans", + "thirdparty-toolkit", + "tracecode-toolkit", + "tracecode-toolkit-strace", + "turbo-spdx", + "typecode", + "univers", +] + + +@click.command() +@click.help_option("-h", "--help") +def update_skeleton_files(repo_names=ABOUTCODE_PUBLIC_REPO_NAMES): + """ + Update project files of AboutCode projects that use the skeleton + + This script will: + - Clone the repo + - Add the skeleton repo as a new origin + - Create a new branch named "update-skeleton-files" + - Merge in the new skeleton files into the "update-skeleton-files" branch + + The user will need to save merge commit messages that pop up when running + this script in addition to resolving the merge conflicts on repos that have + them. + """ + + # Create working directory + work_dir_path = Path("/tmp/update_skeleton/") + if not os.path.exists(work_dir_path): + os.makedirs(work_dir_path, exist_ok=True) + + for repo_name in repo_names: + # Move to work directory + os.chdir(work_dir_path) + + # Clone repo + repo_git = f"git@github.com:aboutcode-org/{repo_name}.git" + subprocess.run(["git", "clone", repo_git]) + + # Go into cloned repo + os.chdir(work_dir_path / repo_name) + + # Add skeleton as an origin + subprocess.run( + ["git", "remote", "add", "skeleton", "git@github.com:aboutcode-org/skeleton.git"] + ) + + # Fetch skeleton files + subprocess.run(["git", "fetch", "skeleton"]) + + # Create and checkout new branch + subprocess.run(["git", "checkout", "-b", "update-skeleton-files"]) + + # Merge skeleton files into the repo + subprocess.run(["git", "merge", "skeleton/main", "--allow-unrelated-histories"]) + + +if __name__ == "__main__": + update_skeleton_files() diff --git a/etc/scripts/utils_dejacode.py b/etc/scripts/utils_dejacode.py index c42e6c93..b6bff518 100644 --- a/etc/scripts/utils_dejacode.py +++ b/etc/scripts/utils_dejacode.py @@ -1,11 +1,10 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # # Copyright (c) nexB Inc. and others. All rights reserved. # ScanCode is a trademark of nexB Inc. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/skeleton for support or download. +# See https://github.com/aboutcode-org/skeleton for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # import io @@ -14,7 +13,6 @@ import requests import saneyaml - from packvers import version as packaging_version """ @@ -26,7 +24,7 @@ DEJACODE_API_URL_PACKAGES = f"{DEJACODE_API_URL}packages/" DEJACODE_API_HEADERS = { - "Authorization": "Token {}".format(DEJACODE_API_KEY), + "Authorization": f"Token {DEJACODE_API_KEY}", "Accept": "application/json; indent=4", } @@ -51,6 +49,7 @@ def fetch_dejacode_packages(params): DEJACODE_API_URL_PACKAGES, params=params, headers=DEJACODE_API_HEADERS, + timeout=10, ) return response.json()["results"] @@ -94,7 +93,7 @@ def update_with_dejacode_about_data(distribution): if package_data: package_api_url = package_data["api_url"] about_url = f"{package_api_url}about" - response = requests.get(about_url, headers=DEJACODE_API_HEADERS) + response = requests.get(about_url, headers=DEJACODE_API_HEADERS, timeout=10) # note that this is YAML-formatted about_text = response.json()["about_data"] about_data = saneyaml.load(about_text) @@ -114,7 +113,7 @@ def fetch_and_save_about_files(distribution, dest_dir="thirdparty"): if package_data: package_api_url = package_data["api_url"] about_url = f"{package_api_url}about_files" - response = requests.get(about_url, headers=DEJACODE_API_HEADERS) + response = requests.get(about_url, headers=DEJACODE_API_HEADERS, timeout=10) about_zip = response.content with io.BytesIO(about_zip) as zf: with zipfile.ZipFile(zf) as zi: @@ -153,7 +152,7 @@ def find_latest_dejacode_package(distribution): with_versions = sorted(with_versions) latest_version, latest_package_version = sorted(with_versions)[-1] print( - f"Found DejaCode latest version: {latest_version} " f"for dist: {distribution.package_url}", + f"Found DejaCode latest version: {latest_version} for dist: {distribution.package_url}", ) return latest_package_version @@ -179,7 +178,7 @@ def create_dejacode_package(distribution): } fields_to_carry_over = [ - "download_url" "type", + "download_urltype", "namespace", "name", "version", @@ -202,10 +201,11 @@ def create_dejacode_package(distribution): DEJACODE_API_URL_PACKAGES, data=new_package_payload, headers=DEJACODE_API_HEADERS, + timeout=10, ) new_package_data = response.json() if response.status_code != 201: raise Exception(f"Error, cannot create package for: {distribution}") - print(f'New Package created at: {new_package_data["absolute_url"]}') + print(f"New Package created at: {new_package_data['absolute_url']}") return new_package_data diff --git a/etc/scripts/utils_pip_compatibility_tags.py b/etc/scripts/utils_pip_compatibility_tags.py index af42a0cd..dd954bca 100644 --- a/etc/scripts/utils_pip_compatibility_tags.py +++ b/etc/scripts/utils_pip_compatibility_tags.py @@ -1,4 +1,5 @@ -"""Generate and work with PEP 425 Compatibility Tags. +""" +Generate and work with PEP 425 Compatibility Tags. copied from pip-20.3.1 pip/_internal/utils/compatibility_tags.py download_url: https://github.com/pypa/pip/blob/20.3.1/src/pip/_internal/utils/compatibility_tags.py @@ -27,14 +28,12 @@ import re -from packvers.tags import ( - compatible_tags, - cpython_tags, - generic_tags, - interpreter_name, - interpreter_version, - mac_platforms, -) +from packvers.tags import compatible_tags +from packvers.tags import cpython_tags +from packvers.tags import generic_tags +from packvers.tags import interpreter_name +from packvers.tags import interpreter_version +from packvers.tags import mac_platforms _osx_arch_pat = re.compile(r"(.+)_(\d+)_(\d+)_(.+)") @@ -132,7 +131,7 @@ def _get_custom_interpreter(implementation=None, version=None): implementation = interpreter_name() if version is None: version = interpreter_version() - return "{}{}".format(implementation, version) + return f"{implementation}{version}" def get_supported( @@ -142,7 +141,8 @@ def get_supported( abis=None, # type: Optional[List[str]] ): # type: (...) -> List[Tag] - """Return a list of supported tags for each version specified in + """ + Return a list of supported tags for each version specified in `versions`. :param version: a string version, of the form "33" or "32", diff --git a/etc/scripts/utils_requirements.py b/etc/scripts/utils_requirements.py index 0fc25a35..424bed2e 100644 --- a/etc/scripts/utils_requirements.py +++ b/etc/scripts/utils_requirements.py @@ -1,11 +1,10 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # # Copyright (c) nexB Inc. and others. All rights reserved. # ScanCode is a trademark of nexB Inc. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/skeleton for support or download. +# See https://github.com/aboutcode-org/skeleton for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # @@ -40,7 +39,7 @@ def get_required_name_versions(requirement_lines, with_unpinned=False): req_line = req_line.strip() if not req_line or req_line.startswith("#"): continue - if req_line.startswith("-") or (not with_unpinned and not "==" in req_line): + if req_line.startswith("-") or (not with_unpinned and "==" not in req_line): print(f"Requirement line is not supported: ignored: {req_line}") continue yield get_required_name_version(requirement=req_line, with_unpinned=with_unpinned) @@ -57,21 +56,25 @@ def get_required_name_version(requirement, with_unpinned=False): >>> assert get_required_name_version("fooA==1.2.3.DEV1") == ("fooa", "1.2.3.dev1") >>> assert get_required_name_version("foo==1.2.3", with_unpinned=False) == ("foo", "1.2.3") >>> assert get_required_name_version("foo", with_unpinned=True) == ("foo", "") - >>> assert get_required_name_version("foo>=1.2", with_unpinned=True) == ("foo", ""), get_required_name_version("foo>=1.2") + >>> expected = ("foo", ""), get_required_name_version("foo>=1.2") + >>> assert get_required_name_version("foo>=1.2", with_unpinned=True) == expected >>> try: ... assert not get_required_name_version("foo", with_unpinned=False) ... except Exception as e: ... assert "Requirement version must be pinned" in str(e) """ requirement = requirement and "".join(requirement.lower().split()) - assert requirement, f"specifier is required is empty:{requirement!r}" + if not requirement: + raise ValueError(f"specifier is required is empty:{requirement!r}") name, operator, version = split_req(requirement) - assert name, f"Name is required: {requirement}" + if not name: + raise ValueError(f"Name is required: {requirement}") is_pinned = operator == "==" if with_unpinned: version = "" else: - assert is_pinned and version, f"Requirement version must be pinned: {requirement}" + if not is_pinned and version: + raise ValueError(f"Requirement version must be pinned: {requirement}") return name, version @@ -117,7 +120,7 @@ def get_installed_reqs(site_packages_dir): # Also include these packages in the output with --all: wheel, distribute, # setuptools, pip args = ["pip", "freeze", "--exclude-editable", "--all", "--path", site_packages_dir] - return subprocess.check_output(args, encoding="utf-8") + return subprocess.check_output(args, encoding="utf-8") # noqa: S603 comparators = ( @@ -147,9 +150,11 @@ def split_req(req): >>> assert split_req("foo >= 1.2.3 ") == ("foo", ">=", "1.2.3"), split_req("foo >= 1.2.3 ") >>> assert split_req("foo>=1.2") == ("foo", ">=", "1.2"), split_req("foo>=1.2") """ - assert req + if not req: + raise ValueError("req is required") # do not allow multiple constraints and tags - assert not any(c in req for c in ",;") + if any(c in req for c in ",;"): + raise Exception(f"complex requirements with : or ; not supported: {req}") req = "".join(req.split()) if not any(c in req for c in comparators): return req, "", "" diff --git a/etc/scripts/utils_thirdparty.py b/etc/scripts/utils_thirdparty.py index addf8e5e..aafc1d69 100644 --- a/etc/scripts/utils_thirdparty.py +++ b/etc/scripts/utils_thirdparty.py @@ -1,11 +1,10 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # # Copyright (c) nexB Inc. and others. All rights reserved. # ScanCode is a trademark of nexB Inc. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/skeleton for support or download. +# See https://github.com/aboutcode-org/skeleton for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # import email @@ -25,14 +24,13 @@ import packageurl import requests import saneyaml +import utils_pip_compatibility_tags from commoncode import fileutils from commoncode.hash import multi_checksums from commoncode.text import python_safe_name from packvers import tags as packaging_tags from packvers import version as packaging_version -import utils_pip_compatibility_tags - """ Utilities to manage Python thirparty libraries source, binaries and metadata in local directories and remote repositories. @@ -93,7 +91,8 @@ - parse requirement file - create a TODO queue of requirements to process -- done: create an empty map of processed binary requirements as {package name: (list of versions/tags} +- done: create an empty map of processed binary requirements as + {package name: (list of versions/tags} - while we have package reqs in TODO queue, process one requirement: @@ -355,7 +354,6 @@ def sorted(cls, namevers): @attr.attributes class Distribution(NameVer): - # field names that can be updated from another Distribution or mapping updatable_fields = [ "license_expression", @@ -555,7 +553,8 @@ def download(self, dest_dir=THIRDPARTY_DIR): Download this distribution into `dest_dir` directory. Return the fetched filename. """ - assert self.filename + if not self.filename: + raise ValueError(f"self.filename has no value but is required: {self.filename!r}") if TRACE_DEEP: print( f"Fetching distribution of {self.name}=={self.version}:", @@ -823,9 +822,9 @@ def fetch_license_files(self, dest_dir=THIRDPARTY_DIR, use_cached_index=False): """ urls = LinksRepository.from_url(use_cached_index=use_cached_index).links errors = [] - extra_lic_names = [l.get("file") for l in self.extra_data.get("licenses", {})] + extra_lic_names = [lic.get("file") for lic in self.extra_data.get("licenses", {})] extra_lic_names += [self.extra_data.get("license_file")] - extra_lic_names = [ln for ln in extra_lic_names if ln] + extra_lic_names = [eln for eln in extra_lic_names if eln] lic_names = [f"{key}.LICENSE" for key in self.get_license_keys()] for filename in lic_names + extra_lic_names: floc = os.path.join(dest_dir, filename) @@ -845,7 +844,7 @@ def fetch_license_files(self, dest_dir=THIRDPARTY_DIR, use_cached_index=False): if TRACE: print(f"Fetched license from remote: {lic_url}") - except: + except Exception: try: # try licensedb second lic_url = f"{LICENSEDB_API_URL}/{filename}" @@ -858,8 +857,9 @@ def fetch_license_files(self, dest_dir=THIRDPARTY_DIR, use_cached_index=False): if TRACE: print(f"Fetched license from licensedb: {lic_url}") - except: - msg = f'No text for license {filename} in expression "{self.license_expression}" from {self}' + except Exception: + msg = f"No text for license {filename} in expression " + f"{self.license_expression!r} from {self}" print(msg) errors.append(msg) @@ -999,7 +999,7 @@ def get_license_link_for_filename(filename, urls): exception if no link is found or if there are more than one link for that file name. """ - path_or_url = [l for l in urls if l.endswith(f"/{filename}")] + path_or_url = [url for url in urls if url.endswith(f"/{filename}")] if not path_or_url: raise Exception(f"Missing link to file: {filename}") if not len(path_or_url) == 1: @@ -1091,7 +1091,6 @@ def get_sdist_name_ver_ext(filename): @attr.attributes class Sdist(Distribution): - extension = attr.ib( repr=False, type=str, @@ -1129,7 +1128,6 @@ def to_filename(self): @attr.attributes class Wheel(Distribution): - """ Represents a wheel file. @@ -1290,7 +1288,7 @@ def is_pure(self): def is_pure_wheel(filename): try: return Wheel.from_filename(filename).is_pure() - except: + except Exception: return False @@ -1486,8 +1484,7 @@ def get_distributions(self): """ if self.sdist: yield self.sdist - for wheel in self.wheels: - yield wheel + yield from self.wheels def get_url_for_filename(self, filename): """ @@ -1616,7 +1613,8 @@ class PypiSimpleRepository: type=dict, default=attr.Factory(lambda: defaultdict(dict)), metadata=dict( - help="Mapping of {name: {version: PypiPackage, version: PypiPackage, etc} available in this repo" + help="Mapping of {name: {version: PypiPackage, version: PypiPackage, etc} " + "available in this repo" ), ) @@ -1630,7 +1628,8 @@ class PypiSimpleRepository: type=bool, default=False, metadata=dict( - help="If True, use any existing on-disk cached PyPI index files. Otherwise, fetch and cache." + help="If True, use any existing on-disk cached PyPI index files. " + "Otherwise, fetch and cache." ), ) @@ -1639,7 +1638,8 @@ def _get_package_versions_map(self, name): Return a mapping of all available PypiPackage version for this package name. The mapping may be empty. It is ordered by version from oldest to newest """ - assert name + if not name: + raise ValueError(f"name is required: {name!r}") normalized_name = NameVer.normalize_name(name) versions = self.packages[normalized_name] if not versions and normalized_name not in self.fetched_package_normalized_names: @@ -1694,7 +1694,7 @@ def fetch_links(self, normalized_name): ) links = collect_urls(text) # TODO: keep sha256 - links = [l.partition("#sha256=") for l in links] + links = [link.partition("#sha256=") for link in links] links = [url for url, _, _sha256 in links] return links @@ -1915,7 +1915,7 @@ def get_remote_file_content( # several redirects and that we can ignore content there. A HEAD request may # not get us this last header print(f" DOWNLOADING: {url}") - with requests.get(url, allow_redirects=True, stream=True, headers=headers) as response: + with requests.get(url, allow_redirects=True, stream=True, headers=headers) as response: # noqa: S113 status = response.status_code if status != requests.codes.ok: # NOQA if status == 429 and _delay < 20: @@ -2134,10 +2134,9 @@ def call(args, verbose=TRACE): """ if TRACE_DEEP: print("Calling:", " ".join(args)) - with subprocess.Popen( + with subprocess.Popen( # noqa: S603 args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8" ) as process: - stdouts = [] while True: line = process.stdout.readline() @@ -2200,7 +2199,7 @@ def download_wheels_with_pip( cli_args.extend(["--requirement", req_file]) if TRACE: - print(f"Downloading wheels using command:", " ".join(cli_args)) + print("Downloading wheels using command:", " ".join(cli_args)) existing = set(os.listdir(dest_dir)) error = False @@ -2233,7 +2232,7 @@ def download_wheels_with_pip( def check_about(dest_dir=THIRDPARTY_DIR): try: - subprocess.check_output(f"venv/bin/about check {dest_dir}".split()) + subprocess.check_output(f"venv/bin/about check {dest_dir}".split()) # noqa: S603 except subprocess.CalledProcessError as cpe: print() print("Invalid ABOUT files:") @@ -2284,5 +2283,5 @@ def get_license_expression(declared_licenses): return get_only_expression_from_extracted_license(declared_licenses) except ImportError: # Scancode is not installed, clean and join all the licenses - lics = [python_safe_name(l).lower() for l in declared_licenses] + lics = [python_safe_name(lic).lower() for lic in declared_licenses] return " AND ".join(lics).lower() diff --git a/pyproject.toml b/pyproject.toml index 17a30535..b491e410 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,6 @@ norecursedirs = [ "dist", "build", "_build", - "dist", "etc", "local", "ci", @@ -34,7 +33,9 @@ norecursedirs = [ "thirdparty", "tmp", "venv", + ".venv", "tests/data", + "*/tests/test_data", ".eggs", "src/*/data", "tests/*/data" @@ -50,3 +51,79 @@ addopts = [ "--strict-markers", "--doctest-modules" ] + +[tool.ruff] +line-length = 100 +extend-exclude = [] +target-version = "py310" +include = [ + "pyproject.toml", + "src/**/*.py", + "etc/**/*.py", + "test/**/*.py", + "tests/**/*.py", + "doc/**/*.py", + "docs/**/*.py", + "*.py", + "." + +] +# ignore test data and testfiles: they should never be linted nor formatted +exclude = [ +# main style + "**/tests/data/**/*", +# scancode-toolkit + "**/tests/*/data/**/*", +# dejacode, purldb + "**/tests/testfiles/**/*", +# vulnerablecode, fetchcode + "**/tests/*/test_data/**/*", + "**/tests/test_data/**/*", +# django migrations + "**/migrations/**/*" +] + +[tool.ruff.lint] +# Rules: https://docs.astral.sh/ruff/rules/ +select = [ +# "E", # pycodestyle +# "W", # pycodestyle warnings + "D", # pydocstyle +# "F", # Pyflakes +# "UP", # pyupgrade +# "S", # flake8-bandit + "I", # isort +# "C9", # McCabe complexity +] +ignore = ["D1", "D200", "D202", "D203", "D205", "D212", "D400", "D415", "I001"] + + +[tool.ruff.lint.isort] +force-single-line = true +lines-after-imports = 1 +default-section = "first-party" +known-first-party = ["src", "tests", "etc/scripts/**/*.py"] +known-third-party = ["click", "pytest"] + +sections = { django = ["django"] } +section-order = [ + "future", + "standard-library", + "django", + "third-party", + "first-party", + "local-folder", +] + +[tool.ruff.lint.mccabe] +max-complexity = 10 + +[tool.ruff.lint.per-file-ignores] +# Place paths of files to be ignored by ruff here +"tests/*" = ["S101"] +"test_*.py" = ["S101"] + + +[tool.doc8] +ignore-path = ["docs/build", "doc/build", "docs/_build", "doc/_build"] +max-line-length=100 diff --git a/requirements-dev.txt b/requirements-dev.txt index 46455179..02135b30 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,8 +1,8 @@ -aboutcode-toolkit==7.2.0 +aboutcode-toolkit==11.1.1 black==23.1.0 bleach==6.0.0 boolean.py==4.0 -cffi==1.15.1 +cffi==2.0.0 cryptography==39.0.1 docutils==0.19 et-xmlfile==1.1.0 @@ -10,6 +10,7 @@ exceptiongroup==1.1.0 execnet==1.9.0 importlib-metadata==6.0.0 iniconfig==2.0.0 +isort==6.0.1 jaraco.classes==3.2.3 jeepney==0.8.0 jinja2==3.1.2 @@ -26,10 +27,11 @@ packaging==23.0 pathspec==0.11.0 pkginfo==1.9.6 platformdirs==3.0.0 -pluggy==1.0.0 +pluggy==1.6.0 +pycodestyle==2.13.0 pycparser==2.21 pygments==2.14.0 -pytest==7.2.1 +pytest==8.4.2 pytest-xdist==3.2.0 readme-renderer==37.3 requests-toolbelt==0.10.1 diff --git a/requirements.txt b/requirements.txt index 2be5ed22..665ff891 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,15 +1,17 @@ -attrs==22.2.0 -beautifulsoup4==4.11.2 -certifi==2022.12.7 -charset-normalizer==3.0.1 -click==8.1.3 -idna==3.4 -pip==23.0 -PyYAML==6.0 -requests==2.28.2 -saneyaml==0.6.0 -setuptools==67.1.0 -soupsieve==2.4 +attrs==25.4.0 +beautifulsoup4==4.14.2 +certifi==2025.10.5 +chardet==5.2.0 +charset-normalizer==3.4.4 +click==8.3.0 +idna==3.10 +pip==25.1.1 +PyYAML==6.0.2 +requests==2.32.2 +saneyaml==0.6.1 +setuptools==80.9.0 +soupsieve==2.8 text-unidecode==1.3 -urllib3==1.26.14 -wheel==0.38.4 +typing_extensions==4.15.0 +urllib3==2.5.0 +wheel==0.38.4 \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index 984a1f1f..812f1686 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,11 +1,4 @@ [metadata] -license_files = - apache-2.0.LICENSE - NOTICE - AUTHORS.rst - CHANGELOG.rst - commoncode.ABOUT - README.rst name = commoncode author = nexB. Inc. and others author_email = info@aboutcode.org @@ -15,7 +8,7 @@ license = Apache-2.0 description = Set of common utilities, originally split from ScanCode long_description = file:README.rst long_description_content_type = text/x-rst -url = https://github.com/nexB/commoncode +url = https://github.com/aboutcode-org/commoncode classifiers = Development Status :: 5 - Production/Stable Intended Audience :: Developers @@ -28,22 +21,31 @@ keywords = utilities scancode-toolkit +license_files = + apache-2.0.LICENSE + NOTICE + AUTHORS.rst + CHANGELOG.rst + CODE_OF_CONDUCT.rst + README.rst + commoncode.ABOUT [options] -package_dir = =src +python_requires = >=3.10 +package_dir = + =src packages = find: include_package_data = true zip_safe = false setup_requires = setuptools_scm[toml] >= 4 -python_requires = >=3.7 - install_requires = - attrs >= 18.1, !=20.1.0 - Beautifulsoup4 >= 4.0.0 - click >= 6.7, !=7.0 - requests >= 2.7.0 + attrs >= 18.1,!=20.1.0;python_version<'3.11' + attrs >= 22.1.0;python_version>='3.11' + Beautifulsoup4[chardet] >= 4.13.0 + click >= 8.3.0 + requests[use_chardet_on_py3] >= 2.7.0 saneyaml >= 0.5.2 text_unidecode >= 1.0 @@ -53,18 +55,17 @@ where = src [options.extras_require] -testing = - pytest >= 6, != 7.0.0 +dev = + pytest >= 7.0.1 pytest-xdist >= 2 - aboutcode-toolkit >= 7.0.2 + aboutcode-toolkit >= 11.1.1 pycodestyle >= 2.8.0 twine - black - isort - -docs = + ruff Sphinx>=5.0.2 sphinx-rtd-theme>=1.0.0 sphinx-reredirects >= 0.1.2 doc8>=0.11.2 - + sphinx-autobuild + sphinx-rtd-dark-mode>=1.3.0 + sphinx-copybutton diff --git a/src/commoncode/__init__.py b/src/commoncode/__init__.py index b3c06862..79584fca 100644 --- a/src/commoncode/__init__.py +++ b/src/commoncode/__init__.py @@ -2,7 +2,7 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # @@ -14,16 +14,16 @@ def set_re_max_cache(max_cache=1000000): libraries use a lot of regexes: therefore 100 is not enough to benefit from caching. """ - import re import fnmatch + import re - remax = getattr(re, '_MAXCACHE', 0) + remax = getattr(re, "_MAXCACHE", 0) if remax < max_cache: - setattr(re, '_MAXCACHE', max_cache) + setattr(re, "_MAXCACHE", max_cache) - fnmatchmax = getattr(fnmatch, '_MAXCACHE', 0) + fnmatchmax = getattr(fnmatch, "_MAXCACHE", 0) if fnmatchmax < max_cache: - setattr(fnmatch, '_MAXCACHE', max_cache) + setattr(fnmatch, "_MAXCACHE", max_cache) set_re_max_cache() diff --git a/src/commoncode/archive.py b/src/commoncode/archive.py index 01b7e0ba..1d3e42cc 100644 --- a/src/commoncode/archive.py +++ b/src/commoncode/archive.py @@ -2,18 +2,19 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # -from functools import partial -import os -from os import path import gzip +import os import tarfile import zipfile +from functools import partial +from os import path from commoncode.system import on_windows +from commoncode.system import py314 """ Mimimal tar and zip file handling, primarily for testing. @@ -39,7 +40,7 @@ def _extract_tar_raw(test_path, target_dir, to_bytes, *args, **kwargs): extract_tar_uni = partial(_extract_tar_raw, to_bytes=False) -def extract_tar(location, target_dir, verbatim=False, *args, **kwargs): +def extract_tar(location, target_dir, verbatim=False, filter=None, *args, **kwargs): """ Extract a tar archive at location in the target_dir directory. If `verbatim` is True preserve the permissions. @@ -47,7 +48,7 @@ def extract_tar(location, target_dir, verbatim=False, *args, **kwargs): # always for using bytes for paths on all OSses... tar seems to use bytes internally # and get confused otherwise location = os.fsencode(location) - with open(location, 'rb') as input_tar: + with open(location, "rb") as input_tar: tar = None try: tar = tarfile.open(fileobj=input_tar) @@ -58,7 +59,10 @@ def extract_tar(location, target_dir, verbatim=False, *args, **kwargs): if not verbatim: tarinfo.mode = 0o755 to_extract.append(tarinfo) - tar.extractall(target_dir, members=to_extract) + if py314 and filter: + tar.extractall(target_dir, members=to_extract, filter=filter) + else: + tar.extractall(target_dir, members=to_extract) finally: if tar: tar.close() @@ -69,7 +73,7 @@ def extract_zip(location, target_dir, *args, **kwargs): Extract a zip archive file at location in the target_dir directory. """ if not path.isfile(location) and zipfile.is_zipfile(location): - raise Exception('Incorrect zip file %(location)r' % locals()) + raise Exception("Incorrect zip file %(location)r" % locals()) with zipfile.ZipFile(location) as zipf: for info in zipf.infolist(): @@ -82,7 +86,7 @@ def extract_zip(location, target_dir, *args, **kwargs): if not path.exists(target): os.makedirs(target) if not path.exists(target): - with open(target, 'wb') as f: + with open(target, "wb") as f: f.write(content) @@ -92,7 +96,7 @@ def extract_zip_raw(location, target_dir, *args, **kwargs): Use the builtin extractall function """ if not path.isfile(location) and zipfile.is_zipfile(location): - raise Exception('Incorrect zip file %(location)r' % locals()) + raise Exception("Incorrect zip file %(location)r" % locals()) with zipfile.ZipFile(location) as zipf: zipf.extractall(path=target_dir) @@ -124,6 +128,6 @@ def get_gz_compressed_file_content(location): Uncompress a compressed file at `location` and return its content as a byte string. Raise Exceptions on errors. """ - with gzip.GzipFile(location, 'rb') as compressed: + with gzip.GzipFile(location, "rb") as compressed: content = compressed.read() return content diff --git a/src/commoncode/cliutils.py b/src/commoncode/cliutils.py index 05357bfb..5b0a01e2 100644 --- a/src/commoncode/cliutils.py +++ b/src/commoncode/cliutils.py @@ -2,7 +2,7 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # @@ -10,12 +10,10 @@ import sys import click - -from click.utils import echo +from click._termui_impl import ProgressBar # FIXME: this is NOT API from click.termui import style from click.types import BoolParamType -# FIXME: this is NOT API -from click._termui_impl import ProgressBar +from click.utils import echo from commoncode.fileutils import file_name from commoncode.fileutils import splitext @@ -24,6 +22,19 @@ # Tracing flags TRACE = False +try: + # Introduced in click 8.3.0 to have a sentinel value + # (https://peps.python.org/pep-0661/) for flag values + # and default values instead of None to differentiate + # between explicitly setting a `None` value and + # not setting and value. + # See https://github.com/pallets/click/pull/3030 and + # https://github.com/pallets/click/releases/tag/8.3.0 + from click.core import UNSET +except ImportError: + # to maintain compatibility with click < 8.3.0 + UNSET = None + def logger_debug(*args): pass @@ -31,13 +42,14 @@ def logger_debug(*args): if TRACE: import logging + logger = logging.getLogger(__name__) logging.basicConfig(stream=sys.stdout) logger.setLevel(logging.DEBUG) def logger_debug(*args): - return logger.debug(' '.join(isinstance(a, str) - and a or repr(a) for a in args)) + return logger.debug(" ".join(isinstance(a, str) and a or repr(a) for a in args)) + """ Command line UI utilities for improved options, help and progress reporting. @@ -51,7 +63,7 @@ class BaseCommand(click.Command): # override this in sub-classes with a command-specific message such as # "Try 'scancode --help' for help on options and arguments." - short_usage_help = '' + short_usage_help = "" def get_usage(self, ctx): """ @@ -61,8 +73,12 @@ def get_usage(self, ctx): return super(BaseCommand, self).get_usage(ctx) + self.short_usage_help def main( - self, args=None, prog_name=None, complete_var=None, - standalone_mode=True, **extra, + self, + args=None, + prog_name=None, + complete_var=None, + standalone_mode=True, + **extra, ): """ Workaround click 4.0 bug https://github.com/mitsuhiko/click/issues/365 @@ -83,14 +99,20 @@ class GroupedHelpCommand(BaseCommand): help where each option is grouped by group in the help. """ - short_usage_help = ''' -Try the '--help' option for help on options and arguments.''' + short_usage_help = """ +Try the '--help' option for help on options and arguments.""" def __init__( - self, name, context_settings=None, callback=None, params=None, + self, + name, + context_settings=None, + callback=None, + params=None, help=None, # NOQA - epilog=None, short_help=None, - options_metavar='[OPTIONS]', add_help_option=True, + epilog=None, + short_help=None, + options_metavar="[OPTIONS]", + add_help_option=True, plugin_options=(), ): """ @@ -120,19 +142,21 @@ def format_options(self, ctx, formatter): to MISC_GROUP group. """ # this mapping defines the CLI help presentation order - help_groups = dict([ - (SCAN_GROUP, []), - (OTHER_SCAN_GROUP, []), - (SCAN_OPTIONS_GROUP, []), - (OUTPUT_GROUP, []), - (OUTPUT_FILTER_GROUP, []), - (OUTPUT_CONTROL_GROUP, []), - (PRE_SCAN_GROUP, []), - (POST_SCAN_GROUP, []), - (CORE_GROUP, []), - (MISC_GROUP, []), - (DOC_GROUP, []), - ]) + help_groups = dict( + [ + (SCAN_GROUP, []), + (OTHER_SCAN_GROUP, []), + (SCAN_OPTIONS_GROUP, []), + (OUTPUT_GROUP, []), + (OUTPUT_FILTER_GROUP, []), + (OUTPUT_CONTROL_GROUP, []), + (PRE_SCAN_GROUP, []), + (POST_SCAN_GROUP, []), + (CORE_GROUP, []), + (MISC_GROUP, []), + (DOC_GROUP, []), + ] + ) for param in self.get_params(ctx): # Get the list of option's name and help text @@ -140,11 +164,11 @@ def format_options(self, ctx, formatter): if not help_record: continue # organize options by group - help_group = getattr(param, 'help_group', MISC_GROUP) - sort_order = getattr(param, 'sort_order', 100) + help_group = getattr(param, "help_group", MISC_GROUP) + sort_order = getattr(param, "sort_order", 100) help_groups[help_group].append((sort_order, help_record)) - with formatter.section('Options'): + with formatter.section("Options"): for group, help_records in help_groups.items(): if not help_records: continue @@ -153,21 +177,31 @@ def format_options(self, ctx, formatter): formatter.write_dl(sorted_records) -# overriden and copied from Click to work around Click woes for -# https://github.com/nexB/scancode-toolkit/issues/2583 -class DebuggedProgressBar(ProgressBar): +class CompatProgressBar(ProgressBar): + # TODO Remove when dropping support for Click 8.1. + @property + def is_hidden(self) -> bool: + return self.hidden + + @is_hidden.setter + def is_hidden(self, value: bool) -> None: + self.hidden = value + +# overriden and copied from Click to work around Click woes for +# https://github.com/aboutcode-org/scancode-toolkit/issues/2583 +class DebuggedProgressBar(CompatProgressBar): # overriden and copied from Click to work around Click woes for - # https://github.com/nexB/scancode-toolkit/issues/2583 + # https://github.com/aboutcode-org/scancode-toolkit/issues/2583 def make_step(self, n_steps): # always increment self.pos += n_steps or 1 super(DebuggedProgressBar, self).make_step(n_steps) # overriden and copied from Click to work around Click woes for - # https://github.com/nexB/scancode-toolkit/issues/2583 + # https://github.com/aboutcode-org/scancode-toolkit/issues/2583 def generator(self): - if self.is_hidden: + if self.hidden: yield from self.iter else: for rv in self.iter: @@ -186,11 +220,11 @@ class EnhancedProgressBar(DebuggedProgressBar): """ def render_progress(self): - if not self.is_hidden: + if not self.hidden: return super(EnhancedProgressBar, self).render_progress() -class ProgressLogger(ProgressBar): +class ProgressLogger(CompatProgressBar): """ A subclass of Click ProgressBar providing a verbose line-by-line progress reporting. @@ -207,7 +241,7 @@ class ProgressLogger(ProgressBar): def __init__(self, *args, **kwargs): super(ProgressLogger, self).__init__(*args, **kwargs) - self.is_hidden = False + self.hidden = False def render_progress(self): line = self.format_progress_line() @@ -221,7 +255,7 @@ def format_progress_line(self): if self.item_show_func: item_info = self.item_show_func(self.current_item) else: - item_info = '.' + item_info = "." if item_info: return item_info @@ -230,16 +264,16 @@ def render_finish(self): BAR_WIDTH = 20 -BAR_SEP_LEN = len(' ') +BAR_SEP_LEN = len(" ") def progressmanager( iterable=None, length=None, - fill_char='#', - empty_char='-', + fill_char="#", + empty_char="-", bar_template=None, - info_sep=' ', + info_sep=" ", show_eta=False, show_percent=False, show_pos=True, @@ -264,10 +298,7 @@ def progressmanager( progress_class = ProgressLogger else: progress_class = EnhancedProgressBar - bar_template = ( - '[%(bar)s]' + ' ' + '%(info)s' - if bar_template is None else bar_template - ) + bar_template = "[%(bar)s]" + " " + "%(info)s" if bar_template is None else bar_template kwargs = dict( iterable=iterable, @@ -290,11 +321,11 @@ def progressmanager( # Click 8. See https://github.com/pallets/click/pull/1698 # Note that we use this argument on Click 8 in order to fix a regression # that this same PR introduced by Click and tracked originally at - # https://github.com/nexB/scancode-toolkit/issues/2583 + # https://github.com/aboutcode-org/scancode-toolkit/issues/2583 # Here we create a dummy progress_class and then for the attribute presence. pb = progress_class([]) - if hasattr(pb, 'update_min_steps'): - kwargs['update_min_steps'] = update_min_steps + if hasattr(pb, "update_min_steps"): + kwargs["update_min_steps"] = update_min_steps return progress_class(**kwargs) @@ -317,7 +348,7 @@ def fixed_width_file_name(path, max_length=25): >>> assert fwfn == '' """ if not path: - return '' + return "" # get the path as unicode for display! filename = file_name(path) @@ -328,14 +359,14 @@ def fixed_width_file_name(path, max_length=25): len_ext = len(ext) remaining_length = max_length - len_ext - dots - if remaining_length < 5 or remaining_length < (len_ext + dots): - return '' + if remaining_length < 5 or remaining_length < (len_ext + dots): + return "" prefix_and_suffix_length = abs(remaining_length // 2) prefix = base_name[:prefix_and_suffix_length] - ellipsis = dots * '.' + ellipsis = dots * "." suffix = base_name[-prefix_and_suffix_length:] - return '{prefix}{ellipsis}{suffix}{ext}'.format(**locals()) + return "{prefix}{ellipsis}{suffix}{ext}".format(**locals()) def file_name_max_len(used_width=BAR_WIDTH + 1 + 7 + 1 + 8 + 1): @@ -359,13 +390,13 @@ def file_name_max_len(used_width=BAR_WIDTH + 1 + 7 + 1 + 8 + 1): return max_filename_length -def path_progress_message(item, verbose=False, prefix='Scanned: '): +def path_progress_message(item, verbose=False, prefix="Scanned: "): """ Return a styled message suitable for progress display when processing a path for an `item` tuple of (location, rid, scan_errors, *other items) """ if not item: - return '' + return "" location = item[0] errors = item[2] location = toascii(location) @@ -374,25 +405,25 @@ def path_progress_message(item, verbose=False, prefix='Scanned: '): max_file_name_len = file_name_max_len() # do not display a file name in progress bar if there is no space available if max_file_name_len <= 10: - return '' + return "" progress_line = fixed_width_file_name(location, max_file_name_len) - color = 'red' if errors else 'green' + color = "red" if errors else "green" return style(prefix) + style(progress_line, fg=color) # CLI help groups -SCAN_GROUP = 'primary scans' -SCAN_OPTIONS_GROUP = 'scan options' -OTHER_SCAN_GROUP = 'other scans' -OUTPUT_GROUP = 'output formats' -OUTPUT_CONTROL_GROUP = 'output control' -OUTPUT_FILTER_GROUP = 'output filters' -PRE_SCAN_GROUP = 'pre-scan' -POST_SCAN_GROUP = 'post-scan' -MISC_GROUP = 'miscellaneous' -DOC_GROUP = 'documentation' -CORE_GROUP = 'core' +SCAN_GROUP = "primary scans" +SCAN_OPTIONS_GROUP = "scan options" +OTHER_SCAN_GROUP = "other scans" +OUTPUT_GROUP = "output formats" +OUTPUT_CONTROL_GROUP = "output control" +OUTPUT_FILTER_GROUP = "output filters" +PRE_SCAN_GROUP = "pre-scan" +POST_SCAN_GROUP = "post-scan" +MISC_GROUP = "miscellaneous" +DOC_GROUP = "documentation" +CORE_GROUP = "core" class PluggableCommandLineOption(click.Option): @@ -411,7 +442,7 @@ def __init__( confirmation_prompt=False, hide_input=False, is_flag=None, - flag_value=None, + flag_value=UNSET, multiple=False, count=False, allow_from_autoenv=True, @@ -431,7 +462,7 @@ def __init__( # a sequence of other option name strings that this option # conflicts with if they are set conflicting_options=(), - **kwargs + **kwargs, ): super(PluggableCommandLineOption, self).__init__( param_decls=param_decls, @@ -446,7 +477,7 @@ def __init__( allow_from_autoenv=allow_from_autoenv, type=type, help=help, - **kwargs + **kwargs, ) self.help_group = help_group @@ -463,9 +494,9 @@ def __repr__(self, *args, **kwargs): conflicting_options = self.conflicting_options return ( - 'PluggableCommandLineOption' % locals() + "PluggableCommandLineOption" % locals() ) def validate_dependencies(self, ctx, value): @@ -488,20 +519,20 @@ def validate_option_dependencies(ctx): """ values = ctx.params if TRACE: - logger_debug('validate_option_dependencies: values:') + logger_debug("validate_option_dependencies: values:") for va in sorted(values.items()): - logger_debug(' ', va) + logger_debug(" ", va) for param in ctx.command.params: if param.is_eager: continue if not isinstance(param, PluggableCommandLineOption): if TRACE: - logger_debug(' validate_option_dependencies: skip param:', param) + logger_debug(" validate_option_dependencies: skip param:", param) continue value = values.get(param.name) if TRACE: - logger_debug(' validate_option_dependencies: param:', param, 'value:', value) + logger_debug(" validate_option_dependencies: param:", param, "value:", value) param.validate_dependencies(ctx, value) @@ -533,8 +564,8 @@ def _is_set(_value, _param): if TRACE: logger_debug() - logger_debug('Checking param:', param) - logger_debug(' value:', value, 'is_set:' , is_set) + logger_debug("Checking param:", param) + logger_debug(" value:", value, "is_set:", is_set) if not is_set: return @@ -552,27 +583,29 @@ def _is_set(_value, _param): if TRACE: logger_debug() - logger_debug(' Available other params:') + logger_debug(" Available other params:") for oparam in oparams: - logger_debug(' other param:', oparam) - logger_debug(' value:', ctx.params.get(oparam.name)) + logger_debug(" other param:", oparam) + logger_debug(" value:", ctx.params.get(oparam.name)) if required: - logger_debug(' missing names:', missing_onames) + logger_debug(" missing names:", missing_onames) if required and missing_onames: opt = param.opts[-1] oopts = [oparam.opts[-1] for oparam in oparams] - omopts = ['--' + oname.replace('_', '-') for oname in missing_onames] + omopts = ["--" + oname.replace("_", "-") for oname in missing_onames] oopts.extend(omopts) - oopts = ', '.join(oopts) - msg = ('The option %(opt)s requires the option(s) %(all_opts)s.' - 'and is missing %(omopts)s. ' - 'You must set all of these options if you use this option.' % locals()) + oopts = ", ".join(oopts) + msg = ( + "The option %(opt)s requires the option(s) %(all_opts)s." + "and is missing %(omopts)s. " + "You must set all of these options if you use this option." % locals() + ) raise click.UsageError(msg) if TRACE: logger_debug() - logger_debug(' Checking other params:') + logger_debug(" Checking other params:") opt = param.opts[-1] @@ -581,21 +614,25 @@ def _is_set(_value, _param): ois_set = _is_set(ovalue, oparam) if TRACE: - logger_debug(' Checking oparam:', oparam) - logger_debug(' value:', ovalue, 'ois_set:' , ois_set) + logger_debug(" Checking oparam:", oparam) + logger_debug(" value:", ovalue, "ois_set:", ois_set) # by convention the last opt is the long form oopt = oparam.opts[-1] - oopts = ', '.join(oparam.opts[-1] for oparam in oparams) - all_opts = '%(opt)s and %(oopts)s' % locals() + oopts = ", ".join(oparam.opts[-1] for oparam in oparams) + all_opts = "%(opt)s and %(oopts)s" % locals() if required and not ois_set: - msg = ('The option %(opt)s requires the option(s) %(oopts)s ' - 'and is missing %(oopt)s. ' - 'You must set all of these options if you use this option.' % locals()) + msg = ( + "The option %(opt)s requires the option(s) %(oopts)s " + "and is missing %(oopt)s. " + "You must set all of these options if you use this option." % locals() + ) raise click.UsageError(msg) - if not required and ois_set: - msg = ('The option %(opt)s cannot be used together with the %(oopts)s option(s) ' - 'and %(oopt)s is used. ' - 'You can set only one of these options at a time.' % locals()) + if not required and ois_set: + msg = ( + "The option %(opt)s cannot be used together with the %(oopts)s option(s) " + "and %(oopt)s is used. " + "You can set only one of these options at a time." % locals() + ) raise click.UsageError(msg) diff --git a/src/commoncode/codec.py b/src/commoncode/codec.py index 6a0f11af..fe84d630 100644 --- a/src/commoncode/codec.py +++ b/src/commoncode/codec.py @@ -2,7 +2,7 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # @@ -13,8 +13,13 @@ Numbers to bytes or strings and URLs coder/decoders. """ -c2i = lambda c: c -i2c = lambda i: bytes([i]) + +def c2i(c): + return c + + +def i2c(i): + return bytes([i]) def num_to_bin(num): @@ -25,16 +30,16 @@ def num_to_bin(num): """ # Zero is not encoded but returned as an empty value if num == 0: - return b'\x00' + return b"\x00" - return num.to_bytes((num.bit_length() + 7) // 8, 'big') + return num.to_bytes((num.bit_length() + 7) // 8, "big") def bin_to_num(binstr): """ Convert a big endian byte-ordered binary string to an integer or long. """ - return int.from_bytes(binstr, byteorder='big', signed=False) + return int.from_bytes(binstr, byteorder="big", signed=False) def urlsafe_b64encode(s): @@ -49,7 +54,7 @@ def urlsafe_b64decode(b64): Decode a url safe base64-encoded string. Note that we use stddecode to work around a bug in the standard library. """ - b = b64.replace(b'-', b'+').replace(b'_', b'/') + b = b64.replace(b"-", b"+").replace(b"_", b"/") return stddecode(b) diff --git a/src/commoncode/command.py b/src/commoncode/command.py index 65818ea3..59da39c3 100644 --- a/src/commoncode/command.py +++ b/src/commoncode/command.py @@ -2,24 +2,23 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # -import ctypes import contextlib +import ctypes import io -import os -from os import path - import logging +import os import signal import subprocess +from os import path +from commoncode import text from commoncode.fileutils import get_temp_dir from commoncode.system import on_posix from commoncode.system import on_windows -from commoncode import text """ Wrapper for executing external commands in sub-processes which works @@ -33,15 +32,16 @@ if TRACE: import sys + logging.basicConfig(stream=sys.stdout) logger.setLevel(logging.DEBUG) # current directory is the root dir of this library curr_dir = path.dirname(path.dirname(path.abspath(__file__))) -PATH_ENV_VAR = 'PATH' -LD_LIBRARY_PATH = 'LD_LIBRARY_PATH' -DYLD_LIBRARY_PATH = 'DYLD_LIBRARY_PATH' +PATH_ENV_VAR = "PATH" +LD_LIBRARY_PATH = "LD_LIBRARY_PATH" +DYLD_LIBRARY_PATH = "DYLD_LIBRARY_PATH" def execute(cmd_loc, args, cwd=None, env=None, to_files=False, log=TRACE): @@ -68,10 +68,10 @@ def execute(cmd_loc, args, cwd=None, env=None, to_files=False, log=TRACE): cwd = cwd or curr_dir # temp files for stderr and stdout - tmp_dir = get_temp_dir(prefix='cmd-') + tmp_dir = get_temp_dir(prefix="cmd-") - sop = path.join(tmp_dir, 'stdout') - sep = path.join(tmp_dir, 'stderr') + sop = path.join(tmp_dir, "stdout") + sep = path.join(tmp_dir, "stderr") # shell==True is DANGEROUS but we are not running arbitrary commands # though we can execute commands that just happen to be in the path @@ -81,15 +81,15 @@ def execute(cmd_loc, args, cwd=None, env=None, to_files=False, log=TRACE): if log: printer = logger.debug if TRACE else lambda x: print(x) printer( - 'Executing command %(cmd_loc)r as:\n%(full_cmd)r\nwith: env=%(env)r\n' - 'shell=%(shell)r\ncwd=%(cwd)r\nstdout=%(sop)r\nstderr=%(sep)r' - % locals()) + "Executing command %(cmd_loc)r as:\n%(full_cmd)r\nwith: env=%(env)r\n" + "shell=%(shell)r\ncwd=%(cwd)r\nstdout=%(sop)r\nstderr=%(sep)r" % locals() + ) proc = None rc = 100 try: - with io.open(sop, 'wb') as stdout, io.open(sep, 'wb') as stderr, pushd(cmd_dir): + with io.open(sop, "wb") as stdout, io.open(sep, "wb") as stderr, pushd(cmd_dir): proc = subprocess.Popen( full_cmd, cwd=cwd, @@ -108,11 +108,11 @@ def execute(cmd_loc, args, cwd=None, env=None, to_files=False, log=TRACE): if not to_files: # return output as ASCII string loaded from the output files - with open(sop, 'rb') as so: + with open(sop, "rb") as so: sor = so.read() sop = text.toascii(sor).strip() - with open(sep, 'rb') as se: + with open(sep, "rb") as se: ser = se.read() sep = text.toascii(ser).strip() @@ -129,9 +129,9 @@ def execute2( log=TRACE, ): """ - DEPRECATED: DO NOT USE. Use execute() instead Run a `cmd_loc` command with the `args` arguments list and return the return code, the stdout and stderr. + DEPRECATED: DO NOT USE. Use execute() instead To avoid RAM exhaustion, always write stdout and stderr streams to files. @@ -167,7 +167,7 @@ def get_env(base_vars=None, lib_dir=None): # Create and add LD environment variables if lib_dir and on_posix: - new_path = f'{lib_dir}' + new_path = f"{lib_dir}" # on Linux/posix ld_lib_path = os.environ.get(LD_LIBRARY_PATH) env_vars.update({LD_LIBRARY_PATH: update_path_var(ld_lib_path, new_path)}) @@ -195,15 +195,15 @@ def close_pipe(p): except IOError: pass - close_pipe(getattr(proc, 'stdin', None)) - close_pipe(getattr(proc, 'stdout', None)) - close_pipe(getattr(proc, 'stderr', None)) + close_pipe(getattr(proc, "stdin", None)) + close_pipe(getattr(proc, "stdout", None)) + close_pipe(getattr(proc, "stderr", None)) try: # Ensure process death otherwise proc.wait may hang in some cases # NB: this will run only on POSIX OSes supporting signals os.kill(proc.pid, signal.SIGKILL) # NOQA - except: + except Exception: pass # This may slow things down a tad on non-POSIX Oses but is safe: @@ -216,7 +216,7 @@ def load_shared_library(dll_loc, *args): Return the loaded shared library object from the ``dll_loc`` location. """ if not dll_loc or not path.exists(dll_loc): - raise ImportError(f'Shared library does not exists: dll_loc: {dll_loc}') + raise ImportError(f"Shared library does not exists: dll_loc: {dll_loc}") if not isinstance(dll_loc, str): dll_loc = os.fsdecode(dll_loc) @@ -228,19 +228,22 @@ def load_shared_library(dll_loc, *args): with pushd(dll_dir): lib = ctypes.CDLL(dll_loc) except OSError as e: - from pprint import pformat import traceback - msgs = tuple([ - f'ctypes.CDLL("{dll_loc}")', - 'os.environ:\n{}'.format(pformat(dict(os.environ))), - traceback.format_exc(), - ]) + from pprint import pformat + + msgs = tuple( + [ + f'ctypes.CDLL("{dll_loc}")', + "os.environ:\n{}".format(pformat(dict(os.environ))), + traceback.format_exc(), + ] + ) raise Exception(msgs) from e if lib and lib._name: return lib - raise Exception(f'Failed to load shared library with ctypes: {dll_loc}') + raise Exception(f"Failed to load shared library with ctypes: {dll_loc}") @contextlib.contextmanager @@ -267,7 +270,7 @@ def update_path_var(existing_path_var, new_path): if not new_path: return existing_path_var - existing_path_var = existing_path_var or '' + existing_path_var = existing_path_var or "" existing_path_var = os.fsdecode(existing_path_var) new_path = os.fsdecode(new_path) @@ -292,7 +295,11 @@ def update_path_var(existing_path_var, new_path): return updated_path_var -PATH_VARS = DYLD_LIBRARY_PATH, LD_LIBRARY_PATH, 'PATH', +PATH_VARS = ( + DYLD_LIBRARY_PATH, + LD_LIBRARY_PATH, + "PATH", +) def searchable_paths(env_vars=PATH_VARS): @@ -302,7 +309,7 @@ def searchable_paths(env_vars=PATH_VARS): """ dirs = [] for env_var in env_vars: - value = os.environ.get(env_var, '') or '' + value = os.environ.get(env_var, "") or "" dirs.extend(value.split(os.pathsep)) dirs = [os.path.realpath(d.strip()) for d in dirs if d.strip()] return tuple(d for d in dirs if os.path.isdir(d)) diff --git a/src/commoncode/compat.py b/src/commoncode/compat.py index 7c059799..bad5d38c 100644 --- a/src/commoncode/compat.py +++ b/src/commoncode/compat.py @@ -2,10 +2,13 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # unicode = str # NOQA long = int # NOQA -integer_types = int, long, +integer_types = ( + int, + long, +) diff --git a/src/commoncode/datautils.py b/src/commoncode/datautils.py index 1b848d54..7570fb57 100644 --- a/src/commoncode/datautils.py +++ b/src/commoncode/datautils.py @@ -2,28 +2,38 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # +import typing + import attr from attr.validators import in_ as choices # NOQA -import typing """ Utilities and helpers for data classes. """ -HELP_METADATA = '__field_help' -LABEL_METADATA = '__field_label' +HELP_METADATA = "__field_help" +LABEL_METADATA = "__field_label" -def attribute(default=attr.NOTHING, validator=None, - repr=False, eq=True, order=True, # NOQA - init=True, type=None, converter=None, # NOQA - help=None, label=None, metadata=None,): # NOQA +def attribute( + default=attr.NOTHING, + validator=None, + repr=False, + eq=True, + order=True, # NOQA + init=True, + type=None, + converter=None, # NOQA + help=None, + label=None, + metadata=None, +): # NOQA """ - A generic attribute with help metadata and that is not included in the + Return a generic attribute with help metadata and that is not included in the representation by default. """ metadata = metadata or dict() @@ -42,14 +52,22 @@ def attribute(default=attr.NOTHING, validator=None, init=init, metadata=metadata, type=type, - converter=converter + converter=converter, ) -def Boolean(default=False, validator=None, repr=False, eq=True, order=True, # NOQA - converter=None, label=None, help=None,): # NOQA +def Boolean( + default=False, + validator=None, + repr=False, + eq=True, + order=True, # NOQA + converter=None, + label=None, + help=None, +): # NOQA """ - A boolean attribute. + Return a boolean attribute. """ return attribute( default=default, @@ -65,10 +83,18 @@ def Boolean(default=False, validator=None, repr=False, eq=True, order=True, # N ) -def TriBoolean(default=None, validator=None, repr=False, eq=True, order=True, # NOQA - converter=None, label=None, help=None,): # NOQA +def TriBoolean( + default=None, + validator=None, + repr=False, + eq=True, + order=True, # NOQA + converter=None, + label=None, + help=None, +): # NOQA """ - A tri-boolean attribute with possible values of None, True and False. + Return a tri-boolean attribute with possible values of None, True and False. """ return attribute( default=default, @@ -84,10 +110,18 @@ def TriBoolean(default=None, validator=None, repr=False, eq=True, order=True, # ) -def String(default=None, validator=None, repr=False, eq=True, order=True, # NOQA - converter=None, label=None, help=None,): # NOQA +def String( + default=None, + validator=None, + repr=False, + eq=True, + order=True, # NOQA + converter=None, + label=None, + help=None, +): # NOQA """ - A string attribute. + Return a string attribute. """ return attribute( default=default, @@ -103,10 +137,18 @@ def String(default=None, validator=None, repr=False, eq=True, order=True, # NOQ ) -def Integer(default=0, validator=None, repr=False, eq=True, order=True, # NOQA - converter=None, label=None, help=None,): # NOQA +def Integer( + default=0, + validator=None, + repr=False, + eq=True, + order=True, # NOQA + converter=None, + label=None, + help=None, +): # NOQA """ - An integer attribute. + Return an integer attribute. """ converter = converter or attr.converters.optional(int) return attribute( @@ -123,10 +165,18 @@ def Integer(default=0, validator=None, repr=False, eq=True, order=True, # NOQA ) -def Float(default=0.0, validator=None, repr=False, eq=True, order=True, # NOQA - converter=None, label=None, help=None,): # NOQA +def Float( + default=0.0, + validator=None, + repr=False, + eq=True, + order=True, # NOQA + converter=None, + label=None, + help=None, +): # NOQA """ - A float attribute. + Return a float attribute. """ return attribute( default=default, @@ -142,11 +192,19 @@ def Float(default=0.0, validator=None, repr=False, eq=True, order=True, # NOQA ) -def List(item_type=typing.Any, default=attr.NOTHING, validator=None, - repr=False, eq=True, order=True, # NOQA - converter=None, label=None, help=None,): # NOQA +def List( + item_type=typing.Any, + default=attr.NOTHING, + validator=None, + repr=False, + eq=True, + order=True, # NOQA + converter=None, + label=None, + help=None, +): # NOQA """ - A list attribute: the optional item_type defines the type of items it stores. + Return a list attribute: the optional item_type defines the type of items it stores. """ if default is attr.NOTHING: default = attr.Factory(list) @@ -165,11 +223,19 @@ def List(item_type=typing.Any, default=attr.NOTHING, validator=None, ) -def Mapping(value_type=typing.Any, default=attr.NOTHING, validator=None, - repr=False, eq=True, order=True, # NOQA - converter=None, help=None, label=None): # NOQA +def Mapping( + value_type=typing.Any, + default=attr.NOTHING, + validator=None, + repr=False, + eq=True, + order=True, # NOQA + converter=None, + help=None, + label=None, +): # NOQA """ - A mapping attribute: the optional value_type defines the type of values it + Return a mapping attribute: the optional value_type defines the type of values it stores. The key is always a string. Notes: in Python 2 the type is Dict as there is no typing available for @@ -191,15 +257,24 @@ def Mapping(value_type=typing.Any, default=attr.NOTHING, validator=None, label=label, ) + ################################################## # FIXME: add proper support for dates!!! ################################################## -def Date(default=None, validator=None, repr=False, eq=True, order=True, # NOQA - converter=None, label=None, help=None,): # NOQA +def Date( + default=None, + validator=None, + repr=False, + eq=True, + order=True, # NOQA + converter=None, + label=None, + help=None, +): # NOQA """ - A date attribute. It always serializes to an ISO date string. + Return a date attribute. It always serializes to an ISO date string. Behavior is TBD and for now this is exactly a string. """ return String( diff --git a/src/commoncode/date.py b/src/commoncode/date.py index 62dd6937..2493265e 100644 --- a/src/commoncode/date.py +++ b/src/commoncode/date.py @@ -2,17 +2,17 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # import calendar -from datetime import datetime import os +from datetime import datetime def isoformat(utc_date): - return datetime.isoformat(utc_date).replace('T', ' ') + return datetime.isoformat(utc_date).replace("T", " ") def get_file_mtime(location, iso=True): @@ -20,7 +20,7 @@ def get_file_mtime(location, iso=True): Return a string containing the last modified date of a file formatted as an ISO time stamp if ISO is True or as a raw number since epoch. """ - date = '' + date = "" # FIXME: use file types if not os.path.isdir(location): mtime = os.stat(location).st_mtime @@ -37,5 +37,4 @@ def secs_from_epoch(d): Return a number of seconds since epoch for a date time stamp """ # FIXME: what does this do? - return calendar.timegm(datetime.strptime(d.split('.')[0], - '%Y-%m-%d %H:%M:%S').timetuple()) + return calendar.timegm(datetime.strptime(d.split(".")[0], "%Y-%m-%d %H:%M:%S").timetuple()) diff --git a/src/commoncode/dict_utils.py b/src/commoncode/dict_utils.py index ca017d56..c35c3d8b 100644 --- a/src/commoncode/dict_utils.py +++ b/src/commoncode/dict_utils.py @@ -1,4 +1,3 @@ - # Copyright (c) 2003-2012 Raymond Hettinger # SPDX-License-Identifier: Python-2.0 @@ -19,6 +18,7 @@ def sparsify(d): Example: >>> sparsify({1: 3, 4: 5}) {1: 3, 4: 5} + """ e = d.copy() d.update(e) diff --git a/src/commoncode/distro.py b/src/commoncode/distro.py index 207bfba3..2e1a29a0 100644 --- a/src/commoncode/distro.py +++ b/src/commoncode/distro.py @@ -2,7 +2,7 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # @@ -32,11 +32,5 @@ def parse_os_release(location): """ with open(location) as osrl: lines = (line.strip() for line in osrl) - lines = ( - line.partition('=') for line in lines - if line and not line.startswith('#') - ) - return { - key.strip(): ''.join(shlex.split(value)) - for key, _, value in lines - } + lines = (line.partition("=") for line in lines if line and not line.startswith("#")) + return {key.strip(): "".join(shlex.split(value)) for key, _, value in lines} diff --git a/src/commoncode/fetch.py b/src/commoncode/fetch.py index 43e47fed..cf3d6dad 100644 --- a/src/commoncode/fetch.py +++ b/src/commoncode/fetch.py @@ -2,7 +2,7 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # @@ -35,18 +35,18 @@ def download_url(url, file_name=None, verify=True, timeout=10): try: response = requests.get(url, **requests_args) except (ConnectionError, InvalidSchema) as e: - logger.error('download_url: Download failed for %(url)r' % locals()) + logger.error("download_url: Download failed for %(url)r" % locals()) raise status = response.status_code if status != 200: - msg = 'download_url: Download failed for %(url)r with %(status)r' % locals() + msg = "download_url: Download failed for %(url)r with %(status)r" % locals() logger.error(msg) raise Exception(msg) - tmp_dir = fileutils.get_temp_dir(prefix='fetch-') + tmp_dir = fileutils.get_temp_dir(prefix="fetch-") output_file = os.path.join(tmp_dir, file_name) - with open(output_file, 'wb') as out: + with open(output_file, "wb") as out: out.write(response.content) return output_file @@ -54,7 +54,7 @@ def download_url(url, file_name=None, verify=True, timeout=10): def ping_url(url): """ - Returns True is `url` is reachable. + Return True is `url` is reachable. """ try: from urlib.request import urlopen diff --git a/src/commoncode/fileset.py b/src/commoncode/fileset.py index dbd4c1c8..f0298ea6 100644 --- a/src/commoncode/fileset.py +++ b/src/commoncode/fileset.py @@ -2,7 +2,7 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # @@ -83,14 +83,14 @@ def is_included(path, includes=None, excludes=None): if includes: included = get_matches(path, includes, all_matches=False) if TRACE: - logger.debug('in_fileset: path: %(path)r included:%(included)r' % locals()) + logger.debug("in_fileset: path: %(path)r included:%(included)r" % locals()) if not included: return False if excludes: excluded = get_matches(path, excludes, all_matches=False) if TRACE: - logger.debug('in_fileset: path: %(path)r excluded:%(excluded)r .' % locals()) + logger.debug("in_fileset: path: %(path)r excluded:%(excluded)r ." % locals()) if excluded: return False @@ -108,39 +108,39 @@ def get_matches(path, patterns, all_matches=False): return False path = fileutils.as_posixpath(path).lower() - pathstripped = path.lstrip('/0') + pathstripped = path.lstrip("/0") if not pathstripped: return False segments = paths.split(pathstripped) if TRACE: - logger.debug('_match: path: %(path)r patterns:%(patterns)r.' % locals()) + logger.debug("_match: path: %(path)r patterns:%(patterns)r." % locals()) matches = [] if not isinstance(patterns, dict): - assert isinstance(patterns, (list, tuple)), 'Invalid patterns: {}'.format(patterns) + assert isinstance(patterns, (list, tuple)), "Invalid patterns: {}".format(patterns) patterns = {p: p for p in patterns} for pat, value in patterns.items(): if not pat or not pat.strip(): continue - value = value or '' - pat = pat.lstrip('/').lower() - is_plain = '/' not in pat + value = value or "" + pat = pat.lstrip("/").lower() + is_plain = "/" not in pat if is_plain: if any(fnmatch.fnmatchcase(s, pat) for s in segments): matches.append(value) if not all_matches: break - elif (fnmatch.fnmatchcase(path, pat) or fnmatch.fnmatchcase(pathstripped, pat)): + elif fnmatch.fnmatchcase(path, pat) or fnmatch.fnmatchcase(pathstripped, pat): matches.append(value) if not all_matches: break if TRACE: - logger.debug('_match: matches: %(matches)r' % locals()) + logger.debug("_match: matches: %(matches)r" % locals()) if not all_matches: if matches: @@ -157,11 +157,11 @@ def load(location): if not location: return tuple() fn = os.path.abspath(os.path.normpath(os.path.expanduser(location))) - msg = ('File %(location)s does not exist or not a file.') % locals() - assert (os.path.exists(fn) and os.path.isfile(fn)), msg - mode = 'r' + msg = ("File %(location)s does not exist or not a file.") % locals() + assert os.path.exists(fn) and os.path.isfile(fn), msg + mode = "r" with open(fn, mode) as f: - return [l.strip() for l in f if l and l.strip()] + return [line.strip() for line in f if line and line.strip()] def includes_excludes(patterns, message): @@ -171,9 +171,9 @@ def includes_excludes(patterns, message): value in the returned mappings. Ignore pattern as comments if prefixed with #. Use an empty string is message is None. """ - message = message or '' - BANG = '!' - POUND = '#' + message = message or "" + BANG = "!" + POUND = "#" included = {} excluded = {} if not patterns: diff --git a/src/commoncode/filetype.py b/src/commoncode/filetype.py index 3f2201b8..ad1d5561 100644 --- a/src/commoncode/filetype.py +++ b/src/commoncode/filetype.py @@ -2,15 +2,16 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # import os from datetime import datetime +from datetime import timezone -from commoncode.system import on_posix from commoncode.functional import memoize +from commoncode.system import on_posix """ Low level file type utilities, essentially a wrapper around os.path and stat. @@ -77,7 +78,7 @@ def get_link_target(location): Return the link target for `location` if this is a Link or an empty string. """ - target = '' + target = "" # always false on windows, until Python supports junctions/links if on_posix and is_link(location): try: @@ -91,12 +92,38 @@ def get_link_target(location): # Map of type checker function -> short type code # The order of types check matters: link -> file -> directory -> special -TYPES = dict([ - (is_link, ('l', 'link',)), - (is_file, ('f', 'file',)), - (is_dir, ('d', 'directory',)), - (is_special, ('s', 'special',)) -]) +TYPES = dict( + [ + ( + is_link, + ( + "l", + "link", + ), + ), + ( + is_file, + ( + "f", + "file", + ), + ), + ( + is_dir, + ( + "d", + "directory", + ), + ), + ( + is_special, + ( + "s", + "special", + ), + ), + ] +) def get_type(location, short=True): @@ -161,18 +188,21 @@ def get_last_modified_date(location): Return the last modified date stamp of a file as YYYYMMDD format. The date of non-files (dir, links, special) is always an empty string. """ - yyyymmdd = '' + yyyymmdd = "" if is_file(location): utc_date = datetime.isoformat( - datetime.utcfromtimestamp(os.path.getmtime(location)) + datetime.fromtimestamp( + os.path.getmtime(location), + tz=timezone.utc, + ) ) yyyymmdd = utc_date[:10] return yyyymmdd counting_functions = { - 'file_count': lambda _: 1, - 'file_size': os.path.getsize, + "file_count": lambda _: 1, + "file_size": os.path.getsize, } @@ -202,8 +232,9 @@ def counter(location, counting_function): count_fun = counting_functions[counting_function] return count_fun(location) elif is_dir(location): - count += sum(counter(os.path.join(location, p), counting_function) - for p in os.listdir(location)) + count += sum( + counter(os.path.join(location, p.name), counting_function) for p in os.scandir(location) + ) return count @@ -213,7 +244,7 @@ def get_file_count(location): or 1 if `location` is a file. Only regular files are counted. Everything else has a zero size. """ - return counter(location, 'file_count') + return counter(location, "file_count") def get_size(location): @@ -222,4 +253,4 @@ def get_size(location): directory, the cumulative size of all files in this directory tree. Only regular files have a size. Everything else has a zero size. """ - return counter(location, 'file_size') + return counter(location, "file_size") diff --git a/src/commoncode/fileutils.py b/src/commoncode/fileutils.py index 584b7ccf..b8d9001b 100644 --- a/src/commoncode/fileutils.py +++ b/src/commoncode/fileutils.py @@ -2,19 +2,18 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 AND Python-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # import errno -import os import ntpath +import os import posixpath import shutil import stat import sys import tempfile - from os import fsdecode try: @@ -33,7 +32,9 @@ class WindowsError(Exception): pass + import logging + logger = logging.getLogger(__name__) TRACE = False @@ -48,7 +49,8 @@ def logger_debug(*args): logger.setLevel(logging.DEBUG) def logger_debug(*args): - return logger.debug(' '.join(isinstance(a, str) and a or repr(a) for a in args)) + return logger.debug(" ".join(isinstance(a, str) and a or repr(a) for a in args)) + """ File, paths and directory utility functions. @@ -68,8 +70,7 @@ def create_dir(location): if os.path.exists(location): if not os.path.isdir(location): - err = ('Cannot create directory: existing file ' - 'in the way ''%(location)s.') + err = "Cannot create directory: existing file in the way %(location)s." raise OSError(err % locals()) else: # may fail on win if the path is too long @@ -96,7 +97,7 @@ def create_dir(location): raise -def get_temp_dir(base_dir=_base_temp_dir, prefix=''): +def get_temp_dir(base_dir=_base_temp_dir, prefix=""): """ Return the path to a new existing unique temporary directory, created under the `base_dir` base directory using the `prefix` prefix. @@ -108,7 +109,7 @@ def get_temp_dir(base_dir=_base_temp_dir, prefix=''): has_base = bool(base_dir) if not has_base: - base_dir = os.getenv('SCANCODE_TMP') + base_dir = os.getenv("SCANCODE_TMP") if not base_dir: base_dir = tempfile.gettempdir() @@ -116,10 +117,11 @@ def get_temp_dir(base_dir=_base_temp_dir, prefix=''): create_dir(base_dir) if not has_base: - prefix = 'scancode-tk-' + prefix = "scancode-tk-" return tempfile.mkdtemp(prefix=prefix, dir=base_dir) + # # PATHS AND NAMES MANIPULATIONS # @@ -138,15 +140,15 @@ def prepare_path(pth): def is_posixpath(location): - """ + r""" Return True if the `location` path is likely a POSIX-like path using POSIX path separators (slash or "/")or has no path separator. Return False if the `location` path is likely a Windows-like path using backslash as path separators (e.g. "\"). """ - has_slashes = '/' in location - has_backslashes = '\\' in location + has_slashes = "/" in location + has_backslashes = "\\" in location # windows paths with drive if location: drive, _ = ntpath.splitdrive(location) @@ -162,22 +164,22 @@ def is_posixpath(location): def as_posixpath(location): - """ + r""" Return a POSIX-like path using POSIX path separators (slash or "/") for a `location` path. This converts Windows paths to look like POSIX paths: Python accepts gracefully POSIX paths on Windows. """ location = prepare_path(location) - return location.replace('\\', '/') + return location.replace("\\", "/") def as_winpath(location): - """ + r""" Return a Windows-like path using Windows path separators (backslash or "\") for a `location` path. """ location = prepare_path(location) - return location.replace('/', '\\') + return location.replace("/", "\\") def split_parent_resource(path, force_posix=False): @@ -186,7 +188,7 @@ def split_parent_resource(path, force_posix=False): """ use_posix = force_posix or is_posixpath(path) splitter = use_posix and posixpath or ntpath - path_no_trailing_speps = path.rstrip('\\/') + path_no_trailing_speps = path.rstrip("\\/") return splitter.split(path_no_trailing_speps) @@ -196,7 +198,7 @@ def resource_name(path, force_posix=False): is the last path segment. """ _left, right = split_parent_resource(path, force_posix) - return right or '' + return right or "" def file_name(path, force_posix=False): @@ -213,8 +215,8 @@ def parent_directory(path, force_posix=False, with_trail=True): """ left, _right = split_parent_resource(path, force_posix) use_posix = force_posix or is_posixpath(path) - sep = '/' if use_posix else '\\' - trail = sep if with_trail and left != sep else '' + sep = "/" if use_posix else "\\" + trail = sep if with_trail and left != sep else "" return left + trail @@ -241,19 +243,19 @@ def splitext_name(file_name, is_file=True): """ if not file_name: - return '', '' + return "", "" file_name = fsdecode(file_name) if not is_file: - return file_name, '' + return file_name, "" - if file_name.startswith('.') and '.' not in file_name[1:]: + if file_name.startswith(".") and "." not in file_name[1:]: # .dot files base name is the full name and they do not have an extension - return file_name, '' + return file_name, "" base_name, extension = posixpath.splitext(file_name) # handle composed extensions of tar.gz, bz, zx,etc - if base_name.endswith('.tar'): + if base_name.endswith(".tar"): base_name, extension2 = posixpath.splitext(base_name) extension = extension2 + extension return base_name, extension @@ -266,30 +268,36 @@ def splitext(path, force_posix=False): the file name minus its extension. Return an empty extension string for a directory. """ - base_name = '' - extension = '' + base_name = "" + extension = "" if not path: return base_name, extension - is_dir = path.endswith(('\\', '/',)) - path = as_posixpath(path).strip('/') + is_dir = path.endswith( + ( + "\\", + "/", + ) + ) + path = as_posixpath(path).strip("/") name = resource_name(path, force_posix) if is_dir: # directories never have an extension base_name = name - extension = '' - elif name.startswith('.') and '.' not in name[1:]: + extension = "" + elif name.startswith(".") and "." not in name[1:]: # .dot files base name is the full name and they do not have an extension base_name = name - extension = '' + extension = "" else: base_name, extension = posixpath.splitext(name) # handle composed extensions of tar.gz, tar.bz2, zx,etc - if base_name.endswith('.tar'): + if base_name.endswith(".tar"): base_name, extension2 = posixpath.splitext(base_name) extension = extension2 + extension return base_name, extension + # # DIRECTORY AND FILES WALKING/ITERATION # @@ -313,42 +321,43 @@ def walk(location, ignored=None, follow_symlinks=False): If `follow_symlinks` is True, then symlinks will not be ignored and be collected like regular files and directories """ - # TODO: consider using the new "scandir" module for some speed-up. - is_ignored = ignored(location) if ignored else False if is_ignored: if TRACE: - logger_debug('walk: ignored:', location, is_ignored) + logger_debug("walk: ignored:", location, is_ignored) return - if filetype.is_file(location, follow_symlinks=follow_symlinks) : + if filetype.is_file(location, follow_symlinks=follow_symlinks): yield parent_directory(location), [], [file_name(location)] elif filetype.is_dir(location, follow_symlinks=follow_symlinks): dirs = [] files = [] - # TODO: consider using scandir - for name in os.listdir(location): - loc = os.path.join(location, name) + for resource in os.scandir(location): + loc = os.path.join(location, resource.name) if filetype.is_special(loc) or (ignored and ignored(loc)): - if (follow_symlinks - and filetype.is_link(loc) - and not filetype.is_broken_link(location)): + if ( + follow_symlinks + and resource.is_symlink() + and not filetype.is_broken_link(location) + ): pass else: if TRACE: ign = ignored and ignored(loc) - logger_debug('walk: ignored:', loc, ign) + logger_debug("walk: ignored:", loc, ign) continue # special files and symlinks are always ignored - if filetype.is_dir(loc, follow_symlinks=follow_symlinks): - dirs.append(name) - elif filetype.is_file(loc, follow_symlinks=follow_symlinks): - files.append(name) + if resource.is_dir(follow_symlinks=follow_symlinks): + dirs.append(resource.name) + elif resource.is_file(follow_symlinks=follow_symlinks): + files.append(resource.name) yield location, dirs, files for dr in dirs: - for tripple in walk(os.path.join(location, dr), ignored, follow_symlinks=follow_symlinks): + for tripple in walk( + os.path.join(location, dr), ignored, follow_symlinks=follow_symlinks + ): yield tripple @@ -367,6 +376,8 @@ def resource_iter(location, ignored=ignore_nothing, with_dirs=True, follow_symli yield os.path.join(top, d) for f in files: yield os.path.join(top, f) + + # # COPY # @@ -389,7 +400,7 @@ def copytree(src, dst): if not filetype.is_readable(src): chmod(src, R, recurse=False) - names = os.listdir(src) + names = [resource.name for resource in os.scandir(src)] if not os.path.exists(dst): os.makedirs(dst) @@ -450,7 +461,7 @@ def copytime(src, dst): """ errors = [] st = os.stat(src) - if hasattr(os, 'utime'): + if hasattr(os, "utime"): try: os.utime(dst, (st.st_atime, st.st_mtime)) except OSError as why: @@ -461,6 +472,7 @@ def copytime(src, dst): errors.append((src, dst, str(why))) return errors + # # PERMISSIONS # @@ -516,6 +528,7 @@ def chmod_tree(location, flags): for f in files: chmod(os.path.join(top, f), flags, recurse=False) + # # DELETION # @@ -527,8 +540,8 @@ def _rm_handler(function, path, excinfo): # NOQA This retries deleting once before giving up. """ if TRACE: - logger_debug('_rm_handler:', 'path:', path, 'excinfo:', excinfo) - if function in (os.rmdir, os.listdir): + logger_debug("_rm_handler:", "path:", path, "excinfo:", excinfo) + if function in (os.rmdir, os.listdir, os.scandir): try: chmod(path, RW, recurse=True) shutil.rmtree(path, True) @@ -536,16 +549,16 @@ def _rm_handler(function, path, excinfo): # NOQA pass if os.path.exists(path): - logger.warning('Failed to delete directory %s', path) + logger.warning("Failed to delete directory %s", path) elif function == os.remove: try: delete(path, _err_handler=None) - except: + except Exception: pass if os.path.exists(path): - logger.warning('Failed to delete file %s', path) + logger.warning("Failed to delete file %s", path) def delete(location, _err_handler=_rm_handler): diff --git a/src/commoncode/functional.py b/src/commoncode/functional.py index 40e275d7..eaa7376d 100644 --- a/src/commoncode/functional.py +++ b/src/commoncode/functional.py @@ -2,14 +2,13 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # import functools - -from types import GeneratorType from array import array +from types import GeneratorType def flatten(seq): @@ -87,8 +86,7 @@ def memoized(*args, **kwargs): if kwargs: return fun(*args, **kwargs) # convert any list args to a tuple - args = tuple(tuple(arg) if isinstance(arg, (list, tuple, array)) else arg - for arg in args) + args = tuple(tuple(arg) if isinstance(arg, (list, tuple, array)) else arg for arg in args) try: return memos[args] except KeyError: diff --git a/src/commoncode/hash.py b/src/commoncode/hash.py index 231ebe1e..b97dd898 100644 --- a/src/commoncode/hash.py +++ b/src/commoncode/hash.py @@ -2,18 +2,19 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # import binascii import hashlib +import os import sys from functools import partial +from commoncode import filetype from commoncode.codec import bin_to_num from commoncode.codec import urlsafe_b64encode -from commoncode import filetype """ Hashes and checksums. @@ -25,41 +26,83 @@ Checksums are operating on files. """ +# This is ~16 MB +FILE_CHUNK_SIZE = 2**24 + def _hash_mod(bitsize, hmodule): """ - Return a hashing class returning hashes with a `bitsize` bit length. The - interface of this class is similar to the hash module API. + Return a hasher class that returns hashes with a ``bitsize`` bit length. The interface of this + class is similar to the hash module API. """ - class hasher(object): + class hasher(Hashable): + """A hasher class that behaves like a hashlib module.""" - def __init__(self, msg=None): + def __init__(self, msg=None, **kwargs): + """ + Return a hasher, populated with an initial ``msg`` bytes string. + Close on the bitsize and hmodule + """ + # length of binary digest for this hash self.digest_size = bitsize // 8 - self.h = msg and hmodule(msg).digest()[:self.digest_size] or None - def digest(self): - return bytes(self.h) + # binh = binary hasher module + self.binh = hmodule() - def hexdigest(self): - return self.h and binascii.hexlify(self.h).decode('utf-8') + # msg_len = length in bytes of the message hashed + self.msg_len = 0 - def b64digest(self): - return self.h and urlsafe_b64encode(self.h).decode('utf-8') + if msg: + self.update(msg) - def intdigest(self): - return self.h and int(bin_to_num(self.h)) + def update(self, msg=None): + """ + Update this hash with a ``msg`` bytes string. + """ + if msg: + self.binh.update(msg) + self.msg_len += len(msg) return hasher -# for FIPS support +class Hashable: + """ + A mixin for hashers that provides the base methods. + """ + + def digest(self): + """ + Return a bytes string digest for this hash. + """ + if not self.msg_len: + return + return self.binh.digest()[: self.digest_size] + + def hexdigest(self): + """ + Return a string hex digest for this hash. + """ + return self.msg_len and binascii.hexlify(self.digest()).decode("utf-8") + + def b64digest(self): + """ + Return a string base64 digest for this hash. + """ + return self.msg_len and urlsafe_b64encode(self.digest()).decode("utf-8") + + def intdigest(self): + """ + Return a int digest for this hash. + """ + return self.msg_len and int(bin_to_num(self.digest())) + + +# for FIPS support, we declare that "usedforsecurity" is False sys_v0 = sys.version_info[0] sys_v1 = sys.version_info[1] -if sys_v0 == 3 and sys_v1 >= 9: - md5_hasher = partial(hashlib.md5, usedforsecurity=False) -else: - md5_hasher = hashlib.md5 +md5_hasher = partial(hashlib.md5, usedforsecurity=False) # Base hashers for each bit size @@ -72,7 +115,7 @@ def intdigest(self): 160: _hash_mod(160, hashlib.sha1), 256: _hash_mod(256, hashlib.sha256), 384: _hash_mod(384, hashlib.sha384), - 512: _hash_mod(512, hashlib.sha512) + 512: _hash_mod(512, hashlib.sha512), } @@ -83,103 +126,182 @@ def get_hasher(bitsize): return _hashmodules_by_bitsize[bitsize] -class sha1_git_hasher(object): +class sha1_git_hasher(Hashable): """ Hash content using the git blob SHA1 convention. + See https://git-scm.com/book/en/v2/Git-Internals-Git-Objects#_object_storage """ - def __init__(self, msg=None): + def __init__(self, msg=None, total_length=0, **kwargs): + """ + Initialize a sha1_git_hasher with an optional ``msg`` byte string. The ``total_length`` of + all content that will be hashed, combining the ``msg`` length plus any later call to + update() with additional messages. + + Here ``total_length`` is total length in bytes of all the messages (chunks) hashed + in contrast to ``msg_len`` which is the length in bytes for the optional message. + """ self.digest_size = 160 // 8 - self.h = msg and self._compute(msg) or None + self.msg_len = 0 - def _compute(self, msg): - # note: bytes interpolation is new in Python 3.5 - git_blob_msg = b'blob %d\0%s' % (len(msg), msg) - return hashlib.sha1(git_blob_msg).digest() + if msg: + self.msg_len = msg_len = len(msg) - def digest(self): - return bytes(self.h) + if not total_length: + total_length = msg_len + else: + if total_length < msg_len: + raise ValueError( + f"Initial msg length: {msg_len} " + f"cannot be larger than the the total_length: {self.total_length}" + ) - def hexdigest(self): - return self.h and binascii.hexlify(self.h).decode('utf-8') + if not total_length: + raise ValueError("total_length cannot be zero") - def b64digest(self): - return self.h and urlsafe_b64encode(self.h).decode('utf-8') + self.total_length = total_length + self.binh = get_hasher(bitsize=160)(total_length=total_length) - def intdigest(self): - return self.h and int(bin_to_num(self.h)) + self._hash_header() + if msg: + self.update(msg) + + def _hash_header(self): + # note: bytes interpolation is new in Python 3.5 + git_blob_header = b"blob %d\0" % (self.total_length) + self.binh.update(msg=git_blob_header) + + def update(self, msg=None): + """ + Update this hash with a ``msg`` bytes string. + """ + if msg: + msg_len = len(msg) + if (msg_len + self.msg_len) > self.total_length: + raise ValueError( + f"Actual combined msg lengths: initial: {self.msg_len} plus added: {msg_len} " + f"cannot be larger than the the total_length: {self.total_length}" + ) + + self.binh.update(msg) + self.msg_len += msg_len _hashmodules_by_name = { - 'md5': get_hasher(128), - 'sha1': get_hasher(160), - 'sha1_git': sha1_git_hasher, - 'sha256': get_hasher(256), - 'sha384': get_hasher(384), - 'sha512': get_hasher(512) + "md5": get_hasher(128), + "sha1": get_hasher(160), + "sha1_git": sha1_git_hasher, + "sha256": get_hasher(256), + "sha384": get_hasher(384), + "sha512": get_hasher(512), } +def get_hasher_instance_by_name(name, total_length=0): + """ + Return a hasher instance for a checksum algorithm ``name`` with a planned ``total_length`` of + bytes to hash. + """ + try: + hm = _hashmodules_by_name[name] + return hm(total_length=total_length) + except KeyError: + raise ValueError(f"Unknown checksum algorithm: {name!r}") + + +def get_file_size(location): + return os.path.getsize(location) + + def checksum(location, name, base64=False): """ - Return a checksum of `bitsize` length from the content of the file at - `location`. The checksum is a hexdigest or base64-encoded is `base64` is - True. + Return a checksum from the content of the file at ``location`` using the ``name`` checksum + algorithm. The checksum is a string as a hexdigest or is base64-encoded is ``base64`` is True. + + Return None if ``location`` is not a file or an empty file. """ if not filetype.is_file(location): return - hasher = _hashmodules_by_name[name] - # fixme: we should read in chunks? - with open(location, 'rb') as f: - hashable = f.read() + total_length = get_file_size(location) + chunks = binary_chunks(location) + return checksum_from_chunks(chunks=chunks, total_length=total_length, name=name, base64=base64) + - hashed = hasher(hashable) +def checksum_from_chunks(chunks, name, total_length=0, base64=False): + """ + Return a checksum from the content of the iterator of byte strings ``chunks`` with a + ``total_length`` combined length using the ``name`` checksum algorithm. The returned checksum is + a string as a hexdigest or is base64-encoded is ``base64`` is True. + """ + hasher = get_hasher_instance_by_name(name=name, total_length=total_length) + for chunk in chunks: + hasher.update(chunk) if base64: - return hashed.b64digest() + return hasher.b64digest() + return hasher.hexdigest() - return hashed.hexdigest() + +def binary_chunks(location, size=FILE_CHUNK_SIZE): + """ + Read file at ``location`` as binary and yield bytes of up to ``size`` length in bytes, + defaulting to 2**24 bytes, e.g., about 16 MB. + """ + with open(location, "rb") as f: + while True: + chunk = f.read(size) + if not chunk: + break + yield chunk def md5(location): - return checksum(location, name='md5', base64=False) + return checksum(location, name="md5", base64=False) def sha1(location): - return checksum(location, name='sha1', base64=False) + return checksum(location, name="sha1", base64=False) def b64sha1(location): - return checksum(location, name='sha1', base64=True) + return checksum(location, name="sha1", base64=True) def sha256(location): - return checksum(location, name='sha256', base64=False) + return checksum(location, name="sha256", base64=False) def sha512(location): - return checksum(location, name='sha512', base64=False) + return checksum(location, name="sha512", base64=False) def sha1_git(location): - return checksum(location, name='sha1_git', base64=False) + return checksum(location, name="sha1_git", base64=False) -def multi_checksums(location, checksum_names=('md5', 'sha1', 'sha256', 'sha512', 'sha1_git')): +def multi_checksums(location, checksum_names=("md5", "sha1", "sha256", "sha512", "sha1_git")): """ - Return a mapping of hexdigest checksums keyed by checksum name from the content - of the file at `location`. Use the `checksum_names` list of checksum names. - The mapping is guaranted to contains all the requested names as keys. - If the location is not a file, the values are None. + Return a mapping of hexdigest checksum strings keyed by checksum algorithm name from hashing the + content of the file at ``location``. Use the ``checksum_names`` list of checksum names. The + mapping is guaranted to contains all the requested names as keys. If the location is not a file, + or if the file is empty, the values are None. + + The purpose of this function is to avoid read the same file multiple times + to compute different checksums. """ - results = dict([(name, None) for name in checksum_names]) if not filetype.is_file(location): - return results + return {name: None for name in checksum_names} + file_size = get_file_size(location) + if file_size == 0: + return {name: None for name in checksum_names} + + hashers = { + name: get_hasher_instance_by_name(name=name, total_length=file_size) + for name in checksum_names + } - # fixme: we should read in chunks? - with open(location, 'rb') as f: - hashable = f.read() + for chunk in binary_chunks(location): + for hasher in hashers.values(): + hasher.update(msg=chunk) - for name in checksum_names: - results[name] = _hashmodules_by_name[name](hashable).hexdigest() - return results + return {name: hasher.hexdigest() for name, hasher in hashers.items()} diff --git a/src/commoncode/ignore.py b/src/commoncode/ignore.py index 881b93f5..c19ccdc3 100644 --- a/src/commoncode/ignore.py +++ b/src/commoncode/ignore.py @@ -2,7 +2,7 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # @@ -38,10 +38,7 @@ def is_ignore_file(location): """ Return True if the location is an ignore file. """ - return ( - filetype.is_file(location) - and fileutils.file_name(location) == '.scancodeignore' - ) + return filetype.is_file(location) and fileutils.file_name(location) == ".scancodeignore" def get_ignores(location, include_defaults=True): @@ -59,244 +56,236 @@ def get_ignores(location, include_defaults=True): unignores.update(uni) return ignores, unignores + # # Default ignores # ignores_MacOSX = { - '.DS_Store': 'Default ignore: MacOSX artifact', - '._.DS_Store': 'Default ignore: MacOSX artifact', - '__MACOSX': 'Default ignore: MacOSX artifact', - '.AppleDouble': 'Default ignore: MacOSX artifact', - '.LSOverride': 'Default ignore: MacOSX artifact', - '.DocumentRevisions-V100': 'Default ignore: MacOSX artifact', - '.fseventsd': 'Default ignore: MacOSX artifact', - '.Spotlight-V100': 'Default ignore: MacOSX artifact', - '.VolumeIcon.icns': 'Default ignore: MacOSX artifact', - - '.journal': 'Default ignore: MacOSX DMG/HFS+ artifact', - '.journal_info_block': 'Default ignore: MacOSX DMG/HFS+ artifact', - '.Trashes': 'Default ignore: MacOSX DMG/HFS+ artifact', - r'\[HFS+ Private Data\]': 'Default ignore: MacOSX DMG/HFS+ artifact private data', + ".DS_Store": "Default ignore: MacOSX artifact", + "._.DS_Store": "Default ignore: MacOSX artifact", + "__MACOSX": "Default ignore: MacOSX artifact", + ".AppleDouble": "Default ignore: MacOSX artifact", + ".LSOverride": "Default ignore: MacOSX artifact", + ".DocumentRevisions-V100": "Default ignore: MacOSX artifact", + ".fseventsd": "Default ignore: MacOSX artifact", + ".Spotlight-V100": "Default ignore: MacOSX artifact", + ".VolumeIcon.icns": "Default ignore: MacOSX artifact", + ".journal": "Default ignore: MacOSX DMG/HFS+ artifact", + ".journal_info_block": "Default ignore: MacOSX DMG/HFS+ artifact", + ".Trashes": "Default ignore: MacOSX DMG/HFS+ artifact", + r"\[HFS+ Private Data\]": "Default ignore: MacOSX DMG/HFS+ artifact private data", } ignores_Windows = { - 'Thumbs.db': 'Default ignore: Windows artifact', - 'ehthumbs.db': 'Default ignore: Windows artifact', - 'Desktop.ini': 'Default ignore: Windows artifact', - '$RECYCLE.BIN': 'Default ignore: Windows artifact', - '*.lnk': 'Default ignore: Windows artifact', - 'System Volume Information': 'Default ignore: Windows FS artifact', - 'NTUSER.DAT*': 'Default ignore: Windows FS artifact', + "Thumbs.db": "Default ignore: Windows artifact", + "ehthumbs.db": "Default ignore: Windows artifact", + "Desktop.ini": "Default ignore: Windows artifact", + "$RECYCLE.BIN": "Default ignore: Windows artifact", + "*.lnk": "Default ignore: Windows artifact", + "System Volume Information": "Default ignore: Windows FS artifact", + "NTUSER.DAT*": "Default ignore: Windows FS artifact", } ignores_Linux = { - '.directory': 'Default ignore: KDE artifact', - '.Trash-*': 'Default ignore: Linux/Gome/KDE artifact', + ".directory": "Default ignore: KDE artifact", + ".Trash-*": "Default ignore: Linux/Gome/KDE artifact", } ignores_IDEs = { - '*.el': 'Default ignore: EMACS Elisp artifact', - '*.swp': 'Default ignore: VIM artifact', - '.project': 'Default ignore: Eclipse IDE artifact', - '.pydevproject': 'Default ignore: Eclipse IDE artifact', - '.settings': 'Default ignore: Eclipse IDE artifact', - '.eclipse': 'Default ignore: Eclipse IDE artifact', - '.loadpath': 'Default ignore: Eclipse IDE artifact', - '*.launch': 'Default ignore: Eclipse IDE artifact', - '.cproject': 'Default ignore: Eclipse IDE artifact', - '.cdtproject': 'Default ignore: Eclipse IDE artifact', - '.classpath': 'Default ignore: Eclipse IDE artifact', - '.buildpath': 'Default ignore: Eclipse IDE artifact', - '.texlipse': 'Default ignore: Eclipse IDE artifact', - - '*.iml': 'Default ignore: JetBrains IDE artifact', - '*.ipr': 'Default ignore: JetBrains IDE artifact', - '*.iws': 'Default ignore: JetBrains IDE artifact', - '.idea/': 'Default ignore: JetBrains IDE artifact', - '.idea_modules/': 'Default ignore: JetBrains IDE artifact', - - '*.kdev4': 'Default ignore: Kdevelop artifact', - '.kdev4/': 'Default ignore: Kdevelop artifact', - - '*.nib': 'Default ignore: Apple Xcode artifact', - '*.plst': 'Default ignore: Apple Xcode plist artifact', - '*.pbxuser': 'Default ignore: Apple Xcode artifact', - '*.pbxproj': 'Default ignore: Apple Xcode artifact', - 'xcuserdata': 'Default ignore: Apple Xcode artifact', - '*.xcuserstate': 'Default ignore: Apple Xcode artifact', - - '*.csproj': 'Default ignore: Microsoft VS project artifact', - '*.unityproj': 'Default ignore: Microsoft VS project artifact', - '*.sln': 'Default ignore: Microsoft VS project artifact', - '*.sluo': 'Default ignore: Microsoft VS project artifact', - '*.suo': 'Default ignore: Microsoft VS project artifact', - '*.user': 'Default ignore: Microsoft VS project artifact', - '*.sln.docstates': 'Default ignore: Microsoft VS project artifact', - '*.dsw': 'Default ignore: Microsoft VS project artifact', - - '.editorconfig': 'Default ignore: Editor config artifact', - - ' Leiningen.gitignore': 'Default ignore: Leiningen artifact', - '.architect': 'Default ignore: ExtJS artifact', - '*.tmproj': 'Default ignore: Textmate artifact', - '*.tmproject': 'Default ignore: Textmate artifact', + "*.el": "Default ignore: EMACS Elisp artifact", + "*.swp": "Default ignore: VIM artifact", + ".project": "Default ignore: Eclipse IDE artifact", + ".pydevproject": "Default ignore: Eclipse IDE artifact", + ".settings": "Default ignore: Eclipse IDE artifact", + ".eclipse": "Default ignore: Eclipse IDE artifact", + ".loadpath": "Default ignore: Eclipse IDE artifact", + "*.launch": "Default ignore: Eclipse IDE artifact", + ".cproject": "Default ignore: Eclipse IDE artifact", + ".cdtproject": "Default ignore: Eclipse IDE artifact", + ".classpath": "Default ignore: Eclipse IDE artifact", + ".buildpath": "Default ignore: Eclipse IDE artifact", + ".texlipse": "Default ignore: Eclipse IDE artifact", + "*.iml": "Default ignore: JetBrains IDE artifact", + "*.ipr": "Default ignore: JetBrains IDE artifact", + "*.iws": "Default ignore: JetBrains IDE artifact", + ".idea/": "Default ignore: JetBrains IDE artifact", + ".idea_modules/": "Default ignore: JetBrains IDE artifact", + "*.kdev4": "Default ignore: Kdevelop artifact", + ".kdev4/": "Default ignore: Kdevelop artifact", + "*.nib": "Default ignore: Apple Xcode artifact", + "*.plst": "Default ignore: Apple Xcode plist artifact", + "*.pbxuser": "Default ignore: Apple Xcode artifact", + "*.pbxproj": "Default ignore: Apple Xcode artifact", + "xcuserdata": "Default ignore: Apple Xcode artifact", + "*.xcuserstate": "Default ignore: Apple Xcode artifact", + "*.csproj": "Default ignore: Microsoft VS project artifact", + "*.unityproj": "Default ignore: Microsoft VS project artifact", + "*.sln": "Default ignore: Microsoft VS project artifact", + "*.sluo": "Default ignore: Microsoft VS project artifact", + "*.suo": "Default ignore: Microsoft VS project artifact", + "*.user": "Default ignore: Microsoft VS project artifact", + "*.sln.docstates": "Default ignore: Microsoft VS project artifact", + "*.dsw": "Default ignore: Microsoft VS project artifact", + ".editorconfig": "Default ignore: Editor config artifact", + " Leiningen.gitignore": "Default ignore: Leiningen artifact", + ".architect": "Default ignore: ExtJS artifact", + "*.tmproj": "Default ignore: Textmate artifact", + "*.tmproject": "Default ignore: Textmate artifact", } ignores_web = { - '.htaccess': 'Default ignore: .htaccess file', - 'robots.txt': 'Default ignore: robots file', - 'humans.txt': 'Default ignore: robots file', - 'web.config': 'Default ignore: web config', - '.htaccess.sample': 'Default ignore: .htaccess file', + ".htaccess": "Default ignore: .htaccess file", + "robots.txt": "Default ignore: robots file", + "humans.txt": "Default ignore: robots file", + "web.config": "Default ignore: web config", + ".htaccess.sample": "Default ignore: .htaccess file", } ignores_Maven = { - 'pom.xml.tag': 'Default ignore: Maven artifact', - 'pom.xml.releaseBackup': 'Default ignore: Maven artifact', - 'pom.xml.versionsBackup': 'Default ignore: Maven artifact', - 'pom.xml.next': 'Default ignore: Maven artifact', - 'release.properties': 'Default ignore: Maven artifact', - 'dependency-reduced-pom.xml': 'Default ignore: Maven artifact', - 'buildNumber.properties': 'Default ignore: Maven artifact', + "pom.xml.tag": "Default ignore: Maven artifact", + "pom.xml.releaseBackup": "Default ignore: Maven artifact", + "pom.xml.versionsBackup": "Default ignore: Maven artifact", + "pom.xml.next": "Default ignore: Maven artifact", + "release.properties": "Default ignore: Maven artifact", + "dependency-reduced-pom.xml": "Default ignore: Maven artifact", + "buildNumber.properties": "Default ignore: Maven artifact", } ignores_VCS = { - '.bzr': 'Default ignore: Bazaar artifact', - '.bzrignore' : 'Default ignore: Bazaar config artifact', - - '.git': 'Default ignore: Git artifact', - '.gitignore' : 'Default ignore: Git config artifact', - '.gitattributes': 'Default ignore: Git config artifact', - - '.hg': 'Default ignore: Mercurial artifact', - '.hgignore' : 'Default ignore: Mercurial config artifact', - - '.repo': 'Default ignore: Multiple Git repository artifact', - - '.svn': 'Default ignore: SVN artifact', - '.svnignore': 'Default ignore: SVN config artifact', - - '.tfignore': 'Default ignore: Microsft TFS config artifact', - - 'vssver.scc': 'Default ignore: Visual Source Safe artifact', - - 'CVS': 'Default ignore: CVS artifact', - '.cvsignore': 'Default ignore: CVS config artifact', - - '*/_MTN': 'Default ignore: Monotone artifact', - '*/_darcs': 'Default ignore: Darcs artifact', - '*/{arch}': 'Default ignore: GNU Arch artifact', + ".bzr": "Default ignore: Bazaar artifact", + ".bzrignore": "Default ignore: Bazaar config artifact", + ".git": "Default ignore: Git artifact", + ".gitignore": "Default ignore: Git config artifact", + ".gitattributes": "Default ignore: Git config artifact", + ".hg": "Default ignore: Mercurial artifact", + ".hgignore": "Default ignore: Mercurial config artifact", + ".repo": "Default ignore: Multiple Git repository artifact", + ".svn": "Default ignore: SVN artifact", + ".svnignore": "Default ignore: SVN config artifact", + ".tfignore": "Default ignore: Microsft TFS config artifact", + "vssver.scc": "Default ignore: Visual Source Safe artifact", + "CVS": "Default ignore: CVS artifact", + ".cvsignore": "Default ignore: CVS config artifact", + "*/_MTN": "Default ignore: Monotone artifact", + "*/_darcs": "Default ignore: Darcs artifact", + "*/{arch}": "Default ignore: GNU Arch artifact", } ignores_Medias = { - 'pspbrwse.jbf': 'Default ignore: Paintshop browse file', - 'Thumbs.db': 'Default ignore: Image thumbnails DB', - 'Thumbs.db:encryptable': 'Default ignore: Image thumbnails DB', - 'thumbs/': 'Default ignore: Image thumbnails DB', - '_thumbs/': 'Default ignore: Image thumbnails DB', + "pspbrwse.jbf": "Default ignore: Paintshop browse file", + "Thumbs.db": "Default ignore: Image thumbnails DB", + "Thumbs.db:encryptable": "Default ignore: Image thumbnails DB", + "thumbs/": "Default ignore: Image thumbnails DB", + "_thumbs/": "Default ignore: Image thumbnails DB", } ignores_Build_scripts = { - 'Makefile.in': 'Default ignore: automake artifact', - 'Makefile.am': 'Default ignore: automake artifact', - 'autom4te.cache': 'Default ignore: autoconf artifact', - '*.m4': 'Default ignore: autotools artifact', - 'configure': 'Default ignore: Configure script', - 'configure.bat': 'Default ignore: Configure script', - 'configure.sh': 'Default ignore: Configure script', - 'configure.ac': 'Default ignore: Configure script', - 'config.guess': 'Default ignore: Configure script', - 'config.sub': 'Default ignore: Configure script', - 'compile': 'Default ignore: autoconf artifact', - 'depcomp': 'Default ignore: autoconf artifact', - 'ltmain.sh': 'Default ignore: libtool autoconf artifact', - 'install-sh': 'Default ignore: autoconf artifact', - 'missing': 'Default ignore: autoconf artifact', - 'mkinstalldirs': 'Default ignore: autoconf artifact', - 'stamp-h1': 'Default ignore: autoconf artifact', - 'm4/': 'Default ignore: autoconf artifact', - 'autogen.sh': 'Default ignore: autotools artifact', - 'autogen.sh': 'Default ignore: autotools artifact', - - 'CMakeCache.txt': 'Default ignore: CMake artifact', - 'cmake_install.cmake': 'Default ignore: CMake artifact', - 'install_manifest.txt': 'Default ignore: CMake artifact', + "Makefile.in": "Default ignore: automake artifact", + "Makefile.am": "Default ignore: automake artifact", + "autom4te.cache": "Default ignore: autoconf artifact", + "*.m4": "Default ignore: autotools artifact", + "configure": "Default ignore: Configure script", + "configure.bat": "Default ignore: Configure script", + "configure.sh": "Default ignore: Configure script", + "configure.ac": "Default ignore: Configure script", + "config.guess": "Default ignore: Configure script", + "config.sub": "Default ignore: Configure script", + "compile": "Default ignore: autoconf artifact", + "depcomp": "Default ignore: autoconf artifact", + "ltmain.sh": "Default ignore: libtool autoconf artifact", + "install-sh": "Default ignore: autoconf artifact", + "missing": "Default ignore: autoconf artifact", + "mkinstalldirs": "Default ignore: autoconf artifact", + "stamp-h1": "Default ignore: autoconf artifact", + "m4/": "Default ignore: autoconf artifact", + "autogen.sh": "Default ignore: autotools artifact", + "autogen.sh": "Default ignore: autotools artifact", + "CMakeCache.txt": "Default ignore: CMake artifact", + "cmake_install.cmake": "Default ignore: CMake artifact", + "install_manifest.txt": "Default ignore: CMake artifact", } ignores_CI = { - '.travis.yml' : 'Default ignore: Travis config', - '.coveragerc' : 'Default ignore: Coverall config', + ".travis.yml": "Default ignore: Travis config", + ".coveragerc": "Default ignore: Coverall config", } ignores_Python = { - 'pip-selfcheck.json': 'Default ignore: Pip workfile', - 'pytest.ini': 'Default ignore: Python pytest config', - 'tox.ini': 'Default ignore: Python tox config', - '__pycache__/': 'Default ignore: Python bytecode cache', - '.installed.cfg': 'Default ignore: Python Buildout artifact', - 'pip-log.txt': 'Default ignore: Python pip artifact', - 'pip-delete-this-directory.txt': 'Default ignore: Python pip artifact', - 'pyvenv.cfg': 'Default ignore: Python virtualenv artifact', + "pip-selfcheck.json": "Default ignore: Pip workfile", + "pytest.ini": "Default ignore: Python pytest config", + "tox.ini": "Default ignore: Python tox config", + "__pycache__/": "Default ignore: Python bytecode cache", + ".installed.cfg": "Default ignore: Python Buildout artifact", + "pip-log.txt": "Default ignore: Python pip artifact", + "pip-delete-this-directory.txt": "Default ignore: Python pip artifact", + "pyvenv.cfg": "Default ignore: Python virtualenv artifact", } ignores_I18N = { - '*.mo': 'Default ignore: Translation file', - '*.pot': 'Default ignore: Translation file', - '.localized': 'Default ignore: localized file', + "*.mo": "Default ignore: Translation file", + "*.pot": "Default ignore: Translation file", + ".localized": "Default ignore: localized file", } ignores_coverage_and_tests = { - '*.gcno': 'Default ignore: GCC coverage', - '*.gcda': 'Default ignore: GCC coverage', - '*.gcov': 'Default ignore: GCC coverage', - '.last_cover_stats': 'Default ignore: Perl coverage', - 'htmlcov/': 'Default ignore: Python coverage', - '.tox/': 'Default ignore: Tox tem dir', - '.coverage': 'Default ignore: Python coverage', - '.coverage.*': 'Default ignore: Python coverage', - 'nosetests.xml': 'Default ignore: Python nose tests', - 'coverage.xml': 'Default ignore: Python coverage', - '/spec/reports/': 'Default ignore: Ruby Rails test report', - '/rdoc/': 'Default ignore: Ruby doc', - '.rvmrc': 'Default ignore: Ruby RVM', - '.sass-cache': 'Default ignore: Saas cache', - '*.css.map': 'Default ignore: Saas map', - 'phpunit.xml': 'Default ignore: phpunit', - '*.VisualState.xml': 'Default ignore: Nunit', - 'TestResult.xml': 'Default ignore: Nunit', + "*.gcno": "Default ignore: GCC coverage", + "*.gcda": "Default ignore: GCC coverage", + "*.gcov": "Default ignore: GCC coverage", + ".last_cover_stats": "Default ignore: Perl coverage", + "htmlcov/": "Default ignore: Python coverage", + ".tox/": "Default ignore: Tox tem dir", + ".coverage": "Default ignore: Python coverage", + ".coverage.*": "Default ignore: Python coverage", + "nosetests.xml": "Default ignore: Python nose tests", + "coverage.xml": "Default ignore: Python coverage", + "/spec/reports/": "Default ignore: Ruby Rails test report", + "/rdoc/": "Default ignore: Ruby doc", + ".rvmrc": "Default ignore: Ruby RVM", + ".sass-cache": "Default ignore: Saas cache", + "*.css.map": "Default ignore: Saas map", + "phpunit.xml": "Default ignore: phpunit", + "*.VisualState.xml": "Default ignore: Nunit", + "TestResult.xml": "Default ignore: Nunit", } ignores_Misc = { - 'pax_global_header': 'Default ignore: Pax header file', - 'C++.gitignore': 'Default ignore: C++.gitignore', - '.gwt/': 'Default ignore: GWT compilation logs', - '.gwt-tmp/': 'Default ignore: GWT temp files', - 'gradle-app.setting': 'Default ignore: Graddle app settings', - 'hs_err_pid*': 'Default ignore: Java VM crash logs', - '.grunt': 'Default ignore: Grunt intermediate storage', - '.history': 'Default ignore: History file', - '.~lock.*#': 'Default ignore: LibreOffice locks', - '/.ssh': 'Default ignore: SSH configuration', + "pax_global_header": "Default ignore: Pax header file", + "C++.gitignore": "Default ignore: C++.gitignore", + ".gwt/": "Default ignore: GWT compilation logs", + ".gwt-tmp/": "Default ignore: GWT temp files", + "gradle-app.setting": "Default ignore: Graddle app settings", + "hs_err_pid*": "Default ignore: Java VM crash logs", + ".grunt": "Default ignore: Grunt intermediate storage", + ".history": "Default ignore: History file", + ".~lock.*#": "Default ignore: LibreOffice locks", + "/.ssh": "Default ignore: SSH configuration", } default_ignores = {} -default_ignores.update(chain(*[d.items() for d in [ - ignores_MacOSX, - ignores_Windows, - ignores_Linux, - ignores_IDEs, - ignores_web, - ignores_Maven, - ignores_VCS, - ignores_Medias, - ignores_Build_scripts, - ignores_CI, - ignores_Python, - ignores_I18N, - ignores_coverage_and_tests, - ignores_Misc, - ignores_Build_scripts, -]])) +default_ignores.update( + chain( + *[ + d.items() + for d in [ + ignores_MacOSX, + ignores_Windows, + ignores_Linux, + ignores_IDEs, + ignores_web, + ignores_Maven, + ignores_VCS, + ignores_Medias, + ignores_Build_scripts, + ignores_CI, + ignores_Python, + ignores_I18N, + ignores_coverage_and_tests, + ignores_Misc, + ignores_Build_scripts, + ] + ] + ) +) diff --git a/src/commoncode/paths.py b/src/commoncode/paths.py index e9e71410..f9d11d66 100644 --- a/src/commoncode/paths.py +++ b/src/commoncode/paths.py @@ -2,21 +2,20 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # import ntpath import posixpath import re - from os.path import commonprefix -from commoncode.text import as_unicode -from commoncode.text import toascii from commoncode.fileutils import as_posixpath from commoncode.fileutils import as_winpath from commoncode.fileutils import is_posixpath +from commoncode.text import as_unicode +from commoncode.text import toascii """ Various path utilities such as common prefix and suffix functions, conversion @@ -53,18 +52,15 @@ def safe_path(path, posix=False, preserve_spaces=False, posix_only=False): segments = [s.strip() for s in path.split(path_sep) if s.strip()] segments = [ - portable_filename( - s, - preserve_spaces=preserve_spaces, - posix_only=posix_only - ) for s in segments + portable_filename(s, preserve_spaces=preserve_spaces, posix_only=posix_only) + for s in segments ] if not segments: - return '_' + return "_" # always return posix - path = '/'.join(segments) + path = "/".join(segments) return as_posixpath(path) @@ -78,7 +74,7 @@ def path_handlers(path, posix=True): is_posix = is_posixpath(path) use_posix = posix or is_posix pathmod = use_posix and posixpath or ntpath - path_sep = '/' if use_posix else '\\' + path_sep = "/" if use_posix else "\\" return pathmod, path_sep @@ -94,11 +90,11 @@ def resolve(path, posix=True): Windows path with blackslash separators otherwise. """ if not path: - return '.' + return "." path = path.strip() if not path: - return '.' + return "." if not is_posixpath(path): path = as_winpath(path) @@ -110,7 +106,7 @@ def resolve(path, posix=True): segments = [s.strip() for s in path.split(path_sep) if s.strip()] # remove empty (// or ///) or blank (space only) or single dot segments - segments = [s for s in segments if s and s != '.'] + segments = [s for s in segments if s and s != "."] path = path_sep.join(segments) @@ -123,30 +119,30 @@ def resolve(path, posix=True): segments = [s.strip() for s in segments if s and s.strip()] # is this a windows absolute path? if yes strip the colon to make this relative - if segments and len(segments[0]) == 2 and segments[0].endswith(':'): + if segments and len(segments[0]) == 2 and segments[0].endswith(":"): segments[0] = segments[0][:-1] # replace any remaining (usually leading) .. segment with a literal "dotdot" - dotdot = 'dotdot' - dd = '..' + dotdot = "dotdot" + dd = ".." segments = [dotdot if s == dd else s for s in segments if s] if segments: path = path_sep.join(segments) else: - path = '.' + path = "." path = as_posixpath(path) return path -legal_punctuation = r'!\#$%&\(\)\+,\-\.;\=@\[\]_\{\}\~' -legal_spaces = r' ' -legal_alphanumeric = r'A-Za-z0-9' +legal_punctuation = r"!\#$%&\(\)\+,\-\.;\=@\[\]_\{\}\~" +legal_spaces = r" " +legal_alphanumeric = r"A-Za-z0-9" legal_chars = legal_alphanumeric + legal_punctuation legal_chars_inc_spaces = legal_chars + legal_spaces -illegal_chars_re = r'[^' + legal_chars + r']' -illegal_chars_exc_spaces_re = r'[^' + legal_chars_inc_spaces + r']' +illegal_chars_re = r"[^" + legal_chars + r"]" +illegal_chars_exc_spaces_re = r"[^" + legal_chars_inc_spaces + r"]" replace_illegal_chars = re.compile(illegal_chars_re).sub replace_illegal_chars_exc_spaces = re.compile(illegal_chars_exc_spaces_re).sub @@ -154,17 +150,38 @@ def resolve(path, posix=True): posix_legal_punctuation = r'<:"/>\|\*\^\\\'`\?' + legal_punctuation posix_legal_chars = legal_alphanumeric + posix_legal_punctuation posix_legal_chars_inc_spaces = posix_legal_chars + legal_spaces -posix_illegal_chars_re = r'[^' + posix_legal_chars + r']' -posix_illegal_chars_exc_spaces_re = r'[^' + posix_legal_chars_inc_spaces + r']' +posix_illegal_chars_re = r"[^" + posix_legal_chars + r"]" +posix_illegal_chars_exc_spaces_re = r"[^" + posix_legal_chars_inc_spaces + r"]" replace_illegal_posix_chars = re.compile(posix_illegal_chars_re).sub replace_illegal_posix_chars_exc_spaces = re.compile(posix_illegal_chars_exc_spaces_re).sub -ILLEGAL_WINDOWS_NAMES = set([ - 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9', - 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9', - 'aux', 'con', 'nul', 'prn' -]) +ILLEGAL_WINDOWS_NAMES = set( + [ + "com1", + "com2", + "com3", + "com4", + "com5", + "com6", + "com7", + "com8", + "com9", + "lpt1", + "lpt2", + "lpt3", + "lpt4", + "lpt5", + "lpt6", + "lpt7", + "lpt8", + "lpt9", + "aux", + "con", + "nul", + "prn", + ] +) def portable_filename(filename, preserve_spaces=False, posix_only=False): @@ -191,32 +208,32 @@ def portable_filename(filename, preserve_spaces=False, posix_only=False): filename = toascii(filename, translit=True) if not filename: - return '_' + return "_" if posix_only: if preserve_spaces: - filename = replace_illegal_posix_chars_exc_spaces('_', filename) + filename = replace_illegal_posix_chars_exc_spaces("_", filename) else: - filename = replace_illegal_posix_chars('_', filename) + filename = replace_illegal_posix_chars("_", filename) else: if preserve_spaces: - filename = replace_illegal_chars_exc_spaces('_', filename) + filename = replace_illegal_chars_exc_spaces("_", filename) else: - filename = replace_illegal_chars('_', filename) + filename = replace_illegal_chars("_", filename) if not posix_only: - basename, dot, extension = filename.partition('.') + basename, dot, extension = filename.partition(".") if basename.lower() in ILLEGAL_WINDOWS_NAMES: - filename = ''.join([basename, '_', dot, extension]) + filename = "".join([basename, "_", dot, extension]) # no name made only of dots. - if set(filename) == set(['.']): - filename = 'dot' * len(filename) + if set(filename) == set(["."]): + filename = "dot" * len(filename) # replaced any leading dotdot - if filename != '..' and filename.startswith('..'): - while filename.startswith('..'): - filename = filename.replace('..', '__', 1) + if filename != ".." and filename.startswith(".."): + while filename.startswith(".."): + filename = filename.replace("..", "__", 1) return filename @@ -232,7 +249,12 @@ def common_prefix(s1, s2): """ if not s1 or not s2: return None, 0 - common = commonprefix((s1, s2,)) + common = commonprefix( + ( + s1, + s2, + ) + ) if common: return common, len(common) else: @@ -275,8 +297,8 @@ def split(p): """ if not p: return [] - p = p.strip('/').split('/') - return [] if p == [''] else p + p = p.strip("/").split("/") + return [] if p == [""] else p def _common_path(p1, p2, common_func): @@ -286,5 +308,5 @@ def _common_path(p1, p2, common_func): function. """ common, lgth = common_func(split(p1), split(p2)) - common = '/'.join(common) if common else None + common = "/".join(common) if common else None return common, lgth diff --git a/src/commoncode/resource.py b/src/commoncode/resource.py index 7a9d1550..d19e2da8 100644 --- a/src/commoncode/resource.py +++ b/src/commoncode/resource.py @@ -2,7 +2,7 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # @@ -21,9 +21,9 @@ from os.path import isfile from os.path import join from os.path import normpath +from posixpath import dirname as posixpath_parent from posixpath import join as posixpath_join from posixpath import normpath as posixpath_normpath -from posixpath import dirname as posixpath_parent import attr @@ -33,7 +33,7 @@ # alway have something there. import tempfile - temp_dir = tempfile.mkdtemp(prefix='scancode-resource-cache') + temp_dir = tempfile.mkdtemp(prefix="scancode-resource-cache") from commoncode import ignore from commoncode.datautils import List @@ -77,7 +77,7 @@ def logger_debug(*args): logger.setLevel(logging.DEBUG) def logger_debug(*args): - return logger.debug(' '.join(isinstance(a, str) and a or repr(a) for a in args)) + return logger.debug(" ".join(isinstance(a, str) and a or repr(a) for a in args)) class ResourceNotInCache(Exception): @@ -98,11 +98,11 @@ def skip_ignored(location): if TRACE_DEEP: logger_debug() logger_debug( - 'Codebase.populate: walk: ignored loc:', + "Codebase.populate: walk: ignored loc:", location, - 'ignored:', + "ignored:", ignored(location), - 'is_special:', + "is_special:", is_special(location), ) @@ -120,8 +120,6 @@ def depth_walk( directory recursively up to ``max_depth`` path segments extending from the ``root_location``. The behaviour is similar of ``os.walk``. - Arguments: - - root_location: Absolute, normalized path for the directory to be walked - max_depth: positive integer for fixed depth limit. 0 for no limit. - skip_ignored: Callback function that takes a location as argument and @@ -157,18 +155,18 @@ class Header(object): should create a Header and append it to the Codebase.headers list. """ - tool_name = String(help='Name of the tool used such as scancode-toolkit.') - tool_version = String(default='', help='Tool version used such as v1.2.3.') - options = Mapping(help='Mapping of key/values describing the options used with this tool.') - notice = String(default='', help='Notice text for this tool.') - start_timestamp = String(help='Start timestamp for this header.') - end_timestamp = String(help='End timestamp for this header.') - output_format_version = String(help='Version for the output data format, such as v1.1 .') - duration = String(help='Scan duration in seconds.') - message = String(help='Message text.') - errors = List(help='List of error messages.') - warnings = List(help='List of warning messages.') - extra_data = Mapping(help='Mapping of extra key/values for this tool.') + tool_name = String(help="Name of the tool used such as scancode-toolkit.") + tool_version = String(default="", help="Tool version used such as v1.2.3.") + options = Mapping(help="Mapping of key/values describing the options used with this tool.") + notice = String(default="", help="Notice text for this tool.") + start_timestamp = String(help="Start timestamp for this header.") + end_timestamp = String(help="End timestamp for this header.") + output_format_version = String(help="Version for the output data format, such as v1.1 .") + duration = String(help="Scan duration in seconds.") + message = String(help="Message text.") + errors = List(help="List of error messages.") + warnings = List(help="List of warning messages.") + extra_data = Mapping(help="Mapping of extra key/values for this tool.") def to_dict(self): return attr.asdict(self, dict_factory=dict) @@ -202,28 +200,28 @@ class Codebase: # we do not really need slots but this is a way to ensure we have tight # control on object attributes __slots__ = ( - 'max_depth', - 'location', - 'has_single_resource', - 'resource_attributes', - 'resource_class', - 'root', - 'is_file', - 'temp_dir', - 'resources_by_path', - 'resources_count', - 'paths', - 'max_in_memory', - 'all_in_memory', - 'all_on_disk', - 'cache_dir', - 'headers', - 'current_header', - 'codebase_attributes', - 'attributes', - 'counters', - 'timings', - 'errors', + "max_depth", + "location", + "has_single_resource", + "resource_attributes", + "resource_class", + "root", + "is_file", + "temp_dir", + "resources_by_path", + "resources_count", + "paths", + "max_in_memory", + "all_in_memory", + "all_on_disk", + "cache_dir", + "headers", + "current_header", + "codebase_attributes", + "attributes", + "counters", + "timings", + "errors", ) # the value returned if the resource is cached @@ -284,7 +282,7 @@ def __init__( ######################################################################## location = os.fsdecode(location) location = abspath(normpath(expanduser(location))) - location = location.rstrip('/\\') + location = location.rstrip("/\\") # TODO: what if is_special(location)??? assert exists(location) self.location = location @@ -292,7 +290,7 @@ def __init__( self.is_file = filetype_is_file(location) # True if this codebase root is a file or an empty directory. - self.has_single_resource = bool(self.is_file or not os.listdir(location)) + self.has_single_resource = bool(self.is_file or not os.scandir(location)) ######################################################################## # Set up caching, summary, timing, and error info @@ -308,8 +306,11 @@ def _prepare_clean_paths(self, paths=tuple()): We convert to POSIX and ensure we have no slash at both ends. """ paths = (clean_path(p) for p in (paths or []) if p) + # we sort by path segments (e.g. essentially a topo sort) - _sorter = lambda p: p.split('/') + def _sorter(p): + return p.split("/") + return sorted(paths, key=_sorter) def _setup_essentials(self, temp_dir=temp_dir, max_in_memory=10000): @@ -388,7 +389,7 @@ def _get_resource_cache_location(self, path, create_dirs=False): path = clean_path(path) # for the cached file name, we use an md5 of the path to avoid things being too long - resid = str(md5(path.encode('utf-8')).hexdigest()) + resid = str(md5(path.encode("utf-8", "surrogateescape")).hexdigest()) cache_sub_dir, cache_file_name = resid[-2:], resid parent = join(self.cache_dir, cache_sub_dir) @@ -409,7 +410,7 @@ def _build_resource_class(self, *args, **kwargs): """ # Resource sub-class to use. Configured with plugin attributes if present return attr.make_class( - name='ScannedResource', + name="ScannedResource", attrs=self.resource_attributes or {}, slots=True, bases=(Resource,), @@ -452,7 +453,7 @@ def _populate(self): ########################################################## root = self._create_root_resource() if TRACE: - logger_debug('Codebase.populate: root:', root) + logger_debug("Codebase.populate: root:", root) if self.has_single_resource: # there is nothing else to do for a single file or a single @@ -479,7 +480,7 @@ def _create_resources_from_paths(self, root, paths): for path in paths: res_loc = join(base_location, path) if not exists(res_loc): - msg = f'ERROR: cannot populate codebase: path: {path!r} not found in {res_loc!r}' + msg = f"ERROR: cannot populate codebase: path: {path!r} not found in {res_loc!r}" self.errors.append(msg) raise Exception(path, join(base_location, path)) continue @@ -487,17 +488,17 @@ def _create_resources_from_paths(self, root, paths): # create all parents. The last parent is the one we want to use parent = root if TRACE: - logger_debug('Codebase._create_resources_from_paths: parent', parent) + logger_debug("Codebase._create_resources_from_paths: parent", parent) for parent_path in get_ancestor_paths(path, include_self=False): if TRACE: logger_debug( - f' Codebase._create_resources_from_paths: parent_path: {parent_path!r}' + f" Codebase._create_resources_from_paths: parent_path: {parent_path!r}" ) if not parent_path: continue newpar = parents_by_path.get(parent_path) if TRACE: - logger_debug(' Codebase._create_resources_from_paths: newpar', repr(newpar)) + logger_debug(" Codebase._create_resources_from_paths: newpar", repr(newpar)) if not newpar: newpar = self._get_or_create_resource( @@ -508,7 +509,8 @@ def _create_resources_from_paths(self, root, paths): ) if not newpar: raise Exception( - f'ERROR: Codebase._create_resources_from_paths: cannot create parent for: {parent_path!r}' + "ERROR: Codebase._create_resources_from_paths:" + f" cannot create parent for: {parent_path!r}" ) parent = newpar @@ -516,8 +518,8 @@ def _create_resources_from_paths(self, root, paths): if TRACE: logger_debug( - f' Codebase._create_resources_from_paths:', - f'created newpar: {newpar!r}', + f" Codebase._create_resources_from_paths:", + f"created newpar: {newpar!r}", ) res = self._get_or_create_resource( @@ -527,7 +529,7 @@ def _create_resources_from_paths(self, root, paths): is_file=isfile(res_loc), ) if TRACE: - logger_debug('Codebase._create_resources_from_paths: resource', res) + logger_debug("Codebase._create_resources_from_paths: resource", res) def _create_resources_from_root(self, root): # without paths we walks the root location top-down @@ -540,11 +542,11 @@ def _create_resources_from_root(self, root): def err(_error): """os.walk error handler""" self.errors.append( - f'ERROR: cannot populate codebase: {_error}\n{traceback.format_exc()}' + f"ERROR: cannot populate codebase: {_error}\n{traceback.format_exc()}" ) # Walk over the directory and build the resource tree - for (top, dirs, files) in depth_walk( + for top, dirs, files in depth_walk( root_location=root.location, max_depth=self.max_depth, error_handler=err, @@ -579,7 +581,7 @@ def _create_resources(self, parent, top, dirs, files, skip_ignored=skip_ignored) is_file=is_file, ) if TRACE: - logger_debug('Codebase.create_resources:', res) + logger_debug("Codebase.create_resources:", res) yield res def _create_root_resource(self): @@ -588,7 +590,7 @@ def _create_root_resource(self): """ # we cannot recreate a root if it exists!! if self.root: - raise TypeError('Root resource already exists and cannot be recreated') + raise TypeError("Root resource already exists and cannot be recreated") location = self.location name = file_name(location) @@ -597,7 +599,7 @@ def _create_root_resource(self): path = Resource.build_path(root_location=location, location=location) if TRACE: - logger_debug(f' Codebase._create_root_resource: {path} is_file: {self.is_file}') + logger_debug(f" Codebase._create_root_resource: {path} is_file: {self.is_file}") logger_debug() root = self.resource_class( @@ -627,7 +629,7 @@ def _get_or_create_resource( """ if not parent: raise TypeError( - f'Cannot create resource without parent: name: {name!r}, path: {path!r}' + f"Cannot create resource without parent: name: {name!r}, path: {path!r}" ) # If the codebase is virtual, we provide the path @@ -638,7 +640,7 @@ def _get_or_create_resource( existing = self.get_resource(path) if existing: if TRACE: - logger_debug(' Codebase._get_or_create_resource: path already exists:', path) + logger_debug(" Codebase._get_or_create_resource: path already exists:", path) return existing if self._use_disk_cache_for_resource(): @@ -655,8 +657,8 @@ def _get_or_create_resource( if TRACE: logger_debug( - f' Codebase._get_or_create_resource: with path: {path}\n' - f' name={name}, is_file={is_file}' + f" Codebase._get_or_create_resource: with path: {path}\n" + f" name={name}, is_file={is_file}" ) child = self.resource_class( @@ -687,7 +689,7 @@ def get_files_count(self): """ Return the final files counts for the codebase. """ - return self.counters.get('final:files_count', 0) + return self.counters.get("final:files_count", 0) def add_files_count_to_current_header(self): """ @@ -696,7 +698,7 @@ def add_files_count_to_current_header(self): """ files_count = self.get_files_count() current_header = self.get_or_create_current_header() - current_header.extra_data['files_count'] = files_count + current_header.extra_data["files_count"] = files_count return files_count def get_headers(self): @@ -732,10 +734,10 @@ def _use_disk_cache_for_resource(self): if TRACE: logger_debug( - f' Codebase._use_disk_cache_for_resource mode: {use_disk_cache} ' - f'on_disk: {self.all_on_disk} ' - f'in_mem: {self.all_in_memory} ' - f'max_in_mem: {self.max_in_memory}' + f" Codebase._use_disk_cache_for_resource mode: {use_disk_cache} " + f"on_disk: {self.all_on_disk} " + f"in_mem: {self.all_in_memory} " + f"max_in_mem: {self.max_in_memory}" ) return use_disk_cache @@ -756,22 +758,22 @@ def _exists_on_disk(self, path): if cache_location: return exists(cache_location) - ########### FIXME: the PATH SHOULD NOT INCLUDE THE ROOT NAME + # FIXME: the PATH SHOULD NOT INCLUDE THE ROOT NAME def get_resource(self, path): """ Return the Resource with `path` or None if it does not exists. The ``path`` must be relative to the root (and including the root name as its first segment). """ - assert isinstance(path, str), f'Invalid path: {path!r} is not a string.' + assert isinstance(path, str), f"Invalid path: {path!r} is not a string." path = clean_path(path) if TRACE: - msg = [' Codebase.get_resource:', 'path:', path] + msg = [" Codebase.get_resource:", "path:", path] if not path or path not in self.resources_by_path: - msg.append('not in resources!') + msg.append("not in resources!") else: - msg.extend(['exists on disk:', self._exists_on_disk(path)]) - msg.extend(['exists in memo:', self._exists_in_memory(path)]) + msg.extend(["exists on disk:", self._exists_on_disk(path)]) + msg.extend(["exists in memo:", self._exists_in_memory(path)]) logger_debug(*msg) # we use Codebase.CACHED_RESOURCE as a semaphore for existing but only @@ -788,10 +790,10 @@ def get_resource(self, path): pass else: # this should never happen - raise Exception(f'get_resource: Internal error when getting {path!r}') + raise Exception(f"get_resource: Internal error when getting {path!r}") if TRACE: - logger_debug(' Resource:', res) + logger_debug(" Resource:", res) return res def save_resource(self, resource): @@ -804,7 +806,7 @@ def save_resource(self, resource): path = clean_path(resource.path) if TRACE: - logger_debug(' Codebase.save_resource:', resource) + logger_debug(" Codebase.save_resource:", resource) if resource.is_root: self.root = resource @@ -825,11 +827,11 @@ def _dump_resource(self, resource): if not cache_location: raise TypeError( - 'Resource cannot be dumped to disk and is used only' f'in memory: {resource}' + f"Resource cannot be dumped to disk and is used onlyin memory: {resource}" ) # TODO: consider messagepack or protobuf for compact/faster processing? - with open(cache_location, 'w') as cached: + with open(cache_location, "w") as cached: cached.write(json.dumps(resource.serialize(), check_circular=False)) # TODO: consider adding a small LRU cache in front of this for perf? @@ -842,28 +844,28 @@ def _load_resource(self, path): if TRACE: logger_debug( - ' Codebase._load_resource: exists:', + " Codebase._load_resource: exists:", exists(cache_location), - 'cache_location:', + "cache_location:", cache_location, ) if not exists(cache_location): - raise ResourceNotInCache(f'Failed to load Resource: {path} from {cache_location!r}') + raise ResourceNotInCache(f"Failed to load Resource: {path} from {cache_location!r}") # TODO: consider messagepack or protobuf for compact/faster processing try: - with open(cache_location, 'rb') as cached: + with open(cache_location, "rb") as cached: # TODO: Use custom json encoder to encode JSON list as a tuple # TODO: Consider using simplejson data = json.load(cached) return self.resource_class(**data) except Exception as e: - with open(cache_location, 'rb') as cached: + with open(cache_location, "rb") as cached: cached_data = cached.read() msg = ( - f'ERROR: failed to load resource from cached location: {cache_location} ' - 'with content:\n\n' + repr(cached_data) + '\n\n' + traceback.format_exc() + f"ERROR: failed to load resource from cached location: {cache_location} " + "with content:\n\n" + repr(cached_data) + "\n\n" + traceback.format_exc() ) raise Exception(msg) from e @@ -873,12 +875,12 @@ def _remove_resource(self, resource): Does not remove children. """ if resource.is_root: - raise TypeError(f'Cannot remove the root resource from codebase: {resource!r}') + raise TypeError(f"Cannot remove the root resource from codebase: {resource!r}") # remove from in-memory cache. The disk cache is cleared on exit. self.resources_by_path.pop(resource.path, None) if TRACE: - logger_debug('Codebase._remove_resource:', resource) + logger_debug("Codebase._remove_resource:", resource) def remove_resource(self, resource): """ @@ -886,11 +888,11 @@ def remove_resource(self, resource): codebase. Return a set of removed Resource paths. """ if TRACE: - logger_debug('Codebase.remove_resource') - logger_debug(' resource', resource) + logger_debug("Codebase.remove_resource") + logger_debug(" resource", resource) if resource.is_root: - raise TypeError(f'Cannot remove the root resource from codebase: {resource!r}') + raise TypeError(f"Cannot remove the root resource from codebase: {resource!r}") removed_paths = set() @@ -903,7 +905,7 @@ def remove_resource(self, resource): # remove resource from parent parent = resource.parent(self) if TRACE: - logger_debug(' parent', parent) + logger_debug(" parent", parent) parent.children_names.remove(resource.name) parent.save(self) @@ -1006,7 +1008,7 @@ def update_counts(self, skip_filtered=False): try: resource._compute_children_counts(self, skip_filtered) except Exception as e: - msg = f'ERROR: cannot compute children counts for: {resource.path}' + msg = f"ERROR: cannot compute children counts for: {resource.path}" raise Exception(msg) from e def clear(self): @@ -1158,18 +1160,18 @@ class Resource(object): @property def type(self): - return 'file' if self.is_file else 'directory' + return "file" if self.is_file else "directory" @type.setter def type(self, value): - if value == 'file': + if value == "file": self.is_file = True else: self.is_file = False @classmethod def build_path(cls, root_location, location): - """ + r""" Return a POSIX path string (using "/" separators) of ``location`` relative to ``root_location`. Both locations are absolute native locations. The returned path has no leading and trailing slashes. The first segment @@ -1187,10 +1189,10 @@ def build_path(cls, root_location, location): assert loc.startswith(root_loc) # keep the root directory name by default - root_loc = posixpath_parent(root_loc).strip('/') - path = loc.replace(root_loc, '', 1).strip('/') + root_loc = posixpath_parent(root_loc).strip("/") + path = loc.replace(root_loc, "", 1).strip("/") if TRACE: - logger_debug('build_path:', root_loc, loc, path) + logger_debug("build_path:", root_loc, loc, path) return path def get_path(self, full_root=False, strip_root=False): @@ -1262,7 +1264,7 @@ def extracted_to(self, codebase): """ Return the path this Resource archive was extracted to or None. """ - extract_path = f'{self.path}-extract' + extract_path = f"{self.path}-extract" return codebase.get_resource(extract_path) def extracted_from(self, codebase): @@ -1270,8 +1272,8 @@ def extracted_from(self, codebase): Return the path to an archive this Resource was extracted from or None. """ path = self.path - if '-extract' in path: - archive_path, _, _ = self.path.rpartition('-extract') + if "-extract" in path: + archive_path, _, _ = self.path.rpartition("-extract") return codebase.get_resource(archive_path) @classmethod @@ -1389,7 +1391,9 @@ def children(self, codebase, names=()): get_child = codebase.get_resource children = [get_child(path=child_path(name)) for name in children_names] - _sorter = lambda r: (r.has_children(), r.name.lower(), r.name) + def _sorter(r): + return (r.has_children(), r.name.lower(), r.name) + return sorted((c for c in children if c), key=_sorter) def has_parent(self): @@ -1494,10 +1498,10 @@ def to_dict( return res if with_info: - res['name'] = self.name - res['base_name'] = self.base_name - res['extension'] = self.extension - res['size'] = self.size + res["name"] = self.name + res["base_name"] = self.base_name + res["extension"] = self.extension + res["size"] = self.size # exclude by default all of the "standard", default Resource fields self_fields_filter = attr.filters.exclude(*attr.fields(Resource)) @@ -1507,21 +1511,21 @@ def to_dict( other_data = attr.asdict(self, filter=self_fields_filter, dict_factory=dict) # FIXME: make a deep copy of the data first!!!! - # see https://github.com/nexB/scancode-toolkit/issues/1199 + # see https://github.com/aboutcode-org/scancode-toolkit/issues/1199 res.update(other_data) if with_timing: - res['scan_time'] = self.scan_time or 0 - res['scan_timings'] = self.scan_timings or dict() + res["scan_time"] = self.scan_time or 0 + res["scan_timings"] = self.scan_timings or dict() if with_info: - res['files_count'] = self.files_count - res['dirs_count'] = self.dirs_count - res['size_count'] = self.size_count + res["files_count"] = self.files_count + res["dirs_count"] = self.dirs_count + res["size_count"] = self.size_count - res['scan_errors'] = self.scan_errors + res["scan_errors"] = self.scan_errors if TRACE: - logger_debug('Resource.to_dict:', res) + logger_debug("Resource.to_dict:", res) return res def serialize(self): @@ -1532,11 +1536,11 @@ def serialize(self): """ # we save all fields, not just the one in .to_dict() serializable = attr.asdict(self) - serializable['name'] = self.name + serializable["name"] = self.name if self.location: - serializable['location'] = self.location + serializable["location"] = self.location if self.cache_location: - serializable['cache_location'] = self.cache_location + serializable["cache_location"] = self.cache_location return serializable @@ -1544,11 +1548,11 @@ def clean_path(path): """ Return a cleaned and normalized POSIX ``path``. """ - path = path or '' + path = path or "" # convert to posix and ensure we have no slash at both ends - path = posixpath_normpath(path.replace('\\', '/').strip('/')) - if path == '.': - path = '' + path = posixpath_normpath(path.replace("\\", "/").strip("/")) + if path == ".": + path = "" return path @@ -1571,11 +1575,11 @@ def strip_first_path_segment(path): '' """ path = clean_path(path) - if '/' in path: - _root, _, path = path.partition('/') + if "/" in path: + _root, _, path = path.partition("/") return path else: - return '' + return "" def get_codebase_cache_dir(temp_dir): @@ -1587,13 +1591,12 @@ def get_codebase_cache_dir(temp_dir): from commoncode.fileutils import get_temp_dir from commoncode.timeutils import time2tstamp - prefix = 'scancode-codebase-' + time2tstamp() + '-' + prefix = "scancode-codebase-" + time2tstamp() + "-" return get_temp_dir(base_dir=temp_dir, prefix=prefix) @attr.s(slots=True) class _CodebaseAttributes(object): - def to_dict(self): return attr.asdict(self, dict_factory=dict) @@ -1604,7 +1607,7 @@ def from_attributes(cls, attributes): ``attributes`` mapping of "attr" attributes. """ return attr.make_class( - name='CodebaseAttributes', + name="CodebaseAttributes", attrs=attributes or {}, slots=True, bases=(_CodebaseAttributes,), @@ -1638,11 +1641,10 @@ def build_attributes_defs(mapping, ignored_keys=()): class VirtualCodebase(Codebase): - __slots__ = ( # TRUE iff the loaded virtual codebase has file information - 'with_info', - 'has_single_resource', + "with_info", + "has_single_resource", ) def __init__( @@ -1663,7 +1665,7 @@ def __init__( `max_depth`, if passed, will be ignored as VirtualCodebase will be using the depth of the original scan. """ - logger_debug(f'VirtualCodebase: new from: {location!r}') + logger_debug(f"VirtualCodebase: new from: {location!r}") self._setup_essentials(temp_dir, max_in_memory) @@ -1683,8 +1685,7 @@ def _get_scan_data_helper(self, location): """ try: return json.loads(location) - except: - + except Exception: location = abspath(normpath(expanduser(location))) with open(location) as f: scan_data = json.load(f) @@ -1714,22 +1715,22 @@ def _get_scan_data(self, location): combined_scan_data = dict(headers=[], files=[]) for idx, loc in enumerate(location, 1): scan_data = self._get_scan_data_helper(loc) - headers = scan_data.get('headers') + headers = scan_data.get("headers") if headers: - combined_scan_data['headers'].extend(headers) - files = scan_data.get('files') + combined_scan_data["headers"].extend(headers) + files = scan_data.get("files") if files: for f in files: - f['path'] = posixpath_join(f'codebase-{idx}', clean_path(f['path'])) - combined_scan_data['files'].extend(files) + f["path"] = posixpath_join(f"codebase-{idx}", clean_path(f["path"])) + combined_scan_data["files"].extend(files) else: raise Exception( f'Input file is missing a "files" (aka. resources) section to load: {loc}' ) - combined_scan_data['headers'] = sorted( - combined_scan_data['headers'], - key=lambda x: x['start_timestamp'], + combined_scan_data["headers"] = sorted( + combined_scan_data["headers"], + key=lambda x: x["start_timestamp"], ) return combined_scan_data @@ -1767,7 +1768,7 @@ def _collect_codebase_attributes(self, scan_data, *args, **kwargs): all_attributes = ( build_attributes_defs( mapping=scan_data, - ignored_keys=('headers', 'files'), + ignored_keys=("headers", "files"), ) or {} ) @@ -1788,7 +1789,7 @@ def _build_resource_class(self, sample_resource_data, *args, **kwargs): standard_res_attributes = set(f.name for f in attr.fields(Resource)) # add these properties since they are fields but are serialized - properties = set(['type', 'base_name', 'extension']) + properties = set(["type", "base_name", "extension"]) standard_res_attributes.update(properties) # We collect attributes that are not in standard_res_attributes already @@ -1806,7 +1807,7 @@ def _build_resource_class(self, sample_resource_data, *args, **kwargs): # Create the Resource class with the desired attributes return attr.make_class( - name='ScannedResource', + name="ScannedResource", attrs=all_res_attributes or dict(), slots=True, bases=(Resource,), @@ -1824,7 +1825,7 @@ def _populate(self, scan_data): """ # Collect headers ########################################################## - headers = scan_data.get('headers') or [] + headers = scan_data.get("headers") or [] headers = [Header.from_dict(**hle) for hle in headers] self.headers = headers @@ -1840,19 +1841,19 @@ def _populate(self, scan_data): ########################################################## for attr_name in self.codebase_attributes: value = scan_data.get(attr_name) - if value == None: + if not value: continue setattr(self.attributes, attr_name, value) ########################################################## - files_data = scan_data.get('files') + files_data = scan_data.get("files") if not files_data: raise Exception('Input has no "files" top-level scan results.') if len(files_data) == 1: # we will shortcut to populate the codebase with a single root resource self.has_single_resource = True - root_is_file = files_data[0].get('type') == 'file' + root_is_file = files_data[0].get("type") == "file" else: root_is_file = False @@ -1884,9 +1885,9 @@ def _populate(self, scan_data): for fdata in files_data: sample_resource_data_update(fdata) - segments = fdata['path'].split('/') + segments = fdata["path"].split("/") root_names_add(segments[0]) - fdata['path_segments'] = segments + fdata["path_segments"] = segments # Resource sub-class to use. Configured with all known scanned file # attributes and plugin attributes if present @@ -1897,13 +1898,13 @@ def _populate(self, scan_data): self.with_info = any( a in sample_resource_data for a in ( - 'name', - 'base_name', - 'extension', - 'size', - 'files_count', - 'dirs_count', - 'size_count', + "name", + "base_name", + "extension", + "size", + "files_count", + "dirs_count", + "size_count", ) ) @@ -1911,28 +1912,28 @@ def _populate(self, scan_data): # Create root resource first ########################################################## if not root_names: - raise Exception('Unable to find root for codebase.') + raise Exception("Unable to find root for codebase.") len_root_names = len(root_names) if len_root_names == 1: root_path = root_names.pop() needs_new_virtual_root = False elif len_root_names > 1 or multiple_inputs: - root_path = 'virtual_root' + root_path = "virtual_root" needs_new_virtual_root = True if needs_new_virtual_root: for fdata in files_data: - rpath = fdata['path'] - fdata['path'] = posixpath_join(root_path, rpath) - fdata['path_segments'].insert(0, root_path) + rpath = fdata["path"] + fdata["path"] = posixpath_join(root_path, rpath) + fdata["path_segments"].insert(0, root_path) root_data = self._create_empty_resource_data() if self.has_single_resource: # single resource with one or more segments rdata = files_data[0] - root_path = rdata['path'] + root_path = rdata["path"] rdata = remove_properties_and_basics(rdata) root_data.update(rdata) @@ -1949,14 +1950,14 @@ def _populate(self, scan_data): setattr(root, name, value) if TRACE: - logger_debug('VirtualCodebase.populate: root:', root) + logger_debug("VirtualCodebase.populate: root:", root) # TODO: report error if filtering the root with a paths? self.save_resource(root) if self.has_single_resource: if TRACE: - logger_debug('VirtualCodebase.populate: with single resource.') + logger_debug("VirtualCodebase.populate: with single resource.") return all_paths = None @@ -1970,21 +1971,21 @@ def _populate(self, scan_data): # Note that we do not know the ordering there. # Therefore we sort in place by path segments - files_data.sort(key=itemgetter('path_segments')) + files_data.sort(key=itemgetter("path_segments")) # We create directories that exist in the scan or create these that # exist only in paths duplicated_paths = set() last_path = None for fdata in files_data: - path = fdata.get('path') + path = fdata.get("path") # skip the ones we did not request if all_paths and path not in all_paths: continue # these are no longer needed - path_segments = fdata.pop('path_segments') + path_segments = fdata.pop("path_segments") if not last_path: last_path = path @@ -1993,11 +1994,11 @@ def _populate(self, scan_data): else: last_path = path - name = fdata.get('name', None) or None + name = fdata.get("name", None) or None if not name: name = file_name(path) - is_file = fdata.get('type', 'file') == 'file' + is_file = fdata.get("type", "file") == "file" parent = self._get_parent_directory(path_segments=path_segments) resource = self._get_or_create_resource( @@ -2016,8 +2017,8 @@ def _populate(self, scan_data): if duplicated_paths: raise Exception( - 'Illegal combination of VirtualCode multiple inputs: ' - f'duplicated paths: {list(duplicated_paths)}', + "Illegal combination of VirtualCode multiple inputs: " + f"duplicated paths: {list(duplicated_paths)}", ) def _get_parent_directory(self, path_segments): @@ -2053,12 +2054,12 @@ def _create_root_resource(self, name, path, is_file): """ # we cannot recreate a root if it exists!! if self.root: - raise TypeError('Root resource already exists and cannot be recreated') + raise TypeError("Root resource already exists and cannot be recreated") path = clean_path(path) if TRACE: - logger_debug(f' VirtualCodebase._create_root_resource: {path!r} is_file: {is_file}') + logger_debug(f" VirtualCodebase._create_root_resource: {path!r} is_file: {is_file}") root = self.resource_class( name=name, @@ -2076,7 +2077,7 @@ def _create_root_resource(self, name, path, is_file): return root -KNOW_PROPS = set(['type', 'base_name', 'extension', 'path', 'name', 'path_segments']) +KNOW_PROPS = set(["type", "base_name", "extension", "path", "name", "path_segments"]) def remove_properties_and_basics(resource_data): @@ -2101,10 +2102,10 @@ def get_ancestor_paths(path, include_self=False): >>> assert results == [], results """ assert path - segments = path.split('/') + segments = path.split("/") if not include_self: segments = segments[:-1] subpath = [] for segment in segments: subpath.append(segment) - yield '/'.join(subpath) + yield "/".join(subpath) diff --git a/src/commoncode/system.py b/src/commoncode/system.py index 07938f9b..3c5a6744 100644 --- a/src/commoncode/system.py +++ b/src/commoncode/system.py @@ -2,7 +2,7 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # @@ -16,22 +16,22 @@ def os_arch(): """ Return a tuple for the current the OS and architecture. """ - if sys.maxsize > 2 ** 32: - arch = '64' + if sys.maxsize > 2**32: + arch = "64" else: - arch = '32' + arch = "32" sys_platform = str(sys.platform).lower() - if sys_platform.startswith('linux'): - os = 'linux' - elif 'win32' in sys_platform: - os = 'win' - elif 'darwin' in sys_platform: - os = 'mac' - elif 'freebsd' in sys_platform: - os = 'freebsd' + if sys_platform.startswith("linux"): + os = "linux" + elif "win32" in sys_platform: + os = "win" + elif "darwin" in sys_platform: + os = "mac" + elif "freebsd" in sys_platform: + os = "freebsd" else: - raise Exception('Unsupported OS/platform %r' % sys_platform) + raise Exception("Unsupported OS/platform %r" % sys_platform) return os, arch @@ -39,17 +39,17 @@ def os_arch(): # OS/Arch # current_os, current_arch = os_arch() -on_windows = current_os == 'win' -on_windows_32 = on_windows and current_arch == '32' -on_windows_64 = on_windows and current_arch == '64' -on_mac = current_os == 'mac' -on_linux = current_os == 'linux' -on_freebsd = current_os == 'freebsd' +on_windows = current_os == "win" +on_windows_32 = on_windows and current_arch == "32" +on_windows_64 = on_windows and current_arch == "64" +on_mac = current_os == "mac" +on_linux = current_os == "linux" +on_freebsd = current_os == "freebsd" on_posix = not on_windows and (on_mac or on_linux or on_freebsd) -current_os_arch = '%(current_os)s-%(current_arch)s' % locals() -noarch = 'noarch' -current_os_noarch = '%(current_os)s-%(noarch)s' % locals() +current_os_arch = "%(current_os)s-%(current_arch)s" % locals() +noarch = "noarch" +current_os_noarch = "%(current_os)s-%(noarch)s" % locals() del os_arch @@ -61,10 +61,11 @@ def is_on_macos_14_or_higher(): filesystem encodings. """ import platform + macos_ver = platform.mac_ver() macos_ver = macos_ver[0] - macos_ver = macos_ver.split('.') - return macos_ver > ['10', '14'] + macos_ver = macos_ver.split(".") + return macos_ver > ["10", "14"] on_macos_14_or_higher = is_on_macos_14_or_higher() @@ -77,7 +78,8 @@ def is_on_macos_arm64(): Return True if the current OS is macOS running on Apple Silicon. """ import platform - return on_mac and platform.machine() == 'arm64' + + return on_mac and platform.machine() == "arm64" on_macos_arm64 = is_on_macos_arm64() @@ -85,16 +87,20 @@ def is_on_macos_arm64(): del is_on_macos_arm64 -def is_on_ubuntu_22(os_release_location='/etc/os-release'): +def is_on_ubuntu_22(os_release_location="/etc/os-release"): """ Return True if the current OS is Ubuntu 22.XX. """ if not on_linux: return False os_release_info = parse_os_release(os_release_location) - return os_release_info['ID'] == 'ubuntu' and '22' in os_release_info['VERSION_ID'] + return os_release_info["ID"] == "ubuntu" and "22" in os_release_info["VERSION_ID"] -on_ubuntu_22 = is_on_ubuntu_22() + +try: + on_ubuntu_22 = is_on_ubuntu_22() +except FileNotFoundError: + on_ubuntu_22 = False del is_on_ubuntu_22 @@ -107,12 +113,13 @@ def has_case_sensitive_fs(): case sensitive by default, newer macOS use APFS which is no longer case sensitive by default. - From https://developer.apple.com/library/archive/documentation/FileManagement/Conceptual/APFS_Guide/FAQ/FAQ.html - How does Apple File System handle filenames? - APFS accepts only valid UTF-8 encoded filenames for creation, and preserves - both case and normalization of the filename on disk in all variants. APFS, - like HFS+, is case-sensitive on iOS and is available in case-sensitive and - case-insensitive variants on macOS, with case-insensitive being the default. + From + https://developer.apple.com/library/archive/documentation/FileManagement/Conceptual/APFS_Guide/FAQ/FAQ.html + How does Apple File System handle filenames? + APFS accepts only valid UTF-8 encoded filenames for creation, and preserves + both case and normalization of the filename on disk in all variants. APFS, + like HFS+, is case-sensitive on iOS and is available in case-sensitive and + case-insensitive variants on macOS, with case-insensitive being the default. """ return not os.path.exists(__file__.upper()) @@ -123,11 +130,11 @@ def has_case_sensitive_fs(): # Shared library file extensions # if on_windows: - lib_ext = '.dll' + lib_ext = ".dll" if on_mac: - lib_ext = '.dylib' + lib_ext = ".dylib" if on_linux or on_freebsd: - lib_ext = '.so' + lib_ext = ".so" # # Python versions @@ -137,11 +144,11 @@ def has_case_sensitive_fs(): py3 = _sys_v0 == 3 _sys_v1 = sys.version_info[1] -py36 = py3 and _sys_v1 == 6 -py37 = py3 and _sys_v1 == 7 -py38 = py3 and _sys_v1 == 8 -py39 = py3 and _sys_v1 == 9 py310 = py3 and _sys_v1 == 10 +py311 = py3 and _sys_v1 == 11 +py312 = py3 and _sys_v1 == 12 +py313 = py3 and _sys_v1 == 13 +py314 = py3 and _sys_v1 == 14 # Do not let Windows error pop up messages with default SetErrorMode # See http://msdn.microsoft.com/en-us/library/ms680621(VS100).aspx @@ -154,5 +161,6 @@ def has_case_sensitive_fs(): # The system does not display the Windows Error Reporting dialog. if on_windows: import ctypes + # 3 is SEM_FAILCRITICALERRORS | SEM_NOGPFAULTERRORBOX ctypes.windll.kernel32.SetErrorMode(3) # @UndefinedVariable diff --git a/src/commoncode/testcase.py b/src/commoncode/testcase.py index 2fd9fd31..2a7b37a9 100644 --- a/src/commoncode/testcase.py +++ b/src/commoncode/testcase.py @@ -2,7 +2,7 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # @@ -12,15 +12,15 @@ import shutil import stat import sys -from os import path from collections import defaultdict from itertools import chain +from os import path from unittest import TestCase as TestCaseClass import saneyaml -from commoncode import fileutils from commoncode import filetype +from commoncode import fileutils from commoncode.archive import extract_tar from commoncode.archive import extract_tar_raw from commoncode.archive import extract_tar_uni @@ -42,13 +42,9 @@ def to_os_native_path(path): """ Normalize a path to use the native OS path separator. """ - OS_PATH_SEP = '\\' if on_windows else '/' + OS_PATH_SEP = "\\" if on_windows else "/" - return ( - path.replace('/', OS_PATH_SEP) - .replace(u'\\', OS_PATH_SEP) - .rstrip(OS_PATH_SEP) - ) + return path.replace("/", OS_PATH_SEP).replace("\\", OS_PATH_SEP).rstrip(OS_PATH_SEP) def get_test_loc( @@ -64,6 +60,7 @@ def get_test_loc( """ if debug: import inspect + caller = inspect.stack()[1][3] print('\nget_test_loc,%(caller)s,"%(test_path)s","%(test_data_dir)s"' % locals()) @@ -71,15 +68,17 @@ def get_test_loc( assert test_data_dir if not path.exists(test_data_dir): - raise IOError("[Errno 2] No such directory: test_data_dir not found:" - " '%(test_data_dir)s'" % locals()) + raise IOError( + "[Errno 2] No such directory: test_data_dir not found: '%(test_data_dir)s'" % locals() + ) tpath = to_os_native_path(test_path) test_loc = path.abspath(path.join(test_data_dir, tpath)) if must_exist and not path.exists(test_loc): - raise IOError("[Errno 2] No such file or directory: " - "test_path not found: '%(test_loc)s'" % locals()) + raise IOError( + "[Errno 2] No such file or directory: test_path not found: '%(test_loc)s'" % locals() + ) return test_loc @@ -90,6 +89,7 @@ class FileDrivenTesting(object): temporary test resources and doing file-based assertions. This can be used as a standalone object if needed. """ + test_data_dir = None def get_test_loc(self, test_path, copy=False, debug=False, must_exist=True): @@ -104,6 +104,7 @@ def get_test_loc(self, test_path, copy=False, debug=False, must_exist=True): test_data_dir = self.test_data_dir if debug: import inspect + caller = inspect.stack()[1][3] print('\nself.get_test_loc,%(caller)s,"%(test_path)s"' % locals()) @@ -129,16 +130,16 @@ def get_test_loc(self, test_path, copy=False, debug=False, must_exist=True): test_loc = target_dir return test_loc - def get_temp_file(self, extension=None, dir_name='td', file_name='tf'): + def get_temp_file(self, extension=None, dir_name="td", file_name="tf"): """ Return a unique new temporary file location to a non-existing temporary file that can safely be created without a risk of name collision. """ if extension is None: - extension = '.txt' + extension = ".txt" - if extension and not extension.startswith('.'): - extension = '.' + extension + if extension and not extension.startswith("."): + extension = "." + extension file_name = file_name + extension temp_dir = self.get_temp_dir(dir_name) @@ -156,13 +157,14 @@ def get_temp_dir(self, sub_dir_path=None): global test_run_temp_dir if not test_run_temp_dir: import tempfile + test_tmp_root_dir = tempfile.gettempdir() # now we add a space in the path for testing path with spaces test_run_temp_dir = fileutils.get_temp_dir( - base_dir=test_tmp_root_dir, prefix='scancode-tk-tests -') + base_dir=test_tmp_root_dir, prefix="scancode-tk-tests -" + ) - test_run_temp_subdir = fileutils.get_temp_dir( - base_dir=test_run_temp_dir, prefix='') + test_run_temp_subdir = fileutils.get_temp_dir(base_dir=test_run_temp_dir, prefix="") if sub_dir_path: # create a sub directory hierarchy if requested @@ -175,7 +177,7 @@ def remove_vcs(self, test_dir): """ Remove some version control directories and some temp editor files. """ - vcses = ('CVS', '.svn', '.git', '.hg') + vcses = ("CVS", ".svn", ".git", ".hg") for root, dirs, files in os.walk(test_dir): for vcs_dir in vcses: if vcs_dir in dirs: @@ -186,24 +188,25 @@ def remove_vcs(self, test_dir): shutil.rmtree(path.join(root, vcs_dir), False) # editors temp file leftovers - tilde_files = [path.join(root, file_loc) - for file_loc in files if file_loc.endswith('~')] + tilde_files = [ + path.join(root, file_loc) for file_loc in files if file_loc.endswith("~") + ] for tf in tilde_files: os.remove(tf) - def __extract(self, test_path, extract_func=None, verbatim=False): + def __extract(self, test_path, extract_func=None, verbatim=False, filter=None): """ Given an archive file identified by test_path relative to a test files directory, return a new temp directory where the archive file has been extracted using extract_func. If `verbatim` is True preserve the permissions. """ - assert test_path and test_path != '' + assert test_path and test_path != "" test_path = to_os_native_path(test_path) target_path = path.basename(test_path) target_dir = self.get_temp_dir(target_path) original_archive = self.get_test_loc(test_path) - extract_func(original_archive, target_dir, verbatim=verbatim) + extract_func(original_archive, target_dir, verbatim=verbatim, filter=filter) return target_dir def extract_test_zip(self, test_path, *args, **kwargs): @@ -212,8 +215,8 @@ def extract_test_zip(self, test_path, *args, **kwargs): def extract_test_zip_raw(self, test_path, *args, **kwargs): return self.__extract(test_path, extract_zip_raw) - def extract_test_tar(self, test_path, verbatim=False): - return self.__extract(test_path, extract_tar, verbatim) + def extract_test_tar(self, test_path, verbatim=False, filter=None): + return self.__extract(test_path, extract_tar, verbatim, filter) def extract_test_tar_raw(self, test_path, *args, **kwargs): return self.__extract(test_path, extract_tar_raw) @@ -247,13 +250,11 @@ def is_same(dir1, dir2): Return False if they differ, True is they are the same. """ compared = dircmp(dir1, dir2) - if (compared.left_only or compared.right_only or compared.diff_files - or compared.funny_files): + if compared.left_only or compared.right_only or compared.diff_files or compared.funny_files: return False for subdir in compared.common_dirs: - if not is_same(path.join(dir1, subdir), - path.join(dir2, subdir)): + if not is_same(path.join(dir1, subdir), path.join(dir2, subdir)): return False return True @@ -263,14 +264,14 @@ def file_cmp(file1, file2, ignore_line_endings=False): Compare two files content. Return False if they differ, True is they are the same. """ - with open(file1, 'rb') as f1: + with open(file1, "rb") as f1: f1c = f1.read() if ignore_line_endings: - f1c = b'\n'.join(f1c.splitlines(False)) - with open(file2, 'rb') as f2: + f1c = b"\n".join(f1c.splitlines(False)) + with open(file2, "rb") as f2: f2c = f2.read() if ignore_line_endings: - f2c = b'\n'.join(f2c.splitlines(False)) + f2c = b"\n".join(f2c.splitlines(False)) assert f2c == f1c @@ -305,7 +306,7 @@ def make_non_executable(location): os.chmod(location, current_stat & ~stat.S_IEXEC) -def get_test_file_pairs(test_dir): +def get_test_file_pairs(test_dir, template_to_generate_missing_yaml=None): """ Yield tuples of (data_file, test_file) from a test data `test_dir` directory. Raise exception for orphaned/dangling files. @@ -323,16 +324,16 @@ def get_test_file_pairs(test_dir): for top, _, files in os.walk(test_dir): for tfile in files: - if tfile.endswith('~'): + if tfile.endswith("~"): continue file_path = path.abspath(path.join(top, tfile)) - if tfile.endswith('.yml'): + if tfile.endswith(".yml"): data_file_path = file_path - test_file_path = file_path.replace('.yml', '') + test_file_path = file_path.replace(".yml", "") else: test_file_path = file_path - data_file_path = test_file_path + '.yml' + data_file_path = test_file_path + ".yml" if not path.exists(test_file_path): dangling_test_files.add(test_file_path) @@ -347,9 +348,13 @@ def get_test_file_pairs(test_dir): # ensure that we haev no dangling files if dangling_test_files or dangling_data_files: - msg = ['Dangling missing test files without a YAML data file:'] + sorted(dangling_test_files) - msg += ['Dangling missing YAML data files without a test file'] + sorted(dangling_data_files) - msg = '\n'.join(msg) + msg = ["Dangling missing test files without a YAML data file:"] + sorted( + dangling_test_files + ) + msg += ["Dangling missing YAML data files without a test file"] + sorted( + dangling_data_files + ) + msg = "\n".join(msg) print(msg) raise Exception(msg) @@ -357,27 +362,29 @@ def get_test_file_pairs(test_dir): diff = set(data_files.keys()).symmetric_difference(set(test_files.keys())) if diff: msg = [ - 'Orphaned copyright test file(s) found: ' - 'test file without its YAML test data file ' - 'or YAML test data file without its test file.'] + sorted(diff) - msg = '\n'.join(msg) + "Orphaned copyright test file(s) found: " + "test file without its YAML test data file " + "or YAML test data file without its test file." + ] + sorted(diff) + msg = "\n".join(msg) print(msg) raise Exception(msg) # ensure that test file paths are unique when you ignore case # we use the file names as test method names (and we have Windows that's # case insensitive - dupes = list(chain.from_iterable( - paths for paths in paths_ignoring_case.values() if len(paths) != 1)) + dupes = list( + chain.from_iterable(paths for paths in paths_ignoring_case.values() if len(paths) != 1) + ) if dupes: - msg = ['Non unique test/data file(s) found when ignoring case!'] + sorted(dupes) + msg = ["Non unique test/data file(s) found when ignoring case!"] + sorted(dupes) - msg = '\n'.join(msg) + msg = "\n".join(msg) print(msg) raise Exception(msg) for test_file in test_files: - yield test_file + '.yml', test_file + yield test_file + ".yml", test_file def check_against_expected_json_file(results, expected_file, regen=False): @@ -389,8 +396,8 @@ def check_against_expected_json_file(results, expected_file, regen=False): This is convenient for updating tests expectations. But use with caution. """ if regen: - with open(expected_file, 'w') as reg: - json.dump(results, reg, indent=2, separators=(',', ': ')) + with open(expected_file, "w") as reg: + json.dump(results, reg, indent=2, separators=(",", ": ")) expected = results else: with open(expected_file) as exp: diff --git a/src/commoncode/text.py b/src/commoncode/text.py index df2f3463..2c39c6b7 100644 --- a/src/commoncode/text.py +++ b/src/commoncode/text.py @@ -3,7 +3,7 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # @@ -11,8 +11,8 @@ import re import unicodedata -from text_unidecode import unidecode from bs4.dammit import UnicodeDammit +from text_unidecode import unidecode """ A text processing module providing functions to process and prepare text @@ -27,7 +27,7 @@ def lines(s): - """ + r""" Split a string in lines using the following conventions: - a line ending \r\n or \n is a separator and yields a new list element - empty lines or lines with only white spaces are not returned. @@ -38,7 +38,7 @@ def lines(s): splitlines. """ # FIXME: leverage new Pythin 3.8 scopeing rules - return [l.strip() for l in s.splitlines() if l.strip()] + return [line.strip() for line in s.splitlines() if line.strip()] def foldcase(text): @@ -49,24 +49,24 @@ def foldcase(text): def nopunc(): - return re.compile(r'[\W_]', re.MULTILINE | re.UNICODE) + return re.compile(r"[\W_]", re.MULTILINE | re.UNICODE) def nopunctuation(text): - u""" - Replaces any non alphanum symbol (i.e. punctuation) in text with space. + """ + Replace any non alphanum symbol (i.e. punctuation) in text with space. Preserve the characters offsets by replacing punctuation with spaces. Warning: this also drops line endings. """ if not isinstance(text, str): text = as_unicode(text) - return re.sub(nopunc(), ' ', text) + return re.sub(nopunc(), " ", text) -CR = '\r' -LF = '\n' +CR = "\r" +LF = "\n" CRLF = CR + LF -CRLF_NO_CR = ' ' + LF +CRLF_NO_CR = " " + LF def unixlinesep(text, preserve=False): @@ -81,11 +81,11 @@ def unixlinesep(text, preserve=False): def nolinesep(text): """ - Removes line separators, replacing them with spaces. + Remove line separators, replacing them with spaces. """ if not isinstance(text, str): text = as_unicode(text) - return text.replace(CR, ' ').replace(LF, ' ') + return text.replace(CR, " ").replace(LF, " ") def toascii(s, translit=False): @@ -110,11 +110,11 @@ def toascii(s, translit=False): if translit: converted = unidecode(s) else: - converted = unicodedata.normalize('NFKD', s) + converted = unicodedata.normalize("NFKD", s) - converted = converted.replace('[?]', '_') - converted = converted.encode('ascii', 'ignore') - return converted.decode('ascii') + converted = converted.replace("[?]", "_") + converted = converted.encode("ascii", "ignore") + return converted.decode("ascii") def python_safe_name(s): @@ -126,9 +126,9 @@ def python_safe_name(s): s = toascii(s) s = foldcase(s) s = nopunctuation(s) - s = s.replace(' ', '_') - s = '_'.join(s.split()) - s = s.strip('_') + s = s.replace(" ", "_") + s = "_".join(s.split()) + s = s.strip("_") return s @@ -138,9 +138,9 @@ def as_unicode(s): """ if isinstance(s, str): return s - if s == b'': - return u'' + if s == b"": + return "" if not s: return s - assert isinstance(s, bytes), 's must be bytes but is: {}'.format(s) - return UnicodeDammit(s).markup + assert isinstance(s, bytes), "s must be bytes but is: {}".format(s) + return UnicodeDammit(s).unicode_markup diff --git a/src/commoncode/timeutils.py b/src/commoncode/timeutils.py index 44b68543..45caf595 100644 --- a/src/commoncode/timeutils.py +++ b/src/commoncode/timeutils.py @@ -2,7 +2,7 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # @@ -25,7 +25,7 @@ def utcoffset(self, dt): # NOQA return None def tzname(self, dt): # NOQA - return 'UTC' + return "UTC" def dst(self, dt): # NOQA return None @@ -61,7 +61,7 @@ def time2tstamp(dt=None, path_safe=True): datim = dt or datetime.utcnow() iso = datim.isoformat() if path_safe: - iso = iso.replace(':', '').replace('/', '_') + iso = iso.replace(":", "").replace("/", "_") return iso @@ -70,11 +70,11 @@ def tstamp2time(stamp): Convert a UTC timestamp to a datetime object. """ # handle both basic and extended formats - tformat = '%Y-%m-%dT%H%M%S' if stamp[4] == '-' else '%Y%m%dT%H%M%S' + tformat = "%Y-%m-%dT%H%M%S" if stamp[4] == "-" else "%Y%m%dT%H%M%S" # normalize - dt_ms = stamp.strip().replace('Z', '').replace(':', '') + dt_ms = stamp.strip().replace("Z", "").replace(":", "") - dt_ms = dt_ms.split('.') + dt_ms = dt_ms.split(".") isodatim = dt_ms[0] datim = datetime.strptime(isodatim, tformat) # all stamps must be UTC @@ -83,7 +83,7 @@ def tstamp2time(stamp): # deal with optional microsec try: microsec = dt_ms[1] - except: + except Exception: microsec = None if microsec: microsec = int(microsec) diff --git a/src/commoncode/urn.py b/src/commoncode/urn.py index fcda69a6..04922024 100644 --- a/src/commoncode/urn.py +++ b/src/commoncode/urn.py @@ -2,7 +2,7 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # @@ -63,6 +63,7 @@ urn:dje:component:dropbear: The product object type syntax is the same as the component syntax. + """ from urllib.parse import quote_plus @@ -75,21 +76,21 @@ class URNValidationError(Exception): # Describes the URN schema for each object type URN_SCHEMAS = { - 'license': { - 'object': 'license', - 'fields': ['key'], + "license": { + "object": "license", + "fields": ["key"], }, - 'owner': { - 'object': 'owner', - 'fields': ['name'], + "owner": { + "object": "owner", + "fields": ["name"], }, - 'component': { - 'object': 'component', - 'fields': ['name', 'version'], + "component": { + "object": "component", + "fields": ["name", "version"], }, - 'product': { - 'object': 'product', - 'fields': ['name', 'version'], + "product": { + "object": "product", + "fields": ["name", "version"], }, } @@ -107,13 +108,13 @@ def encode(object_type, **fields): # case is not significant for the object type object_type = object_type.strip().lower() - urn_prefix = 'urn:dje:{0}:'.format(quote_plus(object_type)) + urn_prefix = "urn:dje:{0}:".format(quote_plus(object_type)) - object_fields = URN_SCHEMAS[object_type]['fields'] + object_fields = URN_SCHEMAS[object_type]["fields"] # leading and trailing white spaces are not significant # each URN part is encoded individually BEFORE assembling the URN encoded_fields = [quote_plus(fields[f].strip()) for f in object_fields] - encoded_fields = ':'.join(encoded_fields) + encoded_fields = ":".join(encoded_fields) return urn_prefix + encoded_fields @@ -122,23 +123,23 @@ def decode(urn): Decode a URN and return the object_type and a mapping of field/values. Raise URNValidationError on errors. """ - segments = [unquote_plus(p) for p in urn.split(':')] + segments = [unquote_plus(p) for p in urn.split(":")] - if not segments[0] == ('urn'): + if not segments[0] == ("urn"): raise URNValidationError("Invalid URN prefix. Expected 'urn'.") - if not segments[1] == ('dje'): + if not segments[1] == ("dje"): raise URNValidationError("Invalid URN namespace. Expected 'dje'.") # object type is always lowercase object_type = segments[2].lower() if object_type not in URN_SCHEMAS: - raise URNValidationError('Unsupported URN object type.') + raise URNValidationError("Unsupported URN object type.") fields = segments[3:] - schema_fields = URN_SCHEMAS[object_type]['fields'] + schema_fields = URN_SCHEMAS[object_type]["fields"] if len(schema_fields) != len(fields): - raise URNValidationError('Invalid number of fields in URN.') + raise URNValidationError("Invalid number of fields in URN.") decoded_fields = dict(zip(schema_fields, fields)) return object_type, decoded_fields diff --git a/src/commoncode/version.py b/src/commoncode/version.py index e112994c..70746e80 100644 --- a/src/commoncode/version.py +++ b/src/commoncode/version.py @@ -2,7 +2,7 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # @@ -12,29 +12,31 @@ def VERSION_PATTERNS_REGEX(): - return [re.compile(x) for x in [ - # Eclipse features - r'v\d+\.feature\_(\d+\.){1,3}\d+', - - # Common version patterns - r'(M?(v\d+(\-|\_))?\d+\.){1,3}\d+[A-Za-z0-9]*((\.|\-|_|~)' - r'(b|B|rc|r|v|RC|alpha|beta|BETA|M|m|pre|vm|G)?\d+((\-|\.)\d+)?)?' - r'((\.|\-)(((alpha|dev|beta|rc|FINAL|final|pre)(\-|\_)\d+[A-Za-z]?' - r'(\-RELEASE)?)|alpha|dev(\.\d+\.\d+)?' - r'|beta|BETA|final|FINAL|release|fixed|(cr\d(\_\d*)?)))?', - # - r'[A-Za-z]?(\d+\_){1,3}\d+\_?[A-Za-z]{0,2}\d+', - # - r'(b|rc|r|v|RC|alpha|beta|BETA|M|m|pre|revision-)\d+(\-\d+)?', - # - r'current|previous|latest|alpha|beta', - # - r'\d{4}-\d{2}-\d{2}', - # - r'(\d(\-|\_)){1,2}\d', - # - r'\d{5,14}', - ]] + return [ + re.compile(x) + for x in [ + # Eclipse features + r"v\d+\.feature\_(\d+\.){1,3}\d+", + # Common version patterns + r"(M?(v\d+(\-|\_))?\d+\.){1,3}\d+[A-Za-z0-9]*((\.|\-|_|~)" + r"(b|B|rc|r|v|RC|alpha|beta|BETA|M|m|pre|vm|G)?\d+((\-|\.)\d+)?)?" + r"((\.|\-)(((alpha|dev|beta|rc|FINAL|final|pre)(\-|\_)\d+[A-Za-z]?" + r"(\-RELEASE)?)|alpha|dev(\.\d+\.\d+)?" + r"|beta|BETA|final|FINAL|release|fixed|(cr\d(\_\d*)?)))?", + # + r"[A-Za-z]?(\d+\_){1,3}\d+\_?[A-Za-z]{0,2}\d+", + # + r"(b|rc|r|v|RC|alpha|beta|BETA|M|m|pre|revision-)\d+(\-\d+)?", + # + r"current|previous|latest|alpha|beta", + # + r"\d{4}-\d{2}-\d{2}", + # + r"(\d(\-|\_)){1,2}\d", + # + r"\d{5,14}", + ] + ] def hint(path): @@ -43,7 +45,7 @@ def hint(path): the version does not start with v. """ for pattern in VERSION_PATTERNS_REGEX(): - segments = path.split('/') + segments = path.split("/") # skip the first path segment unless there's only one segment first_segment = 1 if len(segments) > 1 else 0 interesting_segments = segments[first_segment:] @@ -53,8 +55,8 @@ def hint(path): if version: v = version.group(0) # prefix with v space - if not v.lower().startswith('v'): - v = f'v {v}' + if not v.lower().startswith("v"): + v = f"v {v}" return v @@ -117,7 +119,7 @@ def is_moslty_num(s): return False -NameVersion = namedtuple('NameVersion', 'name, version') +NameVersion = namedtuple("NameVersion", "name, version") def get_jar_nv(filename): @@ -226,7 +228,8 @@ def get_nupkg_nv(filename): """ Return a NameVersion tuple parsed from the .nupkg NuGet archive `filename`. - For example (taken from https://stackoverflow.com/questions/51662737/regex-to-parse-package-name-and-version-number-from-nuget-package-filenames/51662926): + For example (taken from + https://stackoverflow.com/questions/51662737/regex-to-parse-package-name-and-version-number-from-nuget-package-filenames/51662926): >>> get_nupkg_nv('knockoutjs.3.4.2.nupkg') NameVersion(name='knockoutjs', version='3.4.2') >>> get_nupkg_nv('log4net.2.0.8.nupkg') diff --git a/tests/data/hash/empty b/tests/data/hash/empty new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_cliutils.py b/tests/test_cliutils.py index c0aebe76..725eac91 100644 --- a/tests/test_cliutils.py +++ b/tests/test_cliutils.py @@ -2,119 +2,133 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # import os import click -click.disable_unicode_literals_warning = True from click.testing import CliRunner -from commoncode.testcase import FileDrivenTesting -from commoncode.cliutils import fixed_width_file_name from commoncode.cliutils import GroupedHelpCommand from commoncode.cliutils import PluggableCommandLineOption +from commoncode.cliutils import fixed_width_file_name +from commoncode.testcase import FileDrivenTesting +click.disable_unicode_literals_warning = True -class TestFixedWidthFilename(FileDrivenTesting): +class TestFixedWidthFilename(FileDrivenTesting): def test_fixed_width_file_name_with_file_name_larger_than_max_length_is_shortened(self): - test = fixed_width_file_name('0123456789012345678901234.c', 25) - expected = '0123456789...5678901234.c' + test = fixed_width_file_name("0123456789012345678901234.c", 25) + expected = "0123456789...5678901234.c" assert test == expected def test_fixed_width_file_name_with_file_name_smaller_than_max_length_is_not_shortened(self): - file_name = '0123456789012345678901234.c' + file_name = "0123456789012345678901234.c" test = fixed_width_file_name(file_name, max_length=50) assert test == file_name def test_fixed_width_file_name_with_file_name_at_max_length_is_not_shortened(self): - test = fixed_width_file_name('01234567890123456789012.c', 25) - expected = '01234567890123456789012.c' + test = fixed_width_file_name("01234567890123456789012.c", 25) + expected = "01234567890123456789012.c" assert test == expected def test_fixed_width_file_name_with_file_name_smaller_than_max_length_not_shortened(self): - test = fixed_width_file_name('0123456789012345678901.c', 25) - expected = '0123456789012345678901.c' + test = fixed_width_file_name("0123456789012345678901.c", 25) + expected = "0123456789012345678901.c" assert test == expected def test_fixed_width_file_name_with_none_filename_return_empty_string(self): test = fixed_width_file_name(None, 25) - expected = '' + expected = "" assert test == expected def test_fixed_width_file_name_without_extension(self): - test = fixed_width_file_name('012345678901234567890123456', 25) - expected = '01234567890...67890123456' + test = fixed_width_file_name("012345678901234567890123456", 25) + expected = "01234567890...67890123456" assert test == expected def test_fixed_width_file_name_with_posix_path_without_shortening(self): - test = fixed_width_file_name('C/Documents_and_Settings/Boki/Desktop/head/patches/drupal6/drupal.js', 25) - expected = 'drupal.js' + test = fixed_width_file_name( + "C/Documents_and_Settings/Boki/Desktop/head/patches/drupal6/drupal.js", 25 + ) + expected = "drupal.js" assert test == expected def test_fixed_width_file_name_with_posix_path_with_shortening(self): - test = fixed_width_file_name('C/Documents_and_Settings/Boki/Desktop/head/patches/drupal6/012345678901234567890123.c', 25) - expected = '0123456789...4567890123.c' + test = fixed_width_file_name( + "C/Documents_and_Settings/Boki/Desktop/head/patches/drupal6/012345678901234567890123.c", + 25, + ) + expected = "0123456789...4567890123.c" assert test == expected def test_fixed_width_file_name_with_win_path_without_shortening(self): - test = fixed_width_file_name('C\\:Documents_and_Settings\\Boki\\Desktop\\head\\patches\\drupal6\\drupal.js', 25) - expected = 'drupal.js' + test = fixed_width_file_name( + "C\\:Documents_and_Settings\\Boki\\Desktop\\head\\patches\\drupal6\\drupal.js", 25 + ) + expected = "drupal.js" assert test == expected def test_fixed_width_file_name_with_win_path_with_shortening(self): - test = fixed_width_file_name('C\\:Documents_and_Settings\\Boki\\Desktop\\head\\patches\\drupal6\\012345678901234567890123.c', 25) - expected = '0123456789...4567890123.c' + test = fixed_width_file_name( + "C\\:Documents_and_Settings\\Boki\\Desktop\\head\\patches\\drupal6\\012345678901234567890123.c", + 25, + ) + expected = "0123456789...4567890123.c" assert test == expected def test_fixed_width_file_name_with_very_small_file_name_and_long_extension(self): - test = fixed_width_file_name('abc.abcdef', 5) + test = fixed_width_file_name("abc.abcdef", 5) # FIXME: what is expected is TBD - expected = '' + expected = "" assert test == expected class TestGroupedHelpCommand(FileDrivenTesting): - - test_data_dir = os.path.join(os.path.dirname(__file__), 'data') + test_data_dir = os.path.join(os.path.dirname(__file__), "data") def test_GroupedHelpCommand_help_group_and_sort_order_without_custom_class(self): - - @click.command(name='scan', cls=GroupedHelpCommand) - @click.option('--opt', is_flag=True, help='Help text for option') + @click.command(name="scan", cls=GroupedHelpCommand) + @click.option("--opt", is_flag=True, help="Help text for option") def scan(opt): pass runner = CliRunner() - result = runner.invoke(scan, ['--help']) + result = runner.invoke(scan, ["--help"]) from commoncode.cliutils import MISC_GROUP + assert MISC_GROUP in result.output - assert '--opt Help text for option' in result.output + assert "--opt Help text for option" in result.output def test_GroupedHelpCommand_with_help_group_and_sort_order_with_custom_class(self): - - @click.command(name='scan', cls=GroupedHelpCommand) - @click.option('--opt', is_flag=True, sort_order=10, - help='Help text for option', cls=PluggableCommandLineOption) + @click.command(name="scan", cls=GroupedHelpCommand) + @click.option( + "--opt", + is_flag=True, + sort_order=10, + help="Help text for option", + cls=PluggableCommandLineOption, + ) def scan(opt): pass runner = CliRunner() - result = runner.invoke(scan, ['--help']) + result = runner.invoke(scan, ["--help"]) from commoncode.cliutils import MISC_GROUP - assert MISC_GROUP + ':\n --opt Help text for option\n' in result.output + + assert MISC_GROUP + ":\n --opt Help text for option\n" in result.output def test_GroupedHelpCommand_help_with_group(self): from commoncode.cliutils import CORE_GROUP - @click.command(name='scan', cls=GroupedHelpCommand) + @click.command(name="scan", cls=GroupedHelpCommand) @click.option( - '--opt', + "--opt", is_flag=True, - help='Help text for option', + help="Help text for option", help_group=CORE_GROUP, cls=PluggableCommandLineOption, ) @@ -122,5 +136,5 @@ def scan(opt): pass runner = CliRunner() - result = runner.invoke(scan, ['--help']) - assert CORE_GROUP + ':\n --opt Help text for option\n' in result.output + result = runner.invoke(scan, ["--help"]) + assert CORE_GROUP + ":\n --opt Help text for option\n" in result.output diff --git a/tests/test_cliutils_progressbar.py b/tests/test_cliutils_progressbar.py index add6cd48..2683e9c4 100644 --- a/tests/test_cliutils_progressbar.py +++ b/tests/test_cliutils_progressbar.py @@ -2,18 +2,18 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # import os -from commoncode.testcase import FileDrivenTesting from commoncode.cliutils import progressmanager +from commoncode.testcase import FileDrivenTesting -class TestProgressBar(FileDrivenTesting): - test_data_dir = os.path.join(os.path.dirname(__file__), 'data') +class TestProgressBar(FileDrivenTesting): + test_data_dir = os.path.join(os.path.dirname(__file__), "data") def test_progressmanager_works(self): iterable = range(10) diff --git a/tests/test_codec.py b/tests/test_codec.py index 7b2041f9..1109acfd 100644 --- a/tests/test_codec.py +++ b/tests/test_codec.py @@ -2,7 +2,7 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # @@ -14,20 +14,19 @@ class TestCodec(TestCase): - def test_bin_to_num_basic(self): expected = 123 - result = bin_to_num(b'{') + result = bin_to_num(b"{") assert result == expected def test_bin_to_num_zero(self): expected = 0 - result = bin_to_num(b'\x00') + result = bin_to_num(b"\x00") assert result == expected def test_bin_to_num_large_number(self): expected = 432346237462348763 - result = bin_to_num(b'\x06\x00\x00\x9c\xbf\xeb\x83\xdb') + result = bin_to_num(b"\x06\x00\x00\x9c\xbf\xeb\x83\xdb") assert result == expected def test_bin_to_num_and_num_to_bin_is_idempotent(self): @@ -36,43 +35,43 @@ def test_bin_to_num_and_num_to_bin_is_idempotent(self): assert result == expected def test_num_to_bin_basic(self): - expected = b'{' + expected = b"{" result = num_to_bin(123) assert result == expected def test_num_to_bin_zero(self): - expected = b'\x00' + expected = b"\x00" result = num_to_bin(0) assert result == expected def test_num_to_bin_large_number(self): - expected = b'\x06\x00\x00\x9c\xbf\xeb\x83\xdb' + expected = b"\x06\x00\x00\x9c\xbf\xeb\x83\xdb" result = num_to_bin(432346237462348763) assert result == expected def test_num_to_bin_bin_to_num_is_idempotent(self): - expected = b'\x06\x00\x00\x9c\xbf\xeb\x83\xdb' - result = num_to_bin(bin_to_num(b'\x06\x00\x00\x9c\xbf\xeb\x83\xdb')) + expected = b"\x06\x00\x00\x9c\xbf\xeb\x83\xdb" + result = num_to_bin(bin_to_num(b"\x06\x00\x00\x9c\xbf\xeb\x83\xdb")) assert result == expected def test_urlsafe_b64encode_int_zero(self): - assert urlsafe_b64encode_int(0) == b'AA==' + assert urlsafe_b64encode_int(0) == b"AA==" def test_urlsafe_b64encode_int_basic(self): - assert urlsafe_b64encode_int(123123123123) == b'HKq1w7M=' + assert urlsafe_b64encode_int(123123123123) == b"HKq1w7M=" def test_urlsafe_b64encode_int_limit_8bits_255(self): - assert urlsafe_b64encode_int(255) == b'_w==' + assert urlsafe_b64encode_int(255) == b"_w==" def test_urlsafe_b64encode_int_limit_8bits_256(self): - assert urlsafe_b64encode_int(256) == b'AQA=' + assert urlsafe_b64encode_int(256) == b"AQA=" def test_urlsafe_b64encode_int_adds_no_padding_for_number_that_are_multiple_of_6_bits(self): - assert urlsafe_b64encode_int(0xFFFFFFFFFFFFFFFFFF) == b'____________' + assert urlsafe_b64encode_int(0xFFFFFFFFFFFFFFFFFF) == b"____________" assert len(urlsafe_b64encode_int(0xFFFFFFFFFFFF)) == 8 def test_urlsafe_b64encode_int_very_large_number(self): - b64 = (b'QAAAAAAgAAAAAQAACAAAAAAAAAAAAAAkAAIAAAAAAAAAAAAAAACAAIAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAAAAAAAAAIAAAAAAAAAAAAAAAAAAAAiAAAAAAAIAAAAAAAAAAAAAAEAACAAAAAAAA=') + b64 = b"QAAAAAAgAAAAAQAACAAAAAAAAAAAAAAkAAIAAAAAAAAAAAAAAACAAIAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAAAAAAAAAIAAAAAAAAAAAAAAAAAAAAiAAAAAAAIAAAAAAAAAAAAAAEAACAAAAAAAA=" expected = b64 num = 2678771517966886466622496485850735537232223496190189203248435106535830319026141316924949516664780383591425235756710588949364368366679435700855700642969357960349427980681242720502045830438444033569999428606714388704082526548154984676817460705606960919023941301616034362869262429593297635158449513824256 result = urlsafe_b64encode_int(num) diff --git a/tests/test_command.py b/tests/test_command.py index b24fc06d..d0f735f1 100644 --- a/tests/test_command.py +++ b/tests/test_command.py @@ -3,155 +3,178 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # import os import sys - from unittest.case import skipIf from commoncode import command -from commoncode.testcase import FileBasedTesting from commoncode.system import on_linux from commoncode.system import on_mac from commoncode.system import on_windows -from commoncode.system import py36 +from commoncode.testcase import FileBasedTesting class TestCommand(FileBasedTesting): - test_data_dir = os.path.join(os.path.dirname(__file__), 'data') + test_data_dir = os.path.join(os.path.dirname(__file__), "data") - @skipIf(py36, 'This fails on Python 3.6 https://bugs.python.org/issue26919') def test_execute_can_handle_non_ascii_output(self): # Popen returns a *binary* string with non-ascii chars: skips these python = sys.executable rc, stdout, stderr = command.execute( - python, ['-c', 'print("non ascii: été just passed it !")'] + python, ["-c", 'print("non ascii: été just passed it !")'] ) - assert stderr == '' - assert stdout == 'non ascii: ete just passed it !' + assert stderr == "" + assert stdout == "non ascii: ete just passed it !" assert rc == 0 # do not throw exception - stdout.encode('ascii') + stdout.encode("ascii") def test_execute_(self): python = sys.executable - rc, stdout, stderr = command.execute( - python, ['-c', 'print("foobar")'] - ) - assert stderr == '' - assert stdout == 'foobar' + rc, stdout, stderr = command.execute(python, ["-c", 'print("foobar")']) + assert stderr == "" + assert stdout == "foobar" assert rc == 0 # do not throw exception - stdout.encode('ascii') + stdout.encode("ascii") def test_execute2(self): python = sys.executable - rc, stdout, stderr = command.execute2( - python, ['-c', 'print("foobar")'] - ) - assert stderr == '' - assert stdout == 'foobar' + rc, stdout, stderr = command.execute2(python, ["-c", 'print("foobar")']) + assert stderr == "" + assert stdout == "foobar" assert rc == 0 # do not throw exception - stdout.encode('ascii') + stdout.encode("ascii") - @skipIf(not on_linux, 'Linux only') + @skipIf(not on_linux, "Linux only") def test_update_path_var_on_linux(self): - existing_path_var = '/usr/bin:/usr/local' + existing_path_var = "/usr/bin:/usr/local" - new_path = b'foo\xb1bar' + new_path = b"foo\xb1bar" updated_path = command.update_path_var(existing_path_var, new_path) - assert updated_path == 'foo\udcb1bar:/usr/bin:/usr/local' + assert updated_path == "foo\udcb1bar:/usr/bin:/usr/local" - new_path = u'/bin/foo\udcb1bar' + new_path = "/bin/foo\udcb1bar" updated_path = command.update_path_var(updated_path, new_path) - assert updated_path == '/bin/foo\udcb1bar:foo\udcb1bar:/usr/bin:/usr/local' + assert updated_path == "/bin/foo\udcb1bar:foo\udcb1bar:/usr/bin:/usr/local" - new_path = b'/bin/foo\xb1bar' + new_path = b"/bin/foo\xb1bar" updated_path = command.update_path_var(updated_path, new_path) - assert updated_path == '/bin/foo\udcb1bar:foo\udcb1bar:/usr/bin:/usr/local' + assert updated_path == "/bin/foo\udcb1bar:foo\udcb1bar:/usr/bin:/usr/local" - new_path = u'foo\udcb1bar' + new_path = "foo\udcb1bar" updated_path = command.update_path_var(updated_path, new_path) - assert updated_path == '/bin/foo\udcb1bar:foo\udcb1bar:/usr/bin:/usr/local' + assert updated_path == "/bin/foo\udcb1bar:foo\udcb1bar:/usr/bin:/usr/local" - new_path = b'foo\xb1bar' + new_path = b"foo\xb1bar" updated_path = command.update_path_var(updated_path, new_path) - assert updated_path == '/bin/foo\udcb1bar:foo\udcb1bar:/usr/bin:/usr/local' + assert updated_path == "/bin/foo\udcb1bar:foo\udcb1bar:/usr/bin:/usr/local" - @skipIf(not on_mac, 'Mac only') + @skipIf(not on_mac, "Mac only") def test_update_path_var_on_mac(self): - existing_path_var = '/usr/bin:/usr/local' + existing_path_var = "/usr/bin:/usr/local" - new_path = u'foo\udcb1bar' + new_path = "foo\udcb1bar" updated_path = command.update_path_var(existing_path_var, new_path) - assert updated_path == 'foo\udcb1bar:/usr/bin:/usr/local' + assert updated_path == "foo\udcb1bar:/usr/bin:/usr/local" - new_path = b'/bin/foo\xb1bar' + new_path = b"/bin/foo\xb1bar" updated_path = command.update_path_var(updated_path, new_path) - assert updated_path == '/bin/foo\udcb1bar:foo\udcb1bar:/usr/bin:/usr/local' + assert updated_path == "/bin/foo\udcb1bar:foo\udcb1bar:/usr/bin:/usr/local" - new_path = u'foo\udcb1bar' + new_path = "foo\udcb1bar" updated_path = command.update_path_var(updated_path, new_path) - assert updated_path == '/bin/foo\udcb1bar:foo\udcb1bar:/usr/bin:/usr/local' + assert updated_path == "/bin/foo\udcb1bar:foo\udcb1bar:/usr/bin:/usr/local" - @skipIf(not on_windows, 'Windows only') + @skipIf(not on_windows, "Windows only") def test_update_path_var_on_windows(self): - existing_path_var = u'c:\\windows;C:Program Files' + existing_path_var = "c:\\windows;C:Program Files" - new_path = u'foo\udcb1bar' + new_path = "foo\udcb1bar" updated_path = command.update_path_var(existing_path_var, new_path) - assert updated_path == u'foo\udcb1bar;c:\\windows;C:Program Files' + assert updated_path == "foo\udcb1bar;c:\\windows;C:Program Files" - new_path = u'foo\udcb1bar' + new_path = "foo\udcb1bar" updated_path = command.update_path_var(updated_path, new_path) - assert updated_path == u'foo\udcb1bar;c:\\windows;C:Program Files' + assert updated_path == "foo\udcb1bar;c:\\windows;C:Program Files" def test_searchable_paths(self): - d1 = self.get_temp_dir('foo') - d2 = self.get_temp_dir('bar') + d1 = self.get_temp_dir("foo") + d2 = self.get_temp_dir("bar") ps = os.pathsep - os.environ['FOO_SCANCODE_TEST1'] = f'{ps}{d1}{ps}{ps}{d2}{ps}' - os.environ['FOO_SCANCODE_TEST2'] = f'{ps}{d2}{ps}{ps}{d1}{ps}/NOTADIR' + os.environ["FOO_SCANCODE_TEST1"] = f"{ps}{d1}{ps}{ps}{d2}{ps}" + os.environ["FOO_SCANCODE_TEST2"] = f"{ps}{d2}{ps}{ps}{d1}{ps}/NOTADIR" - env_vars = 'FOO_SCANCODE_TEST1', 'FOO_SCANCODE_TEST2' + env_vars = "FOO_SCANCODE_TEST1", "FOO_SCANCODE_TEST2" expected = d1, d2, d2, d1 results = command.searchable_paths(env_vars=env_vars) if on_windows: for res, exp in zip(results, expected): - _, _, r = res.rpartition('\\') - _, _, e = exp.rpartition('\\') + _, _, r = res.rpartition("\\") + _, _, e = exp.rpartition("\\") assert r == e elif on_mac: # macOS somehow adds a /private to the paths in the CI as a side- # effect of calling "realpath" and likely resolving links - expected = f'/private{d1}', f'/private{d2}', f'/private{d2}', f'/private{d1}' + expected = f"/private{d1}", f"/private{d2}", f"/private{d2}", f"/private{d1}" assert expected == results else: assert expected == results def test_find_in_path(self): - d1 = self.get_temp_dir('foo') - d2 = self.get_temp_dir('bar') - filename = 'baz' - - assert None == command.find_in_path(filename, searchable_paths=(d1, d2,)) + d1 = self.get_temp_dir("foo") + d2 = self.get_temp_dir("bar") + filename = "baz" + + assert None == command.find_in_path( + filename, + searchable_paths=( + d1, + d2, + ), + ) f2 = os.path.join(d2, filename) - with open(f2, 'w') as o: - o.write('some') - - assert f2 == command.find_in_path(filename, searchable_paths=(d1, d2,)) - assert f2 == command.find_in_path(filename, searchable_paths=(d2, d1,)) + with open(f2, "w") as o: + o.write("some") + + assert f2 == command.find_in_path( + filename, + searchable_paths=( + d1, + d2, + ), + ) + assert f2 == command.find_in_path( + filename, + searchable_paths=( + d2, + d1, + ), + ) f1 = os.path.join(d1, filename) - with open(f1, 'w') as o: - o.write('some') - - assert f1 == command.find_in_path(filename, searchable_paths=(d1, d2,)) - assert f2 == command.find_in_path(filename, searchable_paths=(d2, d1,)) + with open(f1, "w") as o: + o.write("some") + + assert f1 == command.find_in_path( + filename, + searchable_paths=( + d1, + d2, + ), + ) + assert f2 == command.find_in_path( + filename, + searchable_paths=( + d2, + d1, + ), + ) diff --git a/tests/test_date.py b/tests/test_date.py index a6473593..05a29663 100644 --- a/tests/test_date.py +++ b/tests/test_date.py @@ -2,24 +2,21 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # import os - from datetime import datetime import commoncode.date - from commoncode import testcase class TestDate(testcase.FileBasedTesting): - def test_secs_from_epoch_can_handle_micro_and_nano_secs(self): test_file = self.get_temp_file() - open(test_file, 'w').close() + open(test_file, "w").close() # setting modified time to desired values os.utime(test_file, (1301420665.046481, 1301420665.046481)) # otherwise the issue does not happen (ie. on mac) @@ -29,19 +26,19 @@ def test_secs_from_epoch_can_handle_micro_and_nano_secs(self): def test_get_file_mtime_for_a_new_file(self): test_file = self.get_temp_file() - open(test_file, 'w').close() + open(test_file, "w").close() def as_yyyymmdd(s): return s[:10] - now = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ') + now = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") result = commoncode.date.get_file_mtime(test_file) assert as_yyyymmdd(result) == as_yyyymmdd(now) def test_get_file_mtime_for_a_modified_file(self): test_file = self.get_temp_file() - open(test_file, 'w').close() - expected = u'1992-05-09 00:00:00' + open(test_file, "w").close() + expected = "1992-05-09 00:00:00" m_ts = (24 * 3600) * 134 + (24 * 3600 * 365) * 22 # setting modified time to expected values os.utime(test_file, (m_ts, m_ts)) @@ -49,8 +46,8 @@ def test_get_file_mtime_for_a_modified_file(self): def test_get_file_mtime_for_a_modified_file_2(self): test_file = self.get_temp_file() - open(test_file, 'w').close() + open(test_file, "w").close() # setting modified time to expected values - expected = u'2011-01-06 14:35:00' + expected = "2011-01-06 14:35:00" os.utime(test_file, (1294324500, 1294324500)) assert commoncode.date.get_file_mtime(test_file) == expected diff --git a/tests/test_distro.py b/tests/test_distro.py index c4b5d5a7..48521c1b 100644 --- a/tests/test_distro.py +++ b/tests/test_distro.py @@ -7,14 +7,14 @@ class TestDistro(FileBasedTesting): - test_data_dir = os.path.join(os.path.dirname(__file__), 'data') + test_data_dir = os.path.join(os.path.dirname(__file__), "data") def test_parse_os_release(self): - test_dir = self.get_test_loc('distro/os-release') + test_dir = self.get_test_loc("distro/os-release") for test_file in resource_iter(test_dir, with_dirs=False): - if test_file.endswith('expected.json'): + if test_file.endswith("expected.json"): continue - expected = test_file + '-expected.json' + expected = test_file + "-expected.json" result = distro.parse_os_release(test_file) check_against_expected_json_file(result, expected, regen=False) diff --git a/tests/test_fileset.py b/tests/test_fileset.py index b72e17e5..2c6e5ef7 100644 --- a/tests/test_fileset.py +++ b/tests/test_fileset.py @@ -2,7 +2,7 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # @@ -13,82 +13,84 @@ class FilesetTest(commoncode.testcase.FileBasedTesting): - test_data_dir = os.path.join(os.path.dirname(__file__), 'data') + test_data_dir = os.path.join(os.path.dirname(__file__), "data") def test_load(self): - irf = self.get_test_loc('fileset/scancodeignore.lst') + irf = self.get_test_loc("fileset/scancodeignore.lst") result = fileset.load(irf) - assert result == ['/foo/*', '!/foobar/*', 'bar/*', '#comment'] + assert result == ["/foo/*", "!/foobar/*", "bar/*", "#comment"] def test_is_included_basic(self): - assert fileset.is_included('/common/src/', {}, {}) - assert fileset.is_included('/common/src/', None, None) + assert fileset.is_included("/common/src/", {}, {}) + assert fileset.is_included("/common/src/", None, None) assert not fileset.is_included(None, None, None) def test_is_included_in_fileset(self): - incs = {'/common/src/*': '.scanignore'} - excs = {'/common/src/*.so':'.scanignore'} + incs = {"/common/src/*": ".scanignore"} + excs = {"/common/src/*.so": ".scanignore"} assert not fileset.is_included(None, incs, excs) - assert not fileset.is_included('', incs, excs) - assert not fileset.is_included('/', incs, excs) - assert fileset.is_included('/common/src/', incs, excs) - assert not fileset.is_included('/common/bin/', incs, excs) + assert not fileset.is_included("", incs, excs) + assert not fileset.is_included("/", incs, excs) + assert fileset.is_included("/common/src/", incs, excs) + assert not fileset.is_included("/common/bin/", incs, excs) def test_is_included_in_fileset_2(self): - incs = {'src*': '.scanignore'} - excs = {'src/ab': '.scanignore'} + incs = {"src*": ".scanignore"} + excs = {"src/ab": ".scanignore"} assert not fileset.is_included(None, incs, excs) - assert not fileset.is_included('', incs, excs) - assert not fileset.is_included('/', incs, excs) - assert fileset.is_included('/common/src/', incs, excs) - assert not fileset.is_included('src/ab', incs, excs) - assert fileset.is_included('src/abbab', incs, excs) + assert not fileset.is_included("", incs, excs) + assert not fileset.is_included("/", incs, excs) + assert fileset.is_included("/common/src/", incs, excs) + assert not fileset.is_included("src/ab", incs, excs) + assert fileset.is_included("src/abbab", incs, excs) def test_is_included_is_included_exclusions(self): - incs = {'/src/*': '.scanignore'} - excs = {'/src/*.so':'.scanignore'} - assert not fileset.is_included('/src/dist/build/mylib.so', incs, excs) + incs = {"/src/*": ".scanignore"} + excs = {"/src/*.so": ".scanignore"} + assert not fileset.is_included("/src/dist/build/mylib.so", incs, excs) def test_is_included_is_included_exclusions_2(self): - incs = {'src': '.scanignore'} - excs = {'src/*.so':'.scanignore'} - assert fileset.is_included('/some/src/this/that', incs, excs) - assert not fileset.is_included('/src/dist/build/mylib.so', incs, excs) + incs = {"src": ".scanignore"} + excs = {"src/*.so": ".scanignore"} + assert fileset.is_included("/some/src/this/that", incs, excs) + assert not fileset.is_included("/src/dist/build/mylib.so", incs, excs) def test_is_included_empty_exclusions(self): - incs = {'/src/*': '.scanignore'} - excs = {'': '.scanignore'} - assert fileset.is_included('/src/dist/build/mylib.so', incs, excs) + incs = {"/src/*": ".scanignore"} + excs = {"": ".scanignore"} + assert fileset.is_included("/src/dist/build/mylib.so", incs, excs) def test_is_included_sources(self): - incs = {'/home/elf/elf-0.5/*': '.scanignore'} - excs = {'/home/elf/elf-0.5/src/elf': '.scanignore', - '/home/elf/elf-0.5/src/elf.o': '.scanignore'} - assert not fileset.is_included('/home/elf/elf-0.5/src/elf', incs, excs) + incs = {"/home/elf/elf-0.5/*": ".scanignore"} + excs = { + "/home/elf/elf-0.5/src/elf": ".scanignore", + "/home/elf/elf-0.5/src/elf.o": ".scanignore", + } + assert not fileset.is_included("/home/elf/elf-0.5/src/elf", incs, excs) def test_is_included_dot_svn(self): - incs = {'*/.svn/*': '.scanignore'} + incs = {"*/.svn/*": ".scanignore"} excs = {} - assert fileset.is_included('home/common/tools/elf/.svn/', incs, excs) - assert fileset.is_included('home/common/tools/.svn/this', incs, excs) - assert not fileset.is_included('home/common/tools/this', incs, excs) + assert fileset.is_included("home/common/tools/elf/.svn/", incs, excs) + assert fileset.is_included("home/common/tools/.svn/this", incs, excs) + assert not fileset.is_included("home/common/tools/this", incs, excs) def test_is_included_dot_svn_with_excludes(self): - incs = {'*/.svn/*': '.scanignore'} - excs = {'*/.git/*': '.scanignore'} - assert fileset.is_included('home/common/tools/elf/.svn/', incs, excs) - assert fileset.is_included('home/common/tools/.svn/this', incs, excs) - assert not fileset.is_included('home/common/.git/this', incs, excs) + incs = {"*/.svn/*": ".scanignore"} + excs = {"*/.git/*": ".scanignore"} + assert fileset.is_included("home/common/tools/elf/.svn/", incs, excs) + assert fileset.is_included("home/common/tools/.svn/this", incs, excs) + assert not fileset.is_included("home/common/.git/this", incs, excs) def test_get_matches(self): - patterns = {'*/.svn/*': '.scanignore'} - assert fileset.get_matches('home/common/tools/elf/.svn/', patterns) - assert fileset.get_matches('home/common/tools/.svn/this', patterns) - assert not fileset.get_matches('home/common/.git/this', patterns) + patterns = {"*/.svn/*": ".scanignore"} + assert fileset.get_matches("home/common/tools/elf/.svn/", patterns) + assert fileset.get_matches("home/common/tools/.svn/this", patterns) + assert not fileset.get_matches("home/common/.git/this", patterns) def test_get_matches_accepts_a_list_or_tuple(self): - patterns = ['*/.svn/*'] - assert fileset.get_matches('home/common/tools/elf/.svn/', patterns) + patterns = ["*/.svn/*"] + assert fileset.get_matches("home/common/tools/elf/.svn/", patterns) - patterns = '*/.svn/*', - assert fileset.get_matches('home/common/tools/elf/.svn/', patterns) + patterns = ("*/.svn/*",) + assert fileset.get_matches("home/common/tools/elf/.svn/", patterns) diff --git a/tests/test_filetype.py b/tests/test_filetype.py index 2ca4d1d5..9265314c 100644 --- a/tests/test_filetype.py +++ b/tests/test_filetype.py @@ -2,40 +2,41 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # import os -from os.path import join from os.path import exists +from os.path import join from unittest import skipIf +import commoncode.testcase from commoncode import filetype from commoncode import fileutils from commoncode.system import on_posix from commoncode.system import on_windows from commoncode.system import py3 - -import commoncode.testcase from commoncode.testcase import FileBasedTesting from commoncode.testcase import make_non_readable from commoncode.testcase import make_non_writable class TypeTest(commoncode.testcase.FileBasedTesting): - test_data_dir = os.path.join(os.path.dirname(__file__), 'data') + test_data_dir = os.path.join(os.path.dirname(__file__), "data") def test_get_size_on_file(self): - test_file = self.get_test_loc('filetype/size/Image1.eps') + test_file = self.get_test_loc("filetype/size/Image1.eps") assert filetype.get_size(test_file) == 12388 def test_get_size_on_directory(self): - test_dir = self.get_test_loc('filetype/size', copy=True) + test_dir = self.get_test_loc("filetype/size", copy=True) assert filetype.get_size(test_dir) == 12400 def test_get_type(self): - test_dir = self.extract_test_tar('filetype/types.tar', verbatim=True) + test_dir = self.extract_test_tar( + "filetype/types.tar", verbatim=True, filter="fully_trusted" + ) results = [] for root, dirs, files in os.walk(test_dir): for d in dirs: @@ -44,27 +45,33 @@ def test_get_type(self): results.append((f, filetype.get_type(os.path.join(root, f)))) expected = [ - ('5-DIRTYPE', 'd'), - ('0-REGTYPE', 'f'), - ('0-REGTYPE-TEXT', 'f'), - ('0-REGTYPE-VEEEERY_LONG_NAME___________________________________' - '______________________________________________________________' - '____________________155', 'f'), - ('1-LNKTYPE', 'f'), - ('S-SPARSE', 'f'), - ('S-SPARSE-WITH-NULLS', 'f') + ("5-DIRTYPE", "d"), + ("0-REGTYPE", "f"), + ("0-REGTYPE-TEXT", "f"), + ( + "0-REGTYPE-VEEEERY_LONG_NAME___________________________________" + "______________________________________________________________" + "____________________155", + "f", + ), + ("1-LNKTYPE", "f"), + ("S-SPARSE", "f"), + ("S-SPARSE-WITH-NULLS", "f"), ] # symlinks and special files are not supported on win if on_posix: - expected += [ ('2-SYMTYPE', 'l'), ('6-FIFOTYPE', 's'), ] + expected += [ + ("2-SYMTYPE", "l"), + ("6-FIFOTYPE", "s"), + ] try: assert sorted(results) == sorted(expected) except Exception as e: if on_windows and py3: # On some Windows symlinkes are detected OK (Windows 10?) but not in Windows 7 - expected += [ ('2-SYMTYPE', 'l') ] + expected += [("2-SYMTYPE", "l")] assert sorted(results) == sorted(expected) else: raise e @@ -75,8 +82,8 @@ def test_is_rwx_with_none(self): assert not filetype.is_executable(None) def test_is_readable_is_writeable_file(self): - base_dir = self.get_test_loc('filetype/readwrite', copy=True) - test_file = os.path.join(os.path.join(base_dir, 'sub'), 'file') + base_dir = self.get_test_loc("filetype/readwrite", copy=True) + test_file = os.path.join(os.path.join(base_dir, "sub"), "file") try: assert filetype.is_readable(test_file) @@ -92,8 +99,8 @@ def test_is_readable_is_writeable_file(self): fileutils.chmod(base_dir, fileutils.RW, recurse=True) def test_is_readable_is_writeable_dir(self): - base_dir = self.get_test_loc('filetype/readwrite', copy=True) - test_dir = os.path.join(base_dir, 'sub') + base_dir = self.get_test_loc("filetype/readwrite", copy=True) + test_dir = os.path.join(base_dir, "sub") try: assert filetype.is_readable(test_dir) @@ -117,11 +124,11 @@ def test_is_readable_is_writeable_dir(self): class CountTest(FileBasedTesting): - test_data_dir = os.path.join(os.path.dirname(__file__), 'data') + test_data_dir = os.path.join(os.path.dirname(__file__), "data") def get_test_count_dir(self): - test_dir = self.get_test_loc('count/filecount', copy=True) - sub3 = join(test_dir, 'dir', 'sub3') + test_dir = self.get_test_loc("count/filecount", copy=True) + sub3 = join(test_dir, "dir", "sub3") if not exists(sub3): os.makedirs(sub3) return test_dir @@ -132,8 +139,8 @@ def test_get_file_count_with_empty_dir(self): def test_get_file_count_with_single_file(self): test_file = self.get_temp_file() - with open(test_file, 'w') as f: - f.write(u'') + with open(test_file, "w") as f: + f.write("") assert filetype.is_file(test_file) assert filetype.get_file_count(test_file) == 1 @@ -150,21 +157,21 @@ def test_get_file_size_and_count(self): def test_get_file_size(self): test_dir = self.get_test_count_dir() tests = ( - ('dir/a.txt', 2), - ('dir/b.txt', 2), - ('dir/c.txt', 2), - ('dir/sub1/a.txt', 2), - ('dir/sub1/b.txt', 2), - ('dir/sub1/c.txt', 2), - ('dir/sub1/subsub/a.txt', 2), - ('dir/sub1/subsub/b.txt', 2), - ('dir/sub1/subsub', 4), - ('dir/sub1', 10), - ('dir/sub2/a.txt', 2), - ('dir/sub2', 2), - ('dir/sub3', 0), - ('dir/', 18), - ('', 18), + ("dir/a.txt", 2), + ("dir/b.txt", 2), + ("dir/c.txt", 2), + ("dir/sub1/a.txt", 2), + ("dir/sub1/b.txt", 2), + ("dir/sub1/c.txt", 2), + ("dir/sub1/subsub/a.txt", 2), + ("dir/sub1/subsub/b.txt", 2), + ("dir/sub1/subsub", 4), + ("dir/sub1", 10), + ("dir/sub2/a.txt", 2), + ("dir/sub2", 2), + ("dir/sub3", 0), + ("dir/", 18), + ("", 18), ) for test_file, size in tests: result = filetype.get_size(os.path.join(test_dir, test_file)) @@ -173,21 +180,21 @@ def test_get_file_size(self): def test_get_file_count(self): test_dir = self.get_test_count_dir() tests = ( - ('dir/a.txt', 1), - ('dir/b.txt', 1), - ('dir/c.txt', 1), - ('dir/sub1/a.txt', 1), - ('dir/sub1/b.txt', 1), - ('dir/sub1/c.txt', 1), - ('dir/sub1/subsub/a.txt', 1), - ('dir/sub1/subsub/b.txt', 1), - ('dir/sub1/subsub', 2), - ('dir/sub1', 5), - ('dir/sub2/a.txt', 1), - ('dir/sub2', 1), - ('dir/sub3', 0), - ('dir/', 9), - ('', 9), + ("dir/a.txt", 1), + ("dir/b.txt", 1), + ("dir/c.txt", 1), + ("dir/sub1/a.txt", 1), + ("dir/sub1/b.txt", 1), + ("dir/sub1/c.txt", 1), + ("dir/sub1/subsub/a.txt", 1), + ("dir/sub1/subsub/b.txt", 1), + ("dir/sub1/subsub", 2), + ("dir/sub1", 5), + ("dir/sub2/a.txt", 1), + ("dir/sub2", 1), + ("dir/sub3", 0), + ("dir/", 9), + ("", 9), ) for test_file, count in tests: result = filetype.get_file_count(os.path.join(test_dir, test_file)) @@ -195,22 +202,22 @@ def test_get_file_count(self): class SymlinkTest(FileBasedTesting): - test_data_dir = os.path.join(os.path.dirname(__file__), 'data') + test_data_dir = os.path.join(os.path.dirname(__file__), "data") - @skipIf(on_windows, 'os.symlink does not work on Windows') + @skipIf(on_windows, "os.symlink does not work on Windows") def test_is_file(self): - test_file = self.get_test_loc('symlink/test', copy=True) + test_file = self.get_test_loc("symlink/test", copy=True) temp_dir = fileutils.get_temp_dir() - test_link = join(temp_dir, 'test-link') + test_link = join(temp_dir, "test-link") os.symlink(test_file, test_link) assert filetype.is_file(test_link, follow_symlinks=True) assert not filetype.is_file(test_link, follow_symlinks=False) - @skipIf(on_windows, 'os.symlink does not work on Windows') + @skipIf(on_windows, "os.symlink does not work on Windows") def test_is_dir(self): - test_dir = self.get_test_loc('symlink', copy=True) + test_dir = self.get_test_loc("symlink", copy=True) temp_dir = fileutils.get_temp_dir() - test_link = join(temp_dir, 'test-dir-link') + test_link = join(temp_dir, "test-dir-link") os.symlink(test_dir, test_link) assert filetype.is_dir(test_link, follow_symlinks=True) assert not filetype.is_dir(test_link, follow_symlinks=False) diff --git a/tests/test_fileutils.py b/tests/test_fileutils.py index f6e3b744..e3913b79 100644 --- a/tests/test_fileutils.py +++ b/tests/test_fileutils.py @@ -2,7 +2,7 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # @@ -16,9 +16,9 @@ from commoncode import fileutils from commoncode.fileutils import as_posixpath from commoncode.system import on_linux -from commoncode.system import on_posix from commoncode.system import on_mac from commoncode.system import on_macos_14_or_higher +from commoncode.system import on_posix from commoncode.system import on_windows from commoncode.testcase import FileBasedTesting from commoncode.testcase import make_non_executable @@ -26,17 +26,14 @@ from commoncode.testcase import make_non_writable -@skip('Somehow permissions tests do not work OK yet on Python 3') +@skip("Somehow permissions tests do not work OK yet on Python 3") class TestPermissionsDeletions(FileBasedTesting): - """ - This is failing for now on Python 3 - """ - test_data_dir = os.path.join(os.path.dirname(__file__), 'data') + test_data_dir = os.path.join(os.path.dirname(__file__), "data") def test_delete_unwritable_directory_and_files(self): - base_dir = self.get_test_loc('fileutils/readwrite', copy=True) - test_dir = join(base_dir, 'sub') - test_file = join(test_dir, 'file') + base_dir = self.get_test_loc("fileutils/readwrite", copy=True) + test_dir = join(base_dir, "sub") + test_file = join(test_dir, "file") try: # note: there are no unread/writable dir on windows @@ -59,16 +56,17 @@ class TestPermissions(FileBasedTesting): Several assertions or test are skipped on non posix OSes. Windows handles permissions and special files differently. """ - test_data_dir = os.path.join(os.path.dirname(__file__), 'data') + + test_data_dir = os.path.join(os.path.dirname(__file__), "data") def test_chmod_on_non_existing_file_throws_no_exception(self): - fileutils.chmod('some non existing dir', fileutils.RWX) + fileutils.chmod("some non existing dir", fileutils.RWX) def test_chmod_read_write_recursively_on_dir(self): - test_dir = self.get_test_loc('fileutils/executable', copy=True) - test_file = join(test_dir, 'deep1', 'deep2', 'ctags') - test_dir2 = join(test_dir, 'deep1', 'deep2') - parent = join(test_dir, 'deep1') + test_dir = self.get_test_loc("fileutils/executable", copy=True) + test_file = join(test_dir, "deep1", "deep2", "ctags") + test_dir2 = join(test_dir, "deep1", "deep2") + parent = join(test_dir, "deep1") try: make_non_writable(test_file) @@ -94,10 +92,10 @@ def test_chmod_read_write_recursively_on_dir(self): fileutils.chmod(test_dir, fileutils.RW, recurse=True) def test_chmod_read_write_non_recursively_on_dir(self): - test_dir = self.get_test_loc('fileutils/executable', copy=True) - test_file = join(test_dir, 'deep1', 'deep2', 'ctags') - test_dir = join(test_dir, 'deep1', 'deep2') - parent = join(test_dir, 'deep1') + test_dir = self.get_test_loc("fileutils/executable", copy=True) + test_file = join(test_dir, "deep1", "deep2", "ctags") + test_dir = join(test_dir, "deep1", "deep2") + parent = join(test_dir, "deep1") try: # setup @@ -124,8 +122,8 @@ def test_chmod_read_write_non_recursively_on_dir(self): fileutils.chmod(test_dir, fileutils.RW, recurse=True) def test_chmod_read_write_file(self): - test_dir = self.get_test_loc('fileutils/executable', copy=True) - test_file = join(test_dir, 'deep1', 'deep2', 'ctags') + test_dir = self.get_test_loc("fileutils/executable", copy=True) + test_file = join(test_dir, "deep1", "deep2", "ctags") try: make_non_writable(test_file) @@ -138,8 +136,8 @@ def test_chmod_read_write_file(self): fileutils.chmod(test_dir, fileutils.RW, recurse=True) def test_chmod_read_write_exec_dir(self): - test_dir = self.get_test_loc('fileutils/executable', copy=True) - test_file = join(test_dir, 'deep1', 'deep2', 'ctags') + test_dir = self.get_test_loc("fileutils/executable", copy=True) + test_file = join(test_dir, "deep1", "deep2", "ctags") try: if on_posix: @@ -158,43 +156,43 @@ def test_chmod_read_write_exec_dir(self): def test_copyfile_does_not_keep_permissions(self): src_file = self.get_temp_file() dest = self.get_temp_dir() - with open(src_file, 'w') as f: - f.write(u'') + with open(src_file, "w") as f: + f.write("") try: make_non_readable(src_file) if on_posix: assert not filetype.is_readable(src_file) fileutils.copyfile(src_file, dest) - dest_file = join(dest, os.listdir(dest)[0]) + dest_file = join(dest, list(os.scandir(dest))[0].name) assert filetype.is_readable(dest_file) finally: fileutils.chmod(src_file, fileutils.RW, recurse=True) fileutils.chmod(dest, fileutils.RW, recurse=True) def test_copytree_does_not_keep_non_writable_permissions(self): - src = self.get_test_loc('fileutils/exec', copy=True) + src = self.get_test_loc("fileutils/exec", copy=True) dst = self.get_temp_dir() try: - src_file = join(src, 'subtxt/a.txt') + src_file = join(src, "subtxt/a.txt") make_non_writable(src_file) assert not filetype.is_writable(src_file) - src_dir = join(src, 'subtxt') + src_dir = join(src, "subtxt") make_non_writable(src_dir) if on_posix: assert not filetype.is_writable(src_dir) # copy proper - dest_dir = join(dst, 'dest') + dest_dir = join(dst, "dest") fileutils.copytree(src, dest_dir) - dst_file = join(dest_dir, 'subtxt/a.txt') + dst_file = join(dest_dir, "subtxt/a.txt") assert os.path.exists(dst_file) assert filetype.is_writable(dst_file) - dest_dir2 = join(dest_dir, 'subtxt') + dest_dir2 = join(dest_dir, "subtxt") assert os.path.exists(dest_dir2) assert filetype.is_writable(dest_dir) finally: @@ -202,10 +200,10 @@ def test_copytree_does_not_keep_non_writable_permissions(self): fileutils.chmod(dst, fileutils.RW, recurse=True) def test_copytree_copies_unreadable_files(self): - src = self.get_test_loc('fileutils/exec', copy=True) + src = self.get_test_loc("fileutils/exec", copy=True) dst = self.get_temp_dir() - src_file1 = join(src, 'a.bat') - src_file2 = join(src, 'subtxt', 'a.txt') + src_file1 = join(src, "a.bat") + src_file2 = join(src, "subtxt", "a.txt") try: # make some unreadable source files @@ -218,14 +216,14 @@ def test_copytree_copies_unreadable_files(self): assert not filetype.is_readable(src_file2) # copy proper - dest_dir = join(dst, 'dest') + dest_dir = join(dst, "dest") fileutils.copytree(src, dest_dir) - dest_file1 = join(dest_dir, 'a.bat') + dest_file1 = join(dest_dir, "a.bat") assert os.path.exists(dest_file1) assert filetype.is_readable(dest_file1) - dest_file2 = join(dest_dir, 'subtxt', 'a.txt') + dest_file2 = join(dest_dir, "subtxt", "a.txt") assert os.path.exists(dest_file2) assert filetype.is_readable(dest_file2) @@ -235,21 +233,21 @@ def test_copytree_copies_unreadable_files(self): class TestFileUtils(FileBasedTesting): - test_data_dir = os.path.join(os.path.dirname(__file__), 'data') + test_data_dir = os.path.join(os.path.dirname(__file__), "data") - @skipIf(on_windows, 'Windows handles special files differently.') + @skipIf(on_windows, "Windows handles special files differently.") def test_copytree_does_not_copy_fifo(self): # Windows does not support pipes - src = self.get_test_loc('fileutils/filetype', copy=True) + src = self.get_test_loc("fileutils/filetype", copy=True) dest = self.get_temp_dir() - src_file = join(src, 'myfifo') + src_file = join(src, "myfifo") os.mkfifo(src_file) # NOQA - dest_dir = join(dest, 'dest') + dest_dir = join(dest, "dest") fileutils.copytree(src, dest_dir) - assert not os.path.exists(join(dest_dir, 'myfifo')) + assert not os.path.exists(join(dest_dir, "myfifo")) def test_copyfile_keeps_modified_date(self): - test_file = self.get_test_loc('fileutils/exec/subtxt/a.txt', copy=True) + test_file = self.get_test_loc("fileutils/exec/subtxt/a.txt", copy=True) dest = self.get_temp_file() expected = 1289918700 os.utime(test_file, (expected, expected)) @@ -258,25 +256,25 @@ def test_copyfile_keeps_modified_date(self): assert result == expected def test_copyfile_can_copy_file_to_dir_keeping_full_file_name(self): - test_file = self.get_test_loc('fileutils/exec/subtxt/a.txt', copy=True) + test_file = self.get_test_loc("fileutils/exec/subtxt/a.txt", copy=True) dest = self.get_temp_dir() - expected = os.path.join(dest, 'a.txt') + expected = os.path.join(dest, "a.txt") fileutils.copyfile(test_file, dest) assert os.path.exists(expected) def test_resource_name(self): - assert fileutils.resource_name('/a/b/d/f/f') == 'f' - assert fileutils.resource_name('/a/b/d/f/f/') == 'f' - assert fileutils.resource_name('a/b/d/f/f/') == 'f' - assert fileutils.resource_name('/a/b/d/f/f.a') == 'f.a' - assert fileutils.resource_name('/a/b/d/f/f.a/') == 'f.a' - assert fileutils.resource_name('a/b/d/f/f.a') == 'f.a' - assert fileutils.resource_name('f.a') == 'f.a' - - @skipIf(on_windows, 'Windows FS encoding is ... different!') + assert fileutils.resource_name("/a/b/d/f/f") == "f" + assert fileutils.resource_name("/a/b/d/f/f/") == "f" + assert fileutils.resource_name("a/b/d/f/f/") == "f" + assert fileutils.resource_name("/a/b/d/f/f.a") == "f.a" + assert fileutils.resource_name("/a/b/d/f/f.a/") == "f.a" + assert fileutils.resource_name("a/b/d/f/f.a") == "f.a" + assert fileutils.resource_name("f.a") == "f.a" + + @skipIf(on_windows, "Windows FS encoding is ... different!") def test_fsdecode_and_fsencode_are_idempotent(self): - a = b'foo\xb1bar' - b = u'foo\udcb1bar' + a = b"foo\xb1bar" + b = "foo\udcb1bar" assert os.fsencode(os.fsdecode(a)) == a assert os.fsencode(os.fsdecode(b)) == a assert os.fsdecode(os.fsencode(a)) == b @@ -284,60 +282,66 @@ def test_fsdecode_and_fsencode_are_idempotent(self): class TestFileUtilsWalk(FileBasedTesting): - test_data_dir = os.path.join(os.path.dirname(__file__), 'data') + test_data_dir = os.path.join(os.path.dirname(__file__), "data") def test_os_walk_with_unicode_path(self): - test_dir = self.extract_test_zip('fileutils/walk/unicode.zip') - test_dir = join(test_dir, 'unicode') + test_dir = self.extract_test_zip("fileutils/walk/unicode.zip") + test_dir = join(test_dir, "unicode") test_dir = str(test_dir) result = list(os.walk(test_dir)) expected = [ - (str(test_dir), ['a'], [u'2.csv']), - (str(test_dir) + sep + 'a', [], [u'gru\u0308n.png']) + (str(test_dir), ["a"], ["2.csv"]), + (str(test_dir) + sep + "a", [], ["gru\u0308n.png"]), ] assert result == expected def test_fileutils_walk(self): - test_dir = self.get_test_loc('fileutils/walk') - base = self.get_test_loc('fileutils') - result = [(as_posixpath(t.replace(base, '')), d, sorted(f),) for t, d, f in fileutils.walk(test_dir)] + test_dir = self.get_test_loc("fileutils/walk") + base = self.get_test_loc("fileutils") + result = [ + ( + as_posixpath(t.replace(base, "")), + d, + sorted(f), + ) + for t, d, f in fileutils.walk(test_dir) + ] expected = [ - ('/walk', ['d1'], ['f', 'unicode.zip']), - ('/walk/d1', ['d2'], ['f1']), - ('/walk/d1/d2', ['d3'], ['f2']), - ('/walk/d1/d2/d3', [], ['f3']) + ("/walk", ["d1"], ["f", "unicode.zip"]), + ("/walk/d1", ["d2"], ["f1"]), + ("/walk/d1/d2", ["d3"], ["f2"]), + ("/walk/d1/d2/d3", [], ["f3"]), ] assert result == expected def test_fileutils_walk_with_unicode_path(self): - test_dir = self.extract_test_zip('fileutils/walk/unicode.zip') - test_dir = join(test_dir, 'unicode') + test_dir = self.extract_test_zip("fileutils/walk/unicode.zip") + test_dir = join(test_dir, "unicode") result = list(x[-1] for x in fileutils.walk(test_dir)) - expected = [[u'2.csv'], [u'gru\u0308n.png']] + expected = [["2.csv"], ["gru\u0308n.png"]] assert result == expected def test_fileutils_walk_can_walk_a_single_file(self): - test_file = self.get_test_loc('fileutils/walk/f') + test_file = self.get_test_loc("fileutils/walk/f") result = list(fileutils.walk(test_file)) - expected = [ - (fileutils.parent_directory(test_file), [], ['f']) - ] + expected = [(fileutils.parent_directory(test_file), [], ["f"])] assert result == expected def test_fileutils_walk_can_walk_an_empty_dir(self): test_dir = self.get_temp_dir() result = list(fileutils.walk(test_dir)) - expected = [ - (test_dir, [], []) - ] + expected = [(test_dir, [], [])] assert result == expected - @skipIf(on_macos_14_or_higher, 'Cannot handle yet byte paths on macOS 10.14+. See https://github.com/nexB/scancode-toolkit/issues/1635') + @skipIf( + on_macos_14_or_higher, + "Cannot handle yet byte paths on macOS 10.14+. See https://github.com/nexB/scancode-toolkit/issues/1635", + ) def test_walk_can_walk_non_utf8_path_from_unicode_path(self): - test_dir = self.extract_test_tar_raw('fileutils/walk_non_utf8/non_unicode.tgz') - test_dir = join(test_dir, 'non_unicode') + test_dir = self.extract_test_tar_raw("fileutils/walk_non_utf8/non_unicode.tgz") + test_dir = join(test_dir, "non_unicode") if not on_linux: test_dir = str(test_dir) @@ -345,54 +349,57 @@ def test_walk_can_walk_non_utf8_path_from_unicode_path(self): _dirpath, _dirnames, filenames = result assert len(filenames) == 18 - @skipIf(on_macos_14_or_higher, 'Cannot handle yet byte paths on macOS 10.14+. See https://github.com/nexB/scancode-toolkit/issues/1635') + @skipIf( + on_macos_14_or_higher, + "Cannot handle yet byte paths on macOS 10.14+. See https://github.com/nexB/scancode-toolkit/issues/1635", + ) def test_os_walk_can_walk_non_utf8_path_from_unicode_path(self): - test_dir = self.extract_test_tar_raw('fileutils/walk_non_utf8/non_unicode.tgz') - test_dir = join(test_dir, 'non_unicode') + test_dir = self.extract_test_tar_raw("fileutils/walk_non_utf8/non_unicode.tgz") + test_dir = join(test_dir, "non_unicode") result = list(os.walk(test_dir))[0] _dirpath, _dirnames, filenames = result assert len(filenames) == 18 - @skipIf(on_windows, 'os.symlink does not work on Windows') + @skipIf(on_windows, "os.symlink does not work on Windows") def test_walk_on_symlinks(self): - test_dir = self.get_test_loc('symlink/walk', copy=True) + test_dir = self.get_test_loc("symlink/walk", copy=True) temp_dir = fileutils.get_temp_dir() - test_link = join(temp_dir, 'test-dir-link') + test_link = join(temp_dir, "test-dir-link") os.symlink(test_dir, test_link) results = list(fileutils.walk(test_link, follow_symlinks=True)) results = [(os.path.basename(top), dirs, files) for top, dirs, files in results] - expected = [ - ('test-dir-link', ['dir'], ['a']), - ('dir', [], ['b']) - ] + expected = [("test-dir-link", ["dir"], ["a"]), ("dir", [], ["b"])] assert results == expected class TestFileUtilsIter(FileBasedTesting): - test_data_dir = os.path.join(os.path.dirname(__file__), 'data') + test_data_dir = os.path.join(os.path.dirname(__file__), "data") def test_resource_iter(self): - test_dir = self.get_test_loc('fileutils/walk') - base = self.get_test_loc('fileutils') - result = [as_posixpath(f.replace(base, '')) for f in fileutils.resource_iter(test_dir, with_dirs=False)] + test_dir = self.get_test_loc("fileutils/walk") + base = self.get_test_loc("fileutils") + result = [ + as_posixpath(f.replace(base, "")) + for f in fileutils.resource_iter(test_dir, with_dirs=False) + ] expected = [ - '/walk/f', - '/walk/unicode.zip', - '/walk/d1/f1', - '/walk/d1/d2/f2', - '/walk/d1/d2/d3/f3' + "/walk/f", + "/walk/unicode.zip", + "/walk/d1/f1", + "/walk/d1/d2/f2", + "/walk/d1/d2/d3/f3", ] assert sorted(result) == sorted(expected) def test_resource_iter_can_iterate_a_single_file(self): - test_file = self.get_test_loc('fileutils/walk/f') + test_file = self.get_test_loc("fileutils/walk/f") result = [as_posixpath(f) for f in fileutils.resource_iter(test_file, with_dirs=False)] expected = [as_posixpath(test_file)] assert result == expected def test_resource_iter_can_iterate_a_single_file_with_dirs(self): - test_file = self.get_test_loc('fileutils/walk/f') + test_file = self.get_test_loc("fileutils/walk/f") result = [as_posixpath(f) for f in fileutils.resource_iter(test_file, with_dirs=True)] expected = [as_posixpath(test_file)] assert result == expected @@ -410,227 +417,235 @@ def test_resource_iter_can_walk_an_empty_dir_with_dirs(self): assert result == expected def test_resource_iter_without_dir(self): - test_dir = self.get_test_loc('fileutils/walk') - base = self.get_test_loc('fileutils') - result = sorted([as_posixpath(f.replace(base, '')) - for f in fileutils.resource_iter(test_dir, with_dirs=False)]) + test_dir = self.get_test_loc("fileutils/walk") + base = self.get_test_loc("fileutils") + result = sorted( + [ + as_posixpath(f.replace(base, "")) + for f in fileutils.resource_iter(test_dir, with_dirs=False) + ] + ) expected = [ - '/walk/f', - '/walk/unicode.zip', - '/walk/d1/f1', - '/walk/d1/d2/f2', - '/walk/d1/d2/d3/f3' + "/walk/f", + "/walk/unicode.zip", + "/walk/d1/f1", + "/walk/d1/d2/f2", + "/walk/d1/d2/d3/f3", ] assert sorted(result) == sorted(expected) def test_resource_iter_with_dirs(self): - test_dir = self.get_test_loc('fileutils/walk') - base = self.get_test_loc('fileutils') - result = sorted([as_posixpath(f.replace(base, '')) - for f in fileutils.resource_iter(test_dir, with_dirs=True)]) + test_dir = self.get_test_loc("fileutils/walk") + base = self.get_test_loc("fileutils") + result = sorted( + [ + as_posixpath(f.replace(base, "")) + for f in fileutils.resource_iter(test_dir, with_dirs=True) + ] + ) expected = [ - '/walk/d1', - '/walk/d1/d2', - '/walk/d1/d2/d3', - '/walk/d1/d2/d3/f3', - '/walk/d1/d2/f2', - '/walk/d1/f1', - '/walk/f', - '/walk/unicode.zip' + "/walk/d1", + "/walk/d1/d2", + "/walk/d1/d2/d3", + "/walk/d1/d2/d3/f3", + "/walk/d1/d2/f2", + "/walk/d1/f1", + "/walk/f", + "/walk/unicode.zip", ] assert sorted(result) == sorted(expected) def test_resource_iter_return_byte_on_byte_input(self): - test_dir = self.get_test_loc('fileutils/walk') - base = self.get_test_loc('fileutils') - result = sorted([as_posixpath(f.replace(base, '')) - for f in fileutils.resource_iter(test_dir, with_dirs=True)]) + test_dir = self.get_test_loc("fileutils/walk") + base = self.get_test_loc("fileutils") + result = sorted( + [ + as_posixpath(f.replace(base, "")) + for f in fileutils.resource_iter(test_dir, with_dirs=True) + ] + ) expected = [ - '/walk/d1', - '/walk/d1/d2', - '/walk/d1/d2/d3', - '/walk/d1/d2/d3/f3', - '/walk/d1/d2/f2', - '/walk/d1/f1', - '/walk/f', - '/walk/unicode.zip' + "/walk/d1", + "/walk/d1/d2", + "/walk/d1/d2/d3", + "/walk/d1/d2/d3/f3", + "/walk/d1/d2/f2", + "/walk/d1/f1", + "/walk/f", + "/walk/unicode.zip", ] assert sorted(result) == sorted(expected) assert all(isinstance(p, str) for p in result) def test_resource_iter_return_unicode_on_unicode_input(self): - test_dir = self.get_test_loc('fileutils/walk') - base = str(self.get_test_loc('fileutils')) - result = sorted([as_posixpath(f.replace(base, '')) - for f in fileutils.resource_iter(test_dir, with_dirs=True)]) + test_dir = self.get_test_loc("fileutils/walk") + base = str(self.get_test_loc("fileutils")) + result = sorted( + [ + as_posixpath(f.replace(base, "")) + for f in fileutils.resource_iter(test_dir, with_dirs=True) + ] + ) expected = [ - u'/walk/d1', - u'/walk/d1/d2', - u'/walk/d1/d2/d3', - u'/walk/d1/d2/d3/f3', - u'/walk/d1/d2/f2', - u'/walk/d1/f1', - u'/walk/f', - u'/walk/unicode.zip' + "/walk/d1", + "/walk/d1/d2", + "/walk/d1/d2/d3", + "/walk/d1/d2/d3/f3", + "/walk/d1/d2/f2", + "/walk/d1/f1", + "/walk/f", + "/walk/unicode.zip", ] assert sorted(result) == sorted(expected) assert all(isinstance(p, str) for p in result) def test_resource_iter_can_walk_unicode_path_with_zip(self): - test_dir = self.extract_test_zip('fileutils/walk/unicode.zip') - test_dir = join(test_dir, 'unicode') + test_dir = self.extract_test_zip("fileutils/walk/unicode.zip") + test_dir = join(test_dir, "unicode") test_dir = str(test_dir) - result = sorted([p.replace(test_dir, '') for p in fileutils.resource_iter(test_dir)]) + result = sorted([p.replace(test_dir, "") for p in fileutils.resource_iter(test_dir)]) if on_linux: - expected = [ - u'/2.csv', - u'/a', - u'/a/gru\u0308n.png' - ] + expected = ["/2.csv", "/a", "/a/gru\u0308n.png"] elif on_mac: - expected = [ - u'/2.csv', - u'/a', - u'/a/gru\u0308n.png' - ] + expected = ["/2.csv", "/a", "/a/gru\u0308n.png"] elif on_windows: - expected = [ - u'\\2.csv', - u'\\a', - u'\\a\\gru\u0308n.png' - ] + expected = ["\\2.csv", "\\a", "\\a\\gru\u0308n.png"] assert result == expected - @skipIf(on_macos_14_or_higher, 'Cannot handle yet byte paths on macOS 10.14+. See https://github.com/nexB/scancode-toolkit/issues/1635') + @skipIf( + on_macos_14_or_higher, + "Cannot handle yet byte paths on macOS 10.14+. See https://github.com/nexB/scancode-toolkit/issues/1635", + ) def test_resource_iter_can_walk_non_utf8_path_from_unicode_path_with_dirs(self): - test_dir = self.extract_test_tar_raw('fileutils/walk_non_utf8/non_unicode.tgz') - test_dir = join(test_dir, 'non_unicode') + test_dir = self.extract_test_tar_raw("fileutils/walk_non_utf8/non_unicode.tgz") + test_dir = join(test_dir, "non_unicode") result = list(fileutils.resource_iter(test_dir, with_dirs=True)) assert len(result) == 18 - @skipIf(on_macos_14_or_higher, 'Cannot handle yet byte paths on macOS 10.14+. See https://github.com/nexB/scancode-toolkit/issues/1635') + @skipIf( + on_macos_14_or_higher, + "Cannot handle yet byte paths on macOS 10.14+. See https://github.com/nexB/scancode-toolkit/issues/1635", + ) def test_resource_iter_can_walk_non_utf8_path_from_unicode_path(self): - test_dir = self.extract_test_tar_raw('fileutils/walk_non_utf8/non_unicode.tgz') - test_dir = join(test_dir, 'non_unicode') + test_dir = self.extract_test_tar_raw("fileutils/walk_non_utf8/non_unicode.tgz") + test_dir = join(test_dir, "non_unicode") result = list(fileutils.resource_iter(test_dir, with_dirs=False)) assert len(result) == 18 - @skipIf(on_windows, 'Symlinks do not work well on Windows') + @skipIf(on_windows, "Symlinks do not work well on Windows") def test_resource_iter_follow_symlinks(self): - test_dir = self.get_test_loc('symlink/walk', copy=True) + test_dir = self.get_test_loc("symlink/walk", copy=True) temp_dir = fileutils.get_temp_dir() - test_link = join(temp_dir, 'test-dir-link') + test_link = join(temp_dir, "test-dir-link") os.symlink(test_dir, test_link) - result = [os.path.basename(f) for f in fileutils.resource_iter(test_dir, follow_symlinks=True)] - expected = [ - 'dir', - 'a', - 'b' + result = [ + os.path.basename(f) for f in fileutils.resource_iter(test_dir, follow_symlinks=True) ] + expected = ["dir", "a", "b"] assert sorted(result) == sorted(expected) class TestBaseName(FileBasedTesting): - test_data_dir = os.path.join(os.path.dirname(__file__), 'data') + test_data_dir = os.path.join(os.path.dirname(__file__), "data") def test_file_base_name_on_path_and_location_1(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'a/.a/file' - expected_name = 'file' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "a/.a/file" + expected_name = "file" result = fileutils.file_base_name(test_file) assert result == expected_name result = fileutils.file_base_name(join(test_dir, test_file)) assert result == expected_name - def test_file_base_name_on_file_path_for_dot_file (self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'a/.a/' - expected_name = '.a' + def test_file_base_name_on_file_path_for_dot_file(self): + test_dir = self.get_test_loc("fileutils/basename") + test_file = "a/.a/" + expected_name = ".a" result = fileutils.file_base_name(test_file) assert result == expected_name result = fileutils.file_base_name(join(test_dir, test_file)) assert result == expected_name def test_file_base_name_on_file_path_for_dot_file_with_extension(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'a/b/.a.b' - expected_name = '.a' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "a/b/.a.b" + expected_name = ".a" result = fileutils.file_base_name(test_file) assert result == expected_name result = fileutils.file_base_name(join(test_dir, test_file)) assert result == expected_name def test_file_base_name_on_file_path_for_file_with_unknown_composed_extension(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'a/b/a.tag.gz' - expected_name = 'a.tag' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "a/b/a.tag.gz" + expected_name = "a.tag" result = fileutils.file_base_name(test_file) assert result == expected_name result = fileutils.file_base_name(join(test_dir, test_file)) assert result == expected_name def test_file_base_name_on_file_path_for_file_with_known_composed_extension(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'a/b/a.tar.gz' - expected_name = 'a' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "a/b/a.tar.gz" + expected_name = "a" result = fileutils.file_base_name(test_file) assert result == expected_name result = fileutils.file_base_name(join(test_dir, test_file)) assert result == expected_name def test_file_base_name_on_dir_path(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'a/b/' - expected_name = 'b' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "a/b/" + expected_name = "b" result = fileutils.file_base_name(test_file) assert result == expected_name result = fileutils.file_base_name(join(test_dir, test_file)) assert result == expected_name def test_file_base_name_on_plain_file(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'a/f.a' - expected_name = 'f' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "a/f.a" + expected_name = "f" result = fileutils.file_base_name(test_file) assert result == expected_name result = fileutils.file_base_name(join(test_dir, test_file)) assert result == expected_name def test_file_base_name_on_plain_file_with_parent_dir_extension(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'f.a/a.c' - expected_name = 'a' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "f.a/a.c" + expected_name = "a" result = fileutils.file_base_name(test_file) assert result == expected_name result = fileutils.file_base_name(join(test_dir, test_file)) assert result == expected_name def test_file_base_name_on_path_for_plain_dir(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'a/' - expected_name = 'a' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "a/" + expected_name = "a" result = fileutils.file_base_name(test_file) assert result == expected_name result = fileutils.file_base_name(join(test_dir, test_file)) assert result == expected_name def test_file_base_name_on_path_for_plain_dir_with_extension(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'f.a/' - expected_name = 'f.a' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "f.a/" + expected_name = "f.a" result = fileutils.file_base_name(test_file) assert result == expected_name result = fileutils.file_base_name(join(test_dir, test_file)) assert result == expected_name def test_file_base_name_on_path_for_plain_file(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'tst' - expected_name = 'tst' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "tst" + expected_name = "tst" result = fileutils.file_base_name(test_file) assert result == expected_name result = fileutils.file_base_name(join(test_dir, test_file)) @@ -638,93 +653,93 @@ def test_file_base_name_on_path_for_plain_file(self): class TestFileName(FileBasedTesting): - test_data_dir = os.path.join(os.path.dirname(__file__), 'data') + test_data_dir = os.path.join(os.path.dirname(__file__), "data") def test_file_name_on_path_and_location_1(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'a/.a/file' - expected_name = 'file' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "a/.a/file" + expected_name = "file" result = fileutils.file_name(test_file) assert result == expected_name result = fileutils.file_name((os.path.join(test_dir, test_file))) assert result == expected_name def test_file_name_on_path_and_location_2(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'a/.a/' - expected_name = '.a' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "a/.a/" + expected_name = ".a" result = fileutils.file_name(test_file) assert result == expected_name result = fileutils.file_name((os.path.join(test_dir, test_file))) assert result == expected_name def test_file_name_on_path_and_location_3(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'a/b/.a.b' - expected_name = '.a.b' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "a/b/.a.b" + expected_name = ".a.b" result = fileutils.file_name(test_file) assert result == expected_name result = fileutils.file_name((os.path.join(test_dir, test_file))) assert result == expected_name def test_file_name_on_path_and_location_4(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'a/b/a.tag.gz' - expected_name = 'a.tag.gz' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "a/b/a.tag.gz" + expected_name = "a.tag.gz" result = fileutils.file_name(test_file) assert result == expected_name result = fileutils.file_name((os.path.join(test_dir, test_file))) assert result == expected_name def test_file_name_on_path_and_location_5(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'a/b/' - expected_name = 'b' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "a/b/" + expected_name = "b" result = fileutils.file_name(test_file) assert result == expected_name result = fileutils.file_name((os.path.join(test_dir, test_file))) assert result == expected_name def test_file_name_on_path_and_location_6(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'a/f.a' - expected_name = 'f.a' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "a/f.a" + expected_name = "f.a" result = fileutils.file_name(test_file) assert result == expected_name result = fileutils.file_name((os.path.join(test_dir, test_file))) assert result == expected_name def test_file_name_on_path_and_location_7(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'a/' - expected_name = 'a' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "a/" + expected_name = "a" result = fileutils.file_name(test_file) assert result == expected_name result = fileutils.file_name((os.path.join(test_dir, test_file))) assert result == expected_name def test_file_name_on_path_and_location_8(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'f.a/a.c' - expected_name = 'a.c' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "f.a/a.c" + expected_name = "a.c" result = fileutils.file_name(test_file) assert result == expected_name result = fileutils.file_name((os.path.join(test_dir, test_file))) assert result == expected_name def test_file_name_on_path_and_location_9(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'f.a/' - expected_name = 'f.a' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "f.a/" + expected_name = "f.a" result = fileutils.file_name(test_file) assert result == expected_name result = fileutils.file_name((os.path.join(test_dir, test_file))) assert result == expected_name def test_file_name_on_path_and_location_10(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'tst' - expected_name = 'tst' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "tst" + expected_name = "tst" result = fileutils.file_name(test_file) assert result == expected_name result = fileutils.file_name((os.path.join(test_dir, test_file))) @@ -732,146 +747,147 @@ def test_file_name_on_path_and_location_10(self): class TestFileExtension(FileBasedTesting): - test_data_dir = os.path.join(os.path.dirname(__file__), 'data') + test_data_dir = os.path.join(os.path.dirname(__file__), "data") def test_file_extension_on_path_and_location_1(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'a/.a/file' - expected_name = '' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "a/.a/file" + expected_name = "" result = fileutils.file_extension(test_file) assert result == expected_name result = fileutils.file_extension((os.path.join(test_dir, test_file))) assert result == expected_name def test_file_extension_on_path_and_location_2(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'a/.a/' - expected_name = '' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "a/.a/" + expected_name = "" result = fileutils.file_extension(test_file) assert result == expected_name result = fileutils.file_extension((os.path.join(test_dir, test_file))) assert result == expected_name def test_file_extension_on_path_and_location_3(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'a/b/.a.b' - expected_name = '.b' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "a/b/.a.b" + expected_name = ".b" result = fileutils.file_extension(test_file) assert result == expected_name result = fileutils.file_extension((os.path.join(test_dir, test_file))) assert result == expected_name def test_file_extension_on_path_and_location_4(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'a/b/a.tag.gz' - expected_name = '.gz' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "a/b/a.tag.gz" + expected_name = ".gz" result = fileutils.file_extension(test_file) assert result == expected_name result = fileutils.file_extension((os.path.join(test_dir, test_file))) assert result == expected_name def test_file_extension_on_path_and_location_5(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'a/b/' - expected_name = '' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "a/b/" + expected_name = "" result = fileutils.file_extension(test_file) assert result == expected_name result = fileutils.file_extension((os.path.join(test_dir, test_file))) assert result == expected_name def test_file_extension_on_path_and_location_6(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'a/f.a' - expected_name = '.a' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "a/f.a" + expected_name = ".a" result = fileutils.file_extension(test_file) assert result == expected_name result = fileutils.file_extension((os.path.join(test_dir, test_file))) assert result == expected_name def test_file_extension_on_path_and_location_7(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'a/' - expected_name = '' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "a/" + expected_name = "" result = fileutils.file_extension(test_file) assert result == expected_name result = fileutils.file_extension((os.path.join(test_dir, test_file))) assert result == expected_name def test_file_extension_on_path_and_location_8(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'f.a/a.c' - expected_name = '.c' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "f.a/a.c" + expected_name = ".c" result = fileutils.file_extension(test_file) assert result == expected_name result = fileutils.file_extension((os.path.join(test_dir, test_file))) assert result == expected_name def test_file_extension_on_path_and_location_9(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'f.a/' - expected_name = '' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "f.a/" + expected_name = "" result = fileutils.file_extension(test_file) assert result == expected_name result = fileutils.file_extension((os.path.join(test_dir, test_file))) assert result == expected_name def test_file_extension_on_path_and_location_10(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'tst' - expected_name = '' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "tst" + expected_name = "" result = fileutils.file_extension(test_file) assert result == expected_name result = fileutils.file_extension((os.path.join(test_dir, test_file))) assert result == expected_name def test_splitext_base(self): - expected = 'path', '.ext' - assert fileutils.splitext('C:\\dir\\path.ext') == expected + expected = "path", ".ext" + assert fileutils.splitext("C:\\dir\\path.ext") == expected def test_splitext_directories_even_with_dotted_names_have_no_extension(self): import ntpath - expected = 'path.ext', '' - assert fileutils.splitext('C:\\dir\\path.ext' + ntpath.sep) == expected - expected = 'path.ext', '' - assert fileutils.splitext('/dir/path.ext/') == expected + expected = "path.ext", "" + assert fileutils.splitext("C:\\dir\\path.ext" + ntpath.sep) == expected + + expected = "path.ext", "" + assert fileutils.splitext("/dir/path.ext/") == expected - expected = 'file', '.txt' - assert fileutils.splitext('/some/file.txt') == expected + expected = "file", ".txt" + assert fileutils.splitext("/some/file.txt") == expected def test_splitext_composite_extensions_for_tarballs_are_properly_handled(self): - expected = 'archive', '.tar.gz' - assert fileutils.splitext('archive.tar.gz') == expected + expected = "archive", ".tar.gz" + assert fileutils.splitext("archive.tar.gz") == expected def test_splitext_name_base(self): - expected = 'path', '.ext' - assert fileutils.splitext_name('path.ext') == expected + expected = "path", ".ext" + assert fileutils.splitext_name("path.ext") == expected def test_splitext_name_directories_have_no_extension(self): - expected = 'path.ext', '' - assert fileutils.splitext_name('path.ext', is_file=False) == expected + expected = "path.ext", "" + assert fileutils.splitext_name("path.ext", is_file=False) == expected - expected = 'file', '.txt' - assert fileutils.splitext_name('file.txt') == expected + expected = "file", ".txt" + assert fileutils.splitext_name("file.txt") == expected def test_splitext_name_composite_extensions_for_tarballs_are_properly_handled(self): - expected = 'archive', '.tar.gz' - assert fileutils.splitext_name('archive.tar.gz') == expected + expected = "archive", ".tar.gz" + assert fileutils.splitext_name("archive.tar.gz") == expected def test_splitext_name_dotfile_are_properly_handled(self): - expected = '.dotfile', '' - assert fileutils.splitext_name('.dotfile') == expected - expected = '.dotfile', '.this' - assert fileutils.splitext_name('.dotfile.this') == expected + expected = ".dotfile", "" + assert fileutils.splitext_name(".dotfile") == expected + expected = ".dotfile", ".this" + assert fileutils.splitext_name(".dotfile.this") == expected class TestParentDir(FileBasedTesting): - test_data_dir = os.path.join(os.path.dirname(__file__), 'data') + test_data_dir = os.path.join(os.path.dirname(__file__), "data") def test_parent_directory_on_path_and_location_1(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'a/.a/file' - expected_name = 'a/.a/' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "a/.a/file" + expected_name = "a/.a/" result = fileutils.parent_directory(test_file) result = fileutils.as_posixpath(result) assert result == expected_name @@ -881,9 +897,9 @@ def test_parent_directory_on_path_and_location_1(self): assert result.endswith(expected_name) def test_parent_directory_on_path_and_location_2(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'a/.a/' - expected_name = 'a/' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "a/.a/" + expected_name = "a/" result = fileutils.parent_directory(test_file) result = fileutils.as_posixpath(result) assert result == expected_name @@ -893,9 +909,9 @@ def test_parent_directory_on_path_and_location_2(self): assert result.endswith(expected_name) def test_parent_directory_on_path_and_location_3(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'a/b/.a.b' - expected_name = 'a/b/' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "a/b/.a.b" + expected_name = "a/b/" result = fileutils.parent_directory(test_file) result = fileutils.as_posixpath(result) assert result == expected_name @@ -905,9 +921,9 @@ def test_parent_directory_on_path_and_location_3(self): assert result.endswith(expected_name) def test_parent_directory_on_path_and_location_4(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'a/b/a.tag.gz' - expected_name = 'a/b/' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "a/b/a.tag.gz" + expected_name = "a/b/" result = fileutils.parent_directory(test_file) result = fileutils.as_posixpath(result) assert result == expected_name @@ -917,9 +933,9 @@ def test_parent_directory_on_path_and_location_4(self): assert result.endswith(expected_name) def test_parent_directory_on_path_and_location_5(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'a/b/' - expected_name = 'a/' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "a/b/" + expected_name = "a/" result = fileutils.parent_directory(test_file) result = fileutils.as_posixpath(result) assert result == expected_name @@ -929,9 +945,9 @@ def test_parent_directory_on_path_and_location_5(self): assert result.endswith(expected_name) def test_parent_directory_on_path_and_location_6(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'a/f.a' - expected_name = 'a/' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "a/f.a" + expected_name = "a/" result = fileutils.parent_directory(test_file) result = fileutils.as_posixpath(result) assert result == expected_name @@ -941,9 +957,9 @@ def test_parent_directory_on_path_and_location_6(self): assert result.endswith(expected_name) def test_parent_directory_on_path_and_location_7(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'a/' - expected_name = '/' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "a/" + expected_name = "/" result = fileutils.parent_directory(test_file) result = fileutils.as_posixpath(result) assert result == expected_name @@ -953,9 +969,9 @@ def test_parent_directory_on_path_and_location_7(self): assert result.endswith(expected_name) def test_parent_directory_on_path_and_location_8(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'f.a/a.c' - expected_name = 'f.a/' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "f.a/a.c" + expected_name = "f.a/" result = fileutils.parent_directory(test_file) result = fileutils.as_posixpath(result) assert result == expected_name @@ -965,9 +981,9 @@ def test_parent_directory_on_path_and_location_8(self): assert result.endswith(expected_name) def test_parent_directory_on_path_and_location_9(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'f.a/' - expected_name = '/' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "f.a/" + expected_name = "/" result = fileutils.parent_directory(test_file) result = fileutils.as_posixpath(result) assert result == expected_name @@ -977,9 +993,9 @@ def test_parent_directory_on_path_and_location_9(self): assert result.endswith(expected_name) def test_parent_directory_on_path_and_location_10(self): - test_dir = self.get_test_loc('fileutils/basename') - test_file = 'tst' - expected_name = '/' + test_dir = self.get_test_loc("fileutils/basename") + test_file = "tst" + expected_name = "/" result = fileutils.parent_directory(test_file) result = fileutils.as_posixpath(result) assert result == expected_name diff --git a/tests/test_functional.py b/tests/test_functional.py index 5f9d5728..5953c03f 100644 --- a/tests/test_functional.py +++ b/tests/test_functional.py @@ -2,7 +2,7 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # @@ -15,14 +15,12 @@ class TestFunctional(TestCase): - def test_flatten(self): - expected = [7, 6, 5, 4, 'a', 3, 3, 2, 1] + expected = [7, 6, 5, 4, "a", 3, 3, 2, 1] test = flatten([7, (6, [5, [4, ["a"], 3]], 3), 2, 1]) assert test == expected def test_flatten_generator(self): - def gen(): for _ in range(2): yield range(5) @@ -32,17 +30,16 @@ def gen(): assert test == expected def test_flatten_empties(self): - expected = ['a'] - test = flatten([[], (), ['a']]) + expected = ["a"] + test = flatten([[], (), ["a"]]) assert test == expected def test_partial(self): - def test_func(a, b): pass wrapped = partial(test_func, a=2) - assert wrapped.__name__ == 'test_func' + assert wrapped.__name__ == "test_func" def test_memoized(self): call_count = Counter() diff --git a/tests/test_hash.py b/tests/test_hash.py index 4c1dc03d..b1ab88e5 100644 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -2,140 +2,165 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # +import hashlib import os -from commoncode.testcase import FileBasedTesting - from commoncode.hash import b64sha1 from commoncode.hash import checksum +from commoncode.hash import checksum_from_chunks from commoncode.hash import get_hasher from commoncode.hash import md5 from commoncode.hash import multi_checksums from commoncode.hash import sha1 +from commoncode.hash import sha1_git from commoncode.hash import sha256 from commoncode.hash import sha512 -from commoncode.hash import sha1_git +from commoncode.testcase import FileBasedTesting class TestHash(FileBasedTesting): - test_data_dir = os.path.join(os.path.dirname(__file__), 'data') + test_data_dir = os.path.join(os.path.dirname(__file__), "data") def test_get_hasher(self): h = get_hasher(160) - assert h(b'a').b64digest() == u'hvfkN_qlp_zhXR3cuerq6jd2Z7g=' - assert h(b'aa').b64digest() == u'4MkDWJjdUvxlxBRUzsnE0mEb-zc=' - assert h(b'aaa').b64digest() == u'fiQN50-x7Qj6CNOAY_amqRRiqBU=' + assert h(b"a").b64digest() == "hvfkN_qlp_zhXR3cuerq6jd2Z7g=" + assert h(b"aa").b64digest() == "4MkDWJjdUvxlxBRUzsnE0mEb-zc=" + assert h(b"aaa").b64digest() == "fiQN50-x7Qj6CNOAY_amqRRiqBU=" def test_short_hashes(self): h = get_hasher(32) - assert h(b'a').hexdigest() == u'0cc175b9' - assert h(b'aa').hexdigest() == u'4124bc0a' + assert h(b"a").hexdigest() == "0cc175b9" + assert h(b"aa").hexdigest() == "4124bc0a" h = get_hasher(64) - assert h(b'aa').hexdigest() == u'4124bc0a9335c27f' + assert h(b"aa").hexdigest() == "4124bc0a9335c27f" def test_sha1_checksum_on_text(self): - test_file = self.get_test_loc('hash/dir1/a.txt') - assert u'3ca69e8d6c234a469d16ac28a4a658c92267c423' == sha1(test_file) + test_file = self.get_test_loc("hash/dir1/a.txt") + assert "3ca69e8d6c234a469d16ac28a4a658c92267c423" == sha1(test_file) def test_sha1_checksum_on_text2(self): - test_file = self.get_test_loc('hash/dir2/a.txt') - assert u'3ca69e8d6c234a469d16ac28a4a658c92267c423' == sha1(test_file) + test_file = self.get_test_loc("hash/dir2/a.txt") + assert "3ca69e8d6c234a469d16ac28a4a658c92267c423" == sha1(test_file) def test_sha1_checksum_on_dos_text(self): - test_file = self.get_test_loc('hash/dir2/dos.txt') - assert u'a71718fb198630ae8ba32926015d8555a03cb06c' == sha1(test_file) + test_file = self.get_test_loc("hash/dir2/dos.txt") + assert "a71718fb198630ae8ba32926015d8555a03cb06c" == sha1(test_file) def test_sha1_checksum_base64(self): - test_file = self.get_test_loc('hash/dir1/a.png') - assert u'NKxUZdSKmwT8J18JvCIwZg349Pc=' == b64sha1(test_file) + test_file = self.get_test_loc("hash/dir1/a.png") + assert "NKxUZdSKmwT8J18JvCIwZg349Pc=" == b64sha1(test_file) def test_md5_checksum_on_text(self): - test_file = self.get_test_loc('hash/dir1/a.txt') - assert u'40c53c58fdafacc83cfff6ee3d2f6d69' == md5(test_file) + test_file = self.get_test_loc("hash/dir1/a.txt") + assert "40c53c58fdafacc83cfff6ee3d2f6d69" == md5(test_file) def test_md5_checksum_on_text2(self): - test_file = self.get_test_loc('hash/dir2/a.txt') - assert u'40c53c58fdafacc83cfff6ee3d2f6d69' == md5(test_file) + test_file = self.get_test_loc("hash/dir2/a.txt") + assert "40c53c58fdafacc83cfff6ee3d2f6d69" == md5(test_file) def test_md5_checksum_on_dos_text(self): - test_file = self.get_test_loc('hash/dir2/dos.txt') - assert u'095f5068940e41df9add5d4cc396c181' == md5(test_file) + test_file = self.get_test_loc("hash/dir2/dos.txt") + assert "095f5068940e41df9add5d4cc396c181" == md5(test_file) def test_md5_checksum(self): - test_file = self.get_test_loc('hash/dir1/a.png') - assert u'4760fb467f1ebf3b0aeace4a3926f1a4' == md5(test_file) + test_file = self.get_test_loc("hash/dir1/a.png") + assert "4760fb467f1ebf3b0aeace4a3926f1a4" == md5(test_file) def test_sha1_checksum(self): - test_file = self.get_test_loc('hash/dir1/a.png') - assert u'34ac5465d48a9b04fc275f09bc2230660df8f4f7' == sha1(test_file) + test_file = self.get_test_loc("hash/dir1/a.png") + assert "34ac5465d48a9b04fc275f09bc2230660df8f4f7" == sha1(test_file) def test_sha256_checksum(self): - test_file = self.get_test_loc('hash/dir1/a.png') - assert u'1b598db6fee8f1ec7bb919c0adf68956f3d20af8c9934a9cf2db52e1347efd35' == sha256(test_file) + test_file = self.get_test_loc("hash/dir1/a.png") + assert "1b598db6fee8f1ec7bb919c0adf68956f3d20af8c9934a9cf2db52e1347efd35" == sha256( + test_file + ) def test_sha512_checksum(self): - test_file = self.get_test_loc('hash/dir1/a.png') - assert u'5be9e01cd20ff288fd3c3fc46be5c2747eaa2c526197125330947a95cdb418222176b182a4680f0e435ba8f114363c45a67b30eed9a9222407e63ccbde46d3b4' == sha512(test_file) + test_file = self.get_test_loc("hash/dir1/a.png") + assert ( + "5be9e01cd20ff288fd3c3fc46be5c2747eaa2c526197125330947a95cdb418222176b182a4680f0e435ba8f114363c45a67b30eed9a9222407e63ccbde46d3b4" + == sha512(test_file) + ) def test_checksum_sha1(self): - test_file = self.get_test_loc('hash/dir1/a.txt') - assert '3ca69e8d6c234a469d16ac28a4a658c92267c423' == checksum(test_file, 'sha1') + test_file = self.get_test_loc("hash/dir1/a.txt") + assert "3ca69e8d6c234a469d16ac28a4a658c92267c423" == checksum(test_file, "sha1") def test_checksum_md5(self): - test_file = self.get_test_loc('hash/dir1/a.txt') - assert '40c53c58fdafacc83cfff6ee3d2f6d69' == checksum(test_file, 'md5') + test_file = self.get_test_loc("hash/dir1/a.txt") + assert "40c53c58fdafacc83cfff6ee3d2f6d69" == checksum(test_file, "md5") def test_multi_checksums(self): - test_file = self.get_test_loc('hash/dir1/a.png') - expected = dict([ - ('md5', u'4760fb467f1ebf3b0aeace4a3926f1a4'), - ('sha1', u'34ac5465d48a9b04fc275f09bc2230660df8f4f7'), - ('sha256', u'1b598db6fee8f1ec7bb919c0adf68956f3d20af8c9934a9cf2db52e1347efd35'), - ]) - result = multi_checksums(test_file, 'md5 sha1 sha256'.split()) + test_file = self.get_test_loc("hash/dir1/a.png") + expected = dict( + [ + ("md5", "4760fb467f1ebf3b0aeace4a3926f1a4"), + ("sha1", "34ac5465d48a9b04fc275f09bc2230660df8f4f7"), + ("sha256", "1b598db6fee8f1ec7bb919c0adf68956f3d20af8c9934a9cf2db52e1347efd35"), + ] + ) + result = multi_checksums(test_file, "md5 sha1 sha256".split()) assert result == expected def test_multi_checksums_custom(self): - test_file = self.get_test_loc('hash/dir1/a.png') - result = multi_checksums(test_file, ('sha512',)) - expected = dict([ - ('sha512', u'5be9e01cd20ff288fd3c3fc46be5c2747eaa2c526197125330947a95cdb418222176b182a4680f0e435ba8f114363c45a67b30eed9a9222407e63ccbde46d3b4'), - ]) + test_file = self.get_test_loc("hash/dir1/a.png") + result = multi_checksums(test_file, ("sha512",)) + expected = dict( + [ + ( + "sha512", + "5be9e01cd20ff288fd3c3fc46be5c2747eaa2c526197125330947a95cdb418222176b182a4680f0e435ba8f114363c45a67b30eed9a9222407e63ccbde46d3b4", + ), + ] + ) assert result == expected def test_multi_checksums_shattered1(self): - test_file = self.get_test_loc('hash/sha1-collision/shattered-1.pdf') - expected = dict([ - ('md5', 'ee4aa52b139d925f8d8884402b0a750c'), - ('sha1', '38762cf7f55934b34d179ae6a4c80cadccbb7f0a'), - ('sha256', '2bb787a73e37352f92383abe7e2902936d1059ad9f1ba6daaa9c1e58ee6970d0'), - ('sha512', '3c19b2cbcf72f7f5b252ea31677b8f2323d6119e49bcc0fb55931d00132385f1e749bb24cbd68c04ac826ae8421802825d3587fe185abf709669bb9693f6b416'), - ('sha1_git', 'ba9aaa145ccd24ef760cf31c74d8f7ca1a2e47b0'), - ]) + test_file = self.get_test_loc("hash/sha1-collision/shattered-1.pdf") + expected = dict( + [ + ("md5", "ee4aa52b139d925f8d8884402b0a750c"), + ("sha1", "38762cf7f55934b34d179ae6a4c80cadccbb7f0a"), + ("sha256", "2bb787a73e37352f92383abe7e2902936d1059ad9f1ba6daaa9c1e58ee6970d0"), + ( + "sha512", + "3c19b2cbcf72f7f5b252ea31677b8f2323d6119e49bcc0fb55931d00132385f1e749bb24cbd68c04ac826ae8421802825d3587fe185abf709669bb9693f6b416", + ), + ("sha1_git", "ba9aaa145ccd24ef760cf31c74d8f7ca1a2e47b0"), + ] + ) result = multi_checksums(test_file) assert result == expected def test_multi_checksums_shattered2(self): - test_file = self.get_test_loc('hash/sha1-collision/shattered-2.pdf') - expected = dict([ - ('md5', '5bd9d8cabc46041579a311230539b8d1'), - ('sha1', '38762cf7f55934b34d179ae6a4c80cadccbb7f0a'), - ('sha256', 'd4488775d29bdef7993367d541064dbdda50d383f89f0aa13a6ff2e0894ba5ff'), - ('sha512', 'f39a04842e4b28e04558496beb7cb84654ded9c00b2f873c3ef64f9dfdbc760cd0273b816858ba5b203c0dd71af8b65d6a0c1032e00e48ace0b4705eedcc1bab'), - # Note: this is not the same as the sha1_git for shattered-1.pdf ;) - ('sha1_git', 'b621eeccd5c7edac9b7dcba35a8d5afd075e24f2'), - ]) + test_file = self.get_test_loc("hash/sha1-collision/shattered-2.pdf") + expected = dict( + [ + ("md5", "5bd9d8cabc46041579a311230539b8d1"), + ("sha1", "38762cf7f55934b34d179ae6a4c80cadccbb7f0a"), + ("sha256", "d4488775d29bdef7993367d541064dbdda50d383f89f0aa13a6ff2e0894ba5ff"), + ( + "sha512", + "f39a04842e4b28e04558496beb7cb84654ded9c00b2f873c3ef64f9dfdbc760cd0273b816858ba5b203c0dd71af8b65d6a0c1032e00e48ace0b4705eedcc1bab", + ), + # Note: this is not the same as the sha1_git for shattered-1.pdf ;) + ("sha1_git", "b621eeccd5c7edac9b7dcba35a8d5afd075e24f2"), + ] + ) result = multi_checksums(test_file) assert result == expected def test_sha1_git_checksum(self): # $ pushd tests/commoncode/data && for f in `find hash/ -type f` ; # do echo -n "$f ";git hash-object --literally $f; done && popd - tests = [t.strip().split() for t in ''' + tests = [ + t.strip().split() + for t in """ hash/dir1/a.txt de980441c3ab03a8c07dda1ad27b8a11f39deb1e hash/dir1/a.png 5f212358671a3ada8794cb14fb5227f596447a8c hash/sha1-collision/shattered-1.pdf ba9aaa145ccd24ef760cf31c74d8f7ca1a2e47b0 @@ -144,8 +169,33 @@ def test_sha1_git_checksum(self): hash/sha1-collision/shattered-2.pdf b621eeccd5c7edac9b7dcba35a8d5afd075e24f2 hash/dir2/dos.txt 0d2d3a69833f1ebcbf420875cfbc93f132bc8a0b hash/dir2/a.txt de980441c3ab03a8c07dda1ad27b8a11f39deb1e - '''.splitlines() if t.strip()] + """.splitlines() + if t.strip() + ] for test_file, expected_sha1_git in tests: test_file = self.get_test_loc(test_file) # test that we match the git hash-object assert sha1_git(test_file) == expected_sha1_git + + def test_checksum_from_chunks_can_stream_gigabytes(self): + chunk_16mb = b"0" * 16000000 + chunks_3dot2gb = (chunk_16mb for _ in range(200)) + result = checksum_from_chunks( + chunks=chunks_3dot2gb, total_length=16000000 * 200, name="sha1_git" + ) + assert result == "494caf26c43c4473f6e930b0f5c2ecf8121bcf24" + + def test_checksum_from_chunks_from_stream_is_same_as_plain(self): + chunk = b"0" * 16000 + chunks = (chunk for _ in range(100)) + result1 = checksum_from_chunks(chunks=chunks, name="sha256") + + result2 = hashlib.sha256() + for _ in range(100): + result2.update(chunk) + assert result1 == result2.hexdigest() + + def test_checksum_empty_file(self): + test_file = self.get_test_loc("hash/empty") + checksums = multi_checksums(location=test_file, checksum_names=("sha1",)) + assert checksums == {"sha1": None} diff --git a/tests/test_ignore.py b/tests/test_ignore.py index 22eceecf..8c74286f 100644 --- a/tests/test_ignore.py +++ b/tests/test_ignore.py @@ -2,7 +2,7 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # @@ -17,93 +17,98 @@ class IgnoreTest(commoncode.testcase.FileBasedTesting): - test_data_dir = os.path.join(os.path.dirname(__file__), 'data') + test_data_dir = os.path.join(os.path.dirname(__file__), "data") - @skipIf(on_mac, 'Return different result on Mac for reasons to investigate') + @skipIf(on_mac, "Return different result on Mac for reasons to investigate") def test_is_ignored_default_ignores_eclipse1(self): - test_dir = self.extract_test_tar('ignore/excludes/eclipse.tgz') - test_base = os.path.join(test_dir, 'eclipse') + test_dir = self.extract_test_tar("ignore/excludes/eclipse.tgz") + test_base = os.path.join(test_dir, "eclipse") - test = os.path.join(test_base, '.settings') + test = os.path.join(test_base, ".settings") assert ignore.is_ignored(test, ignore.default_ignores, {}) def test_is_ignored_default_ignores_eclipse2(self): - test_dir = self.extract_test_tar('ignore/excludes/eclipse.tgz') - test_base = os.path.join(test_dir, 'eclipse') + test_dir = self.extract_test_tar("ignore/excludes/eclipse.tgz") + test_base = os.path.join(test_dir, "eclipse") - test = os.path.join(test_base, '.settings/somefile') + test = os.path.join(test_base, ".settings/somefile") assert ignore.is_ignored(test, ignore.default_ignores, {}) def test_is_ignored_default_ignores_eclipse3(self): - test_dir = self.extract_test_tar('ignore/excludes/eclipse.tgz') - test_base = os.path.join(test_dir, 'eclipse') + test_dir = self.extract_test_tar("ignore/excludes/eclipse.tgz") + test_base = os.path.join(test_dir, "eclipse") - test = os.path.join(test_base, '.project') + test = os.path.join(test_base, ".project") assert ignore.is_ignored(test, ignore.default_ignores, {}) def test_is_ignored_default_ignores_eclipse4(self): - test_dir = self.extract_test_tar('ignore/excludes/eclipse.tgz') - test_base = os.path.join(test_dir, 'eclipse') + test_dir = self.extract_test_tar("ignore/excludes/eclipse.tgz") + test_base = os.path.join(test_dir, "eclipse") - test = os.path.join(test_base, '.pydevproject') + test = os.path.join(test_base, ".pydevproject") assert ignore.is_ignored(test, ignore.default_ignores, {}) def test_is_ignored_default_ignores_mac1(self): - test_dir = self.extract_test_tar('ignore/excludes/mac.tgz') - test_base = os.path.join(test_dir, 'mac') + test_dir = self.extract_test_tar("ignore/excludes/mac.tgz") + test_base = os.path.join(test_dir, "mac") - test = os.path.join(test_base, '__MACOSX') + test = os.path.join(test_base, "__MACOSX") assert ignore.is_ignored(test, ignore.default_ignores, {}) def test_is_ignored_default_ignores_mac2(self): - test_dir = self.extract_test_tar('ignore/excludes/mac.tgz') - test_base = os.path.join(test_dir, 'mac') + test_dir = self.extract_test_tar("ignore/excludes/mac.tgz") + test_base = os.path.join(test_dir, "mac") - test = os.path.join(test_base, '__MACOSX/comp_match/smallrepo/._jetty_1.0_index.csv') + test = os.path.join(test_base, "__MACOSX/comp_match/smallrepo/._jetty_1.0_index.csv") assert ignore.is_ignored(test, ignore.default_ignores, {}) def test_is_ignored_default_ignores_mac3(self): - test_dir = self.extract_test_tar('ignore/excludes/mac.tgz') - test_base = os.path.join(test_dir, 'mac') + test_dir = self.extract_test_tar("ignore/excludes/mac.tgz") + test_base = os.path.join(test_dir, "mac") - test = os.path.join(test_base, '.DS_Store') + test = os.path.join(test_base, ".DS_Store") assert ignore.is_ignored(test, ignore.default_ignores, {}) def test_is_ignored_default_ignores_mac4(self): - test_dir = self.extract_test_tar('ignore/excludes/mac.tgz') - test_base = os.path.join(test_dir, 'mac') + test_dir = self.extract_test_tar("ignore/excludes/mac.tgz") + test_base = os.path.join(test_dir, "mac") - test = os.path.join(test_base, '.DS_Store/a') + test = os.path.join(test_base, ".DS_Store/a") assert ignore.is_ignored(test, ignore.default_ignores, {}) - @skipIf(on_mac, 'Return different result on Mac for reasons to investigate') + @skipIf(on_mac, "Return different result on Mac for reasons to investigate") def test_is_ignored_default_ignores_mac5(self): - test_dir = self.extract_test_tar('ignore/excludes/mac.tgz') - test_base = os.path.join(test_dir, 'mac') + test_dir = self.extract_test_tar("ignore/excludes/mac.tgz") + test_base = os.path.join(test_dir, "mac") - test = os.path.join(test_base, '._.DS_Store') + test = os.path.join(test_base, "._.DS_Store") # this is really weird as a behavior # 'Default ignore: MacOSX artifact' assert ignore.is_ignored(test, ignore.default_ignores, {}) - @skipIf(on_mac, 'Return different result on Mac for reasons to investigate') + @skipIf(on_mac, "Return different result on Mac for reasons to investigate") def test_is_ignored_default_ignores_msft(self): - test_dir = self.extract_test_tar('ignore/excludes/msft-vs.tgz') - test = os.path.join(test_dir, 'msft-vs/tst.sluo') + test_dir = self.extract_test_tar("ignore/excludes/msft-vs.tgz") + test = os.path.join(test_dir, "msft-vs/tst.sluo") # 'Default ignore: Microsoft VS project artifact' ?? assert ignore.is_ignored(test, ignore.default_ignores, {}) - @skipIf(on_mac, 'Return different result on Mac for reasons to investigate') + @skipIf(on_mac, "Return different result on Mac for reasons to investigate") def test_is_ignored_skip_vcs_files_and_dirs(self): - test_dir = self.extract_test_tar('ignore/vcs.tgz') + test_dir = self.extract_test_tar("ignore/vcs.tgz") result = [] for top, dirs, files in os.walk(test_dir, topdown=True): not_ignored = [] for d in dirs: p = os.path.join(top, d) ign = ignore.is_ignored(p, ignore.default_ignores, {}) - tp = fileutils.as_posixpath(p.replace(test_dir, '')) - result.append((tp, ign,)) + tp = fileutils.as_posixpath(p.replace(test_dir, "")) + result.append( + ( + tp, + ign, + ) + ) if not ign: not_ignored.append(d) @@ -113,84 +118,90 @@ def test_is_ignored_skip_vcs_files_and_dirs(self): for f in files: p = os.path.join(top, f) ign = ignore.is_ignored(p, ignore.default_ignores, {}) - tp = fileutils.as_posixpath(p.replace(test_dir, '')) - result.append((tp, ign,)) + tp = fileutils.as_posixpath(p.replace(test_dir, "")) + result.append( + ( + tp, + ign, + ) + ) expected = [ - ('/vcs', False), - ('/vcs/.bzr', True), - ('/vcs/.git', True), - ('/vcs/.hg', True), - ('/vcs/.repo', True), - ('/vcs/.svn', True), - ('/vcs/CVS', True), - ('/vcs/_darcs', True), - ('/vcs/_MTN', True), - ('/vcs/.bzrignore', True), - ('/vcs/.cvsignore', True), - ('/vcs/.gitignore', True), - ('/vcs/.hgignore', True), - ('/vcs/.svnignore', True), - ('/vcs/vssver.scc', True), + ("/vcs", False), + ("/vcs/.bzr", True), + ("/vcs/.git", True), + ("/vcs/.hg", True), + ("/vcs/.repo", True), + ("/vcs/.svn", True), + ("/vcs/CVS", True), + ("/vcs/_darcs", True), + ("/vcs/_MTN", True), + ("/vcs/.bzrignore", True), + ("/vcs/.cvsignore", True), + ("/vcs/.gitignore", True), + ("/vcs/.hgignore", True), + ("/vcs/.svnignore", True), + ("/vcs/vssver.scc", True), ] assert sorted(result) == sorted(expected) def test_fileset_is_included_with_default_ignore_does_not_skip_one_char_names(self): # use fileset directly to work on strings not locations from commoncode import fileset - tests = [c for c in 'HFS+ Private Data'] + 'HFS+ Private Data'.split() - result = [(t, - fileset.is_included(t, excludes=ignore.default_ignores, includes={})) - for t in tests] + + tests = [c for c in "HFS+ Private Data"] + "HFS+ Private Data".split() + result = [ + (t, fileset.is_included(t, excludes=ignore.default_ignores, includes={})) for t in tests + ] expected = [ - ('H', True), - ('F', True), - ('S', True), - ('+', True), - (' ', False), - ('P', True), - ('r', True), - ('i', True), - ('v', True), - ('a', True), - ('t', True), - ('e', True), - (' ', False), - ('D', True), - ('a', True), - ('t', True), - ('a', True), - ('HFS+', True), - ('Private', True), - ('Data', True) + ("H", True), + ("F", True), + ("S", True), + ("+", True), + (" ", False), + ("P", True), + ("r", True), + ("i", True), + ("v", True), + ("a", True), + ("t", True), + ("e", True), + (" ", False), + ("D", True), + ("a", True), + ("t", True), + ("a", True), + ("HFS+", True), + ("Private", True), + ("Data", True), ] assert result == expected - @skipIf(on_mac or on_windows, 'We are only testing on posix for now') + @skipIf(on_mac or on_windows, "We are only testing on posix for now") def test_is_ignored_path_string_skip_special(self): - test_path = '/test/path' - assert ignore.is_ignored(test_path, {'asdf': 'skip'}, {}, skip_special=True) - assert not ignore.is_ignored(test_path, {'asdf': 'skip'}, {}, skip_special=False) + test_path = "/test/path" + assert ignore.is_ignored(test_path, {"asdf": "skip"}, {}, skip_special=True) + assert not ignore.is_ignored(test_path, {"asdf": "skip"}, {}, skip_special=False) - @skipIf(on_mac or on_windows, 'We are only testing on posix for now') + @skipIf(on_mac or on_windows, "We are only testing on posix for now") def test_is_ignored_special_files_skip_special(self): test_fifo = self.get_temp_file() os.mkfifo(test_fifo) - assert ignore.is_ignored(test_fifo, {'asdf': 'skip'}, {}, skip_special=True) - assert not ignore.is_ignored(test_fifo, {'asdf': 'skip'}, {}, skip_special=False) + assert ignore.is_ignored(test_fifo, {"asdf": "skip"}, {}, skip_special=True) + assert not ignore.is_ignored(test_fifo, {"asdf": "skip"}, {}, skip_special=False) test_symlink = self.get_temp_file() - test_file_location = self.get_test_loc('ignore/vcs.tgz') + test_file_location = self.get_test_loc("ignore/vcs.tgz") os.symlink(test_file_location, test_symlink) - assert ignore.is_ignored(test_symlink, {'asdf': 'skip'}, {}, skip_special=True) - assert not ignore.is_ignored(test_symlink, {'asdf': 'skip'}, {}, skip_special=False) + assert ignore.is_ignored(test_symlink, {"asdf": "skip"}, {}, skip_special=True) + assert not ignore.is_ignored(test_symlink, {"asdf": "skip"}, {}, skip_special=False) - @skipIf(on_mac or on_windows, 'We are only testing on posix for now') + @skipIf(on_mac or on_windows, "We are only testing on posix for now") def test_is_ignored_real_location_skip_special(self): - test_file_location = self.get_test_loc('ignore/vcs.tgz') - assert not ignore.is_ignored(test_file_location, {'asdf': 'skip'}, {}, skip_special=True) - assert not ignore.is_ignored(test_file_location, {'asdf': 'skip'}, {}, skip_special=False) + test_file_location = self.get_test_loc("ignore/vcs.tgz") + assert not ignore.is_ignored(test_file_location, {"asdf": "skip"}, {}, skip_special=True) + assert not ignore.is_ignored(test_file_location, {"asdf": "skip"}, {}, skip_special=False) - assert not ignore.is_ignored(test_file_location, {'asdf': 'skip'}, {}, skip_special=True) - assert not ignore.is_ignored(test_file_location, {'asdf': 'skip'}, {}, skip_special=False) + assert not ignore.is_ignored(test_file_location, {"asdf": "skip"}, {}, skip_special=True) + assert not ignore.is_ignored(test_file_location, {"asdf": "skip"}, {}, skip_special=False) diff --git a/tests/test_paths.py b/tests/test_paths.py index be855c26..67bff071 100644 --- a/tests/test_paths.py +++ b/tests/test_paths.py @@ -2,7 +2,7 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # @@ -12,300 +12,315 @@ class TestPortablePath(TestCase): - def test_safe_path_mixed_slashes(self): - test = paths.safe_path('C:\\Documents and Settings\\Boki\\Desktop\\head\\patches\\drupal6/drupal.js') - expected = 'C/Documents_and_Settings/Boki/Desktop/head/patches/drupal6/drupal.js' + test = paths.safe_path( + "C:\\Documents and Settings\\Boki\\Desktop\\head\\patches\\drupal6/drupal.js" + ) + expected = "C/Documents_and_Settings/Boki/Desktop/head/patches/drupal6/drupal.js" assert test == expected def test_safe_path_mixed_slashes_and_spaces(self): - test = paths.safe_path('C:\\Documents and Settings\\Boki\\Desktop\\head\\patches\\parallel uploads/drupal.js') - expected = 'C/Documents_and_Settings/Boki/Desktop/head/patches/parallel_uploads/drupal.js' + test = paths.safe_path( + "C:\\Documents and Settings\\Boki\\Desktop\\head\\patches\\parallel uploads/drupal.js" + ) + expected = "C/Documents_and_Settings/Boki/Desktop/head/patches/parallel_uploads/drupal.js" assert test == expected def test_safe_path_windows_style(self): - test = paths.safe_path('C:\\Documents and Settings\\Administrator\\Desktop\\siftDemoV4_old\\defs.h') - expected = 'C/Documents_and_Settings/Administrator/Desktop/siftDemoV4_old/defs.h' + test = paths.safe_path( + "C:\\Documents and Settings\\Administrator\\Desktop\\siftDemoV4_old\\defs.h" + ) + expected = "C/Documents_and_Settings/Administrator/Desktop/siftDemoV4_old/defs.h" assert test == expected def test_safe_path_windows_style_mixed_slashes_no_spaces(self): - test = paths.safe_path('C:\\Documents and Settings\\Boki\\Desktop\\head\\patches\\imagefield/imagefield.css') - expected = 'C/Documents_and_Settings/Boki/Desktop/head/patches/imagefield/imagefield.css' + test = paths.safe_path( + "C:\\Documents and Settings\\Boki\\Desktop\\head\\patches\\imagefield/imagefield.css" + ) + expected = "C/Documents_and_Settings/Boki/Desktop/head/patches/imagefield/imagefield.css" assert test == expected def test_safe_path_windows_style_spaces(self): - test = paths.safe_path('C:\\Documents and Settings\\Boki\\Desktop\\head\\patches\\js delete\\imagefield.css') - expected = 'C/Documents_and_Settings/Boki/Desktop/head/patches/js_delete/imagefield.css' + test = paths.safe_path( + "C:\\Documents and Settings\\Boki\\Desktop\\head\\patches\\js delete\\imagefield.css" + ) + expected = "C/Documents_and_Settings/Boki/Desktop/head/patches/js_delete/imagefield.css" assert test == expected def test_safe_path_windows_style_posix_slashes(self): - test = paths.safe_path('C:/Documents and Settings/Alex Burgel/workspace/Hibernate3.2/test/org/hibernate/test/AllTests.java') - expected = 'C/Documents_and_Settings/Alex_Burgel/workspace/Hibernate3.2/test/org/hibernate/test/AllTests.java' + test = paths.safe_path( + "C:/Documents and Settings/Alex Burgel/workspace/Hibernate3.2/test/org/hibernate/test/AllTests.java" + ) + expected = "C/Documents_and_Settings/Alex_Burgel/workspace/Hibernate3.2/test/org/hibernate/test/AllTests.java" assert test == expected def test_safe_path_windows_style_relative(self): - test = paths.safe_path('includes\\webform.components.inc') - expected = 'includes/webform.components.inc' + test = paths.safe_path("includes\\webform.components.inc") + expected = "includes/webform.components.inc" assert test == expected def test_safe_path_windows_style_absolute_trailing_slash(self): - test = paths.safe_path('\\includes\\webform.components.inc\\') - expected = 'includes/webform.components.inc' + test = paths.safe_path("\\includes\\webform.components.inc\\") + expected = "includes/webform.components.inc" assert test == expected def test_safe_path_posix_style_relative(self): - test = paths.safe_path('includes/webform.components.inc') - expected = 'includes/webform.components.inc' + test = paths.safe_path("includes/webform.components.inc") + expected = "includes/webform.components.inc" assert test == expected def test_safe_path_posix_style_absolute_trailing_slash(self): - test = paths.safe_path('/includes/webform.components.inc/') - expected = 'includes/webform.components.inc' + test = paths.safe_path("/includes/webform.components.inc/") + expected = "includes/webform.components.inc" assert test == expected def test_safe_path_posix_style_french_char(self): - test = paths.safe_path('/includes/webform.compon\xc3nts.inc/') - expected = 'includes/webform.componAnts.inc' + test = paths.safe_path("/includes/webform.compon\xc3nts.inc/") + expected = "includes/webform.componAnts.inc" assert test == expected def test_safe_path_posix_style_chinese_char(self): - test = paths.safe_path(b'/includes/webform.compon\xd2\xaants.inc/') - expected = 'includes/webform.componNSnts.inc' - assert test == expected + test = paths.safe_path(b"/includes/webform.compon\xd2\xaants.inc/") + expected = [ + "includes/webform.componNSnts.inc", + "includes/webform.componS_nts.inc", + ] + assert test in expected def test_safe_path_windows_style_dots(self): - test = paths.safe_path('\\includes\\..\\webform.components.inc\\') - expected = 'webform.components.inc' + test = paths.safe_path("\\includes\\..\\webform.components.inc\\") + expected = "webform.components.inc" assert test == expected def test_safe_path_windows_style_many_dots(self): - test = paths.safe_path('.\\includes\\.\\..\\..\\..\\webform.components.inc\\.') - expected = 'dotdot/dotdot/webform.components.inc' + test = paths.safe_path(".\\includes\\.\\..\\..\\..\\webform.components.inc\\.") + expected = "dotdot/dotdot/webform.components.inc" assert test == expected def test_safe_path_posix_style_dots(self): - test = paths.safe_path('includes/../webform.components.inc') - expected = 'webform.components.inc' + test = paths.safe_path("includes/../webform.components.inc") + expected = "webform.components.inc" assert test == expected def test_safe_path_posix_style_many_dots(self): - test = paths.safe_path('./includes/./../../../../webform.components.inc/.') - expected = 'dotdot/dotdot/dotdot/webform.components.inc' + test = paths.safe_path("./includes/./../../../../webform.components.inc/.") + expected = "dotdot/dotdot/dotdot/webform.components.inc" assert test == expected def test_safe_path_posix_only(self): - test_path = 'var/lib/dpkg/info/libgsm1:amd64.list' + test_path = "var/lib/dpkg/info/libgsm1:amd64.list" test = paths.safe_path(test_path) - expected = 'var/lib/dpkg/info/libgsm1_amd64.list' + expected = "var/lib/dpkg/info/libgsm1_amd64.list" assert test == expected test = paths.safe_path(test_path, posix_only=True) assert test == test_path def test_resolve_mixed_slash(self): - test = paths.resolve('C:\\..\\./drupal.js') - expected = 'C/drupal.js' + test = paths.resolve("C:\\..\\./drupal.js") + expected = "C/drupal.js" assert test == expected def test_resolve_2(self): - test = paths.resolve('\\includes\\..\\webform.components.inc\\') - expected = 'webform.components.inc' + test = paths.resolve("\\includes\\..\\webform.components.inc\\") + expected = "webform.components.inc" assert test == expected def test_resolve_3(self): - test = paths.resolve('includes/../webform.components.inc') - expected = 'webform.components.inc' + test = paths.resolve("includes/../webform.components.inc") + expected = "webform.components.inc" assert test == expected def test_resolve_4(self): - test = paths.resolve('////.//includes/./../..//..///../webform.components.inc/.') - expected = 'dotdot/dotdot/dotdot/webform.components.inc' + test = paths.resolve("////.//includes/./../..//..///../webform.components.inc/.") + expected = "dotdot/dotdot/dotdot/webform.components.inc" assert test == expected def test_resolve_5(self): - test = paths.resolve(u'////.//includes/./../..//..///../webform.components.inc/.') - expected = u'dotdot/dotdot/dotdot/webform.components.inc' + test = paths.resolve("////.//includes/./../..//..///../webform.components.inc/.") + expected = "dotdot/dotdot/dotdot/webform.components.inc" assert test == expected def test_resolve_6(self): - test = paths.resolve('includes/../') - expected = '.' + test = paths.resolve("includes/../") + expected = "." assert test == expected def test_portable_filename(self): - expected = 'A___file__with_Spaces.mov' + expected = "A___file__with_Spaces.mov" assert paths.portable_filename("A:\\ file/ with Spaces.mov") == expected # Test `preserve_spaces` option. Spaces should not be replaced - expected = 'Program Files (x86)' + expected = "Program Files (x86)" assert paths.portable_filename("Program Files (x86)", preserve_spaces=True) == expected # Unresolved relative paths will be treated as a single filename. Use # resolve instead if you want to resolve paths: - expected = '___.._.._etc_passwd' + expected = "___.._.._etc_passwd" assert paths.portable_filename("../../../etc/passwd") == expected # Unicode name are transliterated: - expected = 'This_contain_UMLAUT_umlauts.txt' - assert paths.portable_filename(u'This contain UMLAUT \xfcml\xe4uts.txt') == expected + expected = "This_contain_UMLAUT_umlauts.txt" + assert paths.portable_filename("This contain UMLAUT \xfcml\xe4uts.txt") == expected # Check to see if illegal Windows filenames are properly handled for illegal_window_name in paths.ILLEGAL_WINDOWS_NAMES: # Rename files with names that are illegal on Windows - expected = f'{illegal_window_name}_' + expected = f"{illegal_window_name}_" assert paths.portable_filename(illegal_window_name) == expected # Allow files with names that are illegal on Windows - assert paths.portable_filename(illegal_window_name, posix_only=True) == illegal_window_name + assert ( + paths.portable_filename(illegal_window_name, posix_only=True) == illegal_window_name + ) # Check to see if the posix_only option does and does not replace # punctuation characters that are illegal in Windows filenames for valid_posix_path_char in paths.posix_legal_punctuation: - test_name = f'test{valid_posix_path_char}' + test_name = f"test{valid_posix_path_char}" assert paths.portable_filename(test_name, posix_only=True) == test_name if valid_posix_path_char not in paths.legal_punctuation: - expected = f'test_' + expected = f"test_" assert paths.portable_filename(test_name) == expected class TestCommonPath(TestCase): - def test_common_path_prefix1(self): - test = paths.common_path_prefix('/a/b/c', '/a/b/c') - assert test == ('a/b/c', 3) + test = paths.common_path_prefix("/a/b/c", "/a/b/c") + assert test == ("a/b/c", 3) def test_common_path_prefix2(self): - test = paths.common_path_prefix('/a/b/c', '/a/b') - assert test == ('a/b', 2) + test = paths.common_path_prefix("/a/b/c", "/a/b") + assert test == ("a/b", 2) def test_common_path_prefix3(self): - test = paths.common_path_prefix('/a/b', '/a/b/c') - assert test == ('a/b', 2) + test = paths.common_path_prefix("/a/b", "/a/b/c") + assert test == ("a/b", 2) def test_common_path_prefix4(self): - test = paths.common_path_prefix('/a', '/a') - assert test == ('a', 1) + test = paths.common_path_prefix("/a", "/a") + assert test == ("a", 1) def test_common_path_prefix_path_root(self): - test = paths.common_path_prefix('/a/b/c', '/') + test = paths.common_path_prefix("/a/b/c", "/") assert test == (None, 0) def test_common_path_prefix_root_path(self): - test = paths.common_path_prefix('/', '/a/b/c') + test = paths.common_path_prefix("/", "/a/b/c") assert test == (None, 0) def test_common_path_prefix_root_root(self): - test = paths.common_path_prefix('/', '/') + test = paths.common_path_prefix("/", "/") assert test == (None, 0) def test_common_path_prefix_path_elements_are_similar(self): - test = paths.common_path_prefix('/a/b/c', '/a/b/d') - assert test == ('a/b', 2) + test = paths.common_path_prefix("/a/b/c", "/a/b/d") + assert test == ("a/b", 2) def test_common_path_prefix_no_match(self): - test = paths.common_path_prefix('/abc/d', '/abe/f') + test = paths.common_path_prefix("/abc/d", "/abe/f") assert test == (None, 0) def test_common_path_prefix_ignore_training_slashes(self): - test = paths.common_path_prefix('/a/b/c/', '/a/b/c/') - assert test == ('a/b/c', 3) + test = paths.common_path_prefix("/a/b/c/", "/a/b/c/") + assert test == ("a/b/c", 3) def test_common_path_prefix8(self): - test = paths.common_path_prefix('/a/b/c/', '/a/b') - assert test == ('a/b', 2) + test = paths.common_path_prefix("/a/b/c/", "/a/b") + assert test == ("a/b", 2) def test_common_path_prefix10(self): - test = paths.common_path_prefix('/a/b/c.txt', '/a/b/b.txt') - assert test == ('a/b', 2) + test = paths.common_path_prefix("/a/b/c.txt", "/a/b/b.txt") + assert test == ("a/b", 2) def test_common_path_prefix11(self): - test = paths.common_path_prefix('/a/b/c.txt', '/a/b.txt') - assert test == ('a', 1) + test = paths.common_path_prefix("/a/b/c.txt", "/a/b.txt") + assert test == ("a", 1) def test_common_path_prefix12(self): - test = paths.common_path_prefix('/a/c/e/x.txt', '/a/d/a.txt') - assert test == ('a', 1) + test = paths.common_path_prefix("/a/c/e/x.txt", "/a/d/a.txt") + assert test == ("a", 1) def test_common_path_prefix13(self): - test = paths.common_path_prefix('/a/c/e/x.txt', '/a/d/') - assert test == ('a', 1) + test = paths.common_path_prefix("/a/c/e/x.txt", "/a/d/") + assert test == ("a", 1) def test_common_path_prefix14(self): - test = paths.common_path_prefix('/a/c/e/', '/a/d/') - assert test == ('a', 1) + test = paths.common_path_prefix("/a/c/e/", "/a/d/") + assert test == ("a", 1) def test_common_path_prefix15(self): - test = paths.common_path_prefix('/a/c/e/', '/a/c/a.txt') - assert test == ('a/c', 2) + test = paths.common_path_prefix("/a/c/e/", "/a/c/a.txt") + assert test == ("a/c", 2) def test_common_path_prefix16(self): - test = paths.common_path_prefix('/a/c/e/', '/a/c/f/') - assert test == ('a/c', 2) + test = paths.common_path_prefix("/a/c/e/", "/a/c/f/") + assert test == ("a/c", 2) def test_common_path_prefix17(self): - test = paths.common_path_prefix('/a/a.txt', '/a/b.txt/') - assert test == ('a', 1) + test = paths.common_path_prefix("/a/a.txt", "/a/b.txt/") + assert test == ("a", 1) def test_common_path_prefix18(self): - test = paths.common_path_prefix('/a/c/', '/a/') - assert test == ('a', 1) + test = paths.common_path_prefix("/a/c/", "/a/") + assert test == ("a", 1) def test_common_path_prefix19(self): - test = paths.common_path_prefix('/a/c.txt', '/a/') - assert test == ('a', 1) + test = paths.common_path_prefix("/a/c.txt", "/a/") + assert test == ("a", 1) def test_common_path_prefix20(self): - test = paths.common_path_prefix('/a/c/', '/a/d/') - assert test == ('a', 1) + test = paths.common_path_prefix("/a/c/", "/a/d/") + assert test == ("a", 1) def test_common_path_suffix(self): - test = paths.common_path_suffix('/a/b/c', '/a/b/c') - assert test == ('a/b/c', 3) + test = paths.common_path_suffix("/a/b/c", "/a/b/c") + assert test == ("a/b/c", 3) def test_common_path_suffix_absolute_relative(self): - test = paths.common_path_suffix('a/b/c', '/a/b/c') - assert test == ('a/b/c', 3) + test = paths.common_path_suffix("a/b/c", "/a/b/c") + assert test == ("a/b/c", 3) def test_common_path_suffix_find_subpath(self): - test = paths.common_path_suffix('/z/b/c', '/a/b/c') - assert test == ('b/c', 2) + test = paths.common_path_suffix("/z/b/c", "/a/b/c") + assert test == ("b/c", 2) def test_common_path_suffix_handles_relative_path(self): - test = paths.common_path_suffix('a/b', 'a/b') - assert test == ('a/b', 2) + test = paths.common_path_suffix("a/b", "a/b") + assert test == ("a/b", 2) def test_common_path_suffix_handles_relative_subpath(self): - test = paths.common_path_suffix('zsds/adsds/a/b/b/c', 'a//a/d//b/c') - assert test == ('b/c', 2) + test = paths.common_path_suffix("zsds/adsds/a/b/b/c", "a//a/d//b/c") + assert test == ("b/c", 2) def test_common_path_suffix_ignore_and_strip_trailing_slash(self): - test = paths.common_path_suffix('zsds/adsds/a/b/b/c/', 'a//a/d//b/c/') - assert test == ('b/c', 2) + test = paths.common_path_suffix("zsds/adsds/a/b/b/c/", "a//a/d//b/c/") + assert test == ("b/c", 2) def test_common_path_suffix_return_None_if_no_common_suffix(self): - test = paths.common_path_suffix('/a/b/c', '/') + test = paths.common_path_suffix("/a/b/c", "/") assert test == (None, 0) def test_common_path_suffix_return_None_if_no_common_suffix2(self): - test = paths.common_path_suffix('/', '/a/b/c') + test = paths.common_path_suffix("/", "/a/b/c") assert test == (None, 0) def test_common_path_suffix_match_only_whole_segments(self): # only segments are honored, commonality within segment is ignored - test = paths.common_path_suffix('this/is/aaaa/great/path', 'this/is/aaaaa/great/path') - assert test == ('great/path', 2) + test = paths.common_path_suffix("this/is/aaaa/great/path", "this/is/aaaaa/great/path") + assert test == ("great/path", 2) def test_common_path_suffix_two_root(self): - test = paths.common_path_suffix('/', '/') + test = paths.common_path_suffix("/", "/") assert test == (None, 0) def test_common_path_suffix_empty_root(self): - test = paths.common_path_suffix('', '/') + test = paths.common_path_suffix("", "/") assert test == (None, 0) def test_common_path_suffix_root_empty(self): - test = paths.common_path_suffix('/', '') + test = paths.common_path_suffix("/", "") assert test == (None, 0) def test_common_path_suffix_empty_empty(self): - test = paths.common_path_suffix('', '') + test = paths.common_path_suffix("", "") assert test == (None, 0) diff --git a/tests/test_resource.py b/tests/test_resource.py index 2d899ae9..6249ebb4 100644 --- a/tests/test_resource.py +++ b/tests/test_resource.py @@ -2,7 +2,7 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # @@ -24,21 +24,21 @@ class TestCodebase(FileBasedTesting): - test_data_dir = join(dirname(__file__), 'data') + test_data_dir = join(dirname(__file__), "data") def test_walk_defaults(self): - test_codebase = self.get_test_loc('resource/codebase') + test_codebase = self.get_test_loc("resource/codebase") codebase = Codebase(test_codebase) results = list(codebase.walk()) expected = [ - ('codebase', False), - ('abc', True), - ('et131x.h', True), - ('dir', False), - ('that', True), - ('this', True), - ('other dir', False), - ('file', True), + ("codebase", False), + ("abc", True), + ("et131x.h", True), + ("dir", False), + ("that", True), + ("this", True), + ("other dir", False), + ("file", True), ] assert [(r.name, r.is_file) for r in results] == expected @@ -47,79 +47,79 @@ def test_Codebase_do_not_ignore_by_default_older_sccs_and_rcs_dirs(self): from commoncode.fileutils import create_dir test_codebase = self.get_temp_dir() - create_dir(join(test_codebase, 'sccs', 'a')) - create_dir(join(test_codebase, 'rcs', 'b')) + create_dir(join(test_codebase, "sccs", "a")) + create_dir(join(test_codebase, "rcs", "b")) codebase = Codebase(test_codebase) results = list(codebase.walk(topdown=True, skip_root=True)) - expected = ['rcs', 'b', 'sccs', 'a'] + expected = ["rcs", "b", "sccs", "a"] assert [r.name for r in results] == expected def test_walk_topdown(self): - test_codebase = self.get_test_loc('resource/codebase') + test_codebase = self.get_test_loc("resource/codebase") codebase = Codebase(test_codebase) results = list(codebase.walk(topdown=True)) expected = [ - ('codebase', False), - ('abc', True), - ('et131x.h', True), - ('dir', False), - ('that', True), - ('this', True), - ('other dir', False), - ('file', True), + ("codebase", False), + ("abc", True), + ("et131x.h", True), + ("dir", False), + ("that", True), + ("this", True), + ("other dir", False), + ("file", True), ] assert [(r.name, r.is_file) for r in results] == expected def test_walk_bottomup(self): - test_codebase = self.get_test_loc('resource/codebase') + test_codebase = self.get_test_loc("resource/codebase") codebase = Codebase(test_codebase) results = list(codebase.walk(topdown=False)) expected = [ - ('abc', True), - ('et131x.h', True), - ('that', True), - ('this', True), - ('dir', False), - ('file', True), - ('other dir', False), - ('codebase', False), + ("abc", True), + ("et131x.h", True), + ("that", True), + ("this", True), + ("dir", False), + ("file", True), + ("other dir", False), + ("codebase", False), ] assert [(r.name, r.is_file) for r in results] == expected def test_walk_skip_root_basic(self): - test_codebase = self.get_test_loc('resource/codebase') + test_codebase = self.get_test_loc("resource/codebase") codebase = Codebase(test_codebase) results = list(codebase.walk(skip_root=True)) expected = [ - ('abc', True), - ('et131x.h', True), - ('dir', False), - ('that', True), - ('this', True), - ('other dir', False), - ('file', True), + ("abc", True), + ("et131x.h", True), + ("dir", False), + ("that", True), + ("this", True), + ("other dir", False), + ("file", True), ] assert [(r.name, r.is_file) for r in results] == expected def test_walk_filtered_with_filtered_root(self): - test_codebase = self.get_test_loc('resource/codebase') + test_codebase = self.get_test_loc("resource/codebase") codebase = Codebase(test_codebase) codebase.root.is_filtered = True codebase.save_resource(codebase.root) results = list(codebase.walk_filtered()) expected = [ - ('abc', True), - ('et131x.h', True), - ('dir', False), - ('that', True), - ('this', True), - ('other dir', False), - ('file', True), + ("abc", True), + ("et131x.h", True), + ("dir", False), + ("that", True), + ("this", True), + ("other dir", False), + ("file", True), ] assert [(r.name, r.is_file) for r in results] == expected def test_walk_filtered_with_all_filtered(self): - test_codebase = self.get_test_loc('resource/codebase') + test_codebase = self.get_test_loc("resource/codebase") codebase = Codebase(test_codebase) for res in codebase.walk(): res.is_filtered = True @@ -130,14 +130,14 @@ def test_walk_filtered_with_all_filtered(self): assert [(r.name, r.is_file) for r in results] == expected def test_compute_counts_filtered_None(self): - test_codebase = self.get_test_loc('resource/codebase') + test_codebase = self.get_test_loc("resource/codebase") codebase = Codebase(test_codebase) results = codebase.compute_counts(skip_filtered=True) expected = (5, 3, 0) assert results == expected def test_compute_counts_filtered_None_with_size(self): - test_codebase = self.get_test_loc('resource/codebase') + test_codebase = self.get_test_loc("resource/codebase") codebase = Codebase(test_codebase) for res in codebase.walk(): if res.is_file: @@ -149,14 +149,14 @@ def test_compute_counts_filtered_None_with_size(self): assert results == expected def test_compute_counts_filtered_None_with_cache(self): - test_codebase = self.get_test_loc('resource/codebase') + test_codebase = self.get_test_loc("resource/codebase") codebase = Codebase(test_codebase) results = codebase.compute_counts(skip_filtered=True) expected = (5, 3, 0) assert results == expected def test_compute_counts_filtered_all(self): - test_codebase = self.get_test_loc('resource/codebase') + test_codebase = self.get_test_loc("resource/codebase") codebase = Codebase(test_codebase) for res in codebase.walk(): res.is_filtered = True @@ -166,7 +166,7 @@ def test_compute_counts_filtered_all(self): assert results == expected def test_compute_counts_filtered_all_with_cache(self): - test_codebase = self.get_test_loc('resource/codebase') + test_codebase = self.get_test_loc("resource/codebase") codebase = Codebase(test_codebase) for res in codebase.walk(): res.is_filtered = True @@ -176,7 +176,7 @@ def test_compute_counts_filtered_all_with_cache(self): assert results == expected def test_compute_counts_filtered_files(self): - test_codebase = self.get_test_loc('resource/codebase') + test_codebase = self.get_test_loc("resource/codebase") codebase = Codebase(test_codebase) for res in codebase.walk(): if res.is_file: @@ -187,7 +187,7 @@ def test_compute_counts_filtered_files(self): assert results == expected def test_compute_counts_filtered_dirs(self): - test_codebase = self.get_test_loc('resource/codebase') + test_codebase = self.get_test_loc("resource/codebase") codebase = Codebase(test_codebase) for res in codebase.walk(): if not res.is_file: @@ -198,7 +198,7 @@ def test_compute_counts_filtered_dirs(self): assert results == expected def test_walk_filtered_dirs(self): - test_codebase = self.get_test_loc('resource/codebase') + test_codebase = self.get_test_loc("resource/codebase") codebase = Codebase(test_codebase) for res in codebase.walk(): if not res.is_file: @@ -207,33 +207,33 @@ def test_walk_filtered_dirs(self): results = list(codebase.walk_filtered(topdown=True)) expected = [ - ('abc', True), - ('et131x.h', True), - ('that', True), - ('this', True), - ('file', True), + ("abc", True), + ("et131x.h", True), + ("that", True), + ("this", True), + ("file", True), ] assert [(r.name, r.is_file) for r in results] == expected def test_walk_filtered_skip_root(self): - test_codebase = self.get_test_loc('resource/codebase') + test_codebase = self.get_test_loc("resource/codebase") codebase = Codebase(test_codebase) codebase.root.is_filtered = True codebase.save_resource(codebase.root) results = list(codebase.walk_filtered(skip_root=True)) expected = [ - ('abc', True), - ('et131x.h', True), - ('dir', False), - ('that', True), - ('this', True), - ('other dir', False), - ('file', True), + ("abc", True), + ("et131x.h", True), + ("dir", False), + ("that", True), + ("this", True), + ("other dir", False), + ("file", True), ] assert [(r.name, r.is_file) for r in results] == expected def test_walk_filtered_all_skip_root(self): - test_codebase = self.get_test_loc('resource/codebase') + test_codebase = self.get_test_loc("resource/codebase") codebase = Codebase(test_codebase) for res in codebase.walk(): res.is_filtered = True @@ -243,21 +243,21 @@ def test_walk_filtered_all_skip_root(self): assert [(r.name, r.is_file) for r in results] == expected def test_walk_skip_root_single_file(self): - test_codebase = self.get_test_loc('resource/codebase/et131x.h') + test_codebase = self.get_test_loc("resource/codebase/et131x.h") codebase = Codebase(test_codebase) results = list(codebase.walk(skip_root=True)) - expected = [('et131x.h', True)] + expected = [("et131x.h", True)] assert [(r.name, r.is_file) for r in results] == expected def test_walk_filtered_with_skip_root_and_single_file_not_filtered(self): - test_codebase = self.get_test_loc('resource/codebase/et131x.h') + test_codebase = self.get_test_loc("resource/codebase/et131x.h") codebase = Codebase(test_codebase) results = list(codebase.walk_filtered(skip_root=True)) - expected = [('et131x.h', True)] + expected = [("et131x.h", True)] assert [(r.name, r.is_file) for r in results] == expected def test_walk_filtered__with_skip_root_and_filtered_single_file(self): - test_codebase = self.get_test_loc('resource/codebase/et131x.h') + test_codebase = self.get_test_loc("resource/codebase/et131x.h") codebase = Codebase(test_codebase) codebase.root.is_filtered = True codebase.save_resource(codebase.root) @@ -266,51 +266,51 @@ def test_walk_filtered__with_skip_root_and_filtered_single_file(self): assert [(r.name, r.is_file) for r in results] == expected def test_walk_skip_root_single_file_with_children(self): - test_codebase = self.get_test_loc('resource/codebase/et131x.h') + test_codebase = self.get_test_loc("resource/codebase/et131x.h") codebase = Codebase(test_codebase, strip_root=True) root = codebase.root - c1 = codebase._get_or_create_resource('child1', parent=root, is_file=True) - codebase._get_or_create_resource('child2', parent=c1, is_file=False) + c1 = codebase._get_or_create_resource("child1", parent=root, is_file=True) + codebase._get_or_create_resource("child2", parent=c1, is_file=False) results = list(codebase.walk(skip_root=True)) - expected = [('et131x.h', True), ('child1', True), ('child2', False)] + expected = [("et131x.h", True), ("child1", True), ("child2", False)] assert [(r.name, r.is_file) for r in results] == expected def test_walk_filtered_with_skip_root_and_single_file_with_children(self): - test_codebase = self.get_test_loc('resource/codebase/et131x.h') + test_codebase = self.get_test_loc("resource/codebase/et131x.h") codebase = Codebase(test_codebase, strip_root=True) - c1 = codebase._get_or_create_resource('some child', parent=codebase.root, is_file=True) - c2 = codebase._get_or_create_resource('some child2', parent=c1, is_file=False) + c1 = codebase._get_or_create_resource("some child", parent=codebase.root, is_file=True) + c2 = codebase._get_or_create_resource("some child2", parent=c1, is_file=False) c2.is_filtered = True codebase.save_resource(c2) results = list(codebase.walk_filtered(skip_root=True)) - expected = [('et131x.h', True), ('some child', True)] + expected = [("et131x.h", True), ("some child", True)] assert [(r.name, r.is_file) for r in results] == expected c1.is_filtered = True codebase.save_resource(c1) results = list(codebase.walk_filtered(skip_root=True)) - expected = [('et131x.h', True)] + expected = [("et131x.h", True)] assert [(r.name, r.is_file) for r in results] == expected def test_walk_skip_root_single_dir(self): - test_codebase = self.get_temp_dir('walk') + test_codebase = self.get_temp_dir("walk") codebase = Codebase(test_codebase, strip_root=True) results = list(codebase.walk(skip_root=True)) - expected = [('walk', False)] + expected = [("walk", False)] assert [(r.name, r.is_file) for r in results] == expected def test_walk_skipped_directories_should_not_be_yielded(self): # Resources that we continue past should not be added to the result list - test_codebase = self.get_test_loc('resource/skip_directories_during_walk') + test_codebase = self.get_test_loc("resource/skip_directories_during_walk") cdbs = Codebase(test_codebase) def _ignored(resource, codebase): - return resource.is_dir and resource.name == 'skip-this-directory' + return resource.is_dir and resource.name == "skip-this-directory" result = [ res.name @@ -320,44 +320,44 @@ def _ignored(resource, codebase): ) ] - expected = ['skip_directories_during_walk', 'this-should-be-returned'] + expected = ["skip_directories_during_walk", "this-should-be-returned"] assert result == expected def test__create_resource_can_add_child_to_file(self): - test_codebase = self.get_test_loc('resource/codebase/et131x.h') + test_codebase = self.get_test_loc("resource/codebase/et131x.h") codebase = Codebase(test_codebase) - codebase._get_or_create_resource('some child', codebase.root, is_file=True) + codebase._get_or_create_resource("some child", codebase.root, is_file=True) results = list(codebase.walk()) - expected = [('et131x.h', True), ('some child', True)] + expected = [("et131x.h", True), ("some child", True)] assert [(r.name, r.is_file) for r in results] == expected def test__create_resource_can_add_child_to_dir(self): - test_codebase = self.get_temp_dir('resource') + test_codebase = self.get_temp_dir("resource") codebase = Codebase(test_codebase) - codebase._get_or_create_resource('some child', codebase.root, is_file=False) + codebase._get_or_create_resource("some child", codebase.root, is_file=False) results = list(codebase.walk()) - expected = [('resource', False), ('some child', False)] + expected = [("resource", False), ("some child", False)] assert [(r.name, r.is_file) for r in results] == expected def test_get_resource_for_single_resource_codebase(self): - test_codebase = self.get_temp_dir('resource') + test_codebase = self.get_temp_dir("resource") codebase = Codebase(test_codebase) - assert not (codebase.root is codebase.get_resource('resource')) - assert codebase.get_resource('resource') == codebase.root + assert not (codebase.root is codebase.get_resource("resource")) + assert codebase.get_resource("resource") == codebase.root def test_get_resource_for_multiple_resource_codebase(self): - test_codebase = self.get_temp_dir('resource') - for name in ('a', 'b', 'c'): - with open(os.path.join(test_codebase, name), 'w') as o: - o.write('\n') + test_codebase = self.get_temp_dir("resource") + for name in ("a", "b", "c"): + with open(os.path.join(test_codebase, name), "w") as o: + o.write("\n") codebase = Codebase(test_codebase) - assert codebase.get_resource('resource/a').path == 'resource/a' - assert codebase.get_resource('/resource/c').path == 'resource/c' - assert codebase.get_resource('resource/dsasda/../b/').path == 'resource/b' + assert codebase.get_resource("resource/a").path == "resource/a" + assert codebase.get_resource("/resource/c").path == "resource/c" + assert codebase.get_resource("resource/dsasda/../b/").path == "resource/b" def test_Resource_build_path(self): - test_dir = self.get_test_loc('resource/samples') + test_dir = self.get_test_loc("resource/samples") locations = [] for top, dirs, files in os.walk(test_dir): for x in dirs: @@ -369,49 +369,49 @@ def test_Resource_build_path(self): resources_no_root = list(codebase.walk(skip_root=True)) expected_default = [ - 'samples/JGroups', - 'samples/zlib', - 'samples/arch', - 'samples/README', - 'samples/screenshot.png', - 'samples/JGroups/src', - 'samples/JGroups/licenses', - 'samples/JGroups/LICENSE', - 'samples/JGroups/EULA', - 'samples/JGroups/src/GuardedBy.java', - 'samples/JGroups/src/ImmutableReference.java', - 'samples/JGroups/src/RouterStub.java', - 'samples/JGroups/src/S3_PING.java', - 'samples/JGroups/src/FixedMembershipToken.java', - 'samples/JGroups/src/RouterStubManager.java', - 'samples/JGroups/src/RATE_LIMITER.java', - 'samples/JGroups/licenses/cpl-1.0.txt', - 'samples/JGroups/licenses/bouncycastle.txt', - 'samples/JGroups/licenses/lgpl.txt', - 'samples/JGroups/licenses/apache-2.0.txt', - 'samples/JGroups/licenses/apache-1.1.txt', - 'samples/zlib/dotzlib', - 'samples/zlib/iostream2', - 'samples/zlib/infback9', - 'samples/zlib/gcc_gvmat64', - 'samples/zlib/ada', - 'samples/zlib/deflate.h', - 'samples/zlib/zutil.c', - 'samples/zlib/zlib.h', - 'samples/zlib/deflate.c', - 'samples/zlib/zutil.h', - 'samples/zlib/adler32.c', - 'samples/zlib/dotzlib/AssemblyInfo.cs', - 'samples/zlib/dotzlib/LICENSE_1_0.txt', - 'samples/zlib/dotzlib/readme.txt', - 'samples/zlib/dotzlib/ChecksumImpl.cs', - 'samples/zlib/iostream2/zstream_test.cpp', - 'samples/zlib/iostream2/zstream.h', - 'samples/zlib/infback9/infback9.c', - 'samples/zlib/infback9/infback9.h', - 'samples/zlib/gcc_gvmat64/gvmat64.S', - 'samples/zlib/ada/zlib.ads', - 'samples/arch/zlib.tar.gz', + "samples/JGroups", + "samples/zlib", + "samples/arch", + "samples/README", + "samples/screenshot.png", + "samples/JGroups/src", + "samples/JGroups/licenses", + "samples/JGroups/LICENSE", + "samples/JGroups/EULA", + "samples/JGroups/src/GuardedBy.java", + "samples/JGroups/src/ImmutableReference.java", + "samples/JGroups/src/RouterStub.java", + "samples/JGroups/src/S3_PING.java", + "samples/JGroups/src/FixedMembershipToken.java", + "samples/JGroups/src/RouterStubManager.java", + "samples/JGroups/src/RATE_LIMITER.java", + "samples/JGroups/licenses/cpl-1.0.txt", + "samples/JGroups/licenses/bouncycastle.txt", + "samples/JGroups/licenses/lgpl.txt", + "samples/JGroups/licenses/apache-2.0.txt", + "samples/JGroups/licenses/apache-1.1.txt", + "samples/zlib/dotzlib", + "samples/zlib/iostream2", + "samples/zlib/infback9", + "samples/zlib/gcc_gvmat64", + "samples/zlib/ada", + "samples/zlib/deflate.h", + "samples/zlib/zutil.c", + "samples/zlib/zlib.h", + "samples/zlib/deflate.c", + "samples/zlib/zutil.h", + "samples/zlib/adler32.c", + "samples/zlib/dotzlib/AssemblyInfo.cs", + "samples/zlib/dotzlib/LICENSE_1_0.txt", + "samples/zlib/dotzlib/readme.txt", + "samples/zlib/dotzlib/ChecksumImpl.cs", + "samples/zlib/iostream2/zstream_test.cpp", + "samples/zlib/iostream2/zstream.h", + "samples/zlib/infback9/infback9.c", + "samples/zlib/infback9/infback9.h", + "samples/zlib/gcc_gvmat64/gvmat64.S", + "samples/zlib/ada/zlib.ads", + "samples/arch/zlib.tar.gz", ] default = sorted( @@ -420,49 +420,49 @@ def test_Resource_build_path(self): assert default == sorted(expected_default) expected_strip_root = [ - 'JGroups', - 'zlib', - 'arch', - 'README', - 'screenshot.png', - 'JGroups/src', - 'JGroups/licenses', - 'JGroups/LICENSE', - 'JGroups/EULA', - 'JGroups/src/GuardedBy.java', - 'JGroups/src/ImmutableReference.java', - 'JGroups/src/RouterStub.java', - 'JGroups/src/S3_PING.java', - 'JGroups/src/FixedMembershipToken.java', - 'JGroups/src/RouterStubManager.java', - 'JGroups/src/RATE_LIMITER.java', - 'JGroups/licenses/cpl-1.0.txt', - 'JGroups/licenses/bouncycastle.txt', - 'JGroups/licenses/lgpl.txt', - 'JGroups/licenses/apache-2.0.txt', - 'JGroups/licenses/apache-1.1.txt', - 'zlib/dotzlib', - 'zlib/iostream2', - 'zlib/infback9', - 'zlib/gcc_gvmat64', - 'zlib/ada', - 'zlib/deflate.h', - 'zlib/zutil.c', - 'zlib/zlib.h', - 'zlib/deflate.c', - 'zlib/zutil.h', - 'zlib/adler32.c', - 'zlib/dotzlib/AssemblyInfo.cs', - 'zlib/dotzlib/LICENSE_1_0.txt', - 'zlib/dotzlib/readme.txt', - 'zlib/dotzlib/ChecksumImpl.cs', - 'zlib/iostream2/zstream_test.cpp', - 'zlib/iostream2/zstream.h', - 'zlib/infback9/infback9.c', - 'zlib/infback9/infback9.h', - 'zlib/gcc_gvmat64/gvmat64.S', - 'zlib/ada/zlib.ads', - 'arch/zlib.tar.gz', + "JGroups", + "zlib", + "arch", + "README", + "screenshot.png", + "JGroups/src", + "JGroups/licenses", + "JGroups/LICENSE", + "JGroups/EULA", + "JGroups/src/GuardedBy.java", + "JGroups/src/ImmutableReference.java", + "JGroups/src/RouterStub.java", + "JGroups/src/S3_PING.java", + "JGroups/src/FixedMembershipToken.java", + "JGroups/src/RouterStubManager.java", + "JGroups/src/RATE_LIMITER.java", + "JGroups/licenses/cpl-1.0.txt", + "JGroups/licenses/bouncycastle.txt", + "JGroups/licenses/lgpl.txt", + "JGroups/licenses/apache-2.0.txt", + "JGroups/licenses/apache-1.1.txt", + "zlib/dotzlib", + "zlib/iostream2", + "zlib/infback9", + "zlib/gcc_gvmat64", + "zlib/ada", + "zlib/deflate.h", + "zlib/zutil.c", + "zlib/zlib.h", + "zlib/deflate.c", + "zlib/zutil.h", + "zlib/adler32.c", + "zlib/dotzlib/AssemblyInfo.cs", + "zlib/dotzlib/LICENSE_1_0.txt", + "zlib/dotzlib/readme.txt", + "zlib/dotzlib/ChecksumImpl.cs", + "zlib/iostream2/zstream_test.cpp", + "zlib/iostream2/zstream.h", + "zlib/infback9/infback9.c", + "zlib/infback9/infback9.h", + "zlib/gcc_gvmat64/gvmat64.S", + "zlib/ada/zlib.ads", + "arch/zlib.tar.gz", ] stripped = sorted(r.strip_root_path for r in resources_no_root) assert stripped == sorted(expected_strip_root) @@ -473,7 +473,7 @@ def test_Resource_build_path(self): assert full_loc.endswith((ending)) def test_compute_counts_when_using_disk_cache(self): - test_codebase = self.get_test_loc('resource/samples') + test_codebase = self.get_test_loc("resource/samples") codebase = Codebase(test_codebase, strip_root=True, max_in_memory=-1) files_count, dirs_count, size_count = codebase.compute_counts() assert 33 == files_count @@ -481,34 +481,34 @@ def test_compute_counts_when_using_disk_cache(self): assert 0 == size_count def test_distance(self): - test_dir = self.get_test_loc('resource/dist') + test_dir = self.get_test_loc("resource/dist") codebase = Codebase(test_dir) assert codebase.root.distance(test_dir) == 0 - res = codebase.get_resource('dist/JGroups') - assert res.name == 'JGroups' + res = codebase.get_resource("dist/JGroups") + assert res.name == "JGroups" assert res.distance(codebase) == 1 - res = codebase.get_resource('dist/simple/META-INF/MANIFEST.MF') - assert res.name == 'MANIFEST.MF' - assert res.full_root_path.endswith('resource/dist/simple/META-INF/MANIFEST.MF') + res = codebase.get_resource("dist/simple/META-INF/MANIFEST.MF") + assert res.name == "MANIFEST.MF" + assert res.full_root_path.endswith("resource/dist/simple/META-INF/MANIFEST.MF") assert res.distance(codebase) == 3 def test_skip_files_and_subdirs_of_ignored_dirs(self): - test_dir = self.get_test_loc('resource/ignore') + test_dir = self.get_test_loc("resource/ignore") codebase = Codebase(test_dir) # The `cvs` directory should not be visited - expected = ['ignore', 'ignore/file1'] + expected = ["ignore", "ignore/file1"] result = [r.path for r in codebase.walk(topdown=True)] self.assertEqual(expected, result) def test_depth_negative_fails(self): - test_codebase = self.get_test_loc('resource/deeply_nested') + test_codebase = self.get_test_loc("resource/deeply_nested") with self.assertRaises(Exception): next(depth_walk(test_codebase, -1)) def test_depth_walk_with_depth_0(self): - test_codebase = self.get_test_loc('resource/deeply_nested') + test_codebase = self.get_test_loc("resource/deeply_nested") results_zero = list(depth_walk(test_codebase, 0)) results_neg = list(depth_walk(test_codebase, float("inf"))) result_zero_dirs = [i for j in results_zero for i in j[1]] @@ -519,167 +519,165 @@ def test_depth_walk_with_depth_0(self): self.assertEqual(result_neg_files, result_zero_files) def test_depth_walk_with_depth_1(self): - test_codebase = self.get_test_loc('resource/deeply_nested') + test_codebase = self.get_test_loc("resource/deeply_nested") results = list(depth_walk(test_codebase, 1)) result_dirs = [i for j in results for i in j[1]].sort() result_files = [i for j in results for i in j[2]].sort() - expected_files = ['level1_file1', 'level1_file2'].sort() - expected_dirs = ['level1_dir1', 'level1_dir2'].sort() + expected_files = ["level1_file1", "level1_file2"].sort() + expected_dirs = ["level1_dir1", "level1_dir2"].sort() self.assertEqual(result_dirs, expected_dirs) self.assertEqual(result_files, expected_files) def test_depth_walk_with_depth_2(self): - test_codebase = self.get_test_loc('resource/deeply_nested') + test_codebase = self.get_test_loc("resource/deeply_nested") results = list(depth_walk(test_codebase, 2)) result_dirs = [i for j in results for i in j[1]].sort() result_files = [i for j in results for i in j[2]].sort() expected_files = [ - 'level1_file1', - 'level1_file2', - 'level2_file2', - 'level2_file1', - 'level2_file3', - 'level2_file4', - 'level2_file5', + "level1_file1", + "level1_file2", + "level2_file2", + "level2_file1", + "level2_file3", + "level2_file4", + "level2_file5", ].sort() - expected_dirs = ['level1_dir1', 'level1_dir2', 'level2_dir1', 'level2_dir3'].sort() + expected_dirs = ["level1_dir1", "level1_dir2", "level2_dir1", "level2_dir3"].sort() self.assertEqual(result_dirs, expected_dirs) self.assertEqual(result_files, expected_files) def test_depth_walk_with_depth_3(self): - test_codebase = self.get_test_loc('resource/deeply_nested') + test_codebase = self.get_test_loc("resource/deeply_nested") results = list(depth_walk(test_codebase, 3)) result_dirs = [i for j in results for i in j[1]].sort() result_files = [i for j in results for i in j[2]].sort() expected_files = [ - 'level1_file1', - 'level1_file2', - 'level2_file2', - 'level2_file1', - 'level3_file2', - 'level3_file1', - 'level2_file3', - 'level2_file4', - 'level2_file5', - 'level3_file4', - 'level3_file3', + "level1_file1", + "level1_file2", + "level2_file2", + "level2_file1", + "level3_file2", + "level3_file1", + "level2_file3", + "level2_file4", + "level2_file5", + "level3_file4", + "level3_file3", ].sort() expected_dirs = [ - 'level1_dir1', - 'level1_dir2', - 'level2_dir1', - 'level3_dir1', - 'level2_dir3', + "level1_dir1", + "level1_dir2", + "level2_dir1", + "level3_dir1", + "level2_dir3", ].sort() self.assertEqual(result_dirs, expected_dirs) self.assertEqual(result_files, expected_files) def test_specify_depth_1(self): - test_codebase = self.get_test_loc('resource/deeply_nested') + test_codebase = self.get_test_loc("resource/deeply_nested") codebase = Codebase(test_codebase, max_depth=1) results = list(codebase.walk()) expected = [ - ('deeply_nested', False), - ('level1_dir1', False), - ('level1_dir2', False), - ('level1_file1', True), - ('level1_file2', True), + ("deeply_nested", False), + ("level1_dir1", False), + ("level1_dir2", False), + ("level1_file1", True), + ("level1_file2", True), ] assert [(r.name, r.is_file) for r in results] == expected def test_specify_depth_2(self): - test_codebase = self.get_test_loc('resource/deeply_nested') + test_codebase = self.get_test_loc("resource/deeply_nested") codebase = Codebase(test_codebase, max_depth=2) results = list(codebase.walk()) expected = [ - ('deeply_nested', False), - ('level1_file1', True), - ('level1_file2', True), - ('level1_dir1', False), - ('level2_dir1', False), - ('level2_file1', True), - ('level2_file2', True), - ('level1_dir2', False), - ('level2_dir3', False), - ('level2_file3', True), - ('level2_file4', True), - ('level2_file5', True), + ("deeply_nested", False), + ("level1_file1", True), + ("level1_file2", True), + ("level1_dir1", False), + ("level2_dir1", False), + ("level2_file1", True), + ("level2_file2", True), + ("level1_dir2", False), + ("level2_dir3", False), + ("level2_file3", True), + ("level2_file4", True), + ("level2_file5", True), ] assert [(r.name, r.is_file) for r in results] == expected def test_specify_depth_3(self): - test_codebase = self.get_test_loc('resource/deeply_nested') + test_codebase = self.get_test_loc("resource/deeply_nested") codebase = Codebase(test_codebase, max_depth=3) results = list(codebase.walk()) expected = [ - ('deeply_nested', False), - ('level1_file1', True), - ('level1_file2', True), - ('level1_dir1', False), - ('level2_file1', True), - ('level2_file2', True), - ('level2_dir1', False), - ('level3_dir1', False), - ('level3_file1', True), - ('level3_file2', True), - ('level1_dir2', False), - ('level2_file3', True), - ('level2_file4', True), - ('level2_file5', True), - ('level2_dir3', False), - ('level3_file3', True), - ('level3_file4', True), + ("deeply_nested", False), + ("level1_file1", True), + ("level1_file2", True), + ("level1_dir1", False), + ("level2_file1", True), + ("level2_file2", True), + ("level2_dir1", False), + ("level3_dir1", False), + ("level3_file1", True), + ("level3_file2", True), + ("level1_dir2", False), + ("level2_file3", True), + ("level2_file4", True), + ("level2_file5", True), + ("level2_dir3", False), + ("level3_file3", True), + ("level3_file4", True), ] assert [(r.name, r.is_file) for r in results] == expected class TestCodebaseWithPath(FileBasedTesting): - test_data_dir = join(dirname(__file__), 'data') + test_data_dir = join(dirname(__file__), "data") def test_Codebase_with_paths_works(self): - test_codebase = self.get_test_loc('resource/with_path/codebase') - paths = ['codebase/other dir/file'] + test_codebase = self.get_test_loc("resource/with_path/codebase") + paths = ["codebase/other dir/file"] codebase = Codebase(location=test_codebase, paths=paths) assert not codebase.errors results = [r.to_dict() for r in codebase.walk()] print(r.path for r in codebase.walk()) expected_file = self.get_test_loc( - 'resource/with_path/codebase-expected.json', + "resource/with_path/codebase-expected.json", must_exist=False, ) check_against_expected_json_file(results, expected_file, regen=False) def test_VirtualCodebase_with_paths_works(self): - test_codebase = self.get_test_loc('resource/with_path/virtual-codebase.json') - paths = ['codebase/other dir/file'] + test_codebase = self.get_test_loc("resource/with_path/virtual-codebase.json") + paths = ["codebase/other dir/file"] codebase = VirtualCodebase(location=test_codebase, paths=paths) assert not codebase.errors results = [r.to_dict() for r in codebase.walk()] expected_file = self.get_test_loc( - 'resource/with_path/virtual-codebase-expected.json', + "resource/with_path/virtual-codebase-expected.json", must_exist=False, ) check_against_expected_json_file(results, expected_file, regen=False) def test_VirtualCodebase_codebase_attributes_assignment(self): - test_codebase = self.get_test_loc('resource/with_path/virtual-codebase.json') + test_codebase = self.get_test_loc("resource/with_path/virtual-codebase.json") vc = VirtualCodebase( location=test_codebase, - codebase_attributes=dict( - packages=attr.ib(default=attr.Factory(list)) - ), + codebase_attributes=dict(packages=attr.ib(default=attr.Factory(list))), ) self.assertNotEqual(vc.attributes.packages, None) self.assertEqual(vc.attributes.packages, []) class TestCodebaseCache(FileBasedTesting): - test_data_dir = join(dirname(__file__), 'data') + test_data_dir = join(dirname(__file__), "data") def test_codebase_cache_default(self): - test_codebase = self.get_test_loc('resource/cache2') + test_codebase = self.get_test_loc("resource/cache2") codebase = Codebase(test_codebase) assert codebase.temp_dir @@ -691,14 +689,19 @@ def test_codebase_cache_default(self): assert not exists(cp) assert exists(parent_directory(cp)) - child = codebase._get_or_create_resource(name='child', parent=root, is_file=True) + child = codebase._get_or_create_resource(name="child", parent=root, is_file=True) child.size = 12 codebase.save_resource(child) child_2 = codebase.get_resource(path=child.path) assert child_2 == child + def test_codebase_cache_handles_non_utf8_path(self): + test_codebase = self.get_test_loc("resource/cache2") + codebase = Codebase(test_codebase) + codebase._get_resource_cache_location("resource/cache2/\udce9", create_dirs=True) + def test_codebase_cache_all_in_memory(self): - test_codebase = self.get_test_loc('resource/cache2') + test_codebase = self.get_test_loc("resource/cache2") codebase = Codebase(test_codebase, max_in_memory=0) for path, res in codebase.resources_by_path.items(): if res is Codebase.CACHED_RESOURCE: @@ -718,7 +721,7 @@ def test_codebase_cache_all_in_memory(self): ) def test_codebase_cache_all_on_disk(self): - test_codebase = self.get_test_loc('resource/cache2') + test_codebase = self.get_test_loc("resource/cache2") codebase = Codebase(test_codebase, max_in_memory=-1) for path, res in codebase.resources_by_path.items(): if res is Codebase.CACHED_RESOURCE: @@ -738,7 +741,7 @@ def test_codebase_cache_all_on_disk(self): ) def test_codebase_cache_mixed_two_in_memory(self): - test_codebase = self.get_test_loc('resource/cache2') + test_codebase = self.get_test_loc("resource/cache2") codebase = Codebase(test_codebase, max_in_memory=2) counter = 0 for path, res in codebase.resources_by_path.items(): @@ -766,105 +769,105 @@ def test_codebase_cache_mixed_two_in_memory(self): class TestVirtualCodebase(FileBasedTesting): - test_data_dir = join(dirname(__file__), 'data') + test_data_dir = join(dirname(__file__), "data") def test_virtual_codebase_walk_defaults(self): - test_file = self.get_test_loc('resource/virtual_codebase/virtual_codebase.json') + test_file = self.get_test_loc("resource/virtual_codebase/virtual_codebase.json") codebase = VirtualCodebase(location=test_file) results = list(codebase.walk()) expected = [ - ('codebase', False), - ('abc', True), - ('et131x.h', True), - ('dir', False), - ('that', True), - ('this', True), - ('other dir', False), - ('file', True), + ("codebase", False), + ("abc", True), + ("et131x.h", True), + ("dir", False), + ("that", True), + ("this", True), + ("other dir", False), + ("file", True), ] assert [(r.name, r.is_file) for r in results] == expected def test_virtual_codebase_walk_topdown(self): - test_file = self.get_test_loc('resource/virtual_codebase/virtual_codebase.json') + test_file = self.get_test_loc("resource/virtual_codebase/virtual_codebase.json") codebase = VirtualCodebase(location=test_file) results = list(codebase.walk(topdown=True)) expected = [ - ('codebase', False), - ('abc', True), - ('et131x.h', True), - ('dir', False), - ('that', True), - ('this', True), - ('other dir', False), - ('file', True), + ("codebase", False), + ("abc", True), + ("et131x.h", True), + ("dir", False), + ("that", True), + ("this", True), + ("other dir", False), + ("file", True), ] assert [(r.name, r.is_file) for r in results] == expected def test_virtual_codebase_walk_bottomup(self): - test_file = self.get_test_loc('resource/virtual_codebase/virtual_codebase.json') + test_file = self.get_test_loc("resource/virtual_codebase/virtual_codebase.json") codebase = VirtualCodebase(location=test_file) results = list(codebase.walk(topdown=False)) expected = [ - ('abc', True), - ('et131x.h', True), - ('that', True), - ('this', True), - ('dir', False), - ('file', True), - ('other dir', False), - ('codebase', False), + ("abc", True), + ("et131x.h", True), + ("that", True), + ("this", True), + ("dir", False), + ("file", True), + ("other dir", False), + ("codebase", False), ] assert [(r.name, r.is_file) for r in results] == expected def test_virtual_codebase_walk_skip_root_basic(self): - scan_data = self.get_test_loc('resource/virtual_codebase/virtual_codebase.json') + scan_data = self.get_test_loc("resource/virtual_codebase/virtual_codebase.json") virtual_codebase = VirtualCodebase(location=scan_data) results = list(virtual_codebase.walk(skip_root=True)) expected = [ - ('abc', True), - ('et131x.h', True), - ('dir', False), - ('that', True), - ('this', True), - ('other dir', False), - ('file', True), + ("abc", True), + ("et131x.h", True), + ("dir", False), + ("that", True), + ("this", True), + ("other dir", False), + ("file", True), ] assert [(r.name, r.is_file) for r in results] == expected def test_virtual_codebase_get_path_with_strip_root_and_walk_with_skip_root(self): - scan_data = self.get_test_loc('resource/virtual_codebase/stripped-and-skipped-root.json') + scan_data = self.get_test_loc("resource/virtual_codebase/stripped-and-skipped-root.json") virtual_codebase = VirtualCodebase(location=scan_data) results = [r.get_path(strip_root=True) for r in virtual_codebase.walk(skip_root=True)] - expected = ['README', 'screenshot.png'] + expected = ["README", "screenshot.png"] assert expected == results def test_virtual_codebase_to_list_with_strip_root_and_walk_with_skip_root(self): - scan_data = self.get_test_loc('resource/virtual_codebase/stripped-and-skipped-root.json') + scan_data = self.get_test_loc("resource/virtual_codebase/stripped-and-skipped-root.json") virtual_codebase = VirtualCodebase(location=scan_data) results = virtual_codebase.to_list(strip_root=True, skinny=True) - expected = [{'path': 'README', 'type': 'file'}, {'path': 'screenshot.png', 'type': 'file'}] + expected = [{"path": "README", "type": "file"}, {"path": "screenshot.png", "type": "file"}] assert expected == results def test_virtual_codebase_walk_filtered_with_filtered_root(self): - scan_data = self.get_test_loc('resource/virtual_codebase/virtual_codebase.json') + scan_data = self.get_test_loc("resource/virtual_codebase/virtual_codebase.json") virtual_codebase = VirtualCodebase(location=scan_data) virtual_codebase.root.is_filtered = True virtual_codebase.save_resource(virtual_codebase.root) results = list(virtual_codebase.walk_filtered()) expected = [ - ('abc', True), - ('et131x.h', True), - ('dir', False), - ('that', True), - ('this', True), - ('other dir', False), - ('file', True), + ("abc", True), + ("et131x.h", True), + ("dir", False), + ("that", True), + ("this", True), + ("other dir", False), + ("file", True), ] assert [(r.name, r.is_file) for r in results] == expected def test_virtual_codebase_walk_filtered_with_all_filtered(self): - scan_data = self.get_test_loc('resource/virtual_codebase/virtual_codebase.json') + scan_data = self.get_test_loc("resource/virtual_codebase/virtual_codebase.json") virtual_codebase = VirtualCodebase(location=scan_data) for res in virtual_codebase.walk(): res.is_filtered = True @@ -875,14 +878,14 @@ def test_virtual_codebase_walk_filtered_with_all_filtered(self): assert [(r.name, r.is_file) for r in results] == expected def test_virtual_codebase_compute_counts_filtered_None(self): - scan_data = self.get_test_loc('resource/virtual_codebase/virtual_codebase.json') + scan_data = self.get_test_loc("resource/virtual_codebase/virtual_codebase.json") virtual_codebase = VirtualCodebase(location=scan_data) results = virtual_codebase.compute_counts(skip_filtered=True) expected = (5, 3, 2228) assert results == expected def test_virtual_codebase_compute_counts_filtered_None_with_size(self): - scan_data = self.get_test_loc('resource/virtual_codebase/virtual_codebase.json') + scan_data = self.get_test_loc("resource/virtual_codebase/virtual_codebase.json") virtual_codebase = VirtualCodebase(location=scan_data) for res in virtual_codebase.walk(): if res.is_file: @@ -894,14 +897,14 @@ def test_virtual_codebase_compute_counts_filtered_None_with_size(self): assert results == expected def test_virtual_codebase_compute_counts_filtered_None_with_cache(self): - scan_data = self.get_test_loc('resource/virtual_codebase/virtual_codebase.json') + scan_data = self.get_test_loc("resource/virtual_codebase/virtual_codebase.json") virtual_codebase = VirtualCodebase(location=scan_data) results = virtual_codebase.compute_counts(skip_filtered=True) expected = (5, 3, 2228) assert results == expected def test_virtual_codebase_compute_counts_filtered_all(self): - scan_data = self.get_test_loc('resource/virtual_codebase/virtual_codebase.json') + scan_data = self.get_test_loc("resource/virtual_codebase/virtual_codebase.json") virtual_codebase = VirtualCodebase(location=scan_data) for res in virtual_codebase.walk(): res.is_filtered = True @@ -911,7 +914,7 @@ def test_virtual_codebase_compute_counts_filtered_all(self): assert results == expected def test_virtual_codebase_compute_counts_filtered_all_with_cache(self): - scan_data = self.get_test_loc('resource/virtual_codebase/virtual_codebase.json') + scan_data = self.get_test_loc("resource/virtual_codebase/virtual_codebase.json") virtual_codebase = VirtualCodebase(location=scan_data) for res in virtual_codebase.walk(): res.is_filtered = True @@ -921,7 +924,7 @@ def test_virtual_codebase_compute_counts_filtered_all_with_cache(self): assert results == expected def test_virtual_codebase_compute_counts_filtered_files(self): - scan_data = self.get_test_loc('resource/virtual_codebase/virtual_codebase.json') + scan_data = self.get_test_loc("resource/virtual_codebase/virtual_codebase.json") virtual_codebase = VirtualCodebase(location=scan_data) for res in virtual_codebase.walk(): if res.is_file: @@ -932,7 +935,7 @@ def test_virtual_codebase_compute_counts_filtered_files(self): assert results == expected def test_virtual_codebase_compute_counts_filtered_dirs(self): - scan_data = self.get_test_loc('resource/virtual_codebase/virtual_codebase.json') + scan_data = self.get_test_loc("resource/virtual_codebase/virtual_codebase.json") virtual_codebase = VirtualCodebase(location=scan_data) for res in virtual_codebase.walk(): if not res.is_file: @@ -943,7 +946,7 @@ def test_virtual_codebase_compute_counts_filtered_dirs(self): assert results == expected def test_virtual_codebase_walk_filtered_dirs(self): - scan_data = self.get_test_loc('resource/virtual_codebase/virtual_codebase.json') + scan_data = self.get_test_loc("resource/virtual_codebase/virtual_codebase.json") virtual_codebase = VirtualCodebase(location=scan_data) for res in virtual_codebase.walk(): if not res.is_file: @@ -951,33 +954,33 @@ def test_virtual_codebase_walk_filtered_dirs(self): virtual_codebase.save_resource(res) results = list(virtual_codebase.walk_filtered(topdown=True)) expected = [ - ('abc', True), - ('et131x.h', True), - ('that', True), - ('this', True), - ('file', True), + ("abc", True), + ("et131x.h", True), + ("that", True), + ("this", True), + ("file", True), ] assert [(r.name, r.is_file) for r in results] == expected def test_virtual_codebase_walk_filtered_skip_root(self): - scan_data = self.get_test_loc('resource/virtual_codebase/virtual_codebase.json') + scan_data = self.get_test_loc("resource/virtual_codebase/virtual_codebase.json") virtual_codebase = VirtualCodebase(location=scan_data) virtual_codebase.root.is_filtered = True virtual_codebase.save_resource(virtual_codebase.root) results = list(virtual_codebase.walk_filtered(skip_root=True)) expected = [ - ('abc', True), - ('et131x.h', True), - ('dir', False), - ('that', True), - ('this', True), - ('other dir', False), - ('file', True), + ("abc", True), + ("et131x.h", True), + ("dir", False), + ("that", True), + ("this", True), + ("other dir", False), + ("file", True), ] assert [(r.name, r.is_file) for r in results] == expected def test_virtual_codebase_walk_filtered_all_skip_root(self): - scan_data = self.get_test_loc('resource/virtual_codebase/virtual_codebase.json') + scan_data = self.get_test_loc("resource/virtual_codebase/virtual_codebase.json") virtual_codebase = VirtualCodebase(location=scan_data) for res in virtual_codebase.walk(): res.is_filtered = True @@ -988,21 +991,21 @@ def test_virtual_codebase_walk_filtered_all_skip_root(self): assert [(r.name, r.is_file) for r in results] == expected def test_virtual_codebase_walk_skip_root_single_file(self): - scan_data = self.get_test_loc('resource/virtual_codebase/et131x.h.json') + scan_data = self.get_test_loc("resource/virtual_codebase/et131x.h.json") virtual_codebase = VirtualCodebase(location=scan_data) results = list(virtual_codebase.walk(skip_root=True)) - expected = [('et131x.h', True)] + expected = [("et131x.h", True)] assert [(r.name, r.is_file) for r in results] == expected def test_virtual_codebase_walk_filtered_with_skip_root_and_single_file_not_filtered(self): - scan_data = self.get_test_loc('resource/virtual_codebase/et131x.h.json') + scan_data = self.get_test_loc("resource/virtual_codebase/et131x.h.json") virtual_codebase = VirtualCodebase(location=scan_data) results = list(virtual_codebase.walk_filtered(skip_root=True)) - expected = [('et131x.h', True)] + expected = [("et131x.h", True)] assert [(r.name, r.is_file) for r in results] == expected def test_virtual_codebase_walk_filtered__with_skip_root_and_filtered_single_file(self): - scan_data = self.get_test_loc('resource/virtual_codebase/et131x.h.json') + scan_data = self.get_test_loc("resource/virtual_codebase/et131x.h.json") virtual_codebase = VirtualCodebase(location=scan_data) virtual_codebase.root.is_filtered = True virtual_codebase.save_resource(virtual_codebase.root) @@ -1011,35 +1014,35 @@ def test_virtual_codebase_walk_filtered__with_skip_root_and_filtered_single_file assert [(r.name, r.is_file) for r in results] == expected def test_virtual_codebase_walk_skip_root_single_file_with_children(self): - scan_data = self.get_test_loc('resource/virtual_codebase/et131x.h.json') + scan_data = self.get_test_loc("resource/virtual_codebase/et131x.h.json") virtual_codebase = VirtualCodebase(location=scan_data) c1 = virtual_codebase._get_or_create_resource( - 'some child', + "some child", parent=virtual_codebase.root, is_file=True, ) _c2 = virtual_codebase._get_or_create_resource( - 'some child2', + "some child2", parent=c1, is_file=False, ) results = list(virtual_codebase.walk(skip_root=True)) - expected = [('et131x.h', True), ('some child', True), ('some child2', False)] + expected = [("et131x.h", True), ("some child", True), ("some child2", False)] assert [(r.name, r.is_file) for r in results] == expected def test_virtual_codebase_walk_filtered_with_skip_root_and_single_file_with_children(self): - scan_data = self.get_test_loc('resource/virtual_codebase/et131x.h.json') + scan_data = self.get_test_loc("resource/virtual_codebase/et131x.h.json") virtual_codebase = VirtualCodebase(location=scan_data) c1 = virtual_codebase._get_or_create_resource( - 'some child', + "some child", parent=virtual_codebase.root, is_file=True, ) c2 = virtual_codebase._get_or_create_resource( - 'some child2', + "some child2", parent=c1, is_file=False, ) @@ -1047,73 +1050,73 @@ def test_virtual_codebase_walk_filtered_with_skip_root_and_single_file_with_chil c2.save(virtual_codebase) results = list(virtual_codebase.walk_filtered(skip_root=True)) - expected = [('et131x.h', True), ('some child', True)] + expected = [("et131x.h", True), ("some child", True)] assert [(r.name, r.is_file) for r in results] == expected c1.is_filtered = True c1.save(virtual_codebase) results = list(virtual_codebase.walk_filtered(skip_root=True)) - expected = [('et131x.h', True)] + expected = [("et131x.h", True)] assert [(r.name, r.is_file) for r in results] == expected def test_virtual_codebase__create_resource_can_add_child_to_file(self): - scan_data = self.get_test_loc('resource/virtual_codebase/et131x.h.json') + scan_data = self.get_test_loc("resource/virtual_codebase/et131x.h.json") virtual_codebase = VirtualCodebase(location=scan_data) virtual_codebase._get_or_create_resource( - 'some child', + "some child", virtual_codebase.root, is_file=True, ) results = list(virtual_codebase.walk()) - expected = [('et131x.h', True), ('some child', True)] + expected = [("et131x.h", True), ("some child", True)] assert [(r.name, r.is_file) for r in results] == expected def test_virtual_codebase__create_resource_can_add_child_to_dir(self): - scan_data = self.get_test_loc('resource/virtual_codebase/resource.json') + scan_data = self.get_test_loc("resource/virtual_codebase/resource.json") virtual_codebase = VirtualCodebase(location=scan_data) virtual_codebase._get_or_create_resource( - 'some child', + "some child", virtual_codebase.root, is_file=False, ) results = list(virtual_codebase.walk()) - expected = [('resource', False), ('some child', False)] + expected = [("resource", False), ("some child", False)] assert [(r.name, r.is_file) for r in results] == expected def test_virtual_codebase_get_resource(self): - scan_data = self.get_test_loc('resource/virtual_codebase/resource.json') + scan_data = self.get_test_loc("resource/virtual_codebase/resource.json") virtual_codebase = VirtualCodebase(location=scan_data) - assert not (virtual_codebase.root is virtual_codebase.get_resource('resource')) - assert virtual_codebase.get_resource('resource') == virtual_codebase.root + assert not (virtual_codebase.root is virtual_codebase.get_resource("resource")) + assert virtual_codebase.get_resource("resource") == virtual_codebase.root def test_virtual_codebase_can_process_minimal_resources_without_info(self): - scan_data = self.get_test_loc('resource/virtual_codebase/noinfo.json') + scan_data = self.get_test_loc("resource/virtual_codebase/noinfo.json") codebase = VirtualCodebase(location=scan_data) expected = [ { - 'path': 'NOTICE', - 'type': 'file', - 'copyrights': [ + "path": "NOTICE", + "type": "file", + "copyrights": [ { - 'statements': ['Copyright (c) 2017 nexB Inc. and others.'], - 'holders': ['nexB Inc. and others.'], - 'authors': [], - 'start_line': 4, - 'end_line': 4, + "statements": ["Copyright (c) 2017 nexB Inc. and others."], + "holders": ["nexB Inc. and others."], + "authors": [], + "start_line": 4, + "end_line": 4, } ], - 'scan_errors': [], + "scan_errors": [], } ] assert [r.to_dict() for r in codebase.walk()] == expected def test_virtual_codebase_can_process_minimal_resources_with_only_path(self): - scan_data = self.get_test_loc('resource/virtual_codebase/only-path.json') + scan_data = self.get_test_loc("resource/virtual_codebase/only-path.json") codebase = VirtualCodebase(location=scan_data) expected = [ - {'path': 'samples', 'type': 'directory', 'scan_errors': []}, - {'path': 'samples/NOTICE', 'type': 'file', 'scan_errors': []}, + {"path": "samples", "type": "directory", "scan_errors": []}, + {"path": "samples/NOTICE", "type": "file", "scan_errors": []}, ] assert [r.to_dict() for r in codebase.walk()] == expected @@ -1123,84 +1126,84 @@ def test_VirtualCodebase_account_fingerprint_attribute(self): resources_fingerprint = [resource.fingerprint for resource in codebase.walk()] assert "e30cf09443e7878dfed3288886e97542" in resources_fingerprint assert None in resources_fingerprint - assert codebase.get_resource('apache_to_all_notable_lic_new') == codebase.root + assert codebase.get_resource("apache_to_all_notable_lic_new") == codebase.root assert resources_fingerprint.count(None) == 2 def test_VirtualCodebase_works_with_mapping_backed_codebase(self): test_file = self.get_test_loc("resource/virtual_codebase/license-scan.json") codebase = VirtualCodebase(test_file) - resource = codebase.get_resource('scan-ref/license-notice.txt') + resource = codebase.get_resource("scan-ref/license-notice.txt") assert resource assert len(resource.license_expressions) == 1 class TestCodebaseLowestCommonParent(FileBasedTesting): - test_data_dir = join(dirname(__file__), 'data') + test_data_dir = join(dirname(__file__), "data") def test_lowest_common_parent_on_virtual_codebase(self): - scan_data = self.get_test_loc('resource/virtual_codebase/lcp.json') + scan_data = self.get_test_loc("resource/virtual_codebase/lcp.json") virtual_codebase = VirtualCodebase(location=scan_data) lcp = virtual_codebase.lowest_common_parent() - assert lcp.path == 'lcp/test1' - assert lcp.name == 'test1' + assert lcp.path == "lcp/test1" + assert lcp.name == "test1" def test_virtual_codebase_has_default_for_plugin_attributes(self): - scan_data = self.get_test_loc('resource/virtual_codebase/only-path.json') + scan_data = self.get_test_loc("resource/virtual_codebase/only-path.json") VirtualCodebase(location=scan_data) def test_lowest_common_parent_strip(self): - test_codebase = self.get_test_loc('resource/lcp/test1') + test_codebase = self.get_test_loc("resource/lcp/test1") codebase = Codebase(test_codebase) assert len(list(codebase.walk())) == 75 lcp = codebase.lowest_common_parent() - assert lcp.path == 'test1' - assert lcp.name == 'test1' - assert lcp.strip_root_path == '' - assert lcp.full_root_path.endswith('resource/lcp/test1') + assert lcp.path == "test1" + assert lcp.name == "test1" + assert lcp.strip_root_path == "" + assert lcp.full_root_path.endswith("resource/lcp/test1") def test_lowest_common_parent_2(self): - test_codebase = self.get_test_loc('resource/lcp/test1/zlib') + test_codebase = self.get_test_loc("resource/lcp/test1/zlib") codebase = Codebase(test_codebase) lcp = codebase.lowest_common_parent() - assert lcp.path == 'zlib' - assert lcp.name == 'zlib' - assert lcp.strip_root_path == '' - assert lcp.full_root_path.endswith('resource/lcp/test1/zlib') + assert lcp.path == "zlib" + assert lcp.name == "zlib" + assert lcp.strip_root_path == "" + assert lcp.full_root_path.endswith("resource/lcp/test1/zlib") def test_lowest_common_parent_3(self): - test_codebase = self.get_test_loc('resource/lcp/test1/simple') + test_codebase = self.get_test_loc("resource/lcp/test1/simple") codebase = Codebase(test_codebase) lcp = codebase.lowest_common_parent() - assert lcp.path == 'simple' - assert lcp.name == 'simple' - assert lcp.strip_root_path == '' + assert lcp.path == "simple" + assert lcp.name == "simple" + assert lcp.strip_root_path == "" def test_lowest_common_parent_deep(self): - test_codebase = self.get_test_loc('resource/lcp/test1/simple/org') + test_codebase = self.get_test_loc("resource/lcp/test1/simple/org") codebase = Codebase(test_codebase) lcp = codebase.lowest_common_parent() - assert lcp.path == 'org/jvnet/glassfish/comms/sipagent' - assert lcp.name == 'sipagent' - assert lcp.strip_root_path == 'jvnet/glassfish/comms/sipagent' + assert lcp.path == "org/jvnet/glassfish/comms/sipagent" + assert lcp.name == "sipagent" + assert lcp.strip_root_path == "jvnet/glassfish/comms/sipagent" assert lcp.full_root_path.endswith( - 'resource/lcp/test1/simple/org/jvnet/glassfish/comms/sipagent' + "resource/lcp/test1/simple/org/jvnet/glassfish/comms/sipagent" ) def test_lowest_common_parent_solo_file(self): - test_codebase = self.get_test_loc('resource/lcp/test1/screenshot.png') + test_codebase = self.get_test_loc("resource/lcp/test1/screenshot.png") codebase = Codebase(test_codebase) lcp = codebase.lowest_common_parent() - assert lcp.path == 'screenshot.png' - assert lcp.name == 'screenshot.png' - assert lcp.strip_root_path == '' - assert lcp.full_root_path.endswith('resource/lcp/test1/screenshot.png') + assert lcp.path == "screenshot.png" + assert lcp.name == "screenshot.png" + assert lcp.strip_root_path == "" + assert lcp.full_root_path.endswith("resource/lcp/test1/screenshot.png") class TestVirtualCodebaseCache(FileBasedTesting): - test_data_dir = join(dirname(__file__), 'data') + test_data_dir = join(dirname(__file__), "data") def test_virtual_codebase_cache_default(self): - scan_data = self.get_test_loc('resource/virtual_codebase/codebase-for-cache-tests.json') + scan_data = self.get_test_loc("resource/virtual_codebase/codebase-for-cache-tests.json") virtual_codebase = VirtualCodebase(location=scan_data) assert virtual_codebase.temp_dir assert virtual_codebase.cache_dir @@ -1214,14 +1217,14 @@ def test_virtual_codebase_cache_default(self): assert not exists(cp) assert exists(parent_directory(cp)) - child = virtual_codebase._get_or_create_resource('child', root, is_file=True) + child = virtual_codebase._get_or_create_resource("child", root, is_file=True) child.size = 12 virtual_codebase.save_resource(child) child_2 = virtual_codebase.get_resource(child.path) assert child_2 == child def test_virtual_codebase_cache_all_in_memory(self): - scan_data = self.get_test_loc('resource/virtual_codebase/codebase-for-cache-tests.json') + scan_data = self.get_test_loc("resource/virtual_codebase/codebase-for-cache-tests.json") virtual_codebase = VirtualCodebase(location=scan_data, max_in_memory=0) for path, res in virtual_codebase.resources_by_path.items(): assert res != Codebase.CACHED_RESOURCE @@ -1242,10 +1245,9 @@ def test_virtual_codebase_cache_all_in_memory(self): ) def test_virtual_codebase_cache_all_on_disk(self): - scan_data = self.get_test_loc('resource/virtual_codebase/codebase-for-cache-tests.json') + scan_data = self.get_test_loc("resource/virtual_codebase/codebase-for-cache-tests.json") virtual_codebase = VirtualCodebase(location=scan_data, max_in_memory=-1) for path, res in virtual_codebase.resources_by_path.items(): - if res != Codebase.CACHED_RESOURCE: assert res.is_root else: @@ -1266,7 +1268,7 @@ def test_virtual_codebase_cache_all_on_disk(self): ) def test_virtual_codebase_cache_mixed_two_in_memory(self): - scan_data = self.get_test_loc('resource/virtual_codebase/codebase-for-cache-tests.json') + scan_data = self.get_test_loc("resource/virtual_codebase/codebase-for-cache-tests.json") virtual_codebase = VirtualCodebase(location=scan_data, max_in_memory=2) counter = 0 @@ -1299,17 +1301,17 @@ def test_virtual_codebase_cache_mixed_two_in_memory(self): class TestVirtualCodebaseCreation(FileBasedTesting): - test_data_dir = join(dirname(__file__), 'data') + test_data_dir = join(dirname(__file__), "data") def test_VirtualCodebase_can_be_created_from_json_file(self): - test_file = self.get_test_loc('resource/virtual_codebase/from_file.json') + test_file = self.get_test_loc("resource/virtual_codebase/from_file.json") codebase = VirtualCodebase(test_file) results = sorted(r.name for r in codebase.walk()) - expected = ['bar.svg', 'han'] + expected = ["bar.svg", "han"] assert results == expected def test_VirtualCodebase_can_be_created_from_json_string(self): - test_scan = ''' + test_scan = """ { "scancode_notice": "Generated with ScanCode and provided on an ....", "scancode_version": "2.9.7.post137.2e29fe3.dirty.20181120225811", @@ -1332,10 +1334,10 @@ def test_VirtualCodebase_can_be_created_from_json_string(self): } ] } - ''' + """ codebase = VirtualCodebase(test_scan) results = sorted(r.name for r in codebase.walk()) - expected = ['bar.svg', 'han'] + expected = ["bar.svg", "han"] assert results == expected def test_VirtualCodebase_can_be_created_from_dict(self): @@ -1353,84 +1355,84 @@ def test_VirtualCodebase_can_be_created_from_dict(self): codebase = VirtualCodebase(test_scan) results = sorted(r.name for r in codebase.walk()) - expected = ['bar.svg', 'han'] + expected = ["bar.svg", "han"] assert results == expected def test_VirtualCodebase_create_from_scan_with_no_root_and_missing_parents(self): - test_file = self.get_test_loc('resource/virtual_codebase/samples-only-findings.json') + test_file = self.get_test_loc("resource/virtual_codebase/samples-only-findings.json") result_file = self.get_test_loc( - 'resource/virtual_codebase/samples-only-findings-expected.json' + "resource/virtual_codebase/samples-only-findings-expected.json" ) codebase = VirtualCodebase(test_file) expected_scan = json.load(open(result_file)) results = sorted(r.path for r in codebase.walk()) - expected = sorted(r.get('path') for r in expected_scan['files']) + expected = sorted(r.get("path") for r in expected_scan["files"]) assert results == expected def test_VirtualCodebase_check_that_already_existing_parent_is_updated_properly(self): - test_file = self.get_test_loc('resource/virtual_codebase/root-is-not-first-resource.json') + test_file = self.get_test_loc("resource/virtual_codebase/root-is-not-first-resource.json") codebase = VirtualCodebase(test_file) results = sorted((r.to_dict() for r in codebase.walk()), key=lambda x: tuple(x.items())) expected = [ - {'path': 'samples', 'type': 'directory', 'summary': ['asd'], 'scan_errors': []}, - {'path': 'samples/NOTICE', 'type': 'file', 'summary': [], 'scan_errors': []}, + {"path": "samples", "type": "directory", "summary": ["asd"], "scan_errors": []}, + {"path": "samples/NOTICE", "type": "file", "summary": [], "scan_errors": []}, ] assert results == expected def test_VirtualCodebase_create_from_multiple_scans(self): - test_file_1 = self.get_test_loc('resource/virtual_codebase/combine-1.json') - test_file_2 = self.get_test_loc('resource/virtual_codebase/combine-2.json') + test_file_1 = self.get_test_loc("resource/virtual_codebase/combine-1.json") + test_file_2 = self.get_test_loc("resource/virtual_codebase/combine-2.json") vinput = (test_file_1, test_file_2) codebase = VirtualCodebase(vinput) results = [r.to_dict(with_info=False) for r in codebase.walk()] expected_file = self.get_test_loc( - 'resource/virtual_codebase/combine-expected.json', + "resource/virtual_codebase/combine-expected.json", must_exist=False, ) check_against_expected_json_file(results, expected_file, regen=False) def test_VirtualCodebase_create_from_multiple_scans_shared_directory_names(self): test_file_1 = self.get_test_loc( - 'resource/virtual_codebase/combine-shared-directory-name-1.json' + "resource/virtual_codebase/combine-shared-directory-name-1.json" ) test_file_2 = self.get_test_loc( - 'resource/virtual_codebase/combine-shared-directory-name-2.json' + "resource/virtual_codebase/combine-shared-directory-name-2.json" ) vinput = (test_file_1, test_file_2) codebase = VirtualCodebase(location=vinput) results = [r.to_dict(with_info=False) for r in codebase.walk()] expected_file = self.get_test_loc( - 'resource/virtual_codebase/combine-shared-directory-name-expected.json', + "resource/virtual_codebase/combine-shared-directory-name-expected.json", must_exist=False, ) check_against_expected_json_file(results, expected_file, regen=False) def test_VirtualCodebase_compute_counts_with_full_root_info_one(self): - test_file = self.get_test_loc('resource/virtual_codebase/full-root-info-one.json') + test_file = self.get_test_loc("resource/virtual_codebase/full-root-info-one.json") codebase = VirtualCodebase(test_file) resource = [r for r in codebase.walk() if r.is_file][0] - assert resource.path == 'home/foobar/scancode-toolkit/samples/README' + assert resource.path == "home/foobar/scancode-toolkit/samples/README" files_count, dirs_count, size_count = codebase.compute_counts() assert files_count == 1 assert dirs_count == 0 assert size_count == 236 def test_VirtualCodebase_with_full_root_info_one(self): - test_file = self.get_test_loc('resource/virtual_codebase/full-root-info-one.json') + test_file = self.get_test_loc("resource/virtual_codebase/full-root-info-one.json") codebase = VirtualCodebase(test_file) results = [r.to_dict(with_info=True) for r in codebase.walk()] expected_file = self.get_test_loc( - 'resource/virtual_codebase/full-root-info-one-expected.json', must_exist=False + "resource/virtual_codebase/full-root-info-one-expected.json", must_exist=False ) check_against_expected_json_file(results, expected_file, regen=False) def test_VirtualCodebase_with_full_root_info_many(self): - test_file = self.get_test_loc('resource/virtual_codebase/full-root-info-many.json') + test_file = self.get_test_loc("resource/virtual_codebase/full-root-info-many.json") codebase = VirtualCodebase(test_file) results = [r.to_dict(with_info=True) for r in codebase.walk()] expected_file = self.get_test_loc( - 'resource/virtual_codebase/full-root-info-many-expected.json', must_exist=False + "resource/virtual_codebase/full-root-info-many-expected.json", must_exist=False ) check_against_expected_json_file(results, expected_file, regen=False) @@ -1438,37 +1440,37 @@ def test_VirtualCodebase_can_compute_counts_with_null(self): # was failing with # size_count += child.size # TypeError: unsupported operand type(s) for +=: 'int' and 'NoneType' - test_file = self.get_test_loc('resource/virtual_codebase/node-16-slim.json') + test_file = self.get_test_loc("resource/virtual_codebase/node-16-slim.json") codebase = VirtualCodebase(test_file) codebase.compute_counts() def test_VirtualCodebase_can_be_created_with_single_path(self): - test_file = self.get_test_loc('resource/virtual_codebase/docker-hello-world.json') + test_file = self.get_test_loc("resource/virtual_codebase/docker-hello-world.json") VirtualCodebase(test_file) def test_VirtualCodebase_can_be_created_without_RecursionError(self): # was failing with RecursionError: maximum recursion depth exceeded - test_file = self.get_test_loc('resource/virtual_codebase/zephyr-binary.json') + test_file = self.get_test_loc("resource/virtual_codebase/zephyr-binary.json") VirtualCodebase(test_file) def test_VirtualCodebase_can_be_created_with_repeated_root_directory(self): paths = [ - 'to', - 'to/to', - 'to/to/to', - 'to/to/to/to', + "to", + "to/to", + "to/to/to", + "to/to/to/to", ] - resources = [{'path': path} for path in paths] - vc = VirtualCodebase(location={'files': resources}) + resources = [{"path": path} for path in paths] + vc = VirtualCodebase(location={"files": resources}) walked_paths = [r.path for r in vc.walk()] assert paths == walked_paths class TestResource(FileBasedTesting): - test_data_dir = join(dirname(__file__), 'data') + test_data_dir = join(dirname(__file__), "data") def test_Resource_extracted_to_extracted_from(self): - test_file = self.get_test_loc('resource/resource/test-extracted-from-to.json') + test_file = self.get_test_loc("resource/resource/test-extracted-from-to.json") codebase = VirtualCodebase(location=test_file) results = [] for r in codebase.walk(topdown=True): @@ -1481,74 +1483,74 @@ def test_Resource_extracted_to_extracted_from(self): expected = [ ( - 'test', + "test", None, None, ), ( - 'test/c', + "test/c", None, None, ), ( - 'test/foo.tar.gz', - 'test/foo.tar.gz-extract', + "test/foo.tar.gz", + "test/foo.tar.gz-extract", None, ), ( - 'test/foo.tar.gz-extract', + "test/foo.tar.gz-extract", None, - 'test/foo.tar.gz', + "test/foo.tar.gz", ), ( - 'test/foo.tar.gz-extract/foo', + "test/foo.tar.gz-extract/foo", None, - 'test/foo.tar.gz', + "test/foo.tar.gz", ), ( - 'test/foo.tar.gz-extract/foo/a', + "test/foo.tar.gz-extract/foo/a", None, - 'test/foo.tar.gz', + "test/foo.tar.gz", ), ( - 'test/foo.tar.gz-extract/foo/bar.tar.gz', - 'test/foo.tar.gz-extract/foo/bar.tar.gz-extract', - 'test/foo.tar.gz', + "test/foo.tar.gz-extract/foo/bar.tar.gz", + "test/foo.tar.gz-extract/foo/bar.tar.gz-extract", + "test/foo.tar.gz", ), ( - 'test/foo.tar.gz-extract/foo/bar.tar.gz-extract', + "test/foo.tar.gz-extract/foo/bar.tar.gz-extract", None, - 'test/foo.tar.gz-extract/foo/bar.tar.gz', + "test/foo.tar.gz-extract/foo/bar.tar.gz", ), ( - 'test/foo.tar.gz-extract/foo/bar.tar.gz-extract/bar', + "test/foo.tar.gz-extract/foo/bar.tar.gz-extract/bar", None, - 'test/foo.tar.gz-extract/foo/bar.tar.gz', + "test/foo.tar.gz-extract/foo/bar.tar.gz", ), ( - 'test/foo.tar.gz-extract/foo/bar.tar.gz-extract/bar/b', + "test/foo.tar.gz-extract/foo/bar.tar.gz-extract/bar/b", None, - 'test/foo.tar.gz-extract/foo/bar.tar.gz', + "test/foo.tar.gz-extract/foo/bar.tar.gz", ), ] assert results == expected def test_virtualcode_Resource_can_walk(self): - test_file = self.get_test_loc('resource/resource/test-extracted-from-to.json') + test_file = self.get_test_loc("resource/resource/test-extracted-from-to.json") codebase = VirtualCodebase(location=test_file) results = [r.path for r in codebase.walk(topdown=True)] expected = [ - 'test', - 'test/c', - 'test/foo.tar.gz', - 'test/foo.tar.gz-extract', - 'test/foo.tar.gz-extract/foo', - 'test/foo.tar.gz-extract/foo/a', - 'test/foo.tar.gz-extract/foo/bar.tar.gz', - 'test/foo.tar.gz-extract/foo/bar.tar.gz-extract', - 'test/foo.tar.gz-extract/foo/bar.tar.gz-extract/bar', - 'test/foo.tar.gz-extract/foo/bar.tar.gz-extract/bar/b', + "test", + "test/c", + "test/foo.tar.gz", + "test/foo.tar.gz-extract", + "test/foo.tar.gz-extract/foo", + "test/foo.tar.gz-extract/foo/a", + "test/foo.tar.gz-extract/foo/bar.tar.gz", + "test/foo.tar.gz-extract/foo/bar.tar.gz-extract", + "test/foo.tar.gz-extract/foo/bar.tar.gz-extract/bar", + "test/foo.tar.gz-extract/foo/bar.tar.gz-extract/bar/b", ] assert results == expected diff --git a/tests/test_saneyaml.py b/tests/test_saneyaml.py index 984cdf11..fabcf88e 100644 --- a/tests/test_saneyaml.py +++ b/tests/test_saneyaml.py @@ -2,7 +2,7 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # @@ -12,15 +12,15 @@ from commoncode.testcase import FileBasedTesting -TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), 'data') +TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), "data") class TestSaneyaml(FileBasedTesting): test_data_dir = TEST_DATA_DIR def test_load_with_and_without_tags(self): - test_file_with_tag = self.get_test_loc('saneyaml/metadata1') - test_file_without_tag = self.get_test_loc('saneyaml/metadata1.notag') - with_tags = saneyaml.load(open(test_file_with_tag, 'rb').read()) - without_tags = saneyaml.load(open(test_file_without_tag, 'rb').read()) + test_file_with_tag = self.get_test_loc("saneyaml/metadata1") + test_file_without_tag = self.get_test_loc("saneyaml/metadata1.notag") + with_tags = saneyaml.load(open(test_file_with_tag, "rb").read()) + without_tags = saneyaml.load(open(test_file_without_tag, "rb").read()) assert without_tags == with_tags diff --git a/tests/test_skeleton_codestyle.py b/tests/test_skeleton_codestyle.py deleted file mode 100644 index 2eb6e558..00000000 --- a/tests/test_skeleton_codestyle.py +++ /dev/null @@ -1,36 +0,0 @@ -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# ScanCode is a trademark of nexB Inc. -# SPDX-License-Identifier: Apache-2.0 -# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/skeleton for support or download. -# See https://aboutcode.org for more information about nexB OSS projects. -# - -import subprocess -import unittest -import configparser - - -class BaseTests(unittest.TestCase): - def test_skeleton_codestyle(self): - """ - This test shouldn't run in proliferated repositories. - """ - setup_cfg = configparser.ConfigParser() - setup_cfg.read("setup.cfg") - if setup_cfg["metadata"]["name"] != "skeleton": - return - - args = "venv/bin/black --check -l 100 setup.py etc tests" - try: - subprocess.check_output(args.split()) - except subprocess.CalledProcessError as e: - print("===========================================================") - print(e.output) - print("===========================================================") - raise Exception( - "Black style check failed; please format the code using:\n" - " python -m black -l 100 setup.py etc tests", - e.output, - ) from e diff --git a/tests/test_text.py b/tests/test_text.py index dcb1beb0..e151e886 100644 --- a/tests/test_text.py +++ b/tests/test_text.py @@ -3,7 +3,7 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # @@ -13,7 +13,7 @@ def test_lines(): - t = '''This problem is. + t = """This problem is. It is therefore @@ -22,67 +22,75 @@ def test_lines(): However, I have -''' +""" assert len([p[1] for p in text.lines(t)]) == 5 - expected = ['This problem is.', 'It is therefore', 'However,we', 'without introducing ..', 'However, I have'] + expected = [ + "This problem is.", + "It is therefore", + "However,we", + "without introducing ..", + "However, I have", + ] assert [p for p in text.lines(t)] == expected def test_foldcase(): - test = ' Fold THE cases of a text to lower casM' + test = " Fold THE cases of a text to lower casM" assert text.foldcase(test) == test.lower() def test_nopunctuation(): - test = '''This problem is about sequence-bunching, %^$^%**^&*©©^(*&(*()()_+)_!@@#:><>>?/./,.,';][{}{]just''' - expected = ['This', 'problem', 'is', 'about', 'sequence', 'bunching', 'Â', 'Â', 'just'] + test = """This problem is about sequence-bunching, %^$^%**^&*©©^(*&(*()()_+)_!@@#:><>>?/./,.,';][{}{]just""" + expected = ["This", "problem", "is", "about", "sequence", "bunching", "Â", "Â", "just"] assert text.nopunctuation(test).split() == expected - test = 'This problem is about: sequence-bunching\n\n just \n' - expected = 'This problem is about sequence bunching just ' + test = "This problem is about: sequence-bunching\n\n just \n" + expected = "This problem is about sequence bunching just " assert text.nopunctuation(test) == expected def test_unixlinesep(): t = CR + LF + LF + CR + CR + LF assert text.unixlinesep(t) == LF + LF + LF + LF - assert text.unixlinesep(t, True) == ' ' + LF + LF + LF + ' ' + LF + assert text.unixlinesep(t, True) == " " + LF + LF + LF + " " + LF def test_nolinesep(): t = CR + LF + CR + CR + CR + LF - assert text.nolinesep(t) == ' ' + assert text.nolinesep(t) == " " def test_toascii(): - acc = u"ÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝàáâãäåçèéêëìíîïñòóôõöøùúûüýÿẞß®©œŒØøÆæ₵₡¢¢Žž" - expected = r'AAAAAACEEEEIIIINOOOOOUUUUYaaaaaaceeeeiiiinooooouuuuyyZz' + acc = "ÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝàáâãäåçèéêëìíîïñòóôõöøùúûüýÿẞß®©œŒØøÆæ₵₡¢¢Žž" + expected = r"AAAAAACEEEEIIIINOOOOOUUUUYaaaaaaceeeeiiiinooooouuuuyyZz" assert text.toascii(acc, translit=False) == expected - expected = r'AAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyySsss(r)(c)oeOEOoAEae_CL/CC/Zz' + expected = ( + r"AAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyySsss(r)(c)oeOEOoAEae_CL/CC/Zz" + ) assert text.toascii(acc, translit=True) == expected def test_toascii_works_with_empty_unicode_or_bytes(): - assert text.toascii(b'', translit=False) == u'' - assert text.toascii(u'', translit=True) == u'' - assert text.toascii(b'', translit=False) == u'' - assert text.toascii(u'', translit=True) == u'' + assert text.toascii(b"", translit=False) == "" + assert text.toascii("", translit=True) == "" + assert text.toascii(b"", translit=False) == "" + assert text.toascii("", translit=True) == "" def test_python_safe_name(): s = "not `\\a /`good` -safe name ??" - assert text.python_safe_name(s) == 'not___a___good___safe_name' + assert text.python_safe_name(s) == "not___a___good___safe_name" s1 = "string1++or+" s2 = "string1 +or " assert text.python_safe_name(s2) == text.python_safe_name(s1) def test_as_unicode(): - assert text.as_unicode('') == '' - assert isinstance(text.as_unicode(b'some bytes'), str) + assert text.as_unicode("") == "" + assert isinstance(text.as_unicode(b"some bytes"), str) assert text.as_unicode(None) == None try: - text.as_unicode(['foo']) - raise Exception('Exception should have been raised') + text.as_unicode(["foo"]) + raise Exception("Exception should have been raised") except AssertionError: pass diff --git a/tests/test_timeutils.py b/tests/test_timeutils.py index 7ed973fa..568b6425 100644 --- a/tests/test_timeutils.py +++ b/tests/test_timeutils.py @@ -2,36 +2,35 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # from datetime import datetime from commoncode.testcase import FileBasedTesting +from commoncode.timeutils import UTC from commoncode.timeutils import time2tstamp from commoncode.timeutils import tstamp2time -from commoncode.timeutils import UTC class TestTimeStamp(FileBasedTesting): - def test_time2tstamp_is_path_safe_and_file_is_writable(self): ts = time2tstamp() - tf = self.get_temp_file(extension='ext', dir_name=ts, file_name=ts) - fd = open(tf, 'w') - fd.write('a') + tf = self.get_temp_file(extension="ext", dir_name=ts, file_name=ts) + fd = open(tf, "w") + fd.write("a") fd.close() def test_time2tstamp_accepts_existing_datetimes(self): ts = time2tstamp() - tf = self.get_temp_file(extension='ext', dir_name=ts, file_name=ts) - fd = open(tf, 'w') - fd.write('a') + tf = self.get_temp_file(extension="ext", dir_name=ts, file_name=ts) + fd = open(tf, "w") + fd.write("a") fd.close() def test_time2tstamp_raises_on_non_datetime(self): - self.assertRaises(AttributeError, time2tstamp, 'some') + self.assertRaises(AttributeError, time2tstamp, "some") self.assertRaises(AttributeError, time2tstamp, 1) def test_time2tstamp_tstamp2time_is_idempotent(self): @@ -42,45 +41,154 @@ def test_time2tstamp_tstamp2time_is_idempotent(self): def test_tstamp2time_format(self): import re + ts = time2tstamp() - pat = r'^20\d\d-[0-1][0-9]-[0-3]\dT[0-2]\d[0-6]\d[0-6]\d.\d\d\d\d\d\d$' + pat = r"^20\d\d-[0-1][0-9]-[0-3]\dT[0-2]\d[0-6]\d[0-6]\d.\d\d\d\d\d\d$" assert re.match(pat, ts) def test_tstamp2time(self): - dt_from_ts = tstamp2time('2010-11-12T131415.000016') - assert datetime(year=2010, month=11, day=12, hour=13, minute=14, second=15, microsecond=16, tzinfo=UTC()) == dt_from_ts + dt_from_ts = tstamp2time("2010-11-12T131415.000016") + assert ( + datetime( + year=2010, + month=11, + day=12, + hour=13, + minute=14, + second=15, + microsecond=16, + tzinfo=UTC(), + ) + == dt_from_ts + ) def test_tstamp2time2(self): - dt_from_ts = tstamp2time('20101112T131415.000016') - assert datetime(year=2010, month=11, day=12, hour=13, minute=14, second=15, microsecond=16, tzinfo=UTC()) == dt_from_ts + dt_from_ts = tstamp2time("20101112T131415.000016") + assert ( + datetime( + year=2010, + month=11, + day=12, + hour=13, + minute=14, + second=15, + microsecond=16, + tzinfo=UTC(), + ) + == dt_from_ts + ) def test_tstamp2time3(self): - dt_from_ts = tstamp2time('20101112T131415.000016Z') - assert datetime(year=2010, month=11, day=12, hour=13, minute=14, second=15, microsecond=16, tzinfo=UTC()) == dt_from_ts + dt_from_ts = tstamp2time("20101112T131415.000016Z") + assert ( + datetime( + year=2010, + month=11, + day=12, + hour=13, + minute=14, + second=15, + microsecond=16, + tzinfo=UTC(), + ) + == dt_from_ts + ) def test_tstamp2time4(self): - dt_from_ts = tstamp2time('2010-11-12T131415') - assert datetime(year=2010, month=11, day=12, hour=13, minute=14, second=15, microsecond=0, tzinfo=UTC()) == dt_from_ts + dt_from_ts = tstamp2time("2010-11-12T131415") + assert ( + datetime( + year=2010, + month=11, + day=12, + hour=13, + minute=14, + second=15, + microsecond=0, + tzinfo=UTC(), + ) + == dt_from_ts + ) def test_tstamp2time5(self): - dt_from_ts = tstamp2time('2010-11-12T13:14:15') - assert datetime(year=2010, month=11, day=12, hour=13, minute=14, second=15, microsecond=0, tzinfo=UTC()) == dt_from_ts + dt_from_ts = tstamp2time("2010-11-12T13:14:15") + assert ( + datetime( + year=2010, + month=11, + day=12, + hour=13, + minute=14, + second=15, + microsecond=0, + tzinfo=UTC(), + ) + == dt_from_ts + ) def test_tstamp2time6(self): - dt_from_ts = tstamp2time('20101112T13:14:15') - assert datetime(year=2010, month=11, day=12, hour=13, minute=14, second=15, microsecond=0, tzinfo=UTC()) == dt_from_ts + dt_from_ts = tstamp2time("20101112T13:14:15") + assert ( + datetime( + year=2010, + month=11, + day=12, + hour=13, + minute=14, + second=15, + microsecond=0, + tzinfo=UTC(), + ) + == dt_from_ts + ) def test_tstamp2time7(self): - dt_from_ts = tstamp2time('20101112T13:14:15Z') - assert datetime(year=2010, month=11, day=12, hour=13, minute=14, second=15, microsecond=0, tzinfo=UTC()) == dt_from_ts + dt_from_ts = tstamp2time("20101112T13:14:15Z") + assert ( + datetime( + year=2010, + month=11, + day=12, + hour=13, + minute=14, + second=15, + microsecond=0, + tzinfo=UTC(), + ) + == dt_from_ts + ) def test_tstamp2time8(self): - dt_from_ts = tstamp2time('20101112T13:14:15Z') - assert datetime(year=2010, month=11, day=12, hour=13, minute=14, second=15, microsecond=0, tzinfo=UTC()) == dt_from_ts + dt_from_ts = tstamp2time("20101112T13:14:15Z") + assert ( + datetime( + year=2010, + month=11, + day=12, + hour=13, + minute=14, + second=15, + microsecond=0, + tzinfo=UTC(), + ) + == dt_from_ts + ) def test_tstamp2time9(self): - dt_from_ts = tstamp2time('2010-06-30T21:26:40.000Z') - assert datetime(year=2010, month=6, day=30, hour=21, minute=26, second=40, microsecond=0, tzinfo=UTC()) == dt_from_ts + dt_from_ts = tstamp2time("2010-06-30T21:26:40.000Z") + assert ( + datetime( + year=2010, + month=6, + day=30, + hour=21, + minute=26, + second=40, + microsecond=0, + tzinfo=UTC(), + ) + == dt_from_ts + ) def test_tstamp2time_raise(self): - self.assertRaises(ValueError, tstamp2time, '201011A12T13:14:15Z') + self.assertRaises(ValueError, tstamp2time, "201011A12T13:14:15Z") diff --git a/tests/test_urn.py b/tests/test_urn.py index f8443fd1..6cc6ee99 100644 --- a/tests/test_urn.py +++ b/tests/test_urn.py @@ -2,7 +2,7 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # @@ -12,135 +12,131 @@ class URNTestCase(unittest.TestCase): - def test_encode_license(self): - u1 = urn.encode('license', key='somekey') - assert u1 == 'urn:dje:license:somekey' + u1 = urn.encode("license", key="somekey") + assert u1 == "urn:dje:license:somekey" def test_encode_owner(self): - u1 = urn.encode('owner', name='somekey') - assert u1 == 'urn:dje:owner:somekey' + u1 = urn.encode("owner", name="somekey") + assert u1 == "urn:dje:owner:somekey" def test_encode_component(self): - u1 = urn.encode('component', name='name', version='version') - assert u1 == 'urn:dje:component:name:version' + u1 = urn.encode("component", name="name", version="version") + assert u1 == "urn:dje:component:name:version" def test_encode_component_no_version(self): - u1 = urn.encode('component', name='name', version='') - assert u1 == 'urn:dje:component:name:' + u1 = urn.encode("component", name="name", version="") + assert u1 == "urn:dje:component:name:" def test_encode_license_with_extra_fields_are_ignored(self): - u1 = urn.encode('license', key='somekey', junk='somejunk') - assert u1 == 'urn:dje:license:somekey' + u1 = urn.encode("license", key="somekey", junk="somejunk") + assert u1 == "urn:dje:license:somekey" def test_encode_missing_field_raise_keyerror(self): with self.assertRaises(KeyError): - urn.encode('license') + urn.encode("license") def test_encode_missing_field_component_raise_keyerror(self): with self.assertRaises(KeyError): - urn.encode('component', name='this') + urn.encode("component", name="this") def test_encode_unknown_object_type_raise_keyerror(self): with self.assertRaises(KeyError): - urn.encode('some', key='somekey') + urn.encode("some", key="somekey") def test_encode_component_with_spaces_are_properly_quoted(self): - u1 = urn.encode('component', name='name space', - version='version space') - assert u1 == 'urn:dje:component:name+space:version+space' + u1 = urn.encode("component", name="name space", version="version space") + assert u1 == "urn:dje:component:name+space:version+space" def test_encode_leading_and_trailing_spaces_are_trimmed_and_ignored(self): - u1 = urn.encode(' component ', name=' name space ', - version=''' version space ''') - assert u1 == 'urn:dje:component:name+space:version+space' + u1 = urn.encode(" component ", name=" name space ", version=""" version space """) + assert u1 == "urn:dje:component:name+space:version+space" def test_encode_component_with_semicolon_are_properly_quoted(self): - u1 = urn.encode('component', name='name:', version=':version') - assert u1 == 'urn:dje:component:name%3A:%3Aversion' + u1 = urn.encode("component", name="name:", version=":version") + assert u1 == "urn:dje:component:name%3A:%3Aversion" def test_encode_component_with_plus_are_properly_quoted(self): - u1 = urn.encode('component', name='name+', version='version+') - assert u1 == 'urn:dje:component:name%2B:version%2B' + u1 = urn.encode("component", name="name+", version="version+") + assert u1 == "urn:dje:component:name%2B:version%2B" def test_encode_component_with_percent_are_properly_quoted(self): - u1 = urn.encode('component', name='name%', version='version%') - assert u1 == 'urn:dje:component:name%25:version%25' + u1 = urn.encode("component", name="name%", version="version%") + assert u1 == "urn:dje:component:name%25:version%25" def test_encode_object_type_case_is_not_significant(self): - u1 = urn.encode('license', key='key') - u2 = urn.encode('lICENSe', key='key') + u1 = urn.encode("license", key="key") + u2 = urn.encode("lICENSe", key="key") assert u2 == u1 def test_decode_component(self): - u = 'urn:dje:component:name:version' - parsed = ('component', {'name': 'name', 'version': 'version'}) + u = "urn:dje:component:name:version" + parsed = ("component", {"name": "name", "version": "version"}) assert urn.decode(u) == parsed def test_decode_license(self): - u = 'urn:dje:license:lic' - parsed = ('license', {'key': 'lic'}) + u = "urn:dje:license:lic" + parsed = ("license", {"key": "lic"}) assert urn.decode(u) == parsed def test_decode_org(self): - u = 'urn:dje:owner:name' - parsed = ('owner', {'name': 'name'}) + u = "urn:dje:owner:name" + parsed = ("owner", {"name": "name"}) assert urn.decode(u) == parsed def test_decode_build_is_idempotent(self): - u1 = urn.encode('component', owner__name='org%', name='name%', - version='version%') + u1 = urn.encode("component", owner__name="org%", name="name%", version="version%") m, f = urn.decode(u1) u3 = urn.encode(m, **f) assert u3 == u1 def test_decode_raise_exception_if_incorrect_prefix(self): with self.assertRaises(urn.URNValidationError): - urn.decode('arn:dje:a:a') + urn.decode("arn:dje:a:a") def test_decode_raise_exception_if_incorrect_ns(self): with self.assertRaises(urn.URNValidationError): - urn.decode('urn:x:x:x') + urn.decode("urn:x:x:x") def test_decode_raise_exception_if_incorrect_prefix_or_ns(self): with self.assertRaises(urn.URNValidationError): - urn.decode('x:x:x:x') + urn.decode("x:x:x:x") def test_decode_raise_exception_if_too_short_license(self): with self.assertRaises(urn.URNValidationError): - urn.decode('urn:dje:license') + urn.decode("urn:dje:license") def test_decode_raise_exception_if_too_short_component(self): with self.assertRaises(urn.URNValidationError): - urn.decode('urn:dje:component') + urn.decode("urn:dje:component") def test_decode_raise_exception_if_too_long(self): with self.assertRaises(urn.URNValidationError): - urn.decode('urn:dje:owner:o:n') + urn.decode("urn:dje:owner:o:n") def test_decode_raise_exception_if_too_long1(self): with self.assertRaises(urn.URNValidationError): - urn.decode('urn:dje:component:o:n:v:junk') + urn.decode("urn:dje:component:o:n:v:junk") def test_decode_raise_exception_if_too_long2(self): with self.assertRaises(urn.URNValidationError): - urn.decode('urn:dje:owner:org:junk') + urn.decode("urn:dje:owner:org:junk") def test_decode_raise_exception_if_too_long3(self): with self.assertRaises(urn.URNValidationError): - urn.decode('urn:dje:license:key:junk') + urn.decode("urn:dje:license:key:junk") def test_decode_raise_exception_if_unknown_object_type(self): with self.assertRaises(urn.URNValidationError): - urn.decode('urn:dje:marshmallows:dsds') + urn.decode("urn:dje:marshmallows:dsds") def test_decode_raise_exception_if_missing_object_type(self): with self.assertRaises(urn.URNValidationError): - urn.decode('urn:dje::dsds') + urn.decode("urn:dje::dsds") def test_encode_decode_is_idempotent(self): - object_type = 'component' - fields = {'name': 'SIP Servlets (MSS)', 'version': 'v 1.4.0.FINAL'} - encoded = 'urn:dje:component:SIP+Servlets+%28MSS%29:v+1.4.0.FINAL' + object_type = "component" + fields = {"name": "SIP Servlets (MSS)", "version": "v 1.4.0.FINAL"} + encoded = "urn:dje:component:SIP+Servlets+%28MSS%29:v+1.4.0.FINAL" assert urn.encode(object_type, **fields) == encoded assert urn.decode(encoded) == (object_type, fields) diff --git a/tests/test_version.py b/tests/test_version.py index 00fc8b42..6dafa30f 100644 --- a/tests/test_version.py +++ b/tests/test_version.py @@ -2,7 +2,7 @@ # Copyright (c) nexB Inc. and others. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/commoncode for support or download. +# See https://github.com/aboutcode-org/commoncode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # @@ -12,112 +12,111 @@ class TestVersionHint(unittest.TestCase): - def test_version_hint(self): data = { - '/xmlgraphics/fop/source/fop-1.0-src.zip': '1.0', - '/xml/xindice/xml-xindice-1.2m1-src.zip': '1.2m1', - '/xmlgraphics/fop/binaries/fop-0.94-bin-jdk1.3.tar.gz': '0.94', - '/xmlgraphics/batik/batik-src-1.7beta1.zip': '1.7beta1', - '/xmlgraphics/batik/batik-1.7-jre13.zip': '1.7', - '/xmlbeans/source/xmlbeans-2.3.0-src.tgz': '2.3.0', - '/xml/xindice/source/xml-xindice-1.2m1-src.tar.gz': '1.2m1', - '/xml/xerces-p/binaries/XML-Xerces-2.3.0-4-win32.zip': '2.3.0-4', - '/xml/xerces-p/source/XML-Xerces-2.3.0-3.tar.gz': '2.3.0-3', - '/xml/xalan-j/source/xalan-j_2_7_0-src-2jars.tar.gz': '2_7_0', - '/xml/security/java-library/xml-security-src-1_0_5D2.zip': '1_0_5D2', - '/xml/commons/binaries/xml-commons-external-1.4.01-bin.zip': '1.4.01', - '/xml/commons/xml-commons-1.0.b2.zip': '1.0.b2', - '/xml/cocoon/3.0/cocoon-all-3.0.0-alpha-1-dist.tar.gz': '3.0.0-alpha-1', - '/xerces/j/source/Xerces-J-tools.2.10.0-xml-schema-1.1-beta.tar.gz': '2.10.0', - '/xerces/c/3/binaries/xerces-c-3.1.1-x86_64-solaris-cc-5.10.tar.gz': '3.1.1', - '/xerces/c/3/binaries/xerces-c-3.1.1-x86_64-windows-vc-8.0.zip': '3.1.1', - '/xerces/c/2/binaries/xerces-c_2_8_0-x86-windows-vc_7_1.zip': '2_8_0', - '/ws/woden/1.0M8/apache-woden-src-1.0M8.tar.gz': '1.0M8', - '/ws/scout/0_7rc1/source/scout-0.7rc1-src.zip': '0.7rc1', - '/ws/juddi/3_0/juddi-portal-bundle-3.0.0.rc1.zip': '3.0.0.rc1', - '/ws/juddi/3_0/juddi-portal-bundle-3.0.0.beta.zip': '3.0.0.beta', - '/ws/juddi/2_0RC7/juddi-tomcat-2.0rc7.zip': '2.0rc7', - '/ws/axis2/tools/1_4_1/axis2-wsdl2code-maven-plugin-1.4.1.jar': '1.4.1', - '/ws/axis/1_4/axis-src-1_4.zip': '1_4', - '/ws/axis-c/source/win32/axis-c-1.6b-Win32-trace-src.zip': '1.6b', - '/tuscany/java/sca/2.0-M5/apache-tuscany-sca-all-2.0-M5-src.tar.gz': '2.0-M5', - '/turbine/turbine-2.3.3-rc1/source/turbine-2.3.3-RC1-src.zip': '2.3.3-RC1', - '/tomcat/tomcat-connectors/jk/binaries/win64/jk-1.2.30/ia64/symbols-1.2.30.zip': '1.2.30', - '/tomcat/tomcat-7/v7.0.0-beta/bin/apache-tomcat-7.0.0-windows-i64.zip': '7.0.0', - '/tomcat/tomcat-4/v4.1.40/bin/apache-tomcat-4.1.40-LE-jdk14.exe': '4.1.40', - '/tapestry/tapestry-src-5.1.0.5.tar.gz': '5.1.0.5', - '/spamassassin/source/Mail-SpamAssassin-rules-3.3.0.r901671.tgz': '3.3.0.r901671', - '/spamassassin/Mail-SpamAssassin-rules-3.3.1.r923257.tgz': '3.3.1.r923257', - '/shindig/1.1-BETA5-incubating/shindig-1.1-BETA5-incubating-source.zip': '1.1-BETA5', - '/servicemix/nmr/1.0.0-m3/apache-servicemix-nmr-1.0.0-m3-src.tar.gz': '1.0.0-m3', - '/qpid/0.6/qpid-dotnet-0-10-0.6.zip': '0.6', - '/openjpa/2.0.0-beta/apache-openjpa-2.0.0-beta-binary.zip': '2.0.0-beta', - '/myfaces/source/portlet-bridge-2.0.0-alpha-2-src-all.tar.gz': '2.0.0-alpha-2', - '/myfaces/source/myfaces-extval20-2.0.3-src.tar.gz': '2.0.3', - '/harmony/milestones/6.0/debian/amd64/harmony-6.0-classlib_0.0r946981-1_amd64.deb': '6.0', - '/geronimo/eclipse/updates/plugins/org.apache.geronimo.st.v21.ui_2.1.1.jar': '2.1.1', - '/directory/studio/update/1.x/plugins/org.apache.directory.studio.aciitemeditor_1.5.2.v20091211.jar': '1.5.2.v20091211', - '/db/torque/torque-3.3/source/torque-gen-3.3-RC3-src.zip': '3.3-RC3', - '/cayenne/cayenne-3.0B1.tar.gz': '3.0B1', - '/cayenne/cayenne-3.0M4-macosx.dmg': '3.0M4', - '/xmlgraphics/batik/batik-docs-current.zip': 'current', - '/xmlgraphics/batik/batik-docs-previous.zip': 'previous', - '/poi/dev/bin/poi-bin-3.7-beta1-20100620.zip': '3.7-beta1-20100620', - '/excalibur/avalon-logkit/source/excalibur-logkit-2.0.dev-0-src.zip': '2.0.dev-0', - '/db/derby/db-derby-10.4.2.0/derby_core_plugin_10.4.2.zip': '10.4.2', - '/httpd/modpython/win/2.7.1/mp152dll.zip': '2.7.1', - '/perl/mod_perl-1.31/apaci/mod_perl.config.sh': '1.31', - '/xml/xerces-j/old_xerces2/Xerces-J-bin.2.0.0.alpha.zip': '2.0.0.alpha', - '/xml/xerces-p/archives/XML-Xerces-1.7.0_0.tar.gz': '1.7.0_0', - '/httpd/docs/tools-2004-05-04.zip': '2004-05-04', - '/ws/axis2/c/M0_5/axis2c-src-M0.5.tar.gz': 'M0.5', - '/jakarta/poi/dev/src/jakarta-poi-1.8.0-dev-src.zip': '1.8.0-dev', - '/tapestry/tapestry-4.0-beta-8.zip': '4.0-beta-8', - '/openejb/3.0-beta-1/openejb-3.0-beta-1.zip': '3.0-beta-1', - '/tapestry/tapestry-4.0-rc-1.zip': '4.0-rc-1', - '/jakarta/tapestry/source/3.0-rc-3/Tapestry-3.0-rc-3-src.zip': '3.0-rc-3', - '/jakarta/lucene/binaries/lucene-1.3-final.tar.gz': '1.3-final', - '/jakarta/tapestry/binaries/3.0-beta-1a/Tapestry-3.0-beta-1a-bin.zip': '3.0-beta-1a', - '/poi/release/bin/poi-bin-3.0-FINAL-20070503.tar.gz': '3.0-FINAL-20070503', - '/harmony/milestones/M4/apache-harmony-hdk-r603534-linux-x86-32-libstdc++v6-snapshot.tar.gz': 'r603534', - '/ant/antidote/antidote-20050330.tar.bz2': '20050330', - '/apr/not-released/apr_20020725223645.tar.gz': '20020725223645', - '/ibatis/source/ibatis.net/src-revision-709676.zip': 'revision-709676', - '/ws/axis-c/source/win32/axis-c-src-1-2-win32.zip': '1-2', - '/jakarta/slide/most-recent-2.0rc1-binaries/jakarta-slide 2.0rc1 jakarta-tomcat-4.1.30.zip': '2.0rc1', - '/httpd/modpython/win/3.0.1/python2.2.1-apache2.0.43.zip': '2.2.1', - '/ant/ivyde/updatesite/features/org.apache.ivy.feature_2.1.0.cr1_20090319213629.jar': '2.1.0.cr1_20090319213629', - '/jakarta/poi/dev/bin/poi-2.0-pre1-20030517.jar': '2.0-pre1-20030517', - '/jakarta/poi/release/bin/jakarta-poi-1.5.0-FINAL-bin.zip': '1.5.0-FINAL', - '/jakarta/poi/release/bin/poi-bin-2.0-final-20040126.zip': '2.0-final-20040126', - '/activemq/apache-activemq/5.0.0/apache-activemq-5.0.0-sources.jar': '5.0.0', - '/turbine/turbine-2.2/source/jakarta-turbine-2.2-B1.tar.gz': '2.2-B1', - '/ant/ivyde/updatesite/features/org.apache.ivy.feature_2.0.0.cr1.jar': '2.0.0.cr1', - '/ant/ivyde/updatesite/features/org.apache.ivy.feature_2.0.0.final_20090108225011.jar': '2.0.0.final_20090108225011', - '/ws/axis/1_2RC3/axis-src-1_2RC3.zip': '1_2RC3', - '/commons/lang/old/v1.0-b1.1/commons-lang-1.0-b1.1.zip': '1.0-b1.1', - '/commons/net/binaries/commons-net-1.2.0-release.tar.gz': '1.2.0-release', - '/ant/ivyde/2.0.0.final/apache-ivyde-2.0.0.final-200907011148-RELEASE.tgz': '2.0.0.final-200907011148-RELEASE', - '/geronimo/eclipse/updates/plugins/org.apache.geronimo.jetty.j2ee.server.v11_1.0.0.jar': 'v11_1.0.0', - '/jakarta/cactus/binaries/jakarta-cactus-13-1.7.1-fixed.zip': '1.7.1-fixed', - '/jakarta/jakarta-turbine-maven/maven/jars/maven-1.0-b5-dev.20020731.085427.jar': '1.0-b5-dev.20020731.085427', - '/xml/xalan-j/source/xalan-j_2_5_D1-src.tar.gz': '2_5_D1', - '/ws/woden/IBuilds/I20051002_1145/woden-I20051002_1145.tar.bz2': 'I20051002_1145', - '/commons/beanutils/source/commons-beanutils-1.8.0-BETA-src.tar.gz': '1.8.0-BETA', - '/cocoon/BINARIES/cocoon-2.0.3-vm14-bin.tar.gz': '2.0.3-vm14', - '/felix/xliff_filters_v1_2_7_unix.jar': 'v1_2_7', - '/excalibur/releases/200702/excalibur-javadoc-r508111-15022007.tar.gz': 'r508111-15022007', - '/geronimo/eclipse/updates/features/org.apache.geronimo.v20.feature_2.0.0.jar': 'v20.feature_2.0.0', - '/geronimo/2.1.6/axis2-jaxws-1.3-G20090406.jar': '1.3-G20090406', - '/cassandra/debian/pool/main/c/cassandra/cassandra_0.4.0~beta1-1.diff.gz': '0.4.0~beta1-1', - '/ha-api-3.1.6.jar': '3.1.6', - 'ha-api-3.1.6.jar': '3.1.6' - } + "/xmlgraphics/fop/source/fop-1.0-src.zip": "1.0", + "/xml/xindice/xml-xindice-1.2m1-src.zip": "1.2m1", + "/xmlgraphics/fop/binaries/fop-0.94-bin-jdk1.3.tar.gz": "0.94", + "/xmlgraphics/batik/batik-src-1.7beta1.zip": "1.7beta1", + "/xmlgraphics/batik/batik-1.7-jre13.zip": "1.7", + "/xmlbeans/source/xmlbeans-2.3.0-src.tgz": "2.3.0", + "/xml/xindice/source/xml-xindice-1.2m1-src.tar.gz": "1.2m1", + "/xml/xerces-p/binaries/XML-Xerces-2.3.0-4-win32.zip": "2.3.0-4", + "/xml/xerces-p/source/XML-Xerces-2.3.0-3.tar.gz": "2.3.0-3", + "/xml/xalan-j/source/xalan-j_2_7_0-src-2jars.tar.gz": "2_7_0", + "/xml/security/java-library/xml-security-src-1_0_5D2.zip": "1_0_5D2", + "/xml/commons/binaries/xml-commons-external-1.4.01-bin.zip": "1.4.01", + "/xml/commons/xml-commons-1.0.b2.zip": "1.0.b2", + "/xml/cocoon/3.0/cocoon-all-3.0.0-alpha-1-dist.tar.gz": "3.0.0-alpha-1", + "/xerces/j/source/Xerces-J-tools.2.10.0-xml-schema-1.1-beta.tar.gz": "2.10.0", + "/xerces/c/3/binaries/xerces-c-3.1.1-x86_64-solaris-cc-5.10.tar.gz": "3.1.1", + "/xerces/c/3/binaries/xerces-c-3.1.1-x86_64-windows-vc-8.0.zip": "3.1.1", + "/xerces/c/2/binaries/xerces-c_2_8_0-x86-windows-vc_7_1.zip": "2_8_0", + "/ws/woden/1.0M8/apache-woden-src-1.0M8.tar.gz": "1.0M8", + "/ws/scout/0_7rc1/source/scout-0.7rc1-src.zip": "0.7rc1", + "/ws/juddi/3_0/juddi-portal-bundle-3.0.0.rc1.zip": "3.0.0.rc1", + "/ws/juddi/3_0/juddi-portal-bundle-3.0.0.beta.zip": "3.0.0.beta", + "/ws/juddi/2_0RC7/juddi-tomcat-2.0rc7.zip": "2.0rc7", + "/ws/axis2/tools/1_4_1/axis2-wsdl2code-maven-plugin-1.4.1.jar": "1.4.1", + "/ws/axis/1_4/axis-src-1_4.zip": "1_4", + "/ws/axis-c/source/win32/axis-c-1.6b-Win32-trace-src.zip": "1.6b", + "/tuscany/java/sca/2.0-M5/apache-tuscany-sca-all-2.0-M5-src.tar.gz": "2.0-M5", + "/turbine/turbine-2.3.3-rc1/source/turbine-2.3.3-RC1-src.zip": "2.3.3-RC1", + "/tomcat/tomcat-connectors/jk/binaries/win64/jk-1.2.30/ia64/symbols-1.2.30.zip": "1.2.30", + "/tomcat/tomcat-7/v7.0.0-beta/bin/apache-tomcat-7.0.0-windows-i64.zip": "7.0.0", + "/tomcat/tomcat-4/v4.1.40/bin/apache-tomcat-4.1.40-LE-jdk14.exe": "4.1.40", + "/tapestry/tapestry-src-5.1.0.5.tar.gz": "5.1.0.5", + "/spamassassin/source/Mail-SpamAssassin-rules-3.3.0.r901671.tgz": "3.3.0.r901671", + "/spamassassin/Mail-SpamAssassin-rules-3.3.1.r923257.tgz": "3.3.1.r923257", + "/shindig/1.1-BETA5-incubating/shindig-1.1-BETA5-incubating-source.zip": "1.1-BETA5", + "/servicemix/nmr/1.0.0-m3/apache-servicemix-nmr-1.0.0-m3-src.tar.gz": "1.0.0-m3", + "/qpid/0.6/qpid-dotnet-0-10-0.6.zip": "0.6", + "/openjpa/2.0.0-beta/apache-openjpa-2.0.0-beta-binary.zip": "2.0.0-beta", + "/myfaces/source/portlet-bridge-2.0.0-alpha-2-src-all.tar.gz": "2.0.0-alpha-2", + "/myfaces/source/myfaces-extval20-2.0.3-src.tar.gz": "2.0.3", + "/harmony/milestones/6.0/debian/amd64/harmony-6.0-classlib_0.0r946981-1_amd64.deb": "6.0", + "/geronimo/eclipse/updates/plugins/org.apache.geronimo.st.v21.ui_2.1.1.jar": "2.1.1", + "/directory/studio/update/1.x/plugins/org.apache.directory.studio.aciitemeditor_1.5.2.v20091211.jar": "1.5.2.v20091211", + "/db/torque/torque-3.3/source/torque-gen-3.3-RC3-src.zip": "3.3-RC3", + "/cayenne/cayenne-3.0B1.tar.gz": "3.0B1", + "/cayenne/cayenne-3.0M4-macosx.dmg": "3.0M4", + "/xmlgraphics/batik/batik-docs-current.zip": "current", + "/xmlgraphics/batik/batik-docs-previous.zip": "previous", + "/poi/dev/bin/poi-bin-3.7-beta1-20100620.zip": "3.7-beta1-20100620", + "/excalibur/avalon-logkit/source/excalibur-logkit-2.0.dev-0-src.zip": "2.0.dev-0", + "/db/derby/db-derby-10.4.2.0/derby_core_plugin_10.4.2.zip": "10.4.2", + "/httpd/modpython/win/2.7.1/mp152dll.zip": "2.7.1", + "/perl/mod_perl-1.31/apaci/mod_perl.config.sh": "1.31", + "/xml/xerces-j/old_xerces2/Xerces-J-bin.2.0.0.alpha.zip": "2.0.0.alpha", + "/xml/xerces-p/archives/XML-Xerces-1.7.0_0.tar.gz": "1.7.0_0", + "/httpd/docs/tools-2004-05-04.zip": "2004-05-04", + "/ws/axis2/c/M0_5/axis2c-src-M0.5.tar.gz": "M0.5", + "/jakarta/poi/dev/src/jakarta-poi-1.8.0-dev-src.zip": "1.8.0-dev", + "/tapestry/tapestry-4.0-beta-8.zip": "4.0-beta-8", + "/openejb/3.0-beta-1/openejb-3.0-beta-1.zip": "3.0-beta-1", + "/tapestry/tapestry-4.0-rc-1.zip": "4.0-rc-1", + "/jakarta/tapestry/source/3.0-rc-3/Tapestry-3.0-rc-3-src.zip": "3.0-rc-3", + "/jakarta/lucene/binaries/lucene-1.3-final.tar.gz": "1.3-final", + "/jakarta/tapestry/binaries/3.0-beta-1a/Tapestry-3.0-beta-1a-bin.zip": "3.0-beta-1a", + "/poi/release/bin/poi-bin-3.0-FINAL-20070503.tar.gz": "3.0-FINAL-20070503", + "/harmony/milestones/M4/apache-harmony-hdk-r603534-linux-x86-32-libstdc++v6-snapshot.tar.gz": "r603534", + "/ant/antidote/antidote-20050330.tar.bz2": "20050330", + "/apr/not-released/apr_20020725223645.tar.gz": "20020725223645", + "/ibatis/source/ibatis.net/src-revision-709676.zip": "revision-709676", + "/ws/axis-c/source/win32/axis-c-src-1-2-win32.zip": "1-2", + "/jakarta/slide/most-recent-2.0rc1-binaries/jakarta-slide 2.0rc1 jakarta-tomcat-4.1.30.zip": "2.0rc1", + "/httpd/modpython/win/3.0.1/python2.2.1-apache2.0.43.zip": "2.2.1", + "/ant/ivyde/updatesite/features/org.apache.ivy.feature_2.1.0.cr1_20090319213629.jar": "2.1.0.cr1_20090319213629", + "/jakarta/poi/dev/bin/poi-2.0-pre1-20030517.jar": "2.0-pre1-20030517", + "/jakarta/poi/release/bin/jakarta-poi-1.5.0-FINAL-bin.zip": "1.5.0-FINAL", + "/jakarta/poi/release/bin/poi-bin-2.0-final-20040126.zip": "2.0-final-20040126", + "/activemq/apache-activemq/5.0.0/apache-activemq-5.0.0-sources.jar": "5.0.0", + "/turbine/turbine-2.2/source/jakarta-turbine-2.2-B1.tar.gz": "2.2-B1", + "/ant/ivyde/updatesite/features/org.apache.ivy.feature_2.0.0.cr1.jar": "2.0.0.cr1", + "/ant/ivyde/updatesite/features/org.apache.ivy.feature_2.0.0.final_20090108225011.jar": "2.0.0.final_20090108225011", + "/ws/axis/1_2RC3/axis-src-1_2RC3.zip": "1_2RC3", + "/commons/lang/old/v1.0-b1.1/commons-lang-1.0-b1.1.zip": "1.0-b1.1", + "/commons/net/binaries/commons-net-1.2.0-release.tar.gz": "1.2.0-release", + "/ant/ivyde/2.0.0.final/apache-ivyde-2.0.0.final-200907011148-RELEASE.tgz": "2.0.0.final-200907011148-RELEASE", + "/geronimo/eclipse/updates/plugins/org.apache.geronimo.jetty.j2ee.server.v11_1.0.0.jar": "v11_1.0.0", + "/jakarta/cactus/binaries/jakarta-cactus-13-1.7.1-fixed.zip": "1.7.1-fixed", + "/jakarta/jakarta-turbine-maven/maven/jars/maven-1.0-b5-dev.20020731.085427.jar": "1.0-b5-dev.20020731.085427", + "/xml/xalan-j/source/xalan-j_2_5_D1-src.tar.gz": "2_5_D1", + "/ws/woden/IBuilds/I20051002_1145/woden-I20051002_1145.tar.bz2": "I20051002_1145", + "/commons/beanutils/source/commons-beanutils-1.8.0-BETA-src.tar.gz": "1.8.0-BETA", + "/cocoon/BINARIES/cocoon-2.0.3-vm14-bin.tar.gz": "2.0.3-vm14", + "/felix/xliff_filters_v1_2_7_unix.jar": "v1_2_7", + "/excalibur/releases/200702/excalibur-javadoc-r508111-15022007.tar.gz": "r508111-15022007", + "/geronimo/eclipse/updates/features/org.apache.geronimo.v20.feature_2.0.0.jar": "v20.feature_2.0.0", + "/geronimo/2.1.6/axis2-jaxws-1.3-G20090406.jar": "1.3-G20090406", + "/cassandra/debian/pool/main/c/cassandra/cassandra_0.4.0~beta1-1.diff.gz": "0.4.0~beta1-1", + "/ha-api-3.1.6.jar": "3.1.6", + "ha-api-3.1.6.jar": "3.1.6", + } # FIXME: generate a test function for each case for path in data: expected = data[path] - if not expected.lower().startswith('v'): - expected = 'v ' + expected + if not expected.lower().startswith("v"): + expected = "v " + expected assert version.hint(path) == expected