diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..5ace4600 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,6 @@ +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index b7e1e3cf..e54e686d 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -15,26 +15,39 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.6] + python-version: ["3.12"] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v6 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} - - name: Install dependencies + - name: Install tools in CI virtualenv run: | python -m pip install --upgrade pip pip install flake8 - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + pip install pdm + - name: Create in-project virtualenv and install dependencies + run: | + pdm python install ${{ matrix.python-version }} + # "When you run pdm install the first time on a new PDM-managed project, whose Python interpreter is not decided yet, + # PDM will create a virtualenv in /.venv, and install dependencies into it." + # https://pdm-project.org/en/latest/usage/venv/ + pdm install + - name: Install coverage tool in in-project virtualenv + run: | + pdm run python -m ensurepip + # coverage must run in the same venv as the code being tested. + pdm run python -m pip install coverage - name: Generate coverage report run: | - pip install coverage - coverage run --source=. -m runtests - coverage xml + pdm use --venv in-project + source .venv/bin/activate + python -m coverage run --source=. -m runtests + python -m coverage xml - name: Upload coverage to Codecov - uses: codecov/codecov-action@v1 + uses: codecov/codecov-action@v5 with: token: ${{ secrets.CODECOV_TOKEN }} file: ./coverage.xml diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 2b32bebf..f6f59be3 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -1,7 +1,7 @@ # This workflow will install Python dependencies, run tests and lint with a variety of Python versions # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions # -# This version is customized to use the local flake8rc and test with unpythonic.setup.fixtures. +# This version is customized to install with pdm, use the local flake8rc, and test with unpythonic.setup.fixtures. name: Python package @@ -17,25 +17,40 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.6, 3.7, 3.8, 3.9, "pypy3"] + python-version: ["3.10", "3.11", "3.12", "3.13", "3.14", "pypy-3.11"] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v6 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} - - name: Install dependencies + - name: Install tools in CI venv run: | python -m pip install --upgrade pip pip install flake8 - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + pip install pdm - name: Lint with flake8 run: | # stop the build if there are Python syntax errors or undefined names flake8 . --config=flake8rc --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide flake8 . --config=flake8rc --count --exit-zero --max-complexity=100 --max-line-length=127 --statistics + - name: Determine Python version string for PDM + run: | + echo "TARGET_PYTHON_VERSION_FOR_PDM=${{ matrix.python-version }}" | tr - @ >> "$GITHUB_ENV" + # We need this hack at all because CI expects e.g. "pypy-3.10", whereas PDM expects "pypy@3.10". + # We send the result into an environment variable so that the next step can use it. + # https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/workflow-commands-for-github-actions#setting-an-environment-variable + - name: Create in-project virtualenv and install dependencies + run: | + pdm python install "$TARGET_PYTHON_VERSION_FOR_PDM" + # "When you run pdm install the first time on a new PDM-managed project, whose Python interpreter is not decided yet, + # PDM will create a virtualenv in /.venv, and install dependencies into it." + # https://pdm-project.org/en/latest/usage/venv/ + pdm install - name: Test with unpythonic.test.fixtures run: | + pdm use --venv in-project + source .venv/bin/activate python runtests.py diff --git a/.gitignore b/.gitignore index 2a6cbf88..24af6940 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,15 @@ +00_stuff +__pycache__ *~ *.pyc *.c build dist +MANIFEST +pdm.lock +.pdm-python .spyproject +.venv *.egg-info +*.mypy_cache +.python-version diff --git a/AUTHORS.md b/AUTHORS.md index 584a1c65..1853b38a 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -2,6 +2,7 @@ - Juha Jeronen (@Technologicat) - original author - @aisha-w - documentation improvements +- @Technologicat with Claude (Anthropic) as AI pair programmer - CI modernization, Python 3.13–3.14 and mcpyrate 4.0.0 adaptation (2.0.0) **Design inspiration from the internet**: diff --git a/CHANGELOG.md b/CHANGELOG.md index 46a8a7e3..4eb8cbf8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,38 +1,372 @@ -**0.15.0** (in progress; updated 23 April 2021) - *The very latest future obsolete* edition: +# Changelog -This edition concentrates on upgrading our dependencies, namely the macro expander, and the Python language itself, to ensure `unpythonic` keeps working for the next few years. This unfortunately introduces some breaking changes; see below. While at it, we have also taken the opportunity to make also any previously scheduled breaking changes. +**2.0.0** (March 2026, in progress) — *"Six impossible things before breakfast"* edition: -**Minimum Python version is now 3.6**. For future plans, see our [Python language version support status](https://github.com/Technologicat/unpythonic/issues/1). +**IMPORTANT**: + +- **Python version support**: 3.10–3.14 (dropped 3.8, 3.9; added 3.13, 3.14). PyPy 3.11. + - If you need `unpythonic` for Python 3.8 or 3.9, use version 1.0.0. +- **Requires mcpyrate >= 4.0.0**. + - mcpyrate 4.0.0 dropped the `Str`, `Num`, `NameConstant` AST compatibility shims and the `getconstant` helper. **New**: -- `with namedlambda` now understands the walrus operator, too. In `f := lambda ...: ...`, the lambda will get the name `f`. (Python 3.8 and later.) -- Robustness: several auxiliary syntactic constructs such as `local[]`/`delete[]` (for `do[]`), and `call_cc[]` (for `with continuations`) now detect *at macro expansion time* if they appear outside any valid lexical context, and raise `SyntaxError` (with a descriptive message) if so. That is, the error is now raised *at compile time*. Previously these constructs could only raise an error at run time, and not all of them could detect the error even then. -- `unpythonic.dispatch.generic_for`: add methods to a generic function defined elsewhere. -- Python 3.8 and 3.9 support added. +- **Python 3.13 and 3.14 support**. +- `autoreturn` macro now handles `match`/`case` statements. Each case branch has its own tail position. +- New scope analyzer tests for `match`/`case` patterns and `try`/`except*`. +- New `unpythonic.test.runner` module: reusable test runner with module discovery, version-suffix gating (e.g. `test_foo_3_11.py` skipped on Python < 3.11), and integration with the test framework's warning system. Other projects using `unpythonic.test.fixtures` can import it directly. +- New `emit_warning()` function in `unpythonic.test.fixtures` for signaling test warnings from infrastructure code (outside `test[]`/`warn[]` macros). Used by the test runner for version-suffix skips, which show in the testset warning count. +- Missing optional dependencies (sympy, mpmath) in tests emit `warn[]` instead of `error[]`, correctly reflecting that these are expected skips, not failures. +- Runtime type checker (`unpythonic.typecheck`): new supported typing features — `NoReturn`, `Never` (3.11+), `Literal`, `Type`, `ClassVar`, `Final`, `DefaultDict`, `OrderedDict`, `Counter`, `ChainMap`, `IO`/`TextIO`/`BinaryIO` (mapped to `io` module ABCs), `Pattern[T]`/`Match[T]` (string type checked when parametric), `ContextManager`, `AsyncContextManager`, `Awaitable`, `Coroutine`, `AsyncIterable`, `AsyncIterator`, `Generator`, `AsyncGenerator`. +- Runtime type checker: `TypedDict` support — structural checking of required/optional keys and value types. +- Runtime type checker: `Protocol` support — `@runtime_checkable` Protocols work via `isinstance`; non-runtime-checkable Protocols raise `TypeError` with an actionable message. +- Runtime type checker: parametric forms of abstract ABCs — `Iterable[T]`, `Collection[T]`, `Reversible[T]` perform best-effort element checking (elements checked when value is `Sized`; ABC-only for opaque iterators). `Iterator[T]` and `Container[T]` accept parametric form with type arg silently ignored. + +**Fixed**: + +- Runtime type checker (`unpythonic.typecheck`): fixed compatibility with Python 3.14, where `typing.Union` is no longer a `_GenericAlias`. Now uses `typing.get_origin` (available since 3.8) instead of a local copy. +- Runtime type checker: fixed `TypeVar` detection to use `isinstance(T, typing.TypeVar)` instead of a fragile `repr`-based heuristic. +- Runtime type checker: `typing.Reversible` check now uses `isinstance` instead of a `hasattr("__reversed__")` workaround from the Python 3.5 era. +- Runtime type checker: removed redundant `safeissubclass` fallbacks for generic types — `typing.get_origin` handles both bare and parameterized generics on 3.10+. +- Scope analyzer: fixed `MatchCapturesCollector` bug where class references (e.g. `Point` in `case Point(x, y):`) were incorrectly collected as captured variable names. Match captures are `MatchAs`/`MatchStar` nodes with bare strings, not `Name` nodes. +- Macro layer: updated all `hasattr(tree, "ctx")` checks to use `getattr` with defaults, for correct behavior on Python 3.13+ where AST fields always exist with default values. +- Macro layer: updated `arguments()` constructor calls to always include `posonlyargs=[]`, avoiding a `DeprecationWarning` on Python 3.13 (will become an error in 3.15). +- MS Windows: `unpythonic.net.util` failed to load, due to missing `termios` module (which is *nix only) being loaded by `unpythonic.net.__init__` when it imports `unpythonic.net.ptyproxy`. + - Fixed by catching `ModuleNotFoundError`, disabling `ptyproxy` on MS Windows systems. + +**Deprecated**: + +- Parenthesis syntax for macro arguments (e.g. `let((x, 1), (y, 2))`). Use bracket syntax instead: `let[[x, 1], [y, 2]]`. The parenthesis syntax is kept for backward compatibility but may be removed in a future version. +- Runtime type checker: `typing.Text` (deprecated since Python 3.11) and `typing.ByteString` (deprecated since Python 3.12) support is now marked for removal when the floor bumps to Python 3.12. + + +--- + +**1.0.0** (21 February 2026) — *"Same supercharger, new badge"* edition: + +Re-release of 0.15.5 as 1.0.0. No code changes. The library has been stable and in light maintenance mode for years; the version number now reflects this de facto status quo. + + +--- + +**0.15.5** (16 April 2025) - hotfix: + +**Changed**: + +- Internal: Upgrade build system to `pdm`. + - This is important for the road ahead, since the old `setuptools` build system has been deprecated. + - The GitHub CI scripts for `unpythonic` now use PDM to manage the testing venv and dependencies, too. Now the tests should run the same way as they would on a local system. + +- Bump `mcpyrate` to the hotfix version 3.6.4. + - The only difference is (beside `mcpyrate` too internally upgrading its build system to `pdm`) that the text colorizer now works correctly also for `input` with `readline`. + + +--- + +**0.15.4** (27 September 2024) - hotfix: + +**Fixed** + +- Bump `mcpyrate` to the hotfix version 3.6.3. + - This is only to make sure no one accidentally installs the broken version, `mcpyrate` 3.6.2, which had a bug in interactive console mode that wasn't caught by CI. + + +--- + +**0.15.3** (27 September 2024) - *New tree snakes* edition: + +**IMPORTANT**: + +- Minimum Python language version is now 3.8. + - We support 3.8, 3.9, 3.10, 3.11, 3.12, and PyPy3 (language versions 3.8, 3.9, and 3.10). + - Python 3.6 and 3.7 support dropped, as these language versions have officially reached end-of-life. If you need `unpythonic` for Python 3.6 or 3.7, use version 0.15.2. +- Minimum version for optional macro expander `mcpyrate` is now 3.6.2, because the `astcompat` utility module was moved there. + + +**New**: + +- **Python 3.12 support**. + - As in, all tests pass, so there are no regressions. Some undiscovered interactions with new language features (`type` statement) may still be broken, although the most obvious cases are already implemented. +- **Python 3.11 support**. + - As in, all tests pass, so there are no regressions. Some undiscovered interactions with new language features (`try`/`except*` construct) may still be broken, although the most obvious cases are already implemented. +- Walrus syntax `name := value` is now supported, and preferred, for all env-assignments. Old syntax `name << value` still works, and will remain working at least until v0.16.0, whenever that is. + - Note that language support for using an assignment expression inside a subscript *without parenthesizing it* was [added in Python 3.10](https://docs.python.org/3/whatsnew/3.10.html#other-language-changes). + - If you still use Python 3.8 or 3.9, with the new `:=` syntax you must put parentheses around each `let` binding, because syntactically, the bindings subform looks like a subscript. + - All documentation is written in Python 3.10 syntax; all unit tests are written in Python 3.8 syntax. + + +**Changed**: + +- Utility module `unpythonic.syntax.astcompat`, used by the macro layer, moved to `mcpyrate.astcompat`. This module handles version differences in the `ast` module in various versions of Python. + + +**Fixed**: + +- `ETAEstimator` edge case: at any point after all tasks have been marked completed, return a constant zero estimate for the remaining time. +- Fix borkage in `mathseq` when running with SymPy 1.13 (SymPy is only used in tests). Bump SymPy version to 1.13. +- Fix bug in scopeanalyzer: `get_names_in_store_context` now collects also names bound in `match`/`case` constructs (pattern matching, Python 3.10). + + +--- + +**0.15.2** (19 September 2024) + +This time, just a small but important fix. + +**Fixed**: + +- `unpythonic.env.env` is now pickleable. Save your fancy bunches into `.pickle` files and load them back! + +**Future plans**: + +Contrary to appearances, this project is not dead. But it already does most of what I personally need it to do, so it is pretty much in maintenance mode. And it has not required much maintenance over the past two years. + +We still plan to officially support Python 3.11+ later, as well as to update all constructs with assignment semantics to use the more appropriate `:=` operator, when/if I find the time to do so. The syntax uses `<<` for historical reasons - these constructs were originally implemented in 2018, on Python 3.4, back when `:=` did not exist. + +The most likely upgrade timeframe is when I personally switch to Python 3.11+, and something breaks. That is also when I'll likely next upgrade the sister project `mcpyrate`. + + +--- + +**0.15.1** (28 January 2022) - *New Year's edition*: + +**New**: + +- **Python 3.10 support**. Running on Python 3.10 requires `mcpyrate` 3.6.0. +- New module `unpythonic.timeutil`, with utilities for converting a number of seconds into human-understood formats (`seconds_to_human`, `format_human_time`), and a simple running-average `ETAEstimator` that takes advantage of these. As usual, these are available at the top level of `unpythonic`. +- Add function `unpythonic.syntax.get_cc`, the less antisocial little sister of `call_cc` from an alternate timeline, to make programming with continuations slightly more convenient. (Alternate timelines happen a lot when one uses multi-shot continuations.) The two work together. See docstring. +- Tag continuation closures (generated by the `with continuations` macro), for introspection. + - To detect at run time whether a given object is a continuation function, use the function `unpythonic.syntax.iscontinuation`. + - This is purely an introspection feature; `unpythonic` itself does not use this information. For why you might want to query this, see `get_cc`, particularly the [examples in unit tests](unpythonic/syntax/tests/test_conts.py). + - The information is stored as an attribute on the function object; keep this in mind if you intend to wrap the continuation function with another function. (Strictly, this is the correct behavior, since a custom wrapper is not a continuation function generated by the `with continuations` macro.) + +**Fixed**: + +- The test framework `unpythonic.test.fixtures` is now correctly installed when installing `unpythonic`. See [#81](https://github.com/Technologicat/unpythonic/issues/81). +- The subpackage for live REPL functionality, `unpythonic.net`, is now correctly installed when installing `unpythonic`. +- Fix a broken import that prevented the REPL server `unpythonic.net.server` from starting. This was broken by the move of `async_raise` into `unpythonic.excutil` in 0.15.0. +- `unpythonic.syntax.prefix`: Fix wrong macro name in error message of `unpythonic.syntax.prefix.u`. Document in the docstring that the magic operators `q`, `u`, and `kw` (of the `prefix` macro) cannot be renamed by as-importing. +- Preserve the source location info of the dialect-import statement in the example dialects in [`unpythonic.dialects`](unpythonic/dialects/). In the output, the lines of expanded source code that originate in a particular dialect template are marked as coming from the unexpanded source line that contains the corresponding dialect-import. + - If you want to see the line numbers before and after dialect expansion, use the `StepExpansion` dialect from `mcpyrate.debug`. + - This fix requires `mcpyrate` 3.6.0 or later. The code will run also on earlier versions of `mcpyrate`; then, just like before, it will look as if all lines that originate in any dialect template came from the beginning of the user source code. + + +--- + + +**0.15.0** (22 June 2021) - *"We say 'howdy' around these parts"* edition: + +Beside introducing **dialects** (a.k.a. whole-module code transforms), this edition concentrates on upgrading our dependencies, namely the macro expander, and the Python language itself, to ensure `unpythonic` keeps working for the next few years. This introduces some breaking changes, so we have also taken the opportunity to apply any such that were previously scheduled. + +We have sneaked in some upgrades for other subsystems, too. Particularly `curry`, the multiple dispatch system (`@generic`), and the integration between these two have been improved significantly. + +**IMPORTANT**: + +- Minimum Python language version is now 3.6. + - We support 3.6, 3.7, 3.8, 3.9 and PyPy3 (language versions 3.6 and 3.7). + - For future plans, see our [Python language version support status](https://github.com/Technologicat/unpythonic/issues/1). +- The optional macro expander is now [`mcpyrate`](https://github.com/Technologicat/mcpyrate). + +If you still need `unpythonic` for Python 3.4 or 3.5, use version 0.14.3, which is the final version of `unpythonic` that supports those language versions. + +The same applies if you need the macro parts of `unpythonic` (i.e. import anything from `unpythonic.syntax`) in your own project that uses MacroPy. Version 0.14.3 of `unpythonic` works up to Python 3.7. + + +**New**: + +- **Dialects!** New module `unpythonic.dialects`, providing [some example dialects](doc/dialects.md) that demonstrate what can be done with a [dialects system](https://github.com/Technologicat/mcpyrate/blob/master/doc/dialects.md) (i.e. full-module code transformer) together with a kitchen-sink language extension macro package such as `unpythonic`. + - These dialects have been moved from the now-obsolete [`pydialect`](https://github.com/Technologicat/pydialect) project and ported to use [`mcpyrate`](https://github.com/Technologicat/mcpyrate). + +- **Improved robustness**: several auxiliary syntactic constructs now detect *at macro expansion time* if they appear outside any valid lexical context, and raise `SyntaxError` (with a descriptive message) if so. + - The full list is: + - `call_cc[]`, for `with continuations` + - `it`, for `aif[]` + - `local[]`/`delete[]`, for `do[]` + - `q`/`u`/`kw`, for `with prefix` + - `where`, for `let[body, where(k0=v0, ...)]` (also for `letseq`, `letrec`, `let_syntax`, `abbrev`) + - `with expr`/`with block`, for `with let_syntax`/`with abbrev` + - Previously these constructs could only raise an error at run time, and not all of them could detect the error even then. + +- **Syntactic consistency**: allow env-assignment notation and brackets to declare bindings in the `let` family of macros. The preferred syntaxes for the `let` macro are now: + + ```python + let[x << 42, y << 9001][...] # lispy expr + let[[x << 42, y << 9001] in ...] # haskelly let-in + let[..., where[x << 42, y << 9001]] # haskelly let-where + ``` + If there is just one binding, these become: + ```python + let[x << 42][...] + let[[x << 42] in ...] + let[..., where[x << 42]] + ``` + Similarly for `letseq`, `letrec`, and the decorator versions; and for the expr forms of `let_syntax`, `abbrev`. The reason for preferring this notation is that it is consistent with both `unpythonic`'s env-assignments (`let` bindings live in an `env`) and the use of brackets to denote macro invocations. + + To ease backwards compatibility, we still accept the syntax used up to v0.14.3, too. + + Also, from symmetry and usability viewpoints, if a mix of brackets and parentheses are used, it hardly makes sense to require some specific mix - so this has been extended so that the choice of delimiter doesn't matter. All the following are also accepted, with the meaning exactly the same as above: + ```python + let[[x, 42], [y, 9001]][...] # best visual consistency + let[(x, 42), (y, 9001)][...] + let([x, 42], [y, 9001])[...] + let((x, 42), (y, 9001))[...] # like up to v0.14.3 + let[[[x, 42], [y, 9001]] in ...] # best visual consistency + let[[(x, 42), (y, 9001)] in ...] + let[([x, 42], [y, 9001]) in ...] + let[((x, 42), (y, 9001)) in ...] # like up to v0.14.3 + let[(x << 42, y << 9001) in ...] + let[..., where[[x, 42], [y, 9001]]] # best visual consistency + let[..., where[(x, 42), (y, 9001)]] + let[..., where([x, 42], [y, 9001])] + let[..., where((x, 42), (y, 9001))] # like up to v0.14.3 + let[..., where(x << 42, y << 9001)] + ``` + For a single binding, these are also accepted: + ```python + let[x, 42][...] + let(x, 42)[...] # like up to v0.14.3 + let[[x, 42] in ...] + let[(x, 42) in ...] # like up to v0.14.3 + let[(x << 42) in ...] + let[..., where[x, 42]] + let[..., where(x, 42)] # like up to v0.14.3 + let[..., where(x << 42)] + ``` + These alternate syntaxes will be supported at least as long as we accept parentheses to pass macro arguments; but in new code, please use the preferred syntaxes. + +- **Miscellaneous.** + - `with namedlambda` now understands the walrus operator, too. In the construct `f := lambda ...: ...`, the lambda will get the name `f`. (Python 3.8 and later.) + - `with namedlambda` now auto-names lambdas that don't have a name candidate using their source location info, if present. This makes it easy to see in a stack trace where some particular lambda was defined. + - Multiple-dispatch system `unpythonic.dispatch`: + - Use consistent terminology: + - The function that supports multiple call signatures is a *generic function*. + - Its individual implementations are *multimethods*. + - Add decorator `@augment`: add a multimethod to a generic function defined elsewhere. + - Add function `isgeneric` to detect whether a callable has been declared `@generic`. + - Add function `methods`: display a list of multimethods of a generic function. + - It is now possible to dispatch on a homogeneous type of contents collected by a `**kwargs` parameter. + - `curry` now supports `@generic` functions. **This feature is experimental. Semantics may still change.** + - The utilities `arities`, `required_kwargs`, and `optional_kwargs` now support `@generic` functions. **This feature is experimental. Semantics may still change.** + - `curry` now errors out immediately on argument type mismatch. + - Add `partial`, a type-checking wrapper for `functools.partial`, that errors out immediately on argument type mismatch. + - Add `unpythonic.excutil.reraise_in` (expr form), `unpythonic.excutil.reraise` (block form): conveniently remap library exception types to application exception types. Idea from [Alexis King (2016): Four months with Haskell](https://lexi-lambda.github.io/blog/2016/06/12/four-months-with-haskell/). + - Add variants of the above for the conditions-and-restarts system: `unpythonic.conditions.resignal_in`, `unpythonic.conditions.resignal`. The new signal is sent using the same error-handling protocol as the original signal, so that e.g. an `error` remains an `error` even if re-signaling changes its type. + - Add `resolve_bindings_partial`, useful for analyzing partial application. + - Add `triangular`, to generate the triangular numbers (1, 3, 6, 10, ...). + - Add `partition_int_triangular` to answer a timeless question concerning stackable plushies. + - Add `partition_int_custom` to answer unanticipated similar questions. + - All documentation files now have a quick navigation section to skip to another part of the docs. (For all except the README, it's at the top.) + - Python 3.8 and 3.9 support added. + **Non-breaking changes**: -- The modules `unpythonic.dispatch` and `unpythonic.typecheck`, which provide the `@generic` and `@typed` decorators and the `isoftype` function, are no longer considered experimental. From this release on, they receive the same semantic versioning guarantees as the rest of `unpythonic`. +- **Changes to how some macros expand.** + - Some macros, notably `letseq`, `do0`, and `lazyrec`, now expand into hygienic macro captures of other macros. The `continuations` macro also outputs a hygienically captured `aif` when transforming an `or` expression that occurs in tail position. + - This allows `mcpyrate.debug.step_expansion` to show the intermediate result, as well as brings the implementation closer to the natural explanation of how these macros are defined. (Zen of Python: if the implementation is easy to explain, it *might* be a good idea.) + - The implicit do (extra bracket syntax) also expands as a hygienically captured `do`, but e.g. in `let[]` it will then expand immediately (due to `let`'s inside-out expansion order) before control returns to the macro stepper. If you want to see the implicit `do[]` invocation, use the `"detailed"` mode of the stepper, which shows individual macro invocations even when expanding inside-out: `step_expansion["detailed"][...]`, `with step_expansion["detailed"]:`. + + - The `do[]` and `do0[]` macros now expand outside-in. The main differences from a user perspective are: + - Any source code captures (such as those performed by `test[]`) show the expanded output of `do` and `do0`, because that's what they receive. (For tests, you may want to use the macro `with expand_testing_macros_first`, which see.) + - `mcpyrate.debug.step_expansion` is able to show the intermediate result after the `do` or `do0` has expanded, but before anything else has been done to the tree. + +- **Miscellaneous.** + - Resolve issue [#61](https://github.com/Technologicat/unpythonic/issues/61): `curry` now supports kwargs properly. + - We now analyze parameter bindings like Python itself does, so it should no longer matter whether arguments are passed by position or by name. + - Positional passthrough works as before. Named passthrough added. + - Any remaining arguments (that cannot be accepted by the initial call) are passed through to a callable intermediate result (if any), and then outward on the curry context stack as a `Values`. Since `curry` in this role is essentially a function-composition utility, the receiving curried function instance unpacks the `Values` into args and kwargs. + - If any extra arguments (positional or named) remain when the top-level curry context exits, then by default, `TypeError` is raised. To override, use `with dyn.let(curry_context=["whatever"])`, just like before. Then you'll get a `Values` object. + - The generator instances created by the gfuncs returned by `gmemoize`, `imemoize`, and `fimemoize`, now support the `__len__` and `__getitem__` methods to access the already-yielded, memoized part. Asking for the `len` returns the current length of the memo. For subscripting, both a single `int` index and a slice are accepted. Note that memoized generators do **not** support all of the [`collections.abc.Sequence`](https://docs.python.org/3/library/collections.abc.html) API, because e.g. `__contains__` and `__reversed__` are missing, on purpose. + - `fup`/`fupdate`/`ShadowedSequence` can now walk the start of a memoized infinite replacement backwards. (Use `imemoize` on the original iterable, instantiate the generator, and use that generator instance as the replacement.) + - When using the `autoreturn` macro, if the item in tail position is a function definition or class definition, return the thing that was defined. + - The `nb` macro now works together with `autoreturn`. + - `unpythonic.conditions.signal`, when the signal goes unhandled, now returns the canonized input `condition`, with a nice traceback attached. This feature is intended for implementing custom error protocols on top of `signal`; `error` already uses it to produce a nice-looking error report. + - The internal exception types `unpythonic.conditions.InvokeRestart` and `unpythonic.ec.Escape` now inherit from `BaseException`, so that they are not inadvertently caught by `except Exception` handlers. + - The modules `unpythonic.dispatch` and `unpythonic.typecheck`, which provide the `@generic` and `@typed` decorators and the `isoftype` function, are no longer considered experimental. From this release on, they receive the same semantic versioning guarantees as the rest of `unpythonic`. + - CI: Automated tests now run on Python 3.6, 3.7, 3.8, 3.9, and PyPy3 (language versions 3.6, 3.7). + - CI: Test coverage improved to 94%. + - Full update pass for the user manual written in Markdown. + - Things added or changed in 0.14.2 and later are still mentioned as such, and have not necessarily been folded into the main text. But everything should be at least up to date now. + **Breaking changes**: -- Migrate to the [`mcpyrate`](https://github.com/Technologicat/mcpyrate) macro expander; MacroPy support dropped. - - This facilitates future development of the macro parts of `unpythonic`. - - Macro arguments are now passed using brackets `macroname[args]` instead of parentheses. - - Parentheses are still available as alternative syntax, because up to Python 3.8, decorators cannot have subscripts (so e.g. `@dlet[(x, 42)]` is a syntax error, but `@dlet((x, 42))` is fine). This has been fixed in Python 3.9. - - If you already only need to run on Python 3.9 and later, please use brackets. We currently plan to eventually drop support for parentheses to pass macro arguments, when Python 3.9 becomes the minimum supported language version. - - As a result of the new macro expander, macro test coverage should now be reported correctly. -- The lazy evaluation tools `lazy`, `Lazy`, and the quick lambda `f` (underscore notation for Python) are now provided by `unpythonic` as `unpythonic.syntax.lazy`, `unpythonic.lazyutil.Lazy`, and `unpythonic.syntax.f`, because they used to be provided by `macropy`, and `mcpyrate` does not provide them. - - Any imports of these in user code should be modified to point to the new locations. - - The underscore `_` is no longer a macro on its own. The `f` macro treats the underscore magically, as before, but anywhere else it is available to be used as a regular variable. - - `f[]` now respects nesting: an invocation of `f[]` will not descend into another nested `f[]`. - - The `with quicklambda` macro is still provided, and used just as before. Now it causes any `f[]` invocations lexically inside the block to expand before any other macros in that block do. - - Since in `mcpyrate`, macros can be as-imported, you can rename `f` at import time to have any name you want. The `quicklambda` block macro respects the as-import. Now you **must** import also the macro `f` when you import the macro `quicklambda`, because `quicklambda` internally queries the expander to determine the name(s) the macro `f` is currently bound to. -- Rename the `curry` macro to `autocurry`, to prevent name shadowing of the `curry` function. The new name is also more descriptive. -- The internal utility class `unpythonic.syntax.util.ASTMarker` has been renamed to `UnpythonicExpandedMacroMarker` to explicitly have a class name different from `mcpyrate.markers.ASTMarker`, because these represent semantically different things. -- Rename contribution guidelines to `CONTRIBUTING.md`, which is the modern standard name. -- Python 3.4 and 3.5 support dropped, as these language versions have reached end-of-life. +- **New macro expander `mcpyrate`; MacroPy support dropped**. + - **API differences.** + - Macro arguments are now passed using brackets, `macroname[args][...]`, `with macroname[args]`, `@macroname[args]`, instead of parentheses. + - Parentheses are still available as alternative syntax, because up to Python 3.8, decorators cannot have subscripts (so e.g. `@dlet[(x, 42)]` is a syntax error, but `@dlet((x, 42))` is fine). This has been fixed in Python 3.9. + - If you already only run on Python 3.9 and later, please use brackets, that is the preferred syntax. We currently plan to eventually drop support for parentheses to pass macro arguments in the future, when Python 3.9 becomes the minimum supported language version for `unpythonic`. + - If you write your own macros, note `mcpyrate` is not drop-in compatible with MacroPy or `mcpy`. See [its documentation](https://github.com/Technologicat/mcpyrate#documentation) for details. + - **Behavior differences.** + - `mcpyrate` should report test coverage for macro-using code correctly; no need for `# pragma: no cover` in block macro invocations or in quasiquoted code. + +- **Previously scheduled API changes**. + - As promised, names deprecated during 0.14.x have been removed. Old name on the left, new name on the right: + - `m` → `imathify` (consistency with the rest of `unpythonic`) + - `mg` → `gmathify` (consistency with the rest of `unpythonic`) + - `setescape` → `catch` (Lisp family standard name) + - `escape` → `throw` (Lisp family standard name) + - `getvalue`, `runpipe` → `exitpipe` (combined into one) + - **CAUTION**: `exitpipe` already existed in v0.14.3, but beginning with v0.15.0, it is now an `unpythonic.symbol.sym` (like a Lisp symbol). This is not compatible with existing, pickled `exitpipe` instances; it used to be an instance of the class `Getvalue`, which has been removed. (There's not much reason to pickle an `exitpipe` instance, but we're mentioning this for the sake of completeness.) + - Drop support for deprecated argument format for `raisef`. Now the usage is `raisef(exc)` or `raisef(exc, cause=...)`. These correspond exactly to `raise exc` and `raise exc from ...`, respectively. + +- **Other backward-incompatible API changes.** + - Multiple-return-value handling changed. Resolves issue [#32](https://github.com/Technologicat/unpythonic/issues/32). + - Multiple return values are now denoted as `Values`, available from the top-level namespace of `unpythonic`. + - The `Values` constructor accepts both positional and named arguments. Passing in named arguments creates **named return values**. This completes the symmetry between argument passing and returns. + - Most of the time, it's still fine to return a tuple and destructure that; but in contexts where it is important to distinguish between a single `tuple` return value and multiple return values, it is preferable to use `Values`. + - In any utilities that deal with function composition, if your intent is multiple-return-values, **it is now mandatory to return a `Values`** instead of a `tuple`: + - `curry` + - `pipe` family + - `compose` family + - `unfold` + - `iterate` + - All multiple-return-values in code using the `with continuations` macro. (The continuations system essentially composes continuation functions.) + - The lazy evaluation tools `lazy`, `Lazy`, and the quick lambda `f` (underscore notation for Python) are now provided by `unpythonic` as `unpythonic.syntax.lazy`, `unpythonic.lazyutil.Lazy`, and `unpythonic.syntax.fn` (note name change!), because they used to be provided by `macropy`, and `mcpyrate` does not provide them. + - **API differences.** + - The quick lambda is now named `fn[]` instead of `f[]` (as in MacroPy). This was changed because `f` is often used as a function name in code examples, local temporaries, and similar. Also, `fn[]` is a less ambiguous abbreviation for a syntactic construct that means *function*, while remaining shorter than the equivalent `lambda`. Compare `fn[_ * 2]` and `lambda x: x * 2`, or `fn[_ * _]` and `lambda x, y: x * y`. + - Note that in `mcpyrate`, macros can be as-imported, so this change affects just the *default* name of `fn[]`. But that is exactly what is important: have a sensible default name, to remove the need to as-import so often. + - The macros `lazy` and `fn` can be imported from the syntax interface module, `unpythonic.syntax`, and the class `Lazy` is available at the top level of `unpythonic`. + - Unlike `macropy`'s `Lazy`, our `Lazy` does not define `__call__`; instead, it defines the method `force`, which has the same effect (it computes if necessary, and then returns the value of the promise). You can also use the function `unpythonic.force`, which has the extra advantage that it passes through a non-promise input unchanged (so you don't need to care whether `x` is a promise before calling `force(x)`; this is sometimes useful). + - When you import the macro `quicklambda`, you **must** import also the macro `fn`. + - The underscore `_` is no longer a macro on its own. The `fn` macro treats the underscore magically, as before, but anywhere else it is available to be used as a regular variable. + - **Behavior differences.** + - `fn[]` now respects nesting: an invocation of `fn[]` will not descend into another nested `fn[]`. + - The `with quicklambda` macro is still provided, and used just as before. Now it causes any `fn[]` invocations lexically inside the block to expand before any other macros in that block do. + - Since in `mcpyrate`, macros can be as-imported, you can rename `fn` at import time to have any name you want. The `quicklambda` block macro respects the as-import, by internally querying the expander to determine the name(s) the macro `fn` is currently bound to. + - For the benefit of code using the `with lazify` macro, laziness is now better respected by the `compose` family, `andf` and `orf`. The utilities themselves are marked lazy, and arguments will be forced only when a lazy function in the chain actually uses them, or when an eager (not lazy) function is encountered in the chain. + - Rename the `curry` macro to `autocurry`, to prevent name shadowing of the `curry` function. The new name is also more descriptive. + - Move the functions `force1` and `force` from `unpythonic.syntax` to `unpythonic`. Make the `Lazy` class (promise implementation) public. (They actually come from `unpythonic.lazyutil`.) + - Change parameter ordering of `unpythonic.it.window` to make it curry-friendly. Usage is now `window(n, iterable)`. + - This was an oversight when this function was added; most other functions in `unpythonic.it` have been curry-friendly from the beginning. + - Change output format of `resolve_bindings` to return an `inspect.BoundArguments` instead of the previous `OrderedDict` that had a custom format. Change the input format of `tuplify_bindings` to match. + - Change parameter name from `l` to `length` in the functions `in_slice` and `index_in_slice` (in the `unpythonic.collections` module). + - These are mostly used internally, but technically a part of the public API. + - This change fixes a `flake8` [E741](https://pycodestyle.pycqa.org/en/latest/intro.html#error-codes) warning, and the new name for the parameter is more descriptive. + +- **Miscellaneous.** + - Robustness: the `with continuations` macro now raises `SyntaxError` if async constructs (`async def` or `await`) appear lexically inside the block, because interaction of `with continuations` with Python's async subsystem has never been implemented. See [issue #4](https://github.com/Technologicat/unpythonic/issues/4). + - The functions `raisef`, `tryf`, `equip_with_traceback`, and `async_raise` now live in `unpythonic.excutil`. They are still available in the top-level namespace of `unpythonic`, as usual. + - The functions `call` and `callwith` now live in `unpythonic.funutil`. They are still available in the top-level namespace of `unpythonic`, as usual. + - The functions `almosteq`, `fixpoint`, `partition_int`, and `ulp` now live in `unpythonic.numutil`. They are still available in the top-level namespace of `unpythonic`, as usual. + - Remove the internal utility class `unpythonic.syntax.util.ASTMarker`. We now have `mcpyrate.markers.ASTMarker`, which is designed for data-driven communication between macros that work together. As a bonus, no markers are left in the AST at run time. + - Rename contribution guidelines to `CONTRIBUTING.md`, which is the modern standard name. Old name was `HACKING.md`, which was correct, but nowadays obscure. + - Python 3.4 and 3.5 support dropped, as these language versions have officially reached end-of-life. + + +**Fixed**: + +- Make `unpythonic.misc.callsite_filename` ignore our call helpers. This allows the testing framework report the source code filename correctly when testing code using macros that make use of these helpers (e.g. `autocurry`, `lazify`). + +- In `aif`, `it` is now only valid in the `then` and `otherwise` parts, as it should always have been. + +- Fix docstring of `test`: multiple `the[]` marks were already supported in 0.14.3, as the macro documentation already said, but the docstring claimed otherwise. + +- Fix bug in `with namedlambda`. Due to incorrect function arguments in the analyzer, already named lambdas were not detected correctly. + +- Fix bug: `fup`/`fupdate`/`ShadowedSequence` now actually accept an infinite-length iterable as a replacement sequence (under the obvious usage limitations), as the documentation has always claimed. + +- Fix bug: `memoize` is now thread-safe. Even when the same memoized function instance is called concurrently from multiple threads. Exactly one thread will compute the result. If `f` is recursive, the thread that acquired the lock is the one that is allowed to recurse into the memoized `f`. --- diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..6db9bfe2 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,96 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## What is unpythonic + +A Python library providing language extensions and utilities inspired by Lisp, Haskell, and functional programming. Three-tier architecture: + +1. **Pure Python layer** (`unpythonic/`): ~45 modules of functional utilities (curry, memoize, fold, TCO, conditions/restarts, dynamic variables, linked lists, etc.). No macro dependency. +2. **Macro layer** (`unpythonic/syntax/`): Syntactic macros via `mcpyrate` providing cleaner syntax for let-bindings, autocurry, lazify, TCO, continuations, etc. +3. **Dialect layer** (`unpythonic/dialects/`): Full language variants (Lispython, Listhell, Pytkell) built on the macro layer. + +## API stability + +Released as 2.0.0 in March 2026 (floor bump + mcpyrate 4.0.0 dependency). The public API (everything in `__all__`) should remain backward-compatible. Prefer non-breaking solutions when possible. + +## Build and development + +Uses PDM with `pdm-backend`. Python 3.10–3.14, also PyPy 3.11. + +```bash +# Set up development environment +pdm install # creates .venv/ and installs deps +pdm use --venv in-project +source .venv/bin/activate +``` + +The project venv is managed by PDM (`pdm venv create`, `pdm use --venv in-project`). To switch Python versions, remove the old venv and create a new one: + +```bash +pdm venv remove in-project +pdm config venv.in_project true +pdm venv create 3.14 # or whichever version +pdm use --venv in-project +pdm install +``` + +**Critical**: Never compile `.py` files in this project using `py_compile`, `python -m compileall`, `--compile`, or any other mechanism that bypasses the macro expander. Stale `.pyc` files compiled without macro support will break macro imports (symptom: `ImportError: cannot import name 'macros' from 'mcpyrate.quotes'`). If this happens, clean the caches with `macropython -c unpythonic` and re-run. + +## Running tests + +Custom test framework (`unpythonic.test.fixtures`, not pytest). Tests use macros (`test[]`, `test_raises[]`) and conditions/restarts for reporting. The test runner does not need the `macropython` wrapper—it activates macros via `import mcpyrate.activate`. Note: test *framework* is at `unpythonic/test/` (singular); actual *tests* are in `tests/` (plural) subdirectories. + +```bash +# Run all tests (from repo root, with venv activated) +python runtests.py + +# Run a single test module directly +python -c "import mcpyrate.activate; from unpythonic.tests.test_fun import runtests; runtests()" + +# Run macro tests similarly +python -c "import mcpyrate.activate; from unpythonic.syntax.tests.test_letdo import runtests; runtests()" +``` + +Test suites discovered by `runtests.py`: +- `unpythonic/tests/test_*.py` — pure Python features +- `unpythonic/net/tests/test_*.py` — REPL server/client +- `unpythonic/syntax/tests/test_*.py` — macro features +- `unpythonic/dialects/tests/test_*.py` — dialect features + +Each test module exports a `runtests()` function. Tests are grouped with `testset()` context managers. + +**Reading test results**: The framework reports Pass/Fail/Error/Total per testset. "Error" means an unexpected exception inside a `test[]` expression — this includes intentional skip-with-message patterns (e.g. "SymPy not installed"), so a few errors from optional-dependency tests are normal. Look at the actual error messages, not just the count. Nested testsets show hierarchy with indentation and asterisk depth (`**`, `****`, `******`, etc.). + +## Linting + +```bash +# As in CI — hard errors (syntax errors, undefined names) +flake8 . --config=flake8rc --select=E9,F63,F7,F82 --show-source + +# Soft warnings +flake8 . --config=flake8rc --exit-zero --max-line-length=127 +``` + +## Code structure and conventions + +- **Regular code** in `unpythonic/`, **macros** in `unpythonic/syntax/`, **REPL networking** in `unpythonic/net/`, **dialects** in `unpythonic/dialects/`. +- **Tests** are in `tests/` (plural) subdirectories under the code they test. The testing *framework* lives at `unpythonic/test/` (singular). +- Each module declares `__all__` explicitly for public API. The top-level `__init__.py` re-exports via star imports. +- **Import style**: Use `from ... import ...` (not `import ...`). The from-import syntax is mandatory for macro imports and used consistently throughout. Don't rename unpythonic features with `as`—macro code depends on original bare names. +- **No star imports** in user code (only in the top-level `__init__.py` for re-export). +- **Curry-friendly signatures**: Parameters that change least often go on the left. Use `def f(func, thing0, *things)` (not `def f(func, *things)`) when at least one `thing` is required, so `curry` knows when to trigger. +- **Macros are the nuclear option**: Only make a macro when a regular function can't do the job. Prefer a pure-Python core with a thin macro layer for UX. +- **Macro `**kw` passing**: Use `dyn` (dynamic variables) to pass `mcpyrate` `**kw` arguments through to syntax transformers, rather than threading them through parameter lists. +- **Variable names**: Descriptive but compact. Prefer `theconstant` over `node` when the type matters, `thebody` over `b` when scope is more than a few lines. Avoid generic names like `tmp`, `data`, `x` unless scope is trivially small. In test code using the `the[]` macro, avoid `the`-prefixed names — `the[theconstant]` isn't English. Use e.g. `constant_node` instead. +- **Line width** ~110 characters. Docstrings in reStructuredText. +- **Module size target**: ~100–300 SLOC, rough max ~700 lines. Some modules are longer when appropriate (e.g. `syntax/tailtools.py` at ~1600 lines). Never split just because the line count was exceeded. +- **Dependencies**: Avoid external dependencies. `mcpyrate` is the only allowed external dep and must remain strictly optional for the pure-Python layer. + +## Key cross-cutting concerns + +- `curry` has cross-cutting behavior — grep for it when investigating interactions. +- `@generic` (multiple dispatch) similarly has cross-cutting concerns. +- The `lazify` macro: also grep for `passthrough_lazy_args` and `maybe_force_args`. +- The `continuations` macro builds on `tco` — read `tco` first when studying continuations. +- `unpythonic.syntax.scopeanalyzer` implements lexical scope analysis for macros that interact with Python's scoping rules (notably `let`). diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0517d9c5..e791c09d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,3 +1,26 @@ +**Navigation** + +- [README](README.md) +- [Pure-Python feature set](doc/features.md) +- [Syntactic macro feature set](doc/macros.md) +- [Examples of creating dialects using `mcpyrate`](doc/dialects.md) +- [REPL server](doc/repl.md) +- [Troubleshooting](doc/troubleshooting.md) +- [Design notes](doc/design-notes.md) +- [Essays](doc/essays.md) +- [Additional reading](doc/readings.md) +- **Contribution guidelines** + + +**Table of Contents** + +- [Hacking unpythonic, a.k.a. contribution guidelines](#hacking-unpythonic-aka-contribution-guidelines) + - [Most importantly](#most-importantly) + - [Technical overview](#technical-overview) + - [Style guide](#style-guide) + + + # Hacking unpythonic, a.k.a. contribution guidelines **Rule #1**: Code and/or documentation contributions are welcome! @@ -40,7 +63,8 @@ - For example: - Not only a summarizing `minmax` utility, but `running_minmax` as well. The former is then just a one-liner expressed in terms of the latter. - `foldl` accepts multiple iterables, has a switch to terminate either on the shortest or on the longest input, and takes its arguments in a curry-friendly order. It also *requires* at least one iterable, so that `curry` knows to not trigger the call until at least one iterable has been provided. - - `curry` changes Python's reduction semantics to be more similar to Haskell's, to pass extra arguments through on the right, and keep calling if an intermediate result is a function, and there are still such passed-through arguments remaining. This extends what can be expressed concisely, [for example](http://www.cse.chalmers.se/~rjmh/Papers/whyfp.html) a classic lispy `map` is `curry(lambda f: curry(foldr, composerc(cons, f), nil))`. Feed that a function and an iterable, and get a linked list with the mapped results. Note the arity mismatch; `f` is 1-to-1, but `cons` is 2-to-1. + - `curry` changes Python's reduction semantics to be more similar to Haskell's, to pass extra arguments through, and keep calling if an intermediate result is a function, and there are still such passed-through arguments remaining. This extends what can be expressed concisely, [for example](http://www.cse.chalmers.se/~rjmh/Papers/whyfp.html) a classic lispy `map` is `curry(lambda f: curry(foldr, composerc(cons, f), nil))`. Feed that a function and an iterable, and get a linked list with the mapped results. Note the arity mismatch; `f` is 1-to-1, but `cons` is 2-to-1. + - `curry` also supports our `@generic` functions, and named return values... - **Make features work together** when it makes sense. Aim at composability. Try to make features orthogonal when reasonably possible, so that making them work together requires no extra effort. When not possible, purposefully minimizing friction in interaction between features makes for a coherent, easily understandable language extension. - **Be concise but readable**, like in mathematics. @@ -67,7 +91,7 @@ - *Having no docstring is better than having a placeholder docstring.* - If a function is not documented, make that fact explicit, to help [static analyzers](https://pypi.org/project/pyflakes/) flag it as needing documentation. - To help discoverability, the full documentation `doc/features.md` (or `doc/macros.md`, as appropriate) should contain at least a mention of each public feature. Examples are nice, too. - - Features that have non-obvious uses (e.g. `@call`), as well as those that cannot be assumed to be familiar to Python developers (e.g. Common Lisp style *conditions and restarts*) should get a more detailed explanation. + - Features that have non-obvious uses (e.g. `@call`), as well as those that cannot be assumed to be familiar to developers mostly working in Python (e.g. Common Lisp style *conditions and restarts*) should get a more detailed explanation. ## Technical overview @@ -82,19 +106,19 @@ We use a custom testing framework, which lives in the modules `unpythonic.test.f In retrospect, given that the main aim was compact testing syntax for macro-enabled Python code (without installing another import hook, doing which would disable the macro expander), it might have made more sense to make the testing macros compile to [pytest](https://docs.pytest.org/en/latest/). But hey, it's short, may have applications in teaching... and now we can easily write custom test runners, since the testing framework is just a `mcpyrate` library. It's essentially a *no-framework* (cf. "NoSQL"), which provides the essentials and lets the user define the rest. -(The whole framework is about 1.3k SLOC, counting docstrings, comments and blanks; under 600 SLOC if counting only active code lines. Add another 800 SLOC (all) / 200 SLOC (active code lines) for the condition system.) +(The whole framework is about 1.8k SLOC, counting docstrings, comments and blanks; under 700 SLOC if counting only active code lines. Add another 800 SLOC (all) / 200 SLOC (active code lines) for the condition system.) Since `unpythonic` is a relatively loose collection of language extensions and utilities, that's about it for the 30 000 ft (9 144 m) view. To study a particular feature, just start from the entry point that piques your interest, and follow the definitions recursively. Use an IDE or Emacs's `anaconda-mode` ~for convenience~ to stay sane. Look at the automated tests; those double as usage examples, sometimes containing finer points that didn't make it to prose documentation. -`curry` has some [cross-cutting concerns](https://en.wikipedia.org/wiki/Cross-cutting_concern), but nothing that a grep wouldn't find. +`curry` has some [cross-cutting concerns](https://en.wikipedia.org/wiki/Cross-cutting_concern), but nothing that a grep wouldn't find. Same goes for the multiple-dispatch system (particularly `@generic`). The `lazify` and `continuations` macros are the most complex (and perhaps fearsome?) parts. As for the lazifier, grep also for `passthrough_lazy_args` and `maybe_force_args`. As for continuations, read the `tco` macro first, and keep in mind how that works when reading `continuations`. The `continuations` macro is essentially what [academics call](https://cs.brown.edu/~sk/Publications/Papers/Published/pmmwplck-python-full-monty/paper.pdf) *"a standard [CPS](https://en.wikipedia.org/wiki/Continuation-passing_style) transformation"*, plus some technical details due to various bits of impedance mismatch. `unpythonic.syntax.scopeanalyzer` is a unfortunate artifact that is needed to implement macros that interact with Python's scoping rules, notably `let`. Fortunately, [the language reference explicitly documents](https://docs.python.org/3/reference/executionmodel.html#naming-and-binding) what is needed for a lexical scope analysis for Python. So we have just implemented that (better, as an AST analysis, rather than scanning the surface syntax text). -As of the first half of 2021, the main target platforms are **CPython 3.8** and **PyPy3 3.7** (since as of April 2021, PyPy3 does not have 3.8 yet). The code should run on 3.6 or any later Python. We have [a GitHub workflow](https://github.com/Technologicat/unpythonic/actions?query=workflow%3A%22Python+package%22) that runs the test suite on CPython 3.6 through 3.9, and on PyPy3. +As of v2.0.0, the main target platforms are **CPython 3.10** through **3.14**, and **PyPy3** (language version 3.11). We have [a GitHub workflow](https://github.com/Technologicat/unpythonic/actions?query=workflow%3A%22Python+package%22) that runs the test suite on these platforms. ## Style guide @@ -156,6 +180,16 @@ As of the first half of 2021, the main target platforms are **CPython 3.8** and - When implementing something, if you run into an empty niche, add the missing utility, and implement your higher-level functionality in terms of it. - This keeps code at each level of abstraction short, and exposes parts that can later be combined in new ways. +- **Compile-time or run-time?** + - For anyone new to making programming languages: there's a reason the terms static/lexical/compile-time and dynamic/run-time are grouped together. + - At compile time (macros), you have access to the source code (or AST), including its lexical structure. (I.e. what is defined inside what, in the source code text.) + - You also have access to the macro bindings of the current expander, because [*for the macros, it's run time*](https://github.com/Technologicat/mcpyrate/blob/master/doc/troubleshooting.md#macro-expansion-time-where-exactly). + - A block macro (`with mac:`) takes effect **for the lexical content of that block**. + - At run time (regular code), you have access to run-time bindings of names (e.g. whether `curry` refers to `unpythonic.fun.curry` or something else), and the call stack. + - Keep in mind that in Python, knowing what a name at the top level of a module (i.e. a "global variable") points to *is only possible at run time*. Although it's uncommon, not to mention bad practice in most cases, *any code anywhere* may change the top-level bindings in *any* module (via `sys.modules`). + - A run-time context manager (`with mgr:`) takes effect **for the dynamic extent of that block**. + - Try to take advantage of whichever is the most appropriate for what you're doing. + - **Follow [PEP8](https://www.python.org/dev/peps/pep-0008/) style**, *including* the official recommendation to violate PEP8 when the guidelines do not apply. Specific to `unpythonic`: - Conserve vertical space when reasonable. Even on modern laptops, a display can only fit ~50 lines at a time. - `x = x or default` for initializing `x` inside the function body of `def f(x=None)` (when it makes no sense to publish the actual default value) is concise and very readable. @@ -200,16 +234,11 @@ As of the first half of 2021, the main target platforms are **CPython 3.8** and - **Macros.** - *Macros are the nuclear option of software engineering.* - Only make a macro when a regular function can't do what is needed. - - Sometimes a regular code core with a thin macro layer on top, to improve the user experience, is the appropriate solution for [minimizing magic](https://macropy3.readthedocs.io/en/latest/discussion.html#minimize-macro-magic). See `do`, `let` for examples. - - `unpythonic/syntax/__init__.py` is very long (> 2000 lines), because: - - For technical reasons, as of MacroPy 1.1.0b2, it's not possible to re-export macros defined in another module. (As of `unpythonic` 0.15, this is no longer relevant, since we use `mcpyrate`, which **can** re-export macros. So `unpythonic.syntax` may be revised in a future version of `unpythonic`.) - - Therefore, all macro entry points must reside in `unpythonic/syntax/__init__.py`, so that user code can `from unpythonic.syntax import macros, something`, without caring about how the `unpythonic.syntax` package is internally organized. - - The docstring must be placed on the macro entry point, so that the REPL will find it. This forces all macro docstrings into that one module. (That's less magic than injecting them dynamically when `unpythonic` boots up.) + - Sometimes a regular code core with a thin macro layer on top, to improve the user experience, is the appropriate solution for [minimizing magic](https://macropy3.readthedocs.io/en/latest/discussion.html#minimize-macro-magic). See `do`, `let`, `autocurry`, `forall` for examples. - A macro entry point can be just a thin wrapper around the relevant [*syntax transformer*](http://www.greghendershott.com/fear-of-macros/): a regular function, which takes and returns an AST. - - You can have an expr, block and decorator macro with the same name, in the same module, by making the macro interface into a dispatcher. See the `syntax` kw in `mcpyrate`. - - If you do this, the docstring should be placed in whichever of those is defined last, because that one will be the definition left standing at run time (hence used for docstring lookup by the REPL). - - Syntax transformers can and should be sensibly organized into modules, just like any other regular (non-macro) code. - - But they don't need docstrings, since the macro entry point already has the docstring. + - You can have an expr, block and decorator macro with the same name, in the same module, by making the macro interface into a dispatcher. See the `syntax` kwarg in `mcpyrate`. + - Macros and syntax transformers should be sensibly organized into modules, just like any other regular (non-macro) code. + - The docstring should usually be placed on the macro entry point, so the syntax transformer typically does not need one. - If your syntax transformer (or another one it internally uses) needs `mcpyrate` `**kw` arguments: - Declare the relevant `**kw`s as parameters for the macro entry point, therefore requesting `mcpyrate` to provide them. Stuff them into `dyn` using `with dyn.let(...)`, and call your syntax transformer, which can then get the `**kw`s from `dyn`. See the existing macros for examples. - Using `dyn` keeps the syntax transformer call signatures clean, while limiting the dynamic extent of what is effectively a global assignment. If we used only function parameters, some of the high-level syntax transformers would have to declare `expander` just to pass it through, possibly through several layers, until it reaches the low-level syntax transformer that actually needs it. Avoiding such a parameter definition cascade is exactly the use case `dyn` was designed for. diff --git a/README.md b/README.md index 7d6b2c34..57609ac0 100644 --- a/README.md +++ b/README.md @@ -2,9 +2,11 @@ In the spirit of [toolz](https://github.com/pytoolz/toolz), we provide missing features for Python, mainly from the list processing tradition, but with some Haskellisms mixed in. We extend the language with a set of [syntactic macros](https://en.wikipedia.org/wiki/Macro_(computer_science)#Syntactic_macros). We also provide an in-process, background [REPL](https://en.wikipedia.org/wiki/Read%E2%80%93eval%E2%80%93print_loop) server for live inspection and hot-patching. The emphasis is on **clear, pythonic syntax**, **making features work together**, and **obsessive correctness**. -![100% Python](https://img.shields.io/github/languages/top/Technologicat/unpythonic) ![supported language versions](https://img.shields.io/pypi/pyversions/unpythonic) ![supported implementations](https://img.shields.io/pypi/implementation/unpythonic) ![CI status](https://img.shields.io/github/workflow/status/Technologicat/unpythonic/Python%20package) [![codecov](https://codecov.io/gh/Technologicat/unpythonic/branch/master/graph/badge.svg)](https://codecov.io/gh/Technologicat/unpythonic) +![100% Python](https://img.shields.io/github/languages/top/Technologicat/unpythonic) ![supported language versions](https://img.shields.io/pypi/pyversions/unpythonic) ![supported implementations](https://img.shields.io/pypi/implementation/unpythonic) ![CI status](https://img.shields.io/github/actions/workflow/status/Technologicat/unpythonic/python-package.yml?branch=master) [![codecov](https://codecov.io/gh/Technologicat/unpythonic/branch/master/graph/badge.svg)](https://codecov.io/gh/Technologicat/unpythonic) ![version on PyPI](https://img.shields.io/pypi/v/unpythonic) ![PyPI package format](https://img.shields.io/pypi/format/unpythonic) ![dependency status](https://img.shields.io/librariesio/github/Technologicat/unpythonic) -![license: BSD](https://img.shields.io/pypi/l/unpythonic) ![open issues](https://img.shields.io/github/issues/Technologicat/unpythonic) [![contributions welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg?style=flat)](CONTRIBUTING.md) +![license: BSD](https://img.shields.io/pypi/l/unpythonic) ![open issues](https://img.shields.io/github/issues/Technologicat/unpythonic) [![PRs welcome](https://img.shields.io/badge/PRs-welcome-brightgreen)](http://makeapullrequest.com/) + +We use [semantic versioning](https://semver.org/). *Some hypertext features of this README, such as local links to detailed documentation, and expandable example highlights, are not supported when viewed on PyPI; [view on GitHub](https://github.com/Technologicat/unpythonic) to have those work properly.* @@ -13,31 +15,37 @@ In the spirit of [toolz](https://github.com/pytoolz/toolz), we provide missing f None required. - - [mcpyrate](https://github.com/Technologicat/mcpyrate) optional, to enable the syntactic macro layer, and an interactive macro REPL. + - [`mcpyrate`](https://github.com/Technologicat/mcpyrate) optional, to enable the syntactic macro layer, an interactive macro REPL, and some example dialects. -The officially supported language versions are **CPython 3.8** and **PyPy3 3.7**. +As of v2.0.0, `unpythonic` runs on CPython 3.10, 3.11, 3.12, 3.13, 3.14, and PyPy3 (language version 3.11); the [CI](https://en.wikipedia.org/wiki/Continuous_integration) process verifies the tests pass on those platforms. New Python versions are added and old ones are removed following the [Long-term support roadmap](https://github.com/Technologicat/unpythonic/issues/1). -The 0.15.x series should run on CPython 3.6, 3.7, 3.8 and 3.9, and PyPy3 7.3.4 (language version 3.7); the [CI](https://en.wikipedia.org/wiki/Continuous_integration) process verifies the tests pass on those platforms. ### Documentation -[Pure-Python feature set](doc/features.md) -[Syntactic macro feature set](doc/macros.md) -[REPL server](doc/repl.md): interactively hot-patch your running Python program. -[Design notes](doc/design-notes.md): for more insight into the design choices of ``unpythonic``. +- **README**: you are here. +- [Pure-Python feature set](doc/features.md) +- [Syntactic macro feature set](doc/macros.md) +- [Examples of creating dialects using `mcpyrate`](doc/dialects.md): Python the way you want it. +- [REPL server](doc/repl.md): interactively hot-patch your running Python program. +- [Troubleshooting](doc/troubleshooting.md): possible solutions to possibly common issues. +- [Design notes](doc/design-notes.md): for more insight into the design choices of ``unpythonic``. +- [Essays](doc/essays.md): for writings on the philosophy of ``unpythonic``, things that inspired it, and related discoveries. +- [Additional reading](doc/readings.md): links to material relevant in the context of ``unpythonic``. +- [Contribution guidelines](CONTRIBUTING.md): for understanding the codebase, or if you're interested in making a code or documentation PR. The features of `unpythonic` are built out of, in increasing order of [magic](https://macropy3.readthedocs.io/en/latest/discussion.html#levels-of-magic): - Pure Python (e.g. batteries for `itertools`), - Macros driving a pure-Python core (`do`, `let`), - Pure macros (e.g. `continuations`, `lazify`, `dbg`). + - Whole-module transformations, a.k.a. dialects (e.g. `Lispy`). This depends on the purpose of each feature, as well as ease-of-use considerations. See the design notes for more information. ### Examples -Small, limited-space overview of the overall flavor. There's a lot more that doesn't fit here, especially in the pure-Python feature set. See the [full documentation](doc/features.md) and [unit tests](unpythonic/test/) for more examples. +Small, limited-space overview of the overall flavor. There is a lot more that does not fit here, especially in the pure-Python feature set. We give here simple examples that are **not** necessarily of the most general form supported by the constructs. See the [full documentation](doc/features.md) and [unit tests](unpythonic/tests/) for more examples. #### Unpythonic in 30 seconds: Pure Python @@ -131,21 +139,260 @@ Optionally, if you have [mcpyrate](https://github.com/Technologicat/mcpyrate), t [[docs](doc/features.md#batteries-for-itertools)] +Scan and fold accept multiple iterables, like in Racket. + ```python from operator import add -from unpythonic import scanl, foldl, unfold, take +from unpythonic import scanl, foldl, unfold, take, Values assert tuple(scanl(add, 0, range(1, 5))) == (0, 1, 3, 6, 10) def op(e1, e2, acc): return acc + e1 * e2 -assert foldl(op, 0, (1, 2), (3, 4)) == 11 # we accept multiple input sequences, like Racket +assert foldl(op, 0, (1, 2), (3, 4)) == 11 -def nextfibo(a, b): # *oldstates - return (a, b, a + b) # value, *newstates +def nextfibo(a, b): + return Values(a, a=b, b=a + b) assert tuple(take(10, unfold(nextfibo, 1, 1))) == (1, 1, 2, 3, 5, 8, 13, 21, 34, 55) ``` +
Industrial-strength curry. + +[[docs](doc/features.md#batteries-for-functools)] + +We bind arguments to parameters like Python itself does, so it does not matter whether arguments are passed by position or by name during currying. We support `@generic` multiple-dispatch functions. + +We also feature a Haskell-inspired passthrough system: any args and kwargs that are not accepted by the call signature will be passed through. This is useful when a curried function returns a new function, which is then the target for the passthrough. See the docs for details. + +```python +from unpythonic import curry, generic, foldr, composerc, cons, nil, ll + +@curry +def f(x, y): + return x, y + +assert f(1, 2) == (1, 2) +assert f(1)(2) == (1, 2) +assert f(1)(y=2) == (1, 2) +assert f(y=2)(x=1) == (1, 2) + +@curry +def add3(x, y, z): + return x + y + z + +# actually uses partial application so these work, too +assert add3(1)(2)(3) == 6 +assert add3(1, 2)(3) == 6 +assert add3(1)(2, 3) == 6 +assert add3(1, 2, 3) == 6 + +@curry +def lispyadd(*args): + return sum(args) +assert lispyadd() == 0 # no args is a valid arity here + +@generic +def g(x: int, y: int): + return "int" +@generic +def g(x: float, y: float): + return "float" +@generic +def g(s: str): + return "str" +g = curry(g) + +assert callable(g(1)) +assert g(1)(2) == "int" + +assert callable(g(1.0)) +assert g(1.0)(2.0) == "float" + +assert g("cat") == "str" +assert g(s="cat") == "str" + +# simple example of passthrough +mymap = lambda f: curry(foldr, composerc(cons, f), nil) +myadd = lambda a, b: a + b +assert curry(mymap, myadd, ll(1, 2, 3), ll(2, 4, 6)) == ll(3, 6, 9) +``` +
+
Multiple-dispatch generic functions, like in CLOS or Julia. + +[[docs](doc/features.md#generic-typed-isoftype-multiple-dispatch)] + +```python +from unpythonic import generic + +@generic +def my_range(stop: int): # create the generic function and the first multimethod + return my_range(0, 1, stop) +@generic +def my_range(start: int, stop: int): # further registrations add more multimethods + return my_range(start, 1, stop) +@generic +def my_range(start: int, step: int, stop: int): + return start, step, stop +``` + +This is a purely run-time implementation, so it does **not** give performance benefits, but it can make code more readable, and makes it modular to add support for new input types (or different call signatures) to an existing function later. + +[*Holy traits*](https://ahsmart.com/pub/holy-traits-design-patterns-and-best-practice-book/) are also a possibility: + +```python +import typing +from unpythonic import generic, augment + +class FunninessTrait: + pass +class IsFunny(FunninessTrait): + pass +class IsNotFunny(FunninessTrait): + pass + +@generic +def funny(x: typing.Any): # default + raise NotImplementedError(f"`funny` trait not registered for anything matching {type(x)}") + +@augment(funny) +def funny(x: str): # noqa: F811 + return IsFunny() +@augment(funny) +def funny(x: int): # noqa: F811 + return IsNotFunny() + +@generic +def laugh(x: typing.Any): + return laugh(funny(x), x) + +@augment(laugh) +def laugh(traitvalue: IsFunny, x: typing.Any): + return f"Ha ha ha, {x} is funny!" +@augment(laugh) +def laugh(traitvalue: IsNotFunny, x: typing.Any): + return f"{x} is not funny." + +assert laugh("that") == "Ha ha ha, that is funny!" +assert laugh(42) == "42 is not funny." +``` +
+
Conditions: resumable, modular error handling, like in Common Lisp. + +[[docs](doc/features.md#handlers-restarts-conditions-and-restarts)] + +Contrived example: + +```python +from unpythonic import error, restarts, handlers, invoke, use_value, unbox + +class MyError(ValueError): + def __init__(self, value): # We want to act on the value, so save it. + self.value = value + +def lowlevel(lst): + _drop = object() # gensym/nonce + out = [] + for k in lst: + # Provide several different error recovery strategies. + with restarts(use_value=(lambda x: x), + halve=(lambda x: x // 2), + drop=(lambda: _drop)) as result: + if k > 9000: + error(MyError(k)) + # This is reached when no error occurs. + # `result` is a box, send k into it. + result << k + # Now the result box contains either k, + # or the return value of one of the restarts. + r = unbox(result) # get the value from the box + if r is not _drop: + out.append(r) + return out + +def highlevel(): + # Choose which error recovery strategy to use... + with handlers((MyError, lambda c: use_value(c.value))): + assert lowlevel([17, 10000, 23, 42]) == [17, 10000, 23, 42] + + # ...on a per-use-site basis... + with handlers((MyError, lambda c: invoke("halve", c.value))): + assert lowlevel([17, 10000, 23, 42]) == [17, 5000, 23, 42] + + # ...without changing the low-level code. + with handlers((MyError, lambda: invoke("drop"))): + assert lowlevel([17, 10000, 23, 42]) == [17, 23, 42] + +highlevel() +``` + +Conditions only shine in larger systems, with restarts set up at multiple levels of the call stack; this example is too small to demonstrate that. The single-level case here could be implemented as a error-handling mode parameter for the example's only low-level function. + +With multiple levels, it becomes apparent that this mode parameter must be threaded through the API at each level, unless it is stored as a dynamic variable (see [`unpythonic.dyn`](doc/features.md#dyn-dynamic-assignment)). But then, there can be several types of errors, and the error-handling mode parameters - one for each error type - have to be shepherded in an intricate manner. A stack is needed, so that an inner level may temporarily override the handler for a particular error type... + +The condition system is the clean, general solution to this problem. It automatically scopes handlers to their dynamic extent, and manages the handler stack automatically. In other words, it dynamically binds error-handling modes (for several types of errors, if desired) in a controlled, easily understood manner. The local programmability (i.e. the fact that a handler is not just a restart name, but an arbitrary function) is a bonus for additional flexibility. + +If this sounds a lot like an exception system, that's because conditions are the supercharged sister of exceptions. The condition model cleanly separates mechanism from policy, while otherwise remaining similar to the exception model. +
+
Lispy symbol type. + +[[docs](doc/features.md#sym-gensym-Singleton-symbols-and-singletons)] + +Roughly, a [symbol](https://stackoverflow.com/questions/8846628/what-exactly-is-a-symbol-in-lisp-scheme) is a guaranteed-[interned](https://en.wikipedia.org/wiki/String_interning) string. + +A [gensym](http://clhs.lisp.se/Body/f_gensym.htm) is a guaranteed-*unique* string, which is useful as a nonce value. It's similar to the pythonic idiom `nonce = object()`, but with a nice repr, and object-identity-preserving pickle support. + +```python +from unpythonic import sym # lispy symbol +sandwich = sym("sandwich") +hamburger = sym("sandwich") # symbol's identity is determined by its name, only +assert hamburger is sandwich + +assert str(sandwich) == "sandwich" # symbols have a nice str() +assert repr(sandwich) == 'sym("sandwich")' # and eval-able repr() +assert eval(repr(sandwich)) is sandwich + +from pickle import dumps, loads +pickled_sandwich = dumps(sandwich) +unpickled_sandwich = loads(pickled_sandwich) +assert unpickled_sandwich is sandwich # symbols survive a pickle roundtrip + +from unpythonic import gensym # gensym: make new uninterned symbol +tabby = gensym("cat") +scottishfold = gensym("cat") +assert tabby is not scottishfold + +pickled_tabby = dumps(tabby) +unpickled_tabby = loads(pickled_tabby) +assert unpickled_tabby is tabby # also gensyms survive a pickle roundtrip +``` +
+
Lispy data structures. + +[[docs for `box`](doc/features.md#box-a-mutable-single-item-container)] [[docs for `cons`](doc/features.md#cons-and-friends-pythonic-lispy-linked-lists)] [[docs for `frozendict`](doc/features.md#frozendict-an-immutable-dictionary)] + +```python +from unpythonic import box, unbox # mutable single-item container +cat = object() +cardboardbox = box(cat) +assert cardboardbox is not cat # the box is not the cat +assert unbox(cardboardbox) is cat # but the cat is inside the box +assert cat in cardboardbox # ...also syntactically +dog = object() +cardboardbox << dog # hey, it's my box! (replace contents) +assert unbox(cardboardbox) is dog + +from unpythonic import cons, nil, ll, llist # lispy linked lists +lst = cons(1, cons(2, cons(3, nil))) +assert ll(1, 2, 3) == lst # make linked list out of elements +assert llist([1, 2, 3]) == lst # convert iterable to linked list + +from unpythonic import frozendict # immutable dictionary +d1 = frozendict({'a': 1, 'b': 2}) +d2 = frozendict(d1, c=3, a=4) +assert d1 == frozendict({'a': 1, 'b': 2}) +assert d2 == frozendict({'a': 4, 'b': 2, 'c': 3}) +``` +
Allow a lambda to call itself. Name a lambda. [[docs for `withself`](doc/features.md#batteries-for-functools)] [[docs for `namelambda`](doc/features.md#namelambda-rename-a-function)] @@ -235,36 +482,16 @@ from itertools import repeat from unpythonic import fup t = (1, 2, 3, 4, 5) -s = fup(t)[0::2] << tuple(repeat(10, 3)) +s = fup(t)[0::2] << repeat(10) assert s == (10, 2, 10, 4, 10) assert t == (1, 2, 3, 4, 5) -``` -
-
Lispy data structures. - -[[docs for `box`](doc/features.md#box-a-mutable-single-item-container)] [[docs for `cons`](doc/features.md#cons-and-friends-pythonic-lispy-linked-lists)] [[docs for `frozendict`](doc/features.md#frozendict-an-immutable-dictionary)] - -```python -from unpythonic import box, unbox # mutable single-item container -cat = object() -cardboardbox = box(cat) -assert cardboardbox is not cat # the box is not the cat -assert unbox(cardboardbox) is cat # but the cat is inside the box -assert cat in cardboardbox # ...also syntactically -dog = object() -cardboardbox << dog # hey, it's my box! (replace contents) -assert unbox(cardboardbox) is dog - -from unpythonic import cons, nil, ll, llist # lispy linked lists -lst = cons(1, cons(2, cons(3, nil))) -assert ll(1, 2, 3) == lst # make linked list out of elements -assert llist([1, 2, 3]) == lst # convert iterable to linked list -from unpythonic import frozendict # immutable dictionary -d1 = frozendict({'a': 1, 'b': 2}) -d2 = frozendict(d1, c=3, a=4) -assert d1 == frozendict({'a': 1, 'b': 2}) -assert d2 == frozendict({'a': 4, 'b': 2, 'c': 3}) +from itertools import count +from unpythonic import imemoize +t = (1, 2, 3, 4, 5) +s = fup(t)[::-2] << imemoize(count(start=10))() +assert s == (12, 2, 11, 4, 10) +assert t == (1, 2, 3, 4, 5) ```
Live list slices. @@ -297,63 +524,6 @@ assert x == 85 ``` The point is usability: in a function composition using pipe syntax, data flows from left to right. -
-
Conditions: resumable, modular error handling, like in Common Lisp. - -[[docs](doc/features.md#handlers-restarts-conditions-and-restarts)] - -Contrived example: - -```python -from unpythonic import error, restarts, handlers, invoke, use_value, unbox - -class MyError(ValueError): - def __init__(self, value): # We want to act on the value, so save it. - self.value = value - -def lowlevel(lst): - _drop = object() # gensym/nonce - out = [] - for k in lst: - # Provide several different error recovery strategies. - with restarts(use_value=(lambda x: x), - halve=(lambda x: x // 2), - drop=(lambda: _drop)) as result: - if k > 9000: - error(MyError(k)) - # This is reached when no error occurs. - # `result` is a box, send k into it. - result << k - # Now the result box contains either k, - # or the return value of one of the restarts. - r = unbox(result) # get the value from the box - if r is not _drop: - out.append(r) - return out - -def highlevel(): - # Choose which error recovery strategy to use... - with handlers((MyError, lambda c: use_value(c.value))): - assert lowlevel([17, 10000, 23, 42]) == [17, 10000, 23, 42] - - # ...on a per-use-site basis... - with handlers((MyError, lambda c: invoke("halve", c.value))): - assert lowlevel([17, 10000, 23, 42]) == [17, 5000, 23, 42] - - # ...without changing the low-level code. - with handlers((MyError, lambda: invoke("drop"))): - assert lowlevel([17, 10000, 23, 42]) == [17, 23, 42] - -highlevel() -``` - -Conditions only shine in larger systems, with restarts set up at multiple levels of the call stack; this example is too small to demonstrate that. The single-level case here could be implemented as a error-handling mode parameter for the example's only low-level function. - -With multiple levels, it becomes apparent that this mode parameter must be threaded through the API at each level, unless it is stored as a dynamic variable (see [`unpythonic.dyn`](doc/features.md#dyn-dynamic-assignment)). But then, there can be several types of errors, and the error-handling mode parameters - one for each error type - have to be shepherded in an intricate manner. A stack is needed, so that an inner level may temporarily override the handler for a particular error type... - -The condition system is the clean, general solution to this problem. It automatically scopes handlers to their dynamic extent, and manages the handler stack automatically. In other words, it dynamically binds error-handling modes (for several types of errors, if desired) in a controlled, easily understood manner. The local programmability (i.e. the fact that a handler is not just a restart name, but an arbitrary function) is a bonus for additional flexibility. - -If this sounds a lot like an exception system, that's because conditions are the supercharged sister of exceptions. The condition model cleanly separates mechanism from policy, while otherwise remaining similar to the exception model.
@@ -387,7 +557,8 @@ with session("simple framework demo"): test[returns_normally(g(2, 3))] test[g(2, 3) == 6] # Use `the[]` (or several) in a `test[]` to declare what you want to inspect if the test fails. - test[counter() < the[counter()]] + # Implicit `the[]`: in comparison, the LHS; otherwise the whole expression. Used if no explicit `the[]`. + test[the[counter()] < the[counter()]] with testset("outer"): with testset("inner 1"): @@ -403,15 +574,21 @@ with session("simple framework demo"): try: import blargly except ImportError: - error["blargly not installed, cannot test integration with it."] + warn["blargly not installed, skipping integration tests."] else: ... # blargly integration tests go here + # Unconditional errors and failures can be emitted with `error[]` and `fail[]`. + # with testset("not implemented"): + # fail["not implemented yet!"] + with testset(postproc=terminate): test[2 * 2 == 5] # fails, terminating the nearest dynamically enclosing `with session` test[2 * 2 == 4] # not reached ``` +For running tests, `unpythonic.test.runner` provides a reusable test runner with module discovery and version-suffix gating. See [`doc/macros.md`](doc/macros.md#unpythonictestfixtures-a-test-framework-for-macro-enabled-python) for details, and [`runtests.py`](runtests.py) for a usage example. + We provide the low-level syntactic constructs `test[]`, `test_raises[]` and `test_signals[]`, with the usual meanings. The last one is for testing code that uses conditions and restarts; see `unpythonic.conditions`. The test macros also come in block variants, `with test`, `with test_raises`, `with test_signals`. @@ -425,13 +602,13 @@ As usual in test frameworks, the testing constructs behave somewhat like `assert ```python from unpythonic.syntax import macros, let, letseq, letrec -x = let[((a, 1), (b, 2)) in a + b] -y = letseq[((c, 1), # LET SEQuential, like Scheme's let* - (c, 2 * c), - (c, 2 * c)) in +x = let[[a := 1, b := 2] in a + b] +y = letseq[[c := 1, # LET SEQuential, like Scheme's let* + c := 2 * c, + c := 2 * c] in c] -z = letrec[((evenp, lambda x: (x == 0) or oddp(x - 1)), # LET mutually RECursive, like in Scheme - (oddp, lambda x: (x != 0) and evenp(x - 1))) +z = letrec[[evenp := (lambda x: (x == 0) or oddp(x - 1)), # LET mutually RECursive, like in Scheme + oddp := (lambda x: (x != 0) and evenp(x - 1))] in evenp(42)] ``` @@ -442,9 +619,10 @@ z = letrec[((evenp, lambda x: (x == 0) or oddp(x - 1)), # LET mutually RECursiv ```python from unpythonic.syntax import macros, dlet -@dlet((x, 0)) # let-over-lambda for Python +# In Python 3.8, use `@dlet(x << 0)` instead; in Python 3.9, use `@dlet(x := 0)` +@dlet[x := 0] # let-over-lambda for Python def count(): - return x << x + 1 # `name << value` rebinds in the let env + return x := x + 1 # `name := value` rebinds in the let env assert count() == 1 assert count() == 2 ``` @@ -456,8 +634,8 @@ assert count() == 2 ```python from unpythonic.syntax import macros, do, local, delete -x = do[local[a << 21], - local[b << 2 * a], +x = do[local[a := 21], + local[b := 2 * a], print(b), delete[b], # do[] local variables can be deleted, too 4 * a] @@ -552,41 +730,179 @@ with continuations: # enables also TCO automatically -## Installation +#### Unpythonic in 30 seconds: Language extensions with dialects -**PyPI** +The [dialects subsystem of `mcpyrate`](https://github.com/Technologicat/mcpyrate/blob/master/doc/dialects.md) makes Python into a language platform, à la [Racket](https://racket-lang.org/). We provide some example dialects based on `unpythonic`'s macro layer. See [documentation](doc/dialects.md). -``pip3 install unpythonic --user`` +
Lispython: automatic TCO and an implicit return statement. -or +[[docs](doc/dialects/lispython.md)] -``sudo pip3 install unpythonic`` +Also comes with automatically named, multi-expression lambdas. -**GitHub** +```python +from unpythonic.dialects import dialects, Lispython # noqa: F401 + +def factorial(n): + def f(k, acc): + if k == 1: + return acc + f(k - 1, k * acc) + f(n, acc=1) +assert factorial(4) == 24 +factorial(5000) # no crash + +square = lambda x: x**2 +assert square(3) == 9 +assert square.__name__ == "square" -Clone (or pull) from GitHub. Then, +# - brackets denote a multiple-expression lambda body +# (if you want to have one expression that is a literal list, +# double the brackets: `lambda x: [[5 * x]]`) +# - local[name := value] makes an expression-local variable +g = lambda x: [local[y := 2 * x], + y + 1] +assert g(10) == 21 +``` +
+
Pytkell: Automatic currying and implicitly lazy functions. -``python3 setup.py install --user`` +[[docs](doc/dialects/pytkell.md)] -or +```python +from unpythonic.dialects import dialects, Pytkell # noqa: F401 -``sudo python3 setup.py install`` +from operator import add, mul -**Uninstall** +def addfirst2(a, b, c): + return a + b +assert addfirst2(1)(2)(1 / 0) == 3 -Uninstallation must be invoked in a folder which has no subfolder called ``unpythonic``, so that ``pip`` recognizes it as a package name (instead of a filename). Then, +assert tuple(scanl(add, 0, (1, 2, 3))) == (0, 1, 3, 6) +assert tuple(scanr(add, 0, (1, 2, 3))) == (0, 3, 5, 6) -``pip3 uninstall unpythonic`` +my_sum = foldl(add, 0) +my_prod = foldl(mul, 1) +my_map = lambda f: foldr(compose(cons, f), nil) +assert my_sum(range(1, 5)) == 10 +assert my_prod(range(1, 5)) == 24 +double = lambda x: 2 * x +assert my_map(double, (1, 2, 3)) == ll(2, 4, 6) +``` +
+
Listhell: Prefix syntax for function calls, and automatic currying. -or +[[docs](doc/dialects/listhell.md)] + +```python +from unpythonic.dialects import dialects, Listhell # noqa: F401 -``sudo pip3 uninstall unpythonic`` +from operator import add, mul +from unpythonic import foldl, foldr, cons, nil, ll + +(print, "hello from Listhell") + +my_sum = (foldl, add, 0) +my_prod = (foldl, mul, 1) +my_map = lambda f: (foldr, (compose, cons, f), nil) +assert (my_sum, (range, 1, 5)) == 10 +assert (my_prod, (range, 1, 5)) == 24 +double = lambda x: 2 * x +assert (my_map, double, (q, 1, 2, 3)) == (ll, 2, 4, 6) +``` +
+ +## Install & uninstall + +### From PyPI + +```bash +pip install unpythonic +``` + +### From source + +Clone the repo from GitHub. Then, navigate to it in a terminal, and: + +```bash +pip install . --no-compile +``` + +If you intend to use the macro layer of `unpythonic`, the `--no-compile` flag is important. It prevents an **incorrect** precompilation, without macro support, that `pip install` would otherwise do at its `bdist_wheel` step. + +For most Python projects such precompilation is just fine - it's just macro-enabled projects that shouldn't be precompiled with standard tools. + +If `--no-compile` is NOT used, the precompiled bytecode cache may cause errors such as `ImportError: cannot import name 'macros' from 'mcpyrate.quotes'`, when you try to e.g. `from unpythonic.syntax import macros, let`. In-tree, it might work, but against an installed copy, it will fail. It has happened that my CI setup did not detect this kind of failure. + +This is a common issue when using macro expanders in Python. + +### Development mode (for developing `unpythonic` itself) + +Starting with v0.15.5, `unpythonic` uses [PDM](https://pdm-project.org/en/latest/) to manage its dependencies. This allows easy installation of a development copy into an isolated venv (virtual environment), allowing you to break things without breaking anything else on your system (including apps and libraries that use an installed copy of `unpythonic`). + +#### Install PDM in your Python environment + +To develop `unpythonic`, if your Python environment does not have PDM, you will need to install it first: + +```bash +python -m pip install pdm +``` + +Don't worry; it won't break `pip`, `poetry`, or other similar tools. + +We will also need a Python for PDM venvs. This Python is independent of the Python that PDM itself runs on. It is the version of Python you would like to use for developing `unpythonic`. + +For example, we can make Python 3.10 available with the command: + +```bash +pdm python install 3.10 +``` + +Specifying just a version number defaults to CPython (the usual Python implementation). If you want PyPy instead, you can use e.g. `pypy@3.10`. + +#### Install the isolated venv + +Now, we will auto-create the development venv, and install `unpythonic`'s dependencies into it. In a terminal that sees your Python environment, navigate to the `unpythonic` folder, and issue the command: + +```bash +pdm install +``` + +This creates the development venv into the `.venv` hidden subfolder of the `unpythonic` folder. + +If you are a seasoned pythonista, note that there is no `requirements.txt`; the dependency list lives in `pyproject.toml`. + +#### Upgrade dependencies (later) + +To upgrade dependencies to latest available versions compatible with the specifications in `pyproject.toml`: + +```bash +pdm update +``` + +#### Develop + +To activate the development venv, in a terminal that sees your Python environment, navigate to the `unpythonic` folder, and issue the command: + +```bash +$(pdm venv activate) +``` + +Note the Bash exec syntax `$(...)`; the command `pdm venv activate` just prints the actual internal activation command. + +### Uninstall + +```bash +pip uninstall unpythonic +``` ## Support Not working as advertised? Missing a feature? Documentation needs improvement? +In case of a problem, see [Troubleshooting](doc/troubleshooting.md) first. Then: + **[Issue reports](https://github.com/Technologicat/unpythonic/issues) and [pull requests](https://github.com/Technologicat/unpythonic/pulls) are welcome.** [Contribution guidelines](CONTRIBUTING.md). While `unpythonic` is intended as a serious tool for improving productivity as well as for teaching, right now my work priorities mean that it's developed and maintained on whatever time I can spare for it. Thus getting a response may take a while, depending on which project I happen to be working on. @@ -608,4 +924,4 @@ Thanks to [TUT](http://www.tut.fi/en/home) for letting me teach [RAK-19006 in sp Links to blog posts, online articles and papers on topics relevant in the context of `unpythonic` have been collected to [a separate document](doc/readings.md). -If you like both FP and numerics, we have [some examples](unpythonic/test/test_fpnumerics.py) based on various internet sources. +If you like both FP and numerics, we have [some examples](unpythonic/tests/test_fpnumerics.py) based on various internet sources. diff --git a/TODO_DEFERRED.md b/TODO_DEFERRED.md new file mode 100644 index 00000000..2f5d4681 --- /dev/null +++ b/TODO_DEFERRED.md @@ -0,0 +1,6 @@ +# Deferred Issues + +Next unused item code: D8 + +- **D5**: `dispatch.py` — moved to GitHub issue #99. Dispatch-layer improvements for parametric ABCs (warn/error on indistinguishable multimethods). Typecheck-layer part resolved. + diff --git a/countlines.py b/countlines.py index 187a16c4..ab757165 100644 --- a/countlines.py +++ b/countlines.py @@ -1,60 +1,73 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -"""Estimate project size in lines of code. +"""Estimate project size in lines of code.""" -Ignores blank lines, docstrings, and whole-line comments.""" +# TODO: add sorting options: name, code count, SLOC count, code ratio. import os import re from operator import itemgetter def listpy(path): - return list(sorted(fn for fn in os.listdir(path) if fn.endswith(".py"))) + return list(sorted(filename for filename in os.listdir(path) if filename.endswith(".py"))) -def loc(code, blanks, docstrings, comments): # blanks et al.: include this item? +def count_sloc(code, *, blanks, docstrings, comments): + """blanks et al.: include this item?""" if not docstrings: # TODO: make sure it's a docstring (and not some other """...""" string) code = re.sub(r'""".*?"""', r'', code, flags=(re.MULTILINE + re.DOTALL)) + code = re.sub(r"'''.*?'''", r'', code, flags=(re.MULTILINE + re.DOTALL)) lines = code.split("\n") if not blanks: lines = [line for line in lines if line.strip()] if not comments: - # TODO: removes only whole-line comments. - lines = [line for line in lines if not line.strip().startswith("#")] + lines = [line for line in lines if not line.strip().startswith("#")] # ignore whole-line comments return len(lines) -def analyze(items, blanks=False, docstrings=False, comments=False): - grandtotal = 0 - for name, p in items: - path = os.path.join(*p) - files = listpy(path) - ns = [] - for fn in files: - with open(os.path.join(path, fn), "rt", encoding="utf-8") as f: +def report(paths): + print(f"Code size for {os.getcwd()}") + def format_name(s, width=25): + return s.ljust(width) + def format_number(n, width=5): + return str(n).rjust(width) + def format_path(s): # ./subdir/something + def label(s): + if s == ".": + return "top level" + return s[2:] + return format_name(label(s)) + codes_grandtotal = 0 + slocs_grandtotal = 0 + for path in paths: + filenames = listpy(path) + results = [] + for filename in filenames: + with open(os.path.join(path, filename), "rt", encoding="utf-8") as f: content = f.read() - ns.append(loc(content, blanks, docstrings, comments)) - # report - print(f"{name}:") - for fn, n in sorted(zip(files, ns), key=itemgetter(1)): - print(f" {fn} {n}") - grouptotal = sum(ns) - print(f" total for {name} {grouptotal}") - grandtotal += grouptotal - print(f"grand total {grandtotal}") + code = count_sloc(content, blanks=False, docstrings=False, comments=False) + sloc = count_sloc(content, blanks=True, docstrings=True, comments=True) + results.append((code, sloc)) + + if results: + codes, slocs = zip(*results) + codes = sum(codes) + slocs = sum(slocs) + print(f"\n {format_path(path)} {format_number(codes)} / {format_number(slocs)} {int(codes / slocs * 100):d}% code") + for filename, (code, sloc) in sorted(zip(filenames, results), key=itemgetter(1)): + print(f" {format_name(filename)} {format_number(code)} / {format_number(sloc)} {int(code / sloc * 100):d}% code") + codes_grandtotal += codes + slocs_grandtotal += slocs + print(f"\n{format_name('Total')} {format_number(codes_grandtotal)} / {format_number(slocs_grandtotal)} {int(codes_grandtotal / slocs_grandtotal * 100):d}% code") def main(): - items = (("top level", ["."]), - ("regular code", ["unpythonic"]), - ("regular code tests", ["unpythonic", "tests"]), - ("testing framework (not counting macros)", ["unpythonic", "test"]), - ("REPL/networking code", ["unpythonic", "net"]), - ("REPL/networking tests", ["unpythonic", "net", "tests"]), - ("macros", ["unpythonic", "syntax"]), - ("macro tests", ["unpythonic", "syntax", "tests"])) - print("Raw (with blanks, docstrings and comments)") - analyze(items, blanks=True, docstrings=True, comments=True) - print("\nFiltered (non-blank code lines only)") - analyze(items) + blacklist = [".git", "build", "dist", "__pycache__", "00_stuff"] + paths = [] + for root, dirs, files in os.walk("."): + paths.append(root) + for x in blacklist: + if x in dirs: + dirs.remove(x) + report(sorted(paths)) if __name__ == '__main__': main() diff --git a/doc/design-notes.md b/doc/design-notes.md index 130c410a..bef7cc19 100644 --- a/doc/design-notes.md +++ b/doc/design-notes.md @@ -1,47 +1,64 @@ -# Design Notes +**Navigation** + +- [README](../README.md) +- [Pure-Python feature set](features.md) +- [Syntactic macro feature set](macros.md) +- [Examples of creating dialects using `mcpyrate`](dialects.md) +- [REPL server](repl.md) +- [Troubleshooting](troubleshooting.md) +- **Design notes** +- [Essays](essays.md) +- [Additional reading](readings.md) +- [Contribution guidelines](../CONTRIBUTING.md) + + +**Table of Contents** - [Design Philosophy](#design-philosophy) -- [Macros do not Compose](#macros-do-not-compose) -- [Language Discontinuities](#language-discontinuities) -- [What Belongs in Python?](#what-belongs-in-python) -- [Killer features of Common Lisp](#killer-features-of-common-lisp) -- [Common Lisp, Python, and productivity](#common-lisp-python-and-productivity) -- [Python is not a Lisp](#python-is-not-a-lisp) -- [On ``let`` and Python](#on-let-and-python) -- [Assignment Syntax](#assignment-syntax) -- [TCO Syntax and Speed](#tco-syntax-and-speed) -- [No Monads?](#no-monads) -- [No Types?](#no-types) -- [Detailed Notes on Macros](#detailed-notes-on-macros) -- [Miscellaneous notes](#miscellaneous-notes) - -## Design Philosophy + - [Macros do not Compose](#macros-do-not-compose) + - [Language Discontinuities](#language-discontinuities) + - [`unpythonic` and the Killer Features of Common Lisp](#unpythonic-and-the-killer-features-of-common-lisp) + - [Python is not a Lisp](#python-is-not-a-lisp) + - [On `let` and Python](#on-let-and-python) + - [Assignment syntax](#assignment-syntax) + - [TCO syntax and speed](#tco-syntax-and-speed) + - [No Monads?](#no-monads) + - [No Types?](#no-types) + - [Detailed Notes on Macros](#detailed-notes-on-macros) + - [Miscellaneous notes](#miscellaneous-notes) + + + +# Design Philosophy The main design considerations of `unpythonic` are simplicity, robustness, and minimal dependencies. Some complexity is tolerated, if it is essential to make features interact better, or to provide a better user experience. The whole library is pure Python. No foreign extensions are required. We also try to avoid depending on anything beyond "the Python standard", to help `unpythonic` run on any conforming Python implementation. (Provided its AST representation is sufficiently similar to CPython's, to allow the macros to work.) -As of this writing (0.14.2), we test on CPython 3.6, and consider it as the primary target platform. However, if anything fails to work on another 3.6-compliant Python 3 such as [PyPy3](https://doc.pypy.org/en/latest/index.html) ([version 2.3.1 or later](http://pypy.org/compat.html)), issue reports and pull requests are welcome. +The library is split into **three layers**, providing **four kinds of features**: -The library is split into **two layers**, providing **three kinds of features**: - - - Pure Python (e.g. batteries for `itertools`), - - Macros driving a pure-Python core (e.g. `do`, `let`), - - Pure macros (e.g. `continuations`, `lazify`, `dbg`). + - `unpythonic`, `unpythonic.net` + - Pure Python (e.g. batteries for `itertools`), + - `unpythonic.syntax` + - Macros driving a pure-Python core (e.g. `do`, `let`), + - Pure macros (e.g. `continuations`, `lazify`, `dbg`). + - `unpythonic.dialects` + - Whole-module transformations, a.k.a. dialects. We believe syntactic macros are [*the nuclear option of software engineering*](https://www.factual.com/blog/thinking-in-clojure-for-java-programmers-part-2/). Accordingly, we aim to [minimize macro magic](https://macropy3.readthedocs.io/en/latest/discussion.html#minimize-macro-magic). If a feature can be implemented - *with a level of usability on par with pythonic standards* - without resorting to macros, then it belongs in the pure-Python layer. (The one exception is when building the feature as a macro is the *simpler* solution. Consider `unpythonic.amb.forall` (overly complicated, to avoid macros) vs. `unpythonic.syntax.forall` (a clean macro-based design of the same feature) as an example. Keep in mind [ZoP](https://www.python.org/dev/peps/pep-0020/) §17 and §18.) -When that is not possible, we implement the actual feature as a pure-Python core, not meant for direct use, and provide a macro layer on top. The purpose of the macro layer is then to improve usability, by eliminating the [accidental complexity](https://en.wikipedia.org/wiki/No_Silver_Bullet) from the user interface of the pure-Python core. Examples are *automatic* currying, *automatic* tail-call optimization, and (beside a much leaner syntax) lexical scoping for the ``let`` and ``do`` constructs. We believe a well-designed macro layer can bring a difference in user experience similar to that between programming in [Brainfuck](https://en.wikipedia.org/wiki/Brainfuck) (or to be fair, in Fortran or in Java) versus in Python. +When that is not possible, we implement the actual feature as a pure-Python core, not meant for direct use, and provide a macro layer on top. The purpose of the macro layer is then to improve usability, by eliminating the [accidental complexity](https://en.wikipedia.org/wiki/No_Silver_Bullet) from the user interface of the pure-Python core. Examples are *automatic* currying, *automatic* tail-call optimization, and (beside a much leaner syntax) lexical scoping for the `let` and `do` constructs. We believe a well-designed macro layer can bring a difference in user experience similar to that between programming in [Brainfuck](https://en.wikipedia.org/wiki/Brainfuck) (or to be fair, in Fortran or in Java) versus in Python. Finally, when the whole purpose of the feature is to automatically transform a piece of code into a particular style (`continuations`, `lazify`, `autoreturn`), or when run-time access to the original [AST](https://en.wikipedia.org/wiki/Abstract_syntax_tree) is essential to the purpose (`dbg`), then the feature belongs squarely in the macro layer, with no pure-Python core underneath. When to implement your own feature as a syntactic macro, see the discussion in Chapter 8 of [Paul Graham: On Lisp](http://paulgraham.com/onlisp.html). MacroPy's documentation also provides [some advice on the topic](https://macropy3.readthedocs.io/en/latest/discussion.html). -### Macros do not Compose + +## Macros do not Compose Making macros work together is nontrivial, essentially because *macros don't compose*. [As pointed out by John Shutt](https://fexpr.blogspot.com/2013/12/abstractive-power.html), in a multilayered language extension implemented with macros, the second layer of macros needs to understand all of the first layer. The issue is that the macro abstraction leaks the details of its expansion. Contrast with functions, which operate on values: the process that was used to arrive at a value doesn't matter. It's always possible for a function to take this value and transform it into another value, which can then be used as input for the next layer of functions. That's composability at its finest. -The need for interaction between macros may arise already in what *feels* like a single layer of abstraction; for example, it's not only that the block macros must understand ``let[]``, but some of them must understand other block macros. This is because what feels like one layer of abstraction is actually implemented as a number of separate macros, which run in a specific order. Thus, from the viewpoint of actually applying the macros, if the resulting software is to work correctly, the mere act of allowing combos between the block macros already makes them into a multilayer system. The compartmentalization of conceptually separate features into separate macros facilitates understanding and maintainability, but fails to reach the ideal of modularity. +The need for interaction between macros may arise already in what *feels* like a single layer of abstraction; for example, it's not only that the block macros must understand `let[]`, but some of them must understand other block macros. This is because what feels like one layer of abstraction is actually implemented as a number of separate macros, which run in a specific order. Thus, from the viewpoint of actually applying the macros, if the resulting software is to work correctly, the mere act of allowing combos between the block macros already makes them into a multilayer system. The compartmentalization of conceptually separate features into separate macros facilitates understanding and maintainability, but fails to reach the ideal of modularity. Therefore, any particular combination of macros that has not been specifically tested might not work. That said, if some particular combo doesn't work and *is not at least documented as such*, that's an error; please raise an issue. The unit tests should cover the combos that on the surface seem the most useful, but there's no guarantee that they cover everything that actually is useful somewhere. @@ -49,29 +66,19 @@ Some aspects in the design of `unpythonic` could be simplified by expanding macr The lack of composability is a problem mainly when using macros to create a language extension, because the features of the extended language often interact. Macros can also be used in a much more everyday way, where composability is mostly a non-issue - to abstract and name common patterns that just happen to be of a nature that cannot be extracted as a regular function. See [Peter Seibel: Practical Common Lisp, chapter 3](http://www.gigamonkeys.com/book/practical-a-simple-database.html) for an example. -### Language Discontinuities + +## Language Discontinuities The very act of extending a language creates points of discontinuity between the extended language and the original. This can become a particularly bad source of extra complexity, if the extension can be enabled locally for a piece of code - as is the case with block macros. Then the design of the extended language must consider how to treat interactions between pieces of code that use the extension and those that don't. Then exponentiate those design considerations by the number of extensions that can be enabled independently. This issue is simply absent when designing a new language from scratch. For an example, look at what the rest of `unpythonic` has to do to make `lazify` behave as the user expects! Grep the codebase for `lazyutil`; especially the `passthrough_lazy_args` decorator, and its sister, the utility `maybe_force_args`. The decorator is essentially just an annotation for the `lazify` transformer, that marks a function as *not necessarily needing* evaluation of its arguments. Such functions often represent language-level constructs, such as `let` or `curry`, that essentially just *pass through* user data to other user-provided code, without *accessing* that data. The annotation is honored by the compiler when programming in the lazy (call-by-need) extended language, and otherwise it does nothing. Another pain point is the need of a second trampoline implementation (that only differs in one minor detail) just to make `lazify` interact correctly with TCO (while not losing an order of magnitude of performance in the trampoline used with standard Python). -For another example, it's likely that e.g. `continuations` still doesn't integrate completely seamlessly - and I'm not sure if that is possible even in principle. Calling a traditional function from a [CPS](https://en.wikipedia.org/wiki/Continuation-passing_style) function is no problem; the traditional function uses no continuations, and (barring exceptions) will always return normally. The other way around can be a problem. Also, having TCO implemented as a trampoline system on top of the base language (instead of being already provided under the hood, like in Scheme) makes the `continuations` transformer more complex than absolutely necessary. +For another example, it is likely that e.g. `continuations` still does not integrate completely seamlessly - and I am not sure if that is possible even in principle. Calling a traditional function from a [CPS](https://en.wikipedia.org/wiki/Continuation-passing_style) function is no problem; the traditional function uses no continuations, and (barring exceptions) will always return normally. The other way around can be a problem. Also, having TCO implemented as a trampoline system on top of the base language (instead of being already provided under the hood, like in Scheme) makes the `continuations` transformer more complex than absolutely necessary. For a third example, consider *decorated lambdas*. This is an `unpythonic` extension - essentially, a compiler feature implemented (by calling some common utility code) by each of the transformers of the pure-macro features - that understands a lambda enclosed in a nested sequence of single-argument function calls *as a decorated function definition*. This is painful, because the Python AST has no place to store the decorator list for a lambda; Python sees it just as a nested sequence of function calls, terminating in a lambda. This has to be papered over by the transformers. We also introduce a related complication, the decorator registry (see `regutil`), so that we can automatically sort decorator invocations - so that pure-macro features know at which index to inject a particular decorator (so it works properly) when they need to do that. Needing such a registry is already a complication, but the *decorated lambda* machinery feels the pain more acutely. -### What Belongs in Python? - -If you feel [my hovercraft is full of eels](http://stupidpythonideas.blogspot.com/2015/05/spam-spam-spam-gouda-spam-and-tulips.html), it is because they come with the territory. - -Some have expressed the opinion [the statement-vs-expression dichotomy is a feature](http://stupidpythonideas.blogspot.com/2015/01/statements-and-expressions.html). The BDFL himself has famously stated that TCO has no place in Python [[1]](http://neopythonic.blogspot.com/2009/04/tail-recursion-elimination.html) [[2]](http://neopythonic.blogspot.fi/2009/04/final-words-on-tail-calls.html), and less famously that multi-expression lambdas or continuations have no place in Python [[3]](https://www.artima.com/weblogs/viewpost.jsp?thread=147358). Several potentially interesting PEPs have been deferred [[1]](https://www.python.org/dev/peps/pep-3150/) [[2]](https://www.python.org/dev/peps/pep-0403/) or rejected [[3]](https://www.python.org/dev/peps/pep-0511/) [[4]](https://www.python.org/dev/peps/pep-0463/) [[5]](https://www.python.org/dev/peps/pep-0472/). - -Of course, if I agreed, I wouldn't be doing this, or [`mcpyrate`](https://github.com/Technologicat/mcpyrate) either. - -On a point raised [here](https://www.artima.com/weblogs/viewpost.jsp?thread=147358) with respect to indentation-sensitive vs. indentation-insensitive parser modes, having seen [SRFI-110: Sweet-expressions (t-expressions)](https://srfi.schemers.org/srfi-110/srfi-110.html), I think Python is confusing matters by linking the mode to statements vs. expressions. A workable solution is to make *everything* support both modes (or even preprocess the source code text to use only one of the modes), which *uniformly* makes parentheses an alternative syntax for grouping. -It would be nice to be able to use indentation to structure expressions to improve their readability, like one can do in Racket with [sweet](https://docs.racket-lang.org/sweet/), but I suppose ``lambda x: [expr0, expr1, ...]`` will have to do for a multiple-expression lambda. Unless I decide at some point to make a source filter for [`mcpyrate`](https://github.com/Technologicat/mcpyrate) to auto-convert between indentation and parentheses; but for Python this is somewhat difficult to do, because statements **must** use indentation whereas expressions **must** use parentheses, and this must be done before we can invoke the standard parser to produce an AST. (And I don't want to maintain a [Pyparsing](https://github.com/pyparsing/pyparsing) grammar to parse a modified version of Python.) - -### Killer features of Common Lisp +## `unpythonic` and the Killer Features of Common Lisp In my opinion, Common Lisp has three legendary killer features: @@ -101,45 +108,36 @@ But for those of us that [don't like parentheses](https://srfi.schemers.org/srfi - PyPy (the JIT-enabled Python interpreter) itself is not the full story; the [RPython](https://rpython.readthedocs.io/en/latest/) toolchain from the PyPy project can *automatically produce a JIT for an interpreter for any new dynamic language implemented in the RPython language* (which is essentially a restricted dialect of Python 2.7). Now **that's** higher-order magic if anything is. - For the use case of numerics specifically, instead of Python, [Julia](https://docs.julialang.org/en/v1/manual/methods/) may be a better fit for writing high-level, yet performant code. It's a spiritual heir of Common Lisp, Fortran, *and Python*. Compilation to efficient machine code, with the help of gradual typing and automatic type inference, is a design goal. -### Common Lisp, Python, and productivity - -The various essays by Paul Graham, especially [Revenge of the Nerds (2002)](http://paulgraham.com/icad.html), have given the initial impulse to many programmers for studying Lisp. The essays are well written and have provided a lot of exposure for Lisp. So how does the programming world look in that light now, 20 years later? - -The base abstraction level of programming languages, even those in popular use, has increased. The trend was visible already then, and was indeed noted in the essays. The focus on low-level languages such as C++ has decreased. Java is still popular, but high-level FP languages that compile to JVM bytecode (Kotlin, Scala, Clojure) are rising. - -Python has become highly popular, and is now also closer to Lisp than it was 20 years ago, especially after `MacroPy` introduced syntactic macros to Python (in 2013, [according to the git log](https://github.com/lihaoyi/macropy/commits/python2/macropy/__init__.py)). Python wasn't bad as a Lisp replacement even back in 2000 - see Peter Norvig's essay [Python for Lisp Programmers](https://norvig.com/python-lisp.html). Some more historical background, specifically on lexically scoped closures (and the initial lack thereof), can be found in [PEP 3104](https://www.python.org/dev/peps/pep-3104/), [PEP 227](https://www.python.org/dev/peps/pep-0227/), and [Historical problems with closures in JavaScript and Python](http://giocc.com/problems-with-closures-in-javascript-and-python.html). - -In 2020, does it still make sense to learn [the legendary](https://xkcd.com/297/) Common Lisp? - -To know exactly what it has to offer, yes. As baroque as some parts are, there are a lot of great ideas there. [Conditions](http://www.gigamonkeys.com/book/beyond-exception-handling-conditions-and-restarts.html) are one. [CLOS](http://www.gigamonkeys.com/book/object-reorientation-generic-functions.html) is another. (Nowadays [Julia](https://docs.julialang.org/en/v1/manual/methods/) has CLOS-style [multiple-dispatch generic functions](https://docs.julialang.org/en/v1/manual/methods/).) More widely, in the ecosystem, Swank is one. Having more perspectives at one's disposal makes one a better programmer. -But as a practical tool? Is CL hands-down better than Python? Maybe no. Python has already delivered on 90% of the productivity promise of Lisp. Both languages cut down significantly on [accidental complexity](https://en.wikipedia.org/wiki/No_Silver_Bullet). Python has a huge library ecosystem. [`mcpyrate`](https://github.com/Technologicat/mcpyrate) and `unpythonic` are trying to push the language-level features a further 5%. (A full 100% is likely impossible when extending an existing language; if nothing else, there will be seams.) +## Python is not a Lisp -As for productivity, [it may be](https://medium.com/smalltalk-talk/lisp-smalltalk-and-the-power-of-symmetry-8bd96aaa0c0c) that a form of code-data equivalence (symmetry!), not macros specifically, is what makes Lisp powerful. If so, there may be other ways to reach that equivalence. For example Smalltalk, like Lisp, *runs in the same context it's written in*. All Smalltalk data are programs. Smalltalk [may be making a comeback](https://hackernoon.com/how-to-evangelize-a-programming-language-0p7p3y02), in the form of [Pharo](https://pharo.org/). +The point behind providing `let` and `begin` (and the `let[]` and `do[]` [macros](macros.md)) is to make Python lambdas slightly more useful - which was really the starting point for the whole `unpythonic` experiment. -Haskell aims at code-data equivalence from a third angle (memoized pure functions are in essence infinite lookup tables), but I haven't used it in practice, so I don't have the experience to say whether this is enough to make it feel powerful in the same way. +The oft-quoted single-expression limitation of the Python `lambda` is ultimately a herring, as this library demonstrates. The real problem is the statement/expression dichotomy. In Python, the looping constructs (`for`, `while`), the full power of `if`, and `return` are statements, so they cannot be used in lambdas. (This observation has been earlier made by others, too; see e.g. the [Wikipedia page on anonymous functions](https://en.wikipedia.org/wiki/Anonymous_function#Python).) We can work around some of this: -Image-based programming (live programming) is a common factor between Pharo and Common Lisp + Swank. This is another productivity booster that much of the programming world isn't that familiar with. It eliminates not only the edit/compile/restart cycle, but the edit/restart cycle as well, making the workflow a concurrent *edit/run* instead (without restarting the whole app at each change). Julia has [Revise.jl](https://github.com/timholy/Revise.jl) for something similar. - -### Python is not a Lisp - -The point behind providing `let` and `begin` (and the ``let[]`` and ``do[]`` [macros](macros.md)) is to make Python lambdas slightly more useful - which was really the starting point for the whole `unpythonic` experiment. - -The oft-quoted single-expression limitation of the Python ``lambda`` is ultimately a herring, as this library demonstrates. The real problem is the statement/expression dichotomy. In Python, the looping constructs (`for`, `while`), the full power of `if`, and `return` are statements, so they cannot be used in lambdas. (This observation has been earlier made by others, too; see e.g. the [Wikipedia page on anonymous functions](https://en.wikipedia.org/wiki/Anonymous_function#Python).) We can work around some of this: - - - The expr macro ``cond[]`` gives us a general ``if``/``elif``/``else`` expression. - - Without it, the expression form of `if` (that Python already has) could be used, but readability suffers if nested, since it has no ``elif``. Actually, [`and` and `or` are sufficient for full generality](https://www.ibm.com/developerworks/library/l-prog/), but readability suffers even more. - - So we use macros to define a ``cond`` expression, essentially duplicating a feature the language already almost has. See [our macros](macros.md). - - Functional looping (with TCO, to boot) is possible. See the constructs in ``unpythonic.fploop``. - - ``unpythonic.ec.call_ec`` gives us ``return`` (the ec). - - ``unpythonic.misc.raisef`` gives us ``raise``, and ``unpythonic.misc.tryf`` gives us ``try``/``except``/``else``/``finally``. - - A lambda can be named (``unpythonic.misc.namelambda``, with some practical limitations on the fully qualified name of nested lambdas). - - Even an anonymous function can recurse with some help (``unpythonic.fun.withself``). - - Context management (``with``) is currently **not** available for lambdas, even in ``unpythonic``. + - The expr macro `do[]` gives us sequencing, i.e. allows to use, in any expression position, multiple expressions that run in the specified order. + - The expr macro `cond[]` gives us a general `if`/`elif`/`else` expression. + - Without it, the expression form of `if` (that Python already has) could be used, but readability suffers if nested, since it has no `elif`. Actually, [`and` and `or` are sufficient for full generality](https://www.ibm.com/developerworks/library/l-prog/), but readability suffers even more. + - So we use macros to define a `cond` expression, essentially duplicating a feature the language already almost has. See [our macros](macros.md). + - Functional looping (with TCO) gives us equivalents of `for` and `while`. See the constructs in `unpythonic.fploop`, particularly `looped` and `breakably_looped`. + - `unpythonic.ec.call_ec` gives us `return` (the ec). + - `unpythonic.misc.raisef` gives us `raise`, and `unpythonic.misc.tryf` gives us `try`/`except`/`else`/`finally`. + - A lambda can be named, see `unpythonic.misc.namelambda`. + - There are some practical limitations on the fully qualified name of nested lambdas. + - Note this does not bind the name to an identifier at the use site, so the name cannot be used to recurse. The point is that the name is available for inspection, and it will show in tracebacks. + - A lambda can recurse using `unpythonic.fun.withself`. You will get a `self` argument that points to the lambda itself, and is passed implicitly, like `self` usually in Python. + - A lambda can define a class using the three-argument form of the builtin `type` function. For an example, see [Peter Corbett (2005): Statementless Python](https://gist.github.com/brool/1679908), a complete minimal Lisp interpreter implemented as a single Python expression. + - A lambda can import a module using the builtin `__import__`, or better, `importlib.import_module`. + - A lambda can assert by using an if-expression and then `raisef` to actually raise the `AssertionError`. + - Or use the `test[]` macro, which also shows the source code for the asserted expression if the assertion fails. + - Technically, `test[]` will `signal` the `TestFailure` (part of the public API of `unpythonic.test.fixtures`), not raise it, but essentially, `test[]` is a more convenient assert that optionally hooks into a testing framework. The error signal, if unhandled, will automatically chain into raising a `ControlError` exception, which is often just fine. + - Context management (`with`) is currently **not** available for lambdas, even in `unpythonic`. + - Aside from the `async` stuff, this is the last hold-out preventing full generality, so we will likely add an expression form of `with` in a future version. This is tracked in [issue #76](https://github.com/Technologicat/unpythonic/issues/76). Still, ultimately one must keep in mind that Python is not a Lisp. Not all of Python's standard library is expression-friendly; some standard functions and methods lack return values - even though a call is an expression! For example, `set.add(x)` returns `None`, whereas in an expression context, returning `x` would be much more useful, even though it does have a side effect. -### On ``let`` and Python + +## On `let` and Python Why no `let*`, as a function? In Python, name lookup always occurs at runtime. Python gives us no compile-time guarantees that no binding refers to a later one - in [Racket](http://racket-lang.org/), this guarantee is the main difference between `let*` and `letrec`. @@ -149,17 +147,18 @@ In contrast, in a `let*` form, attempting such a definition is *a compile-time e Our `letrec` behaves like `let*` in that if `valexpr` is not a function, it may only refer to bindings above it. But this is only enforced at run time, and we allow mutually recursive function definitions, hence `letrec`. -Note the function versions of our `let` constructs, in the pure-Python API, are **not** properly lexically scoped; in case of nested ``let`` expressions, one must be explicit about which environment the names come from. +Note the function versions of our `let` constructs, in the pure-Python API, are **not** properly lexically scoped; in case of nested `let` expressions, one must be explicit about which environment the names come from. -The [macro versions](macros.md) of the `let` constructs **are** lexically scoped. The macros also provide a ``letseq[]`` that, similarly to Racket's ``let*``, gives a compile-time guarantee that no binding refers to a later one. +The [macro versions](macros.md) of the `let` constructs **are** lexically scoped. The macros also provide a `letseq[]` that, similarly to Racket's `let*`, gives a compile-time guarantee that no binding refers to a later one. Inspiration: [[1]](https://nvbn.github.io/2014/09/25/let-statement-in-python/) [[2]](https://stackoverflow.com/questions/12219465/is-there-a-python-equivalent-of-the-haskell-let) [[3]](http://sigusr2.net/more-about-let-in-python.html). -### Assignment syntax + +## Assignment syntax Why the clunky `e.set("foo", newval)` or `e << ("foo", newval)`, which do not directly mention `e.foo`? This is mainly because in Python, the language itself is not customizable. If we could define a new operator `e.foo newval` to transform to `e.set("foo", newval)`, this would be easily solved. -Our [macros](macros.md) essentially do exactly this, but by borrowing the ``<<`` operator to provide the syntax ``foo << newval``, because even with macros, it is not possible to define new [BinOp](https://greentreesnakes.readthedocs.io/en/latest/nodes.html#BinOp)s in Python. That **is** possible essentially as a *reader macro* (as it's known in the Lisp world), to transform custom BinOps into some syntactically valid Python code before proceeding with the rest of the import machinery, but it seems as of this writing, no one has done this. +Our [macros](macros.md) essentially do exactly this, but by borrowing the `<<` operator to provide the syntax `foo << newval`, because even with macros, it is not possible to define new [BinOp](https://greentreesnakes.readthedocs.io/en/latest/nodes.html#BinOp)s in Python. That **is** possible essentially as a *reader macro* (as it's known in the Lisp world), to transform custom BinOps into some syntactically valid Python code before proceeding with the rest of the import machinery, but it seems as of this writing, no one has done this. If you want a framework to play around with reader macros in Python, see [`mcpyrate`](https://github.com/Technologicat/mcpyrate). You'll still have to write a parser, where [Pyparsing](https://github.com/pyparsing/pyparsing) may help; but supporting something as complex as a customized version of the surface syntax of Python is still a lot of work, and may quickly go out of date. (You'll want to look at the official [full grammar specification](https://docs.python.org/3/reference/grammar.html), as well as the source code linked therein.) @@ -174,39 +173,42 @@ If we later choose go this route nevertheless, `<<` is a better choice for the s The current solution for the assignment syntax issue is to use macros, to have both clean syntax at the use site and a relatively hackfree implementation. -### TCO syntax and speed -Benefits and costs of ``return jump(...)``: +## TCO syntax and speed - - Explicitly a tail call due to ``return``. - - The trampoline can be very simple and (relatively speaking) fast. Just a dumb ``jump`` record, a ``while`` loop, and regular function calls and returns. - - The cost is that ``jump`` cannot detect whether the user forgot the ``return``, leaving a possibility for bugs in the client code (causing an FP loop to immediately exit, returning ``None``). Unit tests of client code become very important. +Benefits and costs of `return jump(...)`: + + - Explicitly a tail call due to `return`. + - The trampoline can be very simple and (relatively speaking) fast. Just a dumb `jump` record, a `while` loop, and regular function calls and returns. + - The cost is that `jump` cannot detect whether the user forgot the `return`, leaving a possibility for bugs in the client code (causing an FP loop to immediately exit, returning `None`). Unit tests of client code become very important. - This is somewhat mitigated by the check in `__del__`, but it can only print a warning, not stop the incorrect program from proceeding. - - We could mandate that trampolined functions must not return ``None``, but: - - Uniformity is lost between regular and trampolined functions, if only one kind may return ``None``. + - We could mandate that trampolined functions must not return `None`, but: + - Uniformity is lost between regular and trampolined functions, if only one kind may return `None`. - This breaks the *don't care about return value* use case, which is rather common when using side effects. - - Failing to terminate at the intended point may well fall through into what was intended as another branch of the client code, which may correctly have a ``return``. So this would not even solve the problem. + - Failing to terminate at the intended point may well fall through into what was intended as another branch of the client code, which may correctly have a `return`. So this would not even solve the problem. -The other simple-ish solution is to use exceptions, making the jump wrest control from the caller. Then ``jump(...)`` becomes a verb, but this approach is 2-5x slower, when measured with a do-nothing loop. (See the old default TCO implementation in v0.9.2.) +The other simple-ish solution is to use exceptions, making the jump wrest control from the caller. Then `jump(...)` becomes a verb, but this approach is 2-5x slower, when measured with a do-nothing loop. (See the old default TCO implementation in v0.9.2.) -Our [macros](macros.md) provide an easy-to use solution. Just wrap the relevant section of code in a ``with tco:``, to automatically apply TCO to code that looks exactly like standard Python. With the macro, function definitions (also lambdas) and returns are automatically converted. It also knows enough not to add a ``@trampolined`` if you have already declared a ``def`` as ``@looped`` (or any of the other TCO-enabling decorators in ``unpythonic.fploop``, or ``unpythonic.fix.fixtco``). +Our [macros](macros.md) provide an easy-to use solution. Just wrap the relevant section of code in a `with tco:`, to automatically apply TCO to code that looks exactly like standard Python. With the macro, function definitions (also lambdas) and returns are automatically converted. It also knows enough not to add a `@trampolined` if you have already declared a `def` as `@looped` (or any of the other TCO-enabling decorators in `unpythonic.fploop`, or `unpythonic.fix.fixtco`). For other libraries bringing TCO to Python, see: - [tco](https://github.com/baruchel/tco) by Thomas Baruchel, based on exceptions. - - [ActiveState recipe 474088](https://github.com/ActiveState/code/tree/master/recipes/Python/474088_Tail_Call_Optimization_Decorator), based on ``inspect``. - - ``recur.tco`` in [fn.py](https://github.com/fnpy/fn.py), the original source of the approach used here. - - [MacroPy](https://github.com/azazel75/macropy) uses an approach similar to ``fn.py``. + - [ActiveState recipe 474088](https://github.com/ActiveState/code/tree/master/recipes/Python/474088_Tail_Call_Optimization_Decorator), based on `inspect`. + - `recur.tco` in [fn.py](https://github.com/fnpy/fn.py), the original source of the approach used here. + - [MacroPy](https://github.com/azazel75/macropy) uses an approach similar to `fn.py`. + -### No Monads? +## No Monads? -(Beside List inside ``forall``.) +(Beside List inside `forall`.) Admittedly unpythonic, but Haskell feature, not Lisp. Besides, already done elsewhere, see [OSlash](https://github.com/dbrattli/OSlash) if you need them. If you want to roll your own monads for whatever reason, there's [this silly hack](https://github.com/Technologicat/python-3-scicomp-intro/blob/master/examples/monads.py) that wasn't packaged into this; or just read Stephan Boyer's quick introduction [[part 1]](https://www.stephanboyer.com/post/9/monads-part-1-a-design-pattern) [[part 2]](https://www.stephanboyer.com/post/10/monads-part-2-impure-computations) [[super quick intro]](https://www.stephanboyer.com/post/83/super-quick-intro-to-monads) and figure it out, it's easy. (Until you get to `State` and `Reader`, where [this](http://brandon.si/code/the-state-monad-a-tutorial-for-the-confused/) and maybe [this](https://gaiustech.wordpress.com/2010/09/06/on-monads/) can be helpful.) -### No Types? + +## No Types? The `unpythonic` project will likely remain untyped indefinitely, since I don't want to enter that particular marshland with things like `curry` and `with continuations`. It may be possible to gradually type some carefully selected parts - but that's currently not on [the roadmap](https://github.com/Technologicat/unpythonic/milestones). I'm not against it, if someone wants to contribute. @@ -235,57 +237,63 @@ More on type systems: - In physics, units as used for dimension analysis are essentially a form of static typing. - This has been discussed on LtU, see e.g. [[1]](http://lambda-the-ultimate.org/node/33) [[2]](http://lambda-the-ultimate.org/classic/message11877.html). -### Detailed Notes on Macros - - ``continuations`` and ``tco`` are mutually exclusive, since ``continuations`` already implies TCO. - - However, the ``tco`` macro skips any ``with continuations`` blocks inside it, **for the specific reason** of allowing modules written in the [Lispython dialect](https://github.com/Technologicat/pydialect) (which implies TCO for the whole module) to use ``with continuations``. +## Detailed Notes on Macros - - ``prefix``, ``autoreturn``, ``quicklambda`` and ``multilambda`` are first-pass macros (expand from outside in), because they change the semantics: - - ``prefix`` transforms things-that-look-like-tuples into function calls, - - ``autoreturn`` adds ``return`` statements where there weren't any, - - ``quicklambda`` transforms things-that-look-like-list-lookups into ``lambda`` function definitions, - - ``multilambda`` transforms things-that-look-like-lists (in the body of a ``lambda``) into sequences of multiple expressions, using ``do[]``. - - Hence, a lexically outer block of one of these types *will expand first*, before any macros inside it are expanded, in contrast to the default *from inside out* expansion order. + - `continuations` and `tco` are mutually exclusive, since `continuations` already implies TCO. + - However, the `tco` macro skips any `with continuations` blocks inside it, **for the specific reason** of allowing modules written in the [Lispython dialect](https://github.com/Technologicat/pydialect) (which implies TCO for the whole module) to use `with continuations`. + + - `prefix`, `autoreturn`, `quicklambda` and `multilambda` expand outside-in, because they change the semantics: + - `prefix` transforms things-that-look-like-tuples into function calls, + - `autoreturn` adds `return` statements where there weren't any, + - `quicklambda` transforms things-that-look-like-list-lookups into `lambda` function definitions, + - `multilambda` transforms things-that-look-like-lists (in the body of a `lambda`) into sequences of multiple expressions, using `do[]`. + - Hence, a lexically outer block of one of these types *will expand first*, before any macros inside it are expanded. - This yields clean, standard-ish Python for the rest of the macros, which then don't need to worry about their input meaning something completely different from what it looks like. - - An already expanded ``do[]`` (including that inserted by `multilambda`) is accounted for by all ``unpythonic.syntax`` macros when handling expressions. + - An already expanded `do[]` (including that inserted by `multilambda`) is accounted for by all `unpythonic.syntax` macros when handling expressions. - For simplicity, this is **the only** type of sequencing understood by the macros. - - E.g. the more rudimentary ``unpythonic.seq.begin`` is not treated as a sequencing operation. This matters especially in ``tco``, where it is critically important to correctly detect a tail position in a return-value expression or (multi-)lambda body. + - E.g. the more rudimentary `unpythonic.seq.begin` is not treated as a sequencing operation. This matters especially in `tco`, where it is critically important to correctly detect a tail position in a return-value expression or (multi-)lambda body. - *Sequencing* is here meant in the Racket/Haskell sense of *running sub-operations in a specified order*, unrelated to Python's *sequences*. - - The TCO transformation knows about TCO-enabling decorators provided by ``unpythonic``, and adds the ``@trampolined`` decorator to a function definition only when it is not already TCO'd. - - This applies also to lambdas; they are decorated by directly wrapping them with a call: ``trampolined(lambda ...: ...)``. - - This allows ``with tco`` to work together with the functions in ``unpythonic.fploop``, which imply TCO. + - The TCO transformation knows about TCO-enabling decorators provided by `unpythonic`, and adds the `@trampolined` decorator to a function definition only when it is not already TCO'd. + - This applies also to lambdas; they are decorated by directly wrapping them with a call: `trampolined(lambda ...: ...)`. + - This allows `with tco` to work together with the functions in `unpythonic.fploop`, which imply TCO. - - Macros that transform lambdas (notably ``continuations`` and ``tco``): - - Perform a first pass to take note of all lambdas that appear in the code *before the expansion of any inner macros*. Then in the second pass, *after the expansion of all inner macros*, only the recorded lambdas are transformed. + - Macros that transform lambdas (notably `continuations` and `tco`): + - Perform an outside-in pass to take note of all lambdas that appear in the code *before the expansion of any inner macros*. Then in an inside-out pass, *after the expansion of all inner macros*, only the recorded lambdas are transformed. - This mechanism distinguishes between explicit lambdas in the client code, and internal implicit lambdas automatically inserted by a macro. The latter are a technical detail that should not undergo the same transformations as user-written explicit lambdas. - - The identification is based on the ``id`` of the AST node instance. Hence, if you plan to write your own macros that work together with those in ``unpythonic.syntax``, avoid going overboard with FP. Modifying the tree in-place, preserving the original AST node instances as far as sensible, is just fine. - - For the interested reader, grep the source code for ``userlambdas``. - - Support a limited form of *decorated lambdas*, i.e. trees of the form ``f(g(h(lambda ...: ...)))``. + - The identification is based on the `id` of the AST node instance. Hence, if you plan to write your own macros that work together with those in `unpythonic.syntax`, avoid going overboard with FP. Modifying the tree in-place, preserving the original AST node instances as far as sensible, is just fine. + - For the interested reader, grep the source code for `userlambdas`. + - Support a limited form of *decorated lambdas*, i.e. trees of the form `f(g(h(lambda ...: ...)))`. - The macros will reorder a chain of lambda decorators (i.e. nested calls) to use the correct ordering, when only known decorators are used on a literal lambda. - - This allows some combos such as ``tco``, ``unpythonic.fploop.looped``, ``curry``. - - Only decorators provided by ``unpythonic`` are recognized, and only some of them are supported. For details, see ``unpythonic.regutil``. - - If you need to combo ``unpythonic.fploop.looped`` and ``unpythonic.ec.call_ec``, use ``unpythonic.fploop.breakably_looped``, which does exactly that. - - The problem with a direct combo is that the required ordering is the trampoline (inside ``looped``) outermost, then ``call_ec``, and then the actual loop, but because an escape continuation is only valid for the dynamic extent of the ``call_ec``, the whole loop must be run inside the dynamic extent of the ``call_ec``. - - ``unpythonic.fploop.breakably_looped`` internally inserts the ``call_ec`` at the right step, and gives you the ec as ``brk``. - - For the interested reader, look at ``unpythonic.syntax.util``. + - This allows some combos such as `tco`, `unpythonic.fploop.looped`, `autocurry`. + - Only decorators provided by `unpythonic` are recognized, and only some of them are supported. For details, see `unpythonic.regutil`. + - If you need to combo `unpythonic.fploop.looped` and `unpythonic.ec.call_ec`, use `unpythonic.fploop.breakably_looped`, which does exactly that. + - The problem with a direct combo is that the required ordering is the trampoline (inside `looped`) outermost, then `call_ec`, and then the actual loop, but because an escape continuation is only valid for the dynamic extent of the `call_ec`, the whole loop must be run inside the dynamic extent of the `call_ec`. + - `unpythonic.fploop.breakably_looped` internally inserts the `call_ec` at the right step, and gives you the ec as `brk`. + - For the interested reader, look at `unpythonic.syntax.util`. + + - `namedlambda` is a two-pass macro. In the outside-in pass, it names lambdas inside `let[]` expressions before they are expanded away. The inside-out pass of `namedlambda` must run after `autocurry` to analyze and transform the auto-curried code produced by `with autocurry`. + + - `autoref` does not need in its output to be curried (hence after `autocurry` to gain some performance), but needs to run before `lazify`, so that both branches of each transformed reference get the implicit forcing. Its transformation is orthogonal to what `namedlambda` does, so it does not matter in which exact order these two run. - - ``namedlambda`` is a two-pass macro. In the first pass (outside-in), it names lambdas inside ``let[]`` expressions before they are expanded away. The second pass (inside-out) of ``namedlambda`` must run after ``autocurry`` to analyze and transform the auto-curried code produced by ``with autocurry``. In most cases, placing ``namedlambda`` in a separate outer ``with`` block runs both operations in the correct order. + - `lazify` is a rather invasive rewrite that needs to see the output from most of the other macros. - - ``autoref`` does not need in its output to be curried (hence after ``curry`` to gain some performance), but needs to run before ``lazify``, so that both branches of each transformed reference get the implicit forcing. Its transformation is orthogonal to what ``namedlambda`` does, so it does not matter in which exact order these two run. + - `envify` needs to see the output of `lazify` in order to shunt function args into an unpythonic `env` without triggering the implicit forcing. - - ``lazify`` is a rather invasive rewrite that needs to see the output from most of the other macros. + - `nb` needs to determine whether an expression should be printed. + - It needs to see invocations of testing macros, because those are akin to asserts - while they are technically implemented as expr macros, they expand into function calls into test asserter functions that have no meaningful return value. Thus, just in case the user has requested testing macros to expand first, `nb` needs to expand before anything that may edit function calls, such as `tco` and `autocurry`. + - It needs to see bare expressions (technically, in the AST, an *expression statements* `ast.Expr`). Thus `nb` should expand before `autoreturn`, to treat also expressions that appear in tail position. + - `nb` performs the printing using a passthrough helper function, so that the value that was printed is available as the return value of the print helper, so that `return theprint(value)` works, for co-operation with `autoreturn`. - - ``envify`` needs to see the output of ``lazify`` in order to shunt function args into an unpythonic ``env`` without triggering the implicit forcing. + - With MacroPy, it used to be so that some of the block macros could be comboed as multiple context managers in the same `with` statement (expansion order is then *left-to-right*), whereas some (notably `autocurry` and `namedlambda`) required their own `with` statement. In `mcpyrate`, block macros can be comboed in the same `with` statement (and expansion order is *left-to-right*). + - See the relevant [issue report](https://github.com/azazel75/macropy/issues/21) and [PR](https://github.com/azazel75/macropy/pull/22). + - When in doubt, you can use a separate `with` statement for each block macro that applies to the same section of code, and nest the blocks. In `mcpyrate`, this is almost equivalent to having the macros invoked in a single `with` statement, in the same order. + - Load the macro expansion debug utility `from mcpyrate.debug import macros, step_expansion`, and put a `with step_expansion:` around your use site. Then add your macro invocations one by one, and make sure the expansion looks like what you intended. (And of course, while testing, try to keep the input as simple as possible.) - - Some of the block macros can be comboed as multiple context managers in the same ``with`` statement (expansion order is then *left-to-right*), whereas some (notably ``autocurry`` and ``namedlambda``) require their own ``with`` statement. - - This is a [known issue in MacroPy](https://github.com/azazel75/macropy/issues/21). I have made a [fix](https://github.com/azazel75/macropy/pull/22), but still need to make proper test cases to get it merged. - - If something goes wrong in the expansion of one block macro in a ``with`` statement that specifies several block macros, surprises may occur. - - When in doubt, use a separate ``with`` statement for each block macro that applies to the same section of code, and nest the blocks. - - Load the macro expansion debug utility `from mcpyrate.debug import macros, step_expansion`, and put a ``with step_expansion:`` around your use site. Then add your macro invocations one by one, and make sure the expansion looks like what you intended. -### Miscellaneous notes +## Miscellaneous notes - [Nick Coghlan (2011): Traps for the unwary in Python's import system](http://python-notes.curiousefficiency.org/en/latest/python_concepts/import_traps.html). diff --git a/doc/dialects.md b/doc/dialects.md new file mode 100644 index 00000000..4a753df7 --- /dev/null +++ b/doc/dialects.md @@ -0,0 +1,41 @@ +**Navigation** + +- [README](../README.md) +- [Pure-Python feature set](features.md) +- [Syntactic macro feature set](macros.md) +- **Examples of creating dialects using `mcpyrate`** + - [Lispython](dialects/lispython.md) + - [Listhell](dialects/listhell.md) + - [Pytkell](dialects/pytkell.md) +- [REPL server](repl.md) +- [Troubleshooting](troubleshooting.md) +- [Design notes](design-notes.md) +- [Essays](essays.md) +- [Additional reading](readings.md) +- [Contribution guidelines](../CONTRIBUTING.md) + + +**Table of Contents** + +- [Examples of creating dialects using `mcpyrate`](#examples-of-creating-dialects-using-mcpyrate) + + + + +# Examples of creating dialects using `mcpyrate` + +The [dialects subsystem of `mcpyrate`](https://github.com/Technologicat/mcpyrate/blob/master/doc/dialects.md) makes Python into a language platform, à la [Racket](https://racket-lang.org/). +It provides the plumbing that allows to create, in Python, dialects that compile into Python +at macro expansion time. It is geared toward creating languages that extend Python +and look almost like Python, but extend or modify its syntax and/or semantics. +Hence *dialects*. + +As examples of what can be done with a dialects system together with a kitchen-sink language extension macro package such as `unpythonic`, we currently provide the following dialects: + + - [**Lispython**: The love child of Python and Scheme](dialects/lispython.md) + - [**Listhell**: It's not Lisp, it's not Python, it's not Haskell](dialects/listhell.md) + - [**Pytkell**: Because it's good to have a kell](dialects/pytkell.md) + +All three dialects support `unpythonic`'s `continuations` block macro, to add `call/cc` to the language; but it is not enabled automatically. + +Mostly, these dialects are intended as a cross between teaching material and a (fully functional!) practical joke, but Lispython may occasionally come in handy. diff --git a/doc/dialects/lis.png b/doc/dialects/lis.png new file mode 100644 index 00000000..600b5d99 Binary files /dev/null and b/doc/dialects/lis.png differ diff --git a/doc/dialects/lis.svg b/doc/dialects/lis.svg new file mode 100644 index 00000000..f661eee4 --- /dev/null +++ b/doc/dialects/lis.svg @@ -0,0 +1,903 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + λ + λ + λ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + λ + λ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/dialects/lispython.md b/doc/dialects/lispython.md new file mode 100644 index 00000000..d3ac6f39 --- /dev/null +++ b/doc/dialects/lispython.md @@ -0,0 +1,268 @@ +**Navigation** + +- [README](../../README.md) +- [Pure-Python feature set](../features.md) +- [Syntactic macro feature set](../macros.md) +- [Examples of creating dialects using `mcpyrate`](../dialects.md) + - **Lispython** + - [Listhell](listhell.md) + - [Pytkell](pytkell.md) +- [REPL server](../repl.md) +- [Troubleshooting](../troubleshooting.md) +- [Design notes](../design-notes.md) +- [Essays](../essays.md) +- [Additional reading](../readings.md) +- [Contribution guidelines](../../CONTRIBUTING.md) + + +**Table of Contents** + +- [Lispython: The love child of Python and Scheme](#lispython-the-love-child-of-python-and-scheme) + - [Features](#features) + - [The `Lispy` variant](#the-lispy-variant) + - [The `Lispython` variant](#the-lispython-variant) + - [What Lispython is](#what-lispython-is) + - [Comboability](#comboability) + - [Lispython and continuations (call/cc)](#lispython-and-continuations-callcc) + - [Why extend Python?](#why-extend-python) + - [PG's accumulator-generator puzzle](#pgs-accumulator-generator-puzzle) + - [CAUTION](#caution) + - [Etymology?](#etymology) + + + +# Lispython: The love child of Python and Scheme + +Python with automatic tail-call optimization, an implicit return statement, and automatically named, multi-expression lambdas. + +Powered by [`mcpyrate`](https://github.com/Technologicat/mcpyrate/) and `unpythonic`. + +```python +from unpythonic.dialects import dialects, Lispython # noqa: F401 + +def factorial(n): + def f(k, acc): + if k == 1: + return acc # `return` is available to cause an early return + f(k - 1, k * acc) + f(n, acc=1) +assert factorial(4) == 24 +factorial(5000) # no crash + +t = letrec[[evenp << (lambda x: (x == 0) or oddp(x - 1)), + oddp << (lambda x: (x != 0) and evenp(x - 1))] in + evenp(10000)] +assert t is True + +square = lambda x: x**2 +assert square(3) == 9 +assert square.__name__ == "square" + +# - local[name << value] makes an expression-local variable +g = lambda x: [local[y << 2 * x], + y + 1] +assert g(10) == 21 + +c = cons(1, 2) +assert tuple(c) == (1, 2) +assert car(c) == 1 +assert cdr(c) == 2 +assert ll(1, 2, 3) == llist((1, 2, 3)) +``` + +## Features + +In terms of `unpythonic.syntax`, we implicitly enable `autoreturn`, `tco`, `multilambda`, `namedlambda`, and `quicklambda` for the whole module: + + - In tail position, the `return` keyword can be omitted, like in Lisps. + - In a `def`, the last statement at the top level of the `def` is in tail position. + - If the tail position contains an expression, a `return` will be automatically injected, with that expression as the return value. + - It is still legal to use `return` whenever you would in Python; this just makes the `return` keyword non-mandatory in places where a Lisp would not require it. + - To be technically correct, Schemers and Racketeers should read this as, *"in places where a Lisp would not require explicitly invoking an escape continuation"*. + - Automatic tail-call optimization (TCO) for both `def` and `lambda`. + - In a `def`, the last statement at the top level of the `def` is in tail position. + - Tail positions *inside an expression* that itself appears in tail position are: + - Both the `body` and `orelse` branches of an if-expression. (Exactly one of them runs, hence both are in tail position.) + - The lexically last item of an `and`/`or` chain. + - Note the analysis is performed at compile time, whence it does **not** care about the short-circuit behavior that occurs at run time. + - The last item of a `do[]`. + - The last item of an implicit `do[]` in a `let[]` where the body uses the extra bracket syntax. (All `let` constructs provided by `unpythonic.syntax` are supported.) + - For the gritty details, see the syntax transformer `_transform_retexpr` in [`unpythonic.syntax.tailtools`](../../unpythonic/syntax/tailtools.py). + - Multiple-expression lambdas, using bracket syntax, for example `lambda x: [expr0, ...]`. + - Brackets denote a multiple-expression lambda body. Technically, the brackets create a `do[]` environment. + - If you want your lambda to have one expression that is a literal list, double the brackets: `lambda x: [[5 * x]]`. + - Lambdas are automatically named whenever the machinery can figure out a name from the surrounding context. + - When not, source location is auto-injected into the name. + +The multi-expression lambda syntax uses `do[]`, so it also allows lambdas to manage local variables using `local[name << value]` and `delete[name]`. See the documentation of `do[]` for details. + +If you need more stuff, `unpythonic` is effectively the standard library of Lispython, on top of what Python itself already provides. + +There are **two variants** of the dialect, `Lispython` and `Lispy`. + + +### The `Lispy` variant + +In the `Lispy` variant, that's it - the dialect changes the semantics only. Nothing is imported implicitly, except the macros injected by the dialect template (to perform the whole-module semantic changes at macro expansion time). + +This is the pythonic variant of Lispython, keeping in line with *explicit is better than implicit*. The rule is: *if a name appears in user code, it must be defined explicitly*, as is usual in Python. + +Note this implies that you must **explicitly import** the `local[]` macro if you want to declare local variables in a multiple-expression lambda, and the `fn[]` macro if you want to take advantage of the implicit `quicklambda`. Both are available in `unpythonic.syntax`, as usual. (Note that you can rename the `fn[]` macro with an as-import, and the implicit `quicklambda` will still work.) + +The point of the implicit `quicklambda` is that all invocations of `fn[]`, if there are any, will expand early, so that other macros that expect lambdas to be in standard Python notation will get exactly that. This includes other macros invoked by the dialect definition, namely `multilambda`, `namedlambda`, and `tco`. + +The main point of `Lispy`, compared to plain Python, is automatic TCO. The ability to omit `return` is a minor convenience, and the other three features only improve the usability of lambdas. + + +### The `Lispython` variant + +In the `Lispython` variant, we implicitly import some macros and functions to serve as dialect builtins, keeping in line with expectations for a ~language in the~ *somewhat distant relative of the* Lisp family: + + - `cons`, `car`, `cdr`, `ll`, `llist`, `nil`, `prod`. + - All `let[]` and `do[]` constructs from `unpythonic.syntax`. + - The underscore: e.g. `fn[_ * 3]` becomes `lambda x: x * 3`, and `fn[_ * _]` becomes `lambda x, y: x * y`. + - `dyn`, for dynamic assignment. + - `Values`, for returning multiple values and/or named return values. (This ties in to `unpythonic`'s function composition subsystem, e.g. `curry`, `unfold`, `iterate`, the `pipe` family, the `compose` family, and the `with continuations` macro.) + +For detailed documentation of the language features, see [`unpythonic.syntax`](../macros.md), especially the macros `tco`, `autoreturn`, `multilambda`, `namedlambda`, `quicklambda`, `let` and `do`. + +The dialect builtin `let[]` constructs are `let`, `letseq`, `letrec`, the decorator versions `dlet`, `dletseq`, `dletrec`, the block versions (decorator, call immediately, replace def'd name with result) `blet`, `bletseq`, `bletrec`, and the code-splicing variants `let_syntax` and `abbrev`. Bindings may be made using any syntax variant supported by `unpythonic.syntax`. + +The dialect builtin `do[]` constructs are `do` and `do0`. + + +## What Lispython is + +Lispython is a dialect of Python implemented via macros and a thin whole-module AST transformation. The dialect definition lives in [`unpythonic.dialects.lispython`](../../unpythonic/dialects/lispython.py). Usage examples can be found in the unit tests, [for `Lispy`](../../unpythonic/dialects/tests/test_lispy.py) and [for `Lispython`](../../unpythonic/dialects/tests/test_lispython.py). + +Lispython essentially makes Python feel slightly more lispy, in parts where that makes sense. + +It's also a minimal example of how to make an AST-transforming dialect. + +We take the approach of a relatively thin layer of macros (and underlying functions that implement the actual functionality), minimizing magic as far as reasonably possible. + +Performance is only a secondary concern; performance-critical parts fare better at the other end of [the wide spectrum](https://en.wikipedia.org/wiki/Wide-spectrum_language), with [Cython](http://cython.org/). Lispython is for [the remaining 80%](https://en.wikipedia.org/wiki/Pareto_principle), where the bottleneck is human developer time. + + +## Comboability + +The aforementioned block macros are enabled implicitly for the whole module; this is the essence of the Lispython dialect. Other block macros can still be invoked manually in the user code. + +Of the other block macros in `unpythonic.syntax`, code written in Lispython supports only `continuations`. `autoref` should also be harmless enough (will expand too early, but shouldn't matter). + +`prefix`, `autocurry`, `lazify` and `envify` are **not compatible** with the ordering of block macros implicit in the Lispython dialect. + +`prefix` is an outside-in macro that should expand first, so it should be placed in a lexically outer position with respect to the ones Lispython invokes implicitly; but nothing can be more outer than the dialect template. + +The other three are inside-out macros that should expand later, so similarly, also they should be placed in a lexically outer position. + +Basically, any block macro that can be invoked *lexically inside* a `with tco` block will work, the rest will not. + +If you need e.g. a lazy Lispython, the way to do that is to make a copy of the dialect module, change the dialect template to import the `lazify` macro, and then include a `with lazify` in the appropriate position, outside the `with namedlambda` block. Other customizations can be made similarly. + + +## Lispython and continuations (call/cc) + +Just use `with continuations` from `unpythonic.syntax` where needed. See its documentation for usage. + +Lispython works with `with continuations`, because: + + - Nesting `with continuations` within a `with tco` block is allowed, for the specific reason of supporting continuations in Lispython. + + The dialect's implicit `with tco` will just skip the `with continuations` block (`continuations` implies TCO). + + - `autoreturn`, `quicklambda` and `multilambda` are outside-in macros, so although they will be in a lexically outer position with respect to the manually invoked `with continuations` in the user code, this is correct (because being on the outside, they run before `continuations`, as they should). + + - The same applies to the outside-in pass of `namedlambda`. Its inside-out pass, on the other hand, must come after `continuations`, which it does, since the dialect's implicit `with namedlambda` is in a lexically outer position with respect to the `with continuations`. + +Be aware, though, that the combination of the `autoreturn` implicit in the dialect and `with continuations` might have usability issues, because `continuations` handles tail calls specially (the target of a tail-call in a `continuations` block must be continuation-enabled; see the documentation of `continuations`), and `autoreturn` makes it visually slightly less clear which positions are in fact tail calls (since no explicit `return`). Also, the top level of a `with continuations` block may not use `return` - while Lispython's implicit `autoreturn` happily auto-injects a `return` to whatever is the last statement in any particular function. + + +## Why extend Python? + +[Racket](https://racket-lang.org/) is an excellent Lisp, especially with [sweet](https://docs.racket-lang.org/sweet/), sweet expressions [[1]](https://sourceforge.net/projects/readable/) [[2]](https://srfi.schemers.org/srfi-110/srfi-110.html) [[3]](https://srfi.schemers.org/srfi-105/srfi-105.html), not to mention extremely pythonic. The word is *rackety*; the syntax of the language comes with an air of Zen minimalism (as perhaps expected of a descendant of Scheme), but the focus on *batteries included* and understandability are remarkably similar to the pythonic ideal. Racket even has an IDE (DrRacket) and an equivalent of PyPI, and the documentation is simply stellar. + +Python, on the other hand, has a slight edge in usability to the end-user programmer, and importantly, a huge ecosystem of libraries, second to `None`. Python is where science happens (unless you're in CS). Python is an almost-Lisp that has delivered on [the productivity promise](http://paulgraham.com/icad.html) of Lisp. Python also gets many things right, such as well developed support for lazy sequences, and decorators. + +In certain other respects, Python the base language leaves something to be desired, if you have been exposed to Racket (or Haskell, but that's a different story). Writing macros is harder due to the irregular syntax, but thankfully macro expanders already exist, and any set of macros only needs to be created once. + +Practicality beats purity ([ZoP §9](https://www.python.org/dev/peps/pep-0020/)): hence, fix the minor annoyances that would otherwise quickly add up, and reap the benefits of both worlds. If Python is software glue, Lispython is an additive that makes it flow better. + + +## PG's accumulator-generator puzzle + +The puzzle was posted by Paul Graham in 2002, in the essay [Revenge of the Nerds](http://paulgraham.com/icad.html). It asks to implement, in the shortest code possible, an accumulator-generator. The desired behavior is: + +```python +f = foo(10) +assert f(1) == 11 +assert f(1) == 12 +assert f(5) == 17 +``` + +(The original name of the function is literally `foo`; we have chosen to keep the name, although [nowadays one should do better than that](https://docs.racket-lang.org/style/reference-style.html#%28part._examples-style%29).) + +Even Lispython can do no better than this let-over-lambda (here using the haskelly let-in syntax to establish let-bindings): + +```python +foo = lambda n0: let[[n << n0] in + (lambda i: n << n + i)] +``` + +This still sets up a separate place for the accumulator (that is, separate from the argument of the outer function). The pure Python 3 solution avoids that, but needs many lines: + +```python +def foo(n): + def accumulate(i): + nonlocal n + n += i + return n + return accumulate +``` + +The Python 3.8+ solution, using the new walrus operator, is one line shorter: + +```python +def foo(n): + def accumulate(i): + nonlocal n + return (n := n + i) + return accumulate +``` + +This is rather clean, but still needs the `nonlocal` declaration, which is a statement. + +If we abbreviate `accumulate` as a lambda, it needs a `let` environment to write in, to use `unpythonic`'s expression-assignment (`name << value`). + +But see `envify` in `unpythonic.syntax`, which shallow-copies function arguments into an `env` implicitly: + +```python +from unpythonic.syntax import macros, envify + +with envify: + def foo(n): + return lambda i: n << n + i +``` + +or as a one-liner: + +```python +with envify: + foo = lambda n: lambda i: n << n + i +``` + +`envify` is not part of the Lispython dialect definition, because this particular, perhaps rarely used, feature is not really worth a global performance hit whenever a function is entered. + +Note that `envify` is **not** compatible with Lispython, because it would need to appear in a lexically outer position compared to macros already invoked by the dialect template. If you need an envified Lispython, copy `unpythonic/dialects/lispython.py` and modify the template therein. [The xmas tree combo](../macros.md#the-xmas-tree-combo) says `envify` should come lexically after `multilambda`, but before `namedlambda`. + + +## CAUTION + +No instrumentation exists (or is even planned) for the Lispython layer; you'll have to use regular Python tooling to profile, debug, and such. The Lispython layer should be thin enough for this not to be a major problem in practice. + + +## Etymology? + +*Lispython* is obviously made of two parts: Python, and... + +![mascot](lis.png) diff --git a/doc/dialects/listhell.md b/doc/dialects/listhell.md new file mode 100644 index 00000000..20e29cda --- /dev/null +++ b/doc/dialects/listhell.md @@ -0,0 +1,89 @@ +**Navigation** + +- [README](../../README.md) +- [Pure-Python feature set](../features.md) +- [Syntactic macro feature set](../macros.md) +- [Examples of creating dialects using `mcpyrate`](../dialects.md) + - [Lispython](lispython.md) + - **Listhell** + - [Pytkell](pytkell.md) +- [REPL server](../repl.md) +- [Troubleshooting](../troubleshooting.md) +- [Design notes](../design-notes.md) +- [Essays](../essays.md) +- [Additional reading](../readings.md) +- [Contribution guidelines](../../CONTRIBUTING.md) + + +**Table of Contents** + +- [Listhell: It's not Lisp, it's not Python, it's not Haskell](#listhell-its-not-lisp-its-not-python-its-not-haskell) + - [Features](#features) + - [What Listhell is](#what-listhell-is) + - [Comboability](#comboability) + - [Notes](#notes) + - [CAUTION](#caution) + - [Etymology?](#etymology) + + + +# Listhell: It's not Lisp, it's not Python, it's not Haskell + +Python with prefix syntax for function calls, and automatic currying. + +Powered by [`mcpyrate`](https://github.com/Technologicat/mcpyrate/) and `unpythonic`. + +```python +from unpythonic.dialects import dialects, Listhell # noqa: F401 + +from unpythonic import foldr, cons, nil, ll + +(print, "hello from Listhell") + +double = lambda x: 2 * x +my_map = lambda f: (foldr, (compose, cons, f), nil) +assert (my_map, double, (q, 1, 2, 3)) == (ll, 2, 4, 6) +``` + +## Features + +In terms of `unpythonic.syntax`, we implicitly enable `prefix` and `autocurry` for the whole module. + +The following are dialect builtins: + + - `apply`, aliased to `unpythonic.fun.apply` + - `compose`, aliased to unpythonic's currying right-compose `composerc` + - `q`, `u`, `kw` for the prefix syntax (note these are not `mcpyrate`'s + `q` and `u`, but those from `unpythonic.syntax`, specifically for `prefix`) + +For detailed documentation of the language features, see [`unpythonic.syntax`](https://github.com/Technologicat/unpythonic/tree/master/doc/macros.md). + +If you need more stuff, `unpythonic` is effectively the standard library of Listhell, on top of what Python itself already provides. + + +## What Listhell is + +Listhell is a dialect of Python implemented via macros and a thin whole-module AST transformation. The dialect definition lives in [`unpythonic.dialects.listhell`](../../unpythonic/dialects/listhell.py). Usage examples can be found in [the unit tests](../../unpythonic/dialects/tests/test_listhell.py). + +Listhell is essentially a demonstration of how Python could look, if it had Lisp's prefix syntax for function calls and Haskell's automatic currying. + +It's also a minimal example of how to make an AST-transforming dialect. + + +## Comboability + +Only outside-in macros that should expand after `autocurry` (currently, `unpythonic` provides no such macros) and inside-out macros that should expand before `autocurry` (there are two, namely `tco` and `continuations`) can be used in programs written in the Listhell dialect. + + +## Notes + +If you like the idea and want autocurry for a Lisp, try +[spicy](https://github.com/Technologicat/spicy) for [Racket](https://racket-lang.org/). + +## CAUTION + +Not intended for serious use. + +## Etymology? + +Prefix syntax of **Lis**p, speed of Py**th**on, and readability of Hask**ell**, all in one. diff --git a/doc/dialects/pytkell.md b/doc/dialects/pytkell.md new file mode 100644 index 00000000..7025b3bf --- /dev/null +++ b/doc/dialects/pytkell.md @@ -0,0 +1,125 @@ +**Navigation** + +- [README](../../README.md) +- [Pure-Python feature set](../features.md) +- [Syntactic macro feature set](../macros.md) +- [Examples of creating dialects using `mcpyrate`](../dialects.md) + - [Lispython](lispython.md) + - [Listhell](listhell.md) + - **Pytkell** +- [REPL server](../repl.md) +- [Troubleshooting](../troubleshooting.md) +- [Design notes](../design-notes.md) +- [Essays](../essays.md) +- [Additional reading](../readings.md) +- [Contribution guidelines](../../CONTRIBUTING.md) + + +**Table of Contents** + +- [Pytkell: Because it's good to have a kell](#pytkell-because-its-good-to-have-a-kell) + - [Features](#features) + - [What Pytkell is](#what-pytkell-is) + - [Comboability](#comboability) + - [CAUTION](#caution) + - [Etymology?](#etymology) + + + +# Pytkell: Because it's good to have a kell + +Python with automatic currying and implicitly lazy functions. + +Powered by [`mcpyrate`](https://github.com/Technologicat/mcpyrate/) and `unpythonic`. + +```python +from unpythonic.dialects import dialects, Pytkell # noqa: F401 + +from operator import add, mul + +def addfirst2(a, b, c): + return a + b +assert addfirst2(1)(2)(1 / 0) == 3 + +assert tuple(scanl(add, 0, (1, 2, 3))) == (0, 1, 3, 6) +assert tuple(scanr(add, 0, (1, 2, 3))) == (0, 3, 5, 6) + +my_sum = foldl(add, 0) +my_prod = foldl(mul, 1) +my_map = lambda f: foldr(compose(cons, f), nil) +assert my_sum(range(1, 5)) == 10 +assert my_prod(range(1, 5)) == 24 +assert tuple(my_map((lambda x: 2 * x), (1, 2, 3))) == (2, 4, 6) + +pt = forall[z << range(1, 21), # hypotenuse + x << range(1, z + 1), # shorter leg + y << range(x, z + 1), # longer leg + insist(x * x + y * y == z * z), + (x, y, z)] +assert tuple(sorted(pt)) == ((3, 4, 5), (5, 12, 13), (6, 8, 10), + (8, 15, 17), (9, 12, 15), (12, 16, 20)) + +factorials = scanl(mul, 1, s(1, 2, ...)) # 0!, 1!, 2!, ... +assert last(take(6, factorials)) == 120 + +x = let[[a << 21] in 2 * a] +assert x == 42 +x = let[2 * a, where[a << 21]] +assert x == 42 +``` + +## Features + +In terms of `unpythonic.syntax`, we implicitly enable `autocurry` and `lazify` for the whole module. + +We also import some macros and functions to serve as dialect builtins: + + - All `let[]` and `do[]` constructs from `unpythonic.syntax` + - `lazy[]` and `lazyrec[]` for manual lazification of atoms and data structure literals, respectively + - If-elseif-else expression `cond[]` + - Nondeterministic evaluation `forall[]` (do-notation in the List monad) + - Function composition, `compose` (like Haskell's `.` operator), aliased to `unpythonic`'s currying right-compose `composerc` + - Linked list utilities `cons`, `car`, `cdr`, `ll`, `llist`, `nil` + - Folds and scans `foldl`, `foldr`, `scanl`, `scanr` + - Memoization `memoize`, `gmemoize`, `imemoize`, `fimemoize` + - Functional updates `fup` and `fupdate` + - Immutable dict `frozendict` + - Mathematical sequences `s`, `imathify`, `gmathify` + - Iterable utilities `islice` (`unpythonic`'s version), `take`, `drop`, `split_at`, `first`, `second`, `nth`, `last` + - Function arglist reordering utilities `flip`, `rotate` + +For detailed documentation of the language features, see [`unpythonic.syntax`](https://github.com/Technologicat/unpythonic/tree/master/doc/macros.md). + +The builtin `let[]` constructs are `let`, `letseq`, `letrec`, the decorator versions `dlet`, `dletseq`, `dletrec`, the block versions (decorator, call immediately, replace `def`'d name with result) `blet`, `bletseq`, `bletrec`. Bindings may be made using any syntax variant supported by `unpythonic.syntax`. + +The builtin `do[]` constructs are `do` and `do0`. + +If you need more stuff, `unpythonic` is effectively the standard library of Pytkell, on top of what Python itself already provides. + + +## What Pytkell is + +Pytkell is a dialect of Python implemented via macros and a thin whole-module AST transformation. The dialect definition lives in [`unpythonic.dialects.pytkell`](../../unpythonic/dialects/pytkell.py). Usage examples can be found in [the unit tests](../../unpythonic/dialects/tests/test_pytkell.py). + +Pytkell essentially makes Python feel slightly more haskelly. + +It's also a minimal example of how to make an AST-transforming dialect. + + +## Comboability + +**Not** comboable with most of the block macros in `unpythonic.syntax`, because `autocurry` and `lazify` appear in the dialect template, hence at the lexically outermost position. + +Only outside-in macros that should expand after `lazify` has recorded its userlambdas (currently, `unpythonic` provides no such macros) and inside-out macros that should expand before `autocurry` (there are two, namely `tco` and `continuations`) can be used in programs written in the Pytkell dialect. + + +## CAUTION + +No instrumentation exists (or is even planned) for the Pytkell layer; you'll have to use regular Python tooling to profile, debug, and such. + +This layer is not quite as thin as Lispython's, but the dialect is not intended for serious use, either. + + +## Etymology? + +The other obvious contraction, *Pyskell*, sounds like a serious programming language - or possibly the name of a fantasy airship - whereas *Pytkell* is obviously something quickly thrown together for system testing. diff --git a/doc/essays.md b/doc/essays.md new file mode 100644 index 00000000..245519ca --- /dev/null +++ b/doc/essays.md @@ -0,0 +1,181 @@ +**Navigation** + +- [README](../README.md) +- [Pure-Python feature set](features.md) +- [Syntactic macro feature set](macros.md) +- [Examples of creating dialects using `mcpyrate`](dialects.md) +- [REPL server](repl.md) +- [Troubleshooting](troubleshooting.md) +- [Design notes](design-notes.md) +- **Essays** +- [Additional reading](readings.md) +- [Contribution guidelines](../CONTRIBUTING.md) + +For now, essays are listed in chronological order, most recent last. + + +**Table of Contents** + +- [What Belongs in Python?](#what-belongs-in-python) +- [Common Lisp, Python, and productivity](#common-lisp-python-and-productivity) +- [`hoon`: The C of Functional Programming](#hoon-the-c-of-functional-programming) + + + + +# What Belongs in Python? + +*Originally written in 2020; updated 9 June 2021; small update 16 November 2022.* + +You may feel that [my hovercraft is full of eels](http://stupidpythonideas.blogspot.com/2015/05/spam-spam-spam-gouda-spam-and-tulips.html). It is because they come with the territory. + +Some have expressed the opinion [the statement-vs-expression dichotomy is a feature](http://stupidpythonideas.blogspot.com/2015/01/statements-and-expressions.html). The BDFL himself has famously stated that TCO has no place in Python [[1]](http://neopythonic.blogspot.com/2009/04/tail-recursion-elimination.html) [[2]](http://neopythonic.blogspot.fi/2009/04/final-words-on-tail-calls.html), and less famously that multi-expression lambdas or continuations have no place in Python [[3]](https://www.artima.com/weblogs/viewpost.jsp?thread=147358). Several potentially interesting PEPs have been deferred [[1]](https://www.python.org/dev/peps/pep-3150/) [[2]](https://www.python.org/dev/peps/pep-0403/) or rejected [[3]](https://www.python.org/dev/peps/pep-0511/) [[4]](https://www.python.org/dev/peps/pep-0463/) [[5]](https://www.python.org/dev/peps/pep-0472/). + +In general, I like Python. My hat is off to the devs. It is no mean feat to create a high-level language that focuses on readability and approachability, keep it alive for 30 years and counting, and have a large part of the programming community adopt it. But regarding the particular points above, if I agreed, I would not have built `unpythonic`, or [`mcpyrate`](https://github.com/Technologicat/mcpyrate) either. + +I think that with macros, Python can be so much more than just a beginner's language. Language-level extensibility is just the logical endpoint of that. I do not share the sentiment of the Python community against metaprogramming, or toward some language-level features. For me, macros (and full-module transforms a.k.a. dialects) are just another tool for creating abstractions, at yet another level. We can already extract procedures, methods, and classes. Why limit that ability - namely, the ability to create abstractions - to what an [eager](https://en.wikipedia.org/wiki/Evaluation_strategy#Strict_evaluation) language can express at run time? + +If the point is to keep code understandable, I respect the goal; but that is a matter of education. It is perfectly possible to write unreadable code without macros, and in Python, no less. Just use a complex class hierarchy so that the programmer reading the code must hunt through everything to find each method definition; write big functions without abstracting the steps of the overall algorithm; keep lots of mutable state, and store it in top-level variables; and maybe top that off with an overuse of dependency injection. No one will be able to figure out how the program works, at least not in any reasonable amount of time. + +It is also perfectly possible to write readable code with macros. Just keep in mind that macros are a different kind of abstraction, and use them where that kind of abstraction lends itself to building a clean solution. I am willing to admit the technical objection that *macros do not compose*; but that does not make them useless. + +Of the particular points above, in my opinion TCO should at least be an option. I like that *by default*, Python will complain about a call stack overflow rather than hang, when entering an accidentally infinite mutual recursion. I do occasionally make such mistakes when developing complex algorithms - especially when quickly sketching out new ideas. But sometimes, it would be nice to enable TCO selectively. If you ask for it, you know what to expect. This is precisely why `unpythonic.syntax` has `with tco`. I am not very happy with a custom TCO layer on top of a language core that eschews the whole idea, because TCO support in the core (like Scheme and Racket have) would simplify the implementation of certain other language extensions; but then again, [this is exactly what Clojure did](https://clojuredocs.org/clojure.core/trampoline), in similar technical circumstances. + +As for a multi-expression `lambda`, on the surface it sounds like a good idea. But really the issue is that in Python, the `lambda` construct itself is broken. It is essentially a duplicate of `def`, but lacking some features. As of Python 3.8, the latest addition of insult to injury is the lack of support for type annotations. A more uniform solution would be to make `def` into an expression. Much of the time, anonymous functions are not a good idea, because names help understanding and debugging - especially when all you have is a traceback. But defining closures inline **is** a great idea - and sometimes, the most readily understandable presentation order for an algorithm requires to do that in an expression position. The convenience is similar to being able to nest `def` statements, an ability Python already has. + +The macros in `unpythonic.syntax` inject many lambdas, because that makes them much simpler to implement than if we had to always lift a `def` statement into the nearest enclosing statement context. Another case in point is [`pampy`](https://github.com/santinic/pampy). The code to perform a pattern match would read a lot nicer if one could define also slightly more complex actions inline (see [Racket's pattern matcher](https://docs.racket-lang.org/reference/match.html) for a comparison). It is unlikely that the action functions will be needed elsewhere, and it is just silly to define a bunch of functions *before* the call to `match`. If this is not a job for either something like `let-where` (to invert the presentation order locally) or a multi-expression lambda (to define the actions inline), I do not know what is. + +While on the topic of usability, why are lambdas strictly anonymous? In cases where it is useful to be able to omit a name, because sometimes many small helper functions may be needed and [naming is hard](https://martinfowler.com/bliki/TwoHardThings.html), why not include the source location information in the auto-generated name, instead of just `""`? (As of v0.15.0, the `with namedlambda` macro does this.) + +On a point raised [here by the BDFL](https://www.artima.com/weblogs/viewpost.jsp?thread=147358), with respect to indentation-sensitive vs. indentation-insensitive parser modes; having seen [SRFI-110: Sweet-expressions (t-expressions)](https://srfi.schemers.org/srfi-110/srfi-110.html), I think Python is confusing matters by linking the parser mode to statements vs. expressions. A workable solution is to make *everything* support both modes (or even preprocess the source code text to use only one of the modes), which *uniformly* makes parentheses an alternative syntax for grouping. + +It would be nice to be able to use indentation to structure expressions to improve their readability, like one can do in Racket with [sweet](https://docs.racket-lang.org/sweet/), but I suppose `lambda x: [expr0, expr1, ...]` will have to do for a multi-expression lambda. Unless I decide at some point to make a source filter for [`mcpyrate`](https://github.com/Technologicat/mcpyrate) to auto-convert between indentation and parentheses; but for Python this is somewhat difficult to do, because statements **must** use indentation whereas expressions **must** use parentheses, and this must be done before we can invoke the standard parser to produce an AST. (And I do not want to maintain a [Pyparsing](https://github.com/pyparsing/pyparsing) grammar to parse a modified version of Python.) + +As for true multi-shot continuations, `unpythonic.syntax` has `with continuations` for that, but I am not sure if I will ever use it in production code. Most of the time, it seems to me full continuations are a solution looking for a problem. (A very elegant solution, even if the usability of the `call/cc` interface leaves much to be desired. The solution to *that* issue is `let/cc`, which in `unpythonic`, becomes `k = call_cc[get_cc()]`.) For everyday use, one-shot continuations (a.k.a. resumable functions, a.k.a. generators in Python) are often all that is needed to simplify certain patterns, especially those involving backtracking. I am a big fan of the idea that, for example, you can make your [anagram-making algorithm](https://github.com/Technologicat/python-3-scicomp-intro/blob/master/examples/anagram.py) only yield valid anagrams, with the backtracking state (to eliminate dead-ends) implicitly stored in the paused generator! However, having multi-shot continuations is great for teaching the concept of continuations in a programming course, when teaching in Python. + +Finally, there is the issue of implicitly encouraging subtly incompatible Python-like languages (see the rejected [PEP 511](https://www.python.org/dev/peps/pep-0511/)). It is pretty much the point of language-level extensibility, to allow users to do that if they want. I would not worry about it. Racket is *designed* for extensibility, and its community seems to be doing just fine - they even *encourage* the creation of new languages to solve problems. On the other hand, Racket demands some sophistication on the part of its user, and it is not very popular in the programming community at large. + +What I can say is, `unpythonic` is not meant for the average Python project, either. If used intelligently, it can make code shorter, yet readable. For a lone developer who needs to achieve as much as possible in the fewest lines reasonably possible, it seems to me that language extension - and in general, as Alexis King put it, [climbing the infinite ladder of abstraction](https://lexi-lambda.github.io/blog/2016/08/11/climbing-the-infinite-ladder-of-abstraction/) - is the way to go. In a large project with a high developer turnover, the situation is different. + +For general programming in the early 2020s, Python still has the ecosystem advantage, so it does not make sense to move to anything else, at least yet. So, let us empower what we have. Even if we have to build something that could be considered *unpythonic*. + + +# Common Lisp, Python, and productivity + +*Originally written in 2020; updated 9 June 2021; small update 16 November 2022.* + +The various essays Paul Graham wrote near the turn of the millennium, especially [Revenge of the Nerds (2002)](http://paulgraham.com/icad.html), have given the initial impulse to many programmers for studying Lisp. The essays are well written and have provided a lot of exposure for the Lisp family of languages. So how does the programming world look in that light now, 20 years later? + +The base abstraction level of programming languages, even those in popular use, has increased. The trend was visible already then, and was indeed noted in the essays. The focus on low-level languages such as C++ has decreased. Java is still popular, but high-level FP languages that compile to JVM bytecode (Kotlin, Scala, Clojure) are rising. + +Python has become highly popular, and is now also closer to Lisp than it was 20 years ago, especially after `MacroPy` introduced syntactic macros to Python (in 2013, [according to the git log](https://github.com/lihaoyi/macropy/commits/python2/macropy/__init__.py)). Python was not bad as a Lisp replacement even back in 2000 - see Peter Norvig's essay [Python for Lisp Programmers](https://norvig.com/python-lisp.html). Some more historical background, specifically on lexically scoped closures (and the initial lack thereof), can be found in [PEP 3104](https://www.python.org/dev/peps/pep-3104/), [PEP 227](https://www.python.org/dev/peps/pep-0227/), and [Historical problems with closures in JavaScript and Python](http://giocc.com/problems-with-closures-in-javascript-and-python.html). + +In 2020, does it still make sense to learn [the legendary](https://xkcd.com/297/) Common Lisp? + +As a practical tool? Is CL hands-down better than Python? Maybe no. Python has already delivered on 90% of the productivity promise of Lisp. Both languages cut down significantly on [accidental complexity](https://en.wikipedia.org/wiki/No_Silver_Bullet). Python has a huge library ecosystem. [`mcpyrate`](https://github.com/Technologicat/mcpyrate) and `unpythonic` are trying to push the language-level features a further 5%. (A full 100% is likely impossible when extending an existing language; if nothing else, there will be seams.) + +As for productivity, [it may be](https://medium.com/smalltalk-talk/lisp-smalltalk-and-the-power-of-symmetry-8bd96aaa0c0c) that a form of code-data equivalence (symmetry!), not macros specifically, is what makes Lisp powerful. If so, there may be other ways to reach that equivalence. For example Smalltalk, like Lisp, *runs in the same context it's written in*. All Smalltalk data are programs. Smalltalk [may be making a comeback](https://hackernoon.com/how-to-evangelize-a-programming-language-0p7p3y02), in the form of [Pharo](https://pharo.org/). + +Haskell aims at code-data equivalence from a third angle (memoized pure functions are in essence infinite lookup tables), but I have not used it in practice, so I do not have the experience to say whether this is enough to make it feel powerful in a similar way. + +Image-based programming (live programming) is a common factor between Pharo and Common Lisp + Swank. This is another productivity booster that much of the programming world is not that familiar with. It eliminates not only the edit/compile/restart cycle, but the edit/restart cycle as well, making the workflow a concurrent *edit/run* instead - without restarting the whole app at each change. Julia has [Revise.jl](https://github.com/timholy/Revise.jl) for something similar. In web applications, [REST](https://en.wikipedia.org/wiki/Representational_state_transfer) is a small step in a somewhat similar direction (as long as one can restart the server app easily, to make it use the latest definitions). Notebooks (such as [Jupyter](https://jupyter.org/)) provide the edit/run paradigm for scientific scripts. + +But to know exactly what Common Lisp has to offer, **yes**, it does make sense to learn it. As baroque as some parts are, there are a lot of great ideas there. [Conditions](http://www.gigamonkeys.com/book/beyond-exception-handling-conditions-and-restarts.html) are one. [CLOS](http://www.gigamonkeys.com/book/object-reorientation-generic-functions.html) is another. (Nowadays [Julia](https://docs.julialang.org/en/v1/manual/methods/) has CLOS-style [multiple-dispatch generic functions](https://docs.julialang.org/en/v1/manual/methods/).) More widely, in the ecosystem, Swank is one. + +Having more perspectives at one's disposal makes one a better programmer - and that is what ultimately counts. As [Alan Perlis said in 1982](https://en.wikiquote.org/wiki/Alan_Perlis): + +*A language that doesn't affect the way you think about programming, is not worth knowing.* + +In this sense, Common Lisp is very much worth knowing. Although, if you want a beautiful, advanced Lisp, maybe go for [Racket](https://racket-lang.org/) first; but that is an essay for another day. + + +# `hoon`: The C of Functional Programming + +*9 June 2021* + +Some days I wonder if this whole `unpythonic` endeavor even makes any sense. Then, turning the pages of [the book of sand](https://en.wikipedia.org/wiki/The_Book_of_Sand) that is the web, I [happen to run into something](http://axisofeval.blogspot.com/2015/07/what-i-learned-about-urbit-so-far.html) like `hoon`. + +Its philosophy is best described by this gem from an [early version of its documentation](https://github.com/cgyarvin/urbit/blob/master/doc/book/0-intro.markdown#hoon): + +*So we could describe Hoon as a pure, strict, higher-order typed functional language. But don't do this in front of a Haskell purist, unless you put quotes around "typed," "functional," and possibly even "language." We could also say "object-oriented," with the same scare quotes for the cult of Eiffel.* + +While I am not sure if I will ever *use* `hoon`, it is hard not to like a language that puts quotes around "language". Few languages go that far in shaking up preconceptions. Critically examining what we believe, and why, often leads to useful insights. + +The claim that `hoon` is not a language, but a "language", fully makes sense after reading some of the documentation. `hoon` is essentially an *ab initio* language with an axiomatic approach to defining its operational semantics, similarly to how *Arc* approaches defining Lisp. Furthermore, `hoon` is the *functional equivalent of C* to the underlying virtual assembly language, `nock`. From a certain viewpoint, the "language" essentially consists of *glorified Nock macros*. Glorified assembly macros are pretty much all a *low-level* [HLL](https://en.wikipedia.org/wiki/High-level_programming_language) essentially is, so the claim seems about right. + +Nock is a peculiar assembly language. According to the comments in [`hoon.hoon`](https://github.com/cgyarvin/urbit/blob/master/urb/zod/arvo/hoon.hoon), it is a *Turing-complete non-lambda automaton*. The instruction set is permanently frozen, as if it was a physical CPU chip. Opcodes are just natural numbers, 0 through 11, and it is very minimalistic. For example, there is not even a decrement opcode. This is because from an axiomatic viewpoint, decrement can be defined recursively via increment. At which point, every systems programmer objects, rightfully, that no one sane actually does so, because that costs `O(n)`. Indeed, the `hoon` standard library uses C FFI to take advantage of the physical processor's instruction set to perform arithmetic operations. Each piece of C code used for such acceleration purposes is termed a *jet*. + +Since - by the fact that the programmer called a particular standard library function - the system knows we want to compute a decrement (or a multiplication, a power, maybe some floating point operation, etc.), it can *accelerate* that particular operation by using the available hardware. + +The important point is, you *could* write out a `nock` macro that does the same thing, only it would be unbearably slow. In the axiomatic perspective - which is about proving programs correct - speed does not matter. At the same time, FFI gives speed for the real world. + +To summarize; as someone already put it, `hoon` offers a glimpse into an alternative universe of systems programming, where the functional camp won. It may also be a useful tool, or a source for further unconventional ideas - but to know for sure, I will have to read more about it. + +I think the perfect place to end this piece is to quote a few lines from the language definition [`hoon.hoon`](https://github.com/cgyarvin/urbit/blob/master/urb/zod/arvo/hoon.hoon), to give a flavor: + +``` +++ doos :: sleep until + |= hap=path ^- (unit ,@da) + (doze:(wink:(vent bud (dink (dint hap))) now 0 (beck ~)) now [hap ~]) +:: +++ hurl :: start loop no id + |= ovo=ovum + ^- [p=(list ovum) q=(list ,[p=@tas q=vase])] + (kick [[~ [[(dint p.ovo) ~] p.ovo ~] q.ovo] ~]) +:: +++ hymn :: start loop with id + |= [who=ship ovo=ovum] + ^- [p=(list ovum) q=(list ,[p=@tas q=vase])] + (kick [[[~ %iron who] [[(dint p.ovo) ~] p.ovo ~] q.ovo] ~]) +:: +++ kick :: complete loop + |= mor=(list move) + =| ova=(list ovum) + |- ^- [p=(list ovum) q=(list ,[p=@tas q=vase])] + ?~ mor + [(flop ova) fan] + :: ~& [%kick-move q.i.mor -.r.i.mor] + ?> ?=(^ q.i.mor) + ?~ t.q.i.mor + $(mor t.mor, ova [[i.q.i.mor r.i.mor] ova]) + ?> ?=(^ i.q.i.mor) + =- $(mor (weld p.nyx t.mor), fan q.nyx) + ^= nyx + =+ naf=fan + |- ^- [p=(list move) q=_fan] + ?~ naf [~ ~] + ?. =(i.i.q.i.mor p.i.naf) + =+ tuh=$(naf t.naf) + [p.tuh [i.naf q.tuh]] + =+ ven=(vent bud q.i.naf) + =+ win=(wink:ven now (shax now) (beck p.i.mor)) + =+ ^= yub + %- beat:win + [p.i.mor t.i.q.i.mor t.q.i.mor r.i.mor] + [p.yub [[p.i.naf ves:q.yub] t.naf]] +-- +``` + +The Lisp family (particularly the Common Lisp branch) has a reputation for silly terminology, but I think `hoon` deserves the crown. All control structures are punctuation-only ASCII digraphs, and almost every name is a monosyllabic nonsense word. Still, this Lewis-Carroll-esque naming convention of making words mean what you define them to mean makes at least as much sense as the standard naming convention in mathematics, naming theorems after their discoverers! (Or at least, [after someone else](https://en.wikipedia.org/wiki/Stigler's_law_of_eponymy).) + +I actually like the phonemic base, making numbers sound like [*sorreg-namtyv*](https://urbit.org/docs/hoon/hoon-school/nouns/); that is 5 702 400 for the rest of us. And I think I will, quite seriously, adopt the verb *bunt*, meaning *to take the default value of*. That is such a common operation in programming that I find it hard to believe there is no standard abbreviation. I wonder what other discoveries await. + +Finally, in some way I cannot quite put a finger on, to me the style has echoes of [Jorge Luis Borges](https://en.wikipedia.org/wiki/Jorge_Luis_Borges). Maybe it is that the `hoon` source code sounds like something out of [The Library of Babel](https://en.wikipedia.org/wiki/The_Library_of_Babel). The Borgesian flavor seems intentional, too; the company building the Urbit stack, which `hoon` is part of, is itself named *[Tlon](https://en.wikipedia.org/wiki/Tl%C3%B6n%2C_Uqbar%2C_Orbis_Tertius)*. Remaking the world by re-imagining it, indeed. + +Maybe there is a place for `unpythonic`, too. + + +**Links** + +- [Latest documentation for `hoon`](https://urbit.org/docs/hoon/) +- There is a [whole operating system](https://github.com/urbit/urbit) built on `hoon` and `nock`. +- [Wikipedia has an entry on it](https://en.wikipedia.org/wiki/Urbit). Deconstructing the client-server model sounds very [postmodern](https://en.wikipedia.org/wiki/Deconstructivism). + + +**Note on natural-number opcodes** + +Using natural numbers for the opcodes at first glance sounds like a [Gödel numbering](https://en.wikipedia.org/wiki/G%C3%B6del_numbering) for the program space; but actually, the input to [the VM](https://urbit.org/docs/nock/definition/) contains some linked-list structure, which is not represented that way. Also, **any** programming language imposes its own Gödel numbering on the program space. Just take, for example, the UTF-8 representation of the source code text (which, in Python terms, is a `bytes` object), and interpret those bytes as one single bignum. + +Obviously, any interesting programs correspond to very large numbers, and are few and far between, so decoding random numbers via a Gödel numbering is not a practical way to generate interesting programs. [Genetic programming](https://en.wikipedia.org/wiki/Genetic_programming) works much better, because unlike Gödel numbering, it was actually designed specifically to do that. GP takes advantage of the semantic structure present in the source code (or AST) representation. + +The purpose of the original Gödel numbering was to prove Gödel's incompleteness theorem. In the case of `nock`, my impression is that the opcodes are natural numbers just for flavoring purposes. If you are building an ab initio software stack, what better way to announce that than to use natural numbers as your virtual machine's opcodes? diff --git a/doc/features.md b/doc/features.md index 45c9d079..7eaf06e1 100644 --- a/doc/features.md +++ b/doc/features.md @@ -1,3 +1,16 @@ +**Navigation** + +- [README](../README.md) +- **Pure-Python feature set** +- [Syntactic macro feature set](macros.md) +- [Examples of creating dialects using `mcpyrate`](dialects.md) +- [REPL server](repl.md) +- [Troubleshooting](troubleshooting.md) +- [Design notes](design-notes.md) +- [Essays](essays.md) +- [Additional reading](readings.md) +- [Contribution guidelines](../CONTRIBUTING.md) + # Unpythonic: Python meets Lisp and Haskell This is the pure-Python API of `unpythonic`. Most features listed here need no macros, and are intended to be used directly. @@ -7,85 +20,136 @@ The exception are the features marked **[M]**, which are primarily intended as a ### Features [**Bindings**](#bindings) -- [``let``, ``letrec``: local bindings in an expression](#let-letrec-local-bindings-in-an-expression) **[M]** +- [`let`, `letrec`: local bindings in an expression](#let-letrec-local-bindings-in-an-expression) **[M]** + - [`let`](#let) + - [`dlet`, `blet`](#dlet-blet): *let-over-def*, like the classic let-over-lambda. + - [`letrec`](#letrec) - [Lispylet: alternative syntax](#lispylet-alternative-syntax) **[M]** -- [``env``: the environment](#env-the-environment) -- [``assignonce``](#assignonce), a relative of ``env``. -- [``dyn``: dynamic assignment](#dyn-dynamic-assignment) a.k.a. parameterize, special variables, fluid variables, "dynamic scoping". +- [`env`: the environment](#env-the-environment) +- [`assignonce`](#assignonce), a relative of `env`. +- [`dyn`: dynamic assignment](#dyn-dynamic-assignment) a.k.a. parameterize, special variables, fluid variables, "dynamic scoping". [**Containers**](#containers) -- [``frozendict``: an immutable dictionary](#frozendict-an-immutable-dictionary) +- [`frozendict`: an immutable dictionary](#frozendict-an-immutable-dictionary) - [`cons` and friends: pythonic lispy linked lists](#cons-and-friends-pythonic-lispy-linked-lists) -- [``box``: a mutable single-item container](#box-a-mutable-single-item-container) -- [``Shim``: redirect attribute accesses](#shim-redirect-attribute-accesses) -- [Container utilities](#container-utilities): ``get_abcs``, ``in_slice``, ``index_in_slice`` - -[**Sequencing**](#sequencing), run multiple expressions in any expression position (incl. inside a ``lambda``). -- [``begin``: sequence side effects](#begin-sequence-side-effects) -- [``do``: stuff imperative code into an expression](#do-stuff-imperative-code-into-an-expression) **[M]** -- [``pipe``, ``piped``, ``lazy_piped``: sequence functions](#pipe-piped-lazy_piped-sequence-functions) +- [`box`: a mutable single-item container](#box-a-mutable-single-item-container) + - [`box`](#box) + - [`Some`](#some): immutable box, to explicitly indicate the presence of a value. + - [`ThreadLocalBox`](#threadlocalbox) +- [`Shim`: redirect attribute accesses](#shim-redirect-attribute-accesses) +- [Container utilities](#container-utilities): `get_abcs`, `in_slice`, `index_in_slice` + +[**Sequencing**](#sequencing), run multiple expressions in any expression position (incl. inside a `lambda`). +- [`begin`: sequence side effects](#begin-sequence-side-effects) +- [`do`: stuff imperative code into an expression](#do-stuff-imperative-code-into-an-expression) **[M]** + - [`do`](#do) + - [`do0`](#do0) +- [`pipe`, `piped`, `lazy_piped`: sequence functions](#pipe-piped-lazy_piped-sequence-functions) + - [`pipe`](#pipe) + - [`piped`](#piped) + - [`lazy_piped`](#lazy_piped) [**Batteries**](#batteries) missing from the standard library. -- [**Batteries for functools**](#batteries-for-functools): `memoize`, `curry`, `compose`, `withself`, `fix` and more. - - [``curry`` and reduction rules](#curry-and-reduction-rules): we provide some extra features for bonus Haskellness. - - [``fix``: break infinite recursion cycles](#fix-break-infinite-recursion-cycles) +- [**Batteries for functools**](#batteries-for-functools): `curry`, `compose`, `withself`, and more. + - [`memoize`](#memoize): a detailed explanation of the memoizer. + - [`curry`](#curry): a detailed explanation of the curry utility and its haskelly extra features. + - [`fix`: break infinite recursion cycles](#fix-break-infinite-recursion-cycles) - [**Batteries for itertools**](#batteries-for-itertools): multi-input folds, scans (lazy partial folds); unfold; lazy partial unpacking of iterables, etc. - [**Batteries for network programming**](#batteries-for-network-programming): message protocol, PTY/socket proxy, etc. -- [``islice``: slice syntax support for ``itertools.islice``](#islice-slice-syntax-support-for-itertoolsislice) + - [`unpythonic.net.msg`](#unpythonic-net-msg): message protocol. +- [`islice`: slice syntax support for `itertools.islice`](#islice-slice-syntax-support-for-itertoolsislice) - [`gmemoize`, `imemoize`, `fimemoize`: memoize generators](#gmemoize-imemoize-fimemoize-memoize-generators), iterables and iterator factories. -- [``fup``: functional update; ``ShadowedSequence``](#fup-functional-update-shadowedsequence): like ``collections.ChainMap``, but for sequences. -- [``view``: writable, sliceable view into a sequence](#view-writable-sliceable-view-into-a-sequence) with scalar broadcast on assignment. -- [``mogrify``: update a mutable container in-place](#mogrify-update-a-mutable-container-in-place) -- [``s``, ``imathify``, ``gmathify``: lazy mathematical sequences with infix arithmetic](#s-imathify-gmathify-lazy-mathematical-sequences-with-infix-arithmetic) -- [``sym``, ``gensym``, ``Singleton``: symbols and singletons](#sym-gensym-Singleton-symbols-and-singletons) +- [`fup`: functional update; `ShadowedSequence`](#fup-functional-update-shadowedsequence): like `collections.ChainMap`, but for sequences. + - [`fup`](#fup): the high-level syntactic sugar to update a sequence functionally. + - [`fupdate`](#fupdate): the low-level workhorse. +- [`view`: writable, sliceable view into a sequence](#view-writable-sliceable-view-into-a-sequence) with scalar broadcast on assignment. +- [`mogrify`: update a mutable container in-place](#mogrify-update-a-mutable-container-in-place) +- [`s`, `imathify`, `gmathify`: lazy mathematical sequences with infix arithmetic](#s-imathify-gmathify-lazy-mathematical-sequences-with-infix-arithmetic) +- [`sym`, `gensym`, `Singleton`: symbols and singletons](#sym-gensym-Singleton-symbols-and-singletons) [**Control flow tools**](#control-flow-tools) -- [``trampolined``, ``jump``: tail call optimization (TCO) / explicit continuations](#trampolined-jump-tail-call-optimization-tco--explicit-continuations) -- [``looped``, ``looped_over``: loops in FP style (with TCO)](#looped-looped_over-loops-in-fp-style-with-tco) -- [``gtrampolined``: generators with TCO](#gtrampolined-generators-with-tco): tail-chaining; like ``itertools.chain``, but from inside a generator. -- [``catch``, ``throw``: escape continuations (ec)](#catch-throw-escape-continuations-ec) (as in [Lisp's `catch`/`throw`](http://www.gigamonkeys.com/book/the-special-operators.html), unlike C++ or Java) - - [``call_ec``: first-class escape continuations](#call_ec-first-class-escape-continuations), like Racket's ``call/ec``. -- [``forall``: nondeterministic evaluation](#forall-nondeterministic-evaluation), a tuple comprehension with multiple body expressions. -- [``handlers``, ``restarts``: conditions and restarts](#handlers-restarts-conditions-and-restarts), a.k.a. **resumable exceptions**. -- [``generic``, ``typed``, ``isoftype``: multiple dispatch](#generic-typed-isoftype-multiple-dispatch): create generic functions with type annotation syntax; also some friendly utilities. +- [`trampolined`, `jump`: tail call optimization (TCO) / explicit continuations](#trampolined-jump-tail-call-optimization-tco--explicit-continuations) + - [Tail recursion in a `lambda`](#tail-recursion-in-a-lambda) + - [Mutual recursion with TCO](#mutual-recursion-with-tco) + - [Mutual recursion in `letrec` with TCO](#mutual-recursion-in-letrec-with-tco) + - [Reinterpreting TCO as explicit continuations](#reinterpreting-tco-as-explicit-continuations) +- [`looped`, `looped_over`: loops in FP style (with TCO)](#looped-looped_over-loops-in-fp-style-with-tco) + - [Relation to the TCO system](#relation-to-the-tco-system) + - [FP loop over an iterable](#fp-loop-over-an-iterable): the `looped_over` parametric decorator + - [Accumulator type and runtime cost](#accumulator-type-and-runtime-cost) + - [`break`](#break) + - [`continue`](#continue) + - [Prepackaged `break` and `continue`](#prepackaged-break-and-continue) + - [FP loops using a lambda as body](#fp-loops-using-a-lambda-as-body) +- [`gtrampolined`: generators with TCO](#gtrampolined-generators-with-tco): tail-chaining; like `itertools.chain`, but from inside a generator. +- [`catch`, `throw`: escape continuations (ec)](#catch-throw-escape-continuations-ec) (as in [Lisp's `catch`/`throw`](http://www.gigamonkeys.com/book/the-special-operators.html), unlike C++ or Java) + - [`call_ec`: first-class escape continuations](#call_ec-first-class-escape-continuations), like Racket's `call/ec`. +- [`forall`: nondeterministic evaluation](#forall-nondeterministic-evaluation), a tuple comprehension with multiple body expressions. +- [`handlers`, `restarts`: conditions and restarts](#handlers-restarts-conditions-and-restarts), a.k.a. **resumable exceptions**. + - [Fundamental signaling protocol](#fundamental-signaling-protocol) + - [API summary](#api-summary) + - [High-level signaling protocols](#high-level-signaling-protocols) + - [Conditions vs. exceptions](#conditions-vs-exceptions) +- [`generic`, `typed`, `isoftype`: multiple dispatch](#generic-typed-isoftype-multiple-dispatch): create generic functions with type annotation syntax; also some friendly utilities. + - [`generic`: multiple dispatch with type annotation syntax](#generic-multiple-dispatch-with-type-annotation-syntax) + - [`augment`: add a new multimethod to an existing generic function](#augment-add-a-new-multimethod-to-an-existing-generic-function) + - [`typed`: add run-time type checks with type annotation syntax](#typed-add-run-time-type-checks-with-type-annotation-syntax) + - [`isoftype`: the big sister of `isinstance`](#isoftype-the-big-sister-of-isinstance) + +[**Exception tools**](#exception-tools) +- [`raisef`, `tryf`: `raise` and `try` as functions](#raisef-tryf-raise-and-try-as-functions), useful inside a lambda. +- [`equip_with_traceback`](#equip-with-traceback), equip a manually created exception instance with a traceback. +- [`async_raise`: inject an exception to another thread](#async_raise-inject-an-exception-to-another-thread) *(CPython only)* +- [`reraise_in`, `reraise`: automatically convert exception types](#reraise_in-reraise-automatically-convert-exception-types) + +[**Function call and return value tools**](#function-call-and-return-value-tools) +- [`def` as a code block: `@call`](#def-as-a-code-block-call): run a block of code immediately, in a new lexical scope. +- [`@callwith`: freeze arguments, choose function later](#callwith-freeze-arguments-choose-function-later) +- [`Values`: multiple and named return values](#values-multiple-and-named-return-values) + - [`valuify`](#valuify): convert pythonic multiple-return-values idiom of `tuple` into `Values`. + +[**Numerical tools**](#numerical-tools) + - [`almosteq`: floating-point almost-equality](#almosteq-floating-point-almost-equality) + - [`fixpoint`: arithmetic fixed-point finder](#fixpoint-arithmetic-fixed-point-finder) + - [`partition_int`: partition integers](#partition_int-partition-integers) + - [`ulp`: unit in last place](#ulp-unit-in-last-place) [**Other**](#other) -- [``def`` as a code block: ``@call``](#def-as-a-code-block-call): run a block of code immediately, in a new lexical scope. -- [``@callwith``: freeze arguments, choose function later](#callwith-freeze-arguments-choose-function-later) -- [``raisef``, ``tryf``: ``raise`` and ``try`` as functions](#raisef-tryf-raise-and-try-as-functions), useful inside a lambda. -- [``equip_with_traceback``](#equip-with-traceback), equip a manually created exception instance with a traceback. -- [``callsite_filename``](#callsite-filename) -- [``safeissubclass``](#safeissubclass), convenience function. -- [``pack``: multi-arg constructor for tuple](#pack-multi-arg-constructor-for-tuple) -- [``namelambda``: rename a function](#namelambda-rename-a-function) -- [``timer``: a context manager for performance testing](#timer-a-context-manager-for-performance-testing) -- [``getattrrec``, ``setattrrec``: access underlying data in an onion of wrappers](#getattrrec-setattrrec-access-underlying-data-in-an-onion-of-wrappers) -- [``arities``, ``kwargs``, ``resolve_bindings``: Function signature inspection utilities](#arities-kwargs-resolve_bindings-function-signature-inspection-utilities) -- [``Popper``: a pop-while iterator](#popper-a-pop-while-iterator) -- [``ulp``: unit in last place](#ulp-unit-in-last-place) -- [``async_raise``: inject an exception to another thread](#async_raise-inject-an-exception-to-another-thread) *(CPython only)* - -For many examples, see [the unit tests](unpythonic/test/), the docstrings of the individual features, and this guide. - -*This document doubles as the API reference, but despite maintenance on a best-effort basis, may occasionally be out-of-date at places. In case of conflicts in documentation, believe the unit tests first; specifically the code, not necessarily the comments. Everything else (comments, docstrings and this guide) should agree with the unit tests. So if something fails to work as advertised, check what the tests say - and optionally file an issue on GitHub so that the documentation can be fixed.* - -**This document is up-to-date for v0.14.3.** +- [`callsite_filename`](#callsite-filename) +- [`safeissubclass`](#safeissubclass), convenience function. +- [`pack`: multi-arg constructor for tuple](#pack-multi-arg-constructor-for-tuple) +- [`namelambda`: rename a function](#namelambda-rename-a-function) +- [`timer`: a context manager for performance testing](#timer-a-context-manager-for-performance-testing) +- [`format_human_time`: seconds to days, hours, minutes, seconds](#format_human_time-seconds-to-days-hours-minutes-seconds) +- [`ETAEstimator`: estimate the time of completion of a long-running task](#etaestimator-estimate-the-time-of-completion-of-a-long-running-task) +- [`getattrrec`, `setattrrec`: access underlying data in an onion of wrappers](#getattrrec-setattrrec-access-underlying-data-in-an-onion-of-wrappers) +- [`arities`, `kwargs`, `resolve_bindings`: Function signature inspection utilities](#arities-kwargs-resolve_bindings-function-signature-inspection-utilities) +- [`Popper`: a pop-while iterator](#popper-a-pop-while-iterator) + +For many examples, see [the unit tests](unpythonic/tests/), the docstrings of the individual features, and this guide. + +*This document doubles as the API reference, but despite maintenance on a best-effort basis, may occasionally be out-of-date at places. In case of conflicts in documentation, believe the unit tests first; specifically the code, not necessarily the comments. Everything else (comments, docstrings and this guide) should agree with the unit tests. So if something fails to work as advertised, check what the tests do - and optionally file an issue on GitHub so that the documentation can be fixed.* + +**This document is up-to-date for v0.15.0.** ## Bindings Tools to bind identifiers in ways not ordinarily supported by Python. -### ``let``, ``letrec``: local bindings in an expression +### `let`, `letrec`: local bindings in an expression + +**NOTE**: *This is primarily a code generation target API for the `let[]` family of [macros](macros.md), which make the constructs easier to use, and make the code look almost like normal Python. Below is the documentation for the raw API.* -**NOTE**: This is primarily a code generation target API for the ``let[]`` family of [macros](macros.md), which make the constructs easier to use. Below is the documentation for the raw API. +The `let` constructs introduce bindings local to an expression, like Scheme's `let` and `letrec`. -Introduces bindings local to an expression, like Scheme's ``let`` and ``letrec``. For easy-to-use versions of these constructs that look almost like normal Python, see [our macros](macros.md). +#### `let` -In ``let``, the bindings are independent (do not see each other). A binding is of the form ``name=value``, where ``name`` is a Python identifier, and ``value`` is any expression. +In `let`, the bindings are independent (do not see each other). A binding is of the form `name=value`, where `name` is a Python identifier, and `value` is any expression. Use a `lambda e: ...` to supply the environment to the body: ```python +# These six are the constructs covered in this section of documentation. from unpythonic import let, letrec, dlet, dletrec, blet, bletrec u = lambda lst: let(seen=set(), @@ -97,9 +161,11 @@ u(L) # --> [1, 3, 2, 4] Generally speaking, `body` is a one-argument function, which takes in the environment instance as the first positional parameter (by convention, named `e` or `env`). In typical inline usage, `body` is `lambda e: expr`. -*Let over lambda*. Here the inner ``lambda`` is the definition of the function ``counter``: +*Let over lambda*. Here the inner `lambda` is the definition of the function `counter`: ```python +from unpythonic import let, begin + counter = let(x=0, body=lambda e: lambda: @@ -109,6 +175,21 @@ counter() # --> 1 counter() # --> 2 ``` +For comparison, with the macro API, this becomes: + +```python +from unpythonic.syntax import macros, let, do + +counter = let[[x << 0] in + (lambda: + do[x << x + 1, + x])] +counter() # --> 1 +counter() # --> 2 +``` + +(*The parentheses around the lambda are just to make the expression into syntactically valid Python. You can also use brackets instead, denoting a multiple-expression `let` body - which is also valid even if there is just one expression. The `do` makes a multiple-expression `lambda` body. For more, see the [macro documentation](macros.md).*) + Compare the sweet-exp [Racket](http://racket-lang.org/) (see [SRFI-110](https://srfi.schemers.org/srfi-110/srfi-110.html) and [sweet](https://docs.racket-lang.org/sweet/)): ```racket @@ -121,9 +202,13 @@ counter() ; --> 1 counter() ; --> 2 ``` -*Let over def* decorator ``@dlet``, to *let over lambda* more pythonically: +#### `dlet`, `blet` + +*Let over def* decorator `@dlet`, to *let over lambda* more pythonically: ```python +from unpythonic import dlet + @dlet(x=0) def counter(*, env=None): # named argument "env" filled in by decorator env.x += 1 @@ -132,9 +217,30 @@ counter() # --> 1 counter() # --> 2 ``` -In `letrec`, bindings may depend on ones above them in the same `letrec`, by using `lambda e: ...` (**Python 3.6+**): +For comparison, with the macro API, this becomes: + +```python +from unpythonic.syntax import macros, dlet + +@dlet(x << 0) +def counter(): + x << x + 1 + return x +counter() # --> 1 +counter() # --> 2 +``` + +The `@blet` decorator is otherwise the same as `@dlet`, but instead of decorating a function definition in the usual manner, it runs the `def` block immediately, and upon exit, replaces the function definition with the return value. The name `blet` is an abbreviation of *block let*, since the role of the `def` is just a code block to be run immediately. + +#### `letrec` + +The name of this construct comes from the Scheme family of Lisps, and stands for *let (mutually) recursive*. The "[mutually recursive](https://en.wikipedia.org/wiki/Mutual_recursion)" refers to the kind of scoping between the bindings in the same `letrec`. + +In plain English, in `letrec`, the value of a binding may depend on other bindings in the same `letrec`. The raw API in `unpythonic` uses a `lambda e: ...` to provide the environment: ```python +from unpythonic import letrec + x = letrec(a=1, b=lambda e: e.a + 1, @@ -142,13 +248,27 @@ x = letrec(a=1, e.b) # --> 2 ``` -In `letrec`, the ``value`` of each binding is either a simple value (non-callable, and doesn't use the environment), or an expression of the form ``lambda e: valexpr``, providing access to the environment as ``e``. If ``valexpr`` itself is callable, the binding **must** have the ``lambda e: ...`` wrapper to prevent any misunderstandings in the environment initialization procedure. +The ordering of the definitions is respected, because Python 3.6 and later preserve the ordering of named arguments passed in a function call. See [PEP 468](https://www.python.org/dev/peps/pep-0468/). + +For comparison, with the macro API, this becomes: + +```python +from unpythonic.syntax import macros, letrec + +x = letrec[[a << 1, + b << a + 1] in + b] +``` + +In the non-macro `letrec`, the `value` of each binding is either a simple value (non-callable, and doesn't use the environment), or an expression of the form `lambda e: valexpr`, providing access to the environment as `e`. If `valexpr` itself is callable, the binding **must** have the `lambda e: ...` wrapper to prevent misinterpretation by the machinery when the environment initialization procedure runs. -In a non-callable ``valexpr``, trying to depend on a binding below it raises ``AttributeError``. +In a non-callable `valexpr`, trying to depend on a binding below it raises `AttributeError`. -A callable ``valexpr`` may depend on any bindings (also later ones) in the same `letrec`. Mutually recursive functions: +A callable `valexpr` may depend on any bindings (**also later ones**) in the same `letrec`. For example, here is a pair of [mutually recursive](https://en.wikipedia.org/wiki/Mutual_recursion) functions: ```python +from unpythonic import letrec + letrec(evenp=lambda e: lambda x: (x == 0) or e.oddp(x - 1), @@ -159,9 +279,24 @@ letrec(evenp=lambda e: e.evenp(42)) # --> True ``` +For comparison, with the macro API, this becomes: + +```python +from unpythonic.syntax import macros, letrec + +letrec[[evenp << (lambda x: + (x == 0) or oddp(x - 1)), + oddp << (lambda x: + (x != 0) and evenp(x - 1))] in + evenp(42)] # --> True +``` + + Order-preserving list uniqifier: ```python +from unpythonic import letrec, begin + u = lambda lst: letrec(seen=set(), see=lambda e: lambda x: @@ -171,19 +306,30 @@ u = lambda lst: letrec(seen=set(), [e.see(x) for x in lst if x not in e.seen]) ``` -**CAUTION**: in Pythons older than 3.6, bindings are **initialized in an arbitrary order**, also in `letrec`. This is a limitation of the kwargs abuse. Hence mutually recursive functions are possible, but a non-callable `valexpr` cannot depend on other bindings in the same `letrec`. +For comparison, with the macro API, this becomes: + +```python +from unpythonic.syntax import macros, letrec, do + +u = lambda lst: letrec[[seen << set(), + see << (lambda x: + do[seen.add(x), + x])] in + [[see(x) for x in lst if x not in seen]]] +``` + +(*The double brackets around the `letrec` body are needed because brackets denote a multiple-expression `letrec` body. So it is a multiple-expression body that contains just one expression, which is a list comprehension.*) -Trying to access `e.foo` from `e.bar` arbitrarily produces either the intended value of `e.foo`, or the uninitialized `lambda e: ...`, depending on whether `e.foo` has been initialized or not at the point of time when `e.bar` is being initialized. +The decorators `@dletrec` and `@bletrec` work otherwise exactly like `@dlet` and `@blet`, respectively, but the bindings are scoped like in `letrec` (mutually recursive scope). -This has been fixed in Python 3.6, see [PEP 468](https://www.python.org/dev/peps/pep-0468/). #### Lispylet: alternative syntax -**NOTE**: This is primarily a code generation target API for the ``let[]`` family of [macros](macros.md), which make the constructs easier to use. Below is the documentation for the raw API. +**NOTE**: *This is primarily a code generation target API for the `let[]` family of [macros](macros.md), which make the constructs easier to use. Below is the documentation for the raw API.* -If you need **guaranteed left-to-right initialization** of `letrec` bindings in Pythons older than 3.6, there is also an alternative implementation for all the `let` constructs, with positional syntax and more parentheses. The only difference is the syntax; the behavior is identical with the default implementation. +The `lispylet` module was originally created to allow guaranteed left-to-right initialization of `letrec` bindings in Pythons older than 3.6, hence the positional syntax and more parentheses. The only difference is the syntax; the behavior is identical with the other implementation. As of 0.15, the main role of `lispylet` is to act as the run-time backend for the `let` family of macros. -These constructs are available in the top-level `unpythonic` namespace, with the ``ordered_`` prefix: ``ordered_let``, ``ordered_letrec``, ``ordered_dlet``, ``ordered_dletrec``, ``ordered_blet``, ``ordered_bletrec``. +These constructs are available in the top-level `unpythonic` namespace, with the `ordered_` prefix: `ordered_let`, `ordered_letrec`, `ordered_dlet`, `ordered_dletrec`, `ordered_blet`, `ordered_bletrec`. It is also possible to override the default `let` constructs by the `ordered_` variants, like this: @@ -192,7 +338,7 @@ from unpythonic.lispylet import * # override the default "let" implementation letrec((('a', 1), ('b', lambda e: - e.a + 1)), # may refer to any bindings above it in the same letrec, also in Python < 3.6 + e.a + 1)), # may refer to any bindings above it in the same letrec lambda e: e.b) # --> 2 @@ -208,14 +354,32 @@ letrec((("evenp", lambda e: The syntax is `let(bindings, body)` (respectively `letrec(bindings, body)`), where `bindings` is `((name, value), ...)`, and `body` is like in the default variants. The same rules concerning `name` and `value` apply. -The let macros internally use this *lispylet* implementation. +For comparison, with the macro API, the above becomes: +```python +from unpythonic.syntax import macros, letrec + +letrec[[a << 1, + b << a + 1] in + b] + +letrec[[evenp << (lambda x: + (x == 0) or oddp(x - 1)), + oddp << (lambda x: + (x != 0) and evenp(x - 1))] in + evenp(42)] # --> True +``` -### ``env``: the environment +(*The transformations made by the macros may be the most apparent when comparing these examples. Note that the macros scope the let-bindings lexically, automatically figuring out which `let` environment, if any, to refer to.*) -The environment used by all the ``let`` constructs and ``assignonce`` (but **not** by `dyn`) is essentially a bunch with iteration, subscripting and context manager support. It is somewhat similar to [`types.SimpleNamespace`](https://docs.python.org/3/library/types.html#types.SimpleNamespace), but with many extra features. For details, see `unpythonic.env`. -Our ``env`` allows things like: +### `env`: the environment + +**Changed in v0.15.2.** *`env` objects are now pickleable.* + +The environment used by all the `let` constructs and `assignonce` (but **not** by `dyn`) is essentially a bunch with iteration, subscripting and context manager support. It is somewhat similar to [`types.SimpleNamespace`](https://docs.python.org/3/library/types.html#types.SimpleNamespace), but with many extra features. For details, see `unpythonic.env.env` (and note the unfortunate module name). + +Our `env` allows things like: ```python let(x=1, y=2, z=3, @@ -251,10 +415,12 @@ When the `with` block exits, the environment clears itself. The environment inst (This allows using `with env(...) as e:` as a poor man's `let`, if you have a block of statements you want to locally scope some names to, but don't want to introduce a `def`.) -``env`` provides the ``collections.abc.Mapping`` and ``collections.abc.MutableMapping`` APIs. +`env` provides the `collections.abc.Mapping` and `collections.abc.MutableMapping` APIs. + +### `assignonce` -### ``assignonce`` +*As of v0.15.0, `assignonce` is mostly a standalone curiosity that has never been integrated with the rest of `unpythonic`. But anything that works with arbitrary subclasses of `env`, for example `mogrify`, works with it, too.* In Scheme terms, make `define` and `set!` look different: @@ -268,30 +434,37 @@ with assignonce() as e: e.foo = "quux" # AttributeError, e.foo already defined. ``` -It's a subclass of ``env``, so it shares most of the same [features](#env-the-environment) and allows similar usage. +The `assignonce` construct is a subclass of `env`, so it shares most of the same [features](#env-the-environment) and allows similar usage. #### Historical note The fact that in Python creating bindings and updating (rebinding) them look the same was already noted in 2000, in [PEP 227](https://www.python.org/dev/peps/pep-0227/#discussion), which introduced true closures to Python 2.1. For related history concerning the `nonlocal` keyword, see [PEP 3104](https://www.python.org/dev/peps/pep-3104/). -### ``dyn``: dynamic assignment +### `dyn`: dynamic assignment -([As termed by Felleisen.](https://groups.google.com/forum/#!topic/racket-users/2Baxa2DxDKQ) Other names seen in the wild for variants of this feature include *parameters* (not to be confused with function parameters), *special variables*, *fluid variables*, *fluid let*, and even the misnomer *"dynamic scoping"*.) +**Changed in v0.14.2.** *To bring this in line with [SRFI-39](https://srfi.schemers.org/srfi-39/srfi-39.html), `dyn` now supports rebinding, using assignment syntax such as `dyn.x = 42`, and the function `dyn.update(x=42, y=17, ...)`.* -Like global variables, but better-behaved. Useful for sending some configuration parameters through several layers of function calls without changing their API. Best used sparingly. +([As termed by Felleisen.](https://groups.google.com/forum/#!topic/racket-users/2Baxa2DxDKQ) Other names seen in the wild for variants of this feature include *parameters* ([Scheme](https://srfi.schemers.org/srfi-39/srfi-39.html) and [Racket](https://docs.racket-lang.org/reference/parameters.html); not to be confused with function parameters), *special variables* (Common Lisp), *fluid variables*, *fluid let* (e.g. Emacs Lisp), and even the misnomer *"dynamic scoping"*.) + +The feature itself is *dynamic assignment*; the things it creates are *dynamic variables* (a.k.a. *dynvars*). + +Dynvars are like global variables, but better-behaved. Useful for sending some configuration parameters through several layers of function calls without changing their API. Best used sparingly. There's a singleton, `dyn`: ```python -from unpythonic import dyn +from unpythonic import dyn, make_dynvar + +make_dynvar(c=17) # top-level default value def f(): # no "a" in lexical scope here assert dyn.a == 2 def g(): - with dyn.let(a=2, b="foo"): + with dyn.let(a=2, b="foo", c=42): assert dyn.a == 2 + assert dyn.c == 42 f() @@ -301,60 +474,66 @@ def g(): # now "a" has reverted to its previous value assert dyn.a == 2 + assert dyn.c == 17 # "c" has reverted to its default value print(dyn.b) # AttributeError, dyn.b no longer exists g() ``` -Dynamic variables are set using `with dyn.let(...)`. There is no `set`, `<<`, unlike in the other `unpythonic` environments. +Dynvars are created using `with dyn.let(k0=v0, ...)`. The syntax is in line with the nature of the assignment, which is in effect *for the dynamic extent* of the `with`. Exiting the `with` block pops the dynamic environment stack. Inner dynamic environments shadow outer ones. + +The point of dynamic assignment is that dynvars are seen also by code that is *outside the lexical scope* where the `with dyn.let` resides. The use case is to avoid a function parameter definition cascade, when you need to pass some information through several layers that do not care about it. This is especially useful for passing "background" information, such as plotter settings in scientific visualization, or the macro expander instance in metaprogramming. -**Changed in v0.14.2.** *To bring this in line with [SRFI-39](https://srfi.schemers.org/srfi-39/srfi-39.html), `dyn` now supports rebinding, using assignment syntax such as `dyn.x = 42`. For an atomic mass-update, see `dyn.update`. Rebinding occurs in the closest enclosing dynamic environment that has the target name bound. If the name is not bound in any dynamic environment, ``AttributeError`` is raised.* +To give a dynvar a top-level default value, use `make_dynvar(k0=v0, ...)`. Usually this is done at the top-level scope of the module for which that dynvar is meaningful. Each dynvar, of the same name, should only have one default set; the (dynamically) latest definition always overwrites. However, we do not prevent overwrites, because in some codebases the same module may run its top-level initialization code multiple times (e.g. if a module has a `main()` for tests, and the file gets loaded both as a module and as the main program). -**CAUTION**: Use rebinding of dynamic variables carefully, if at all. Stealth updates of dynamic variables defined in an enclosing dynamic extent can destroy any chance of statically reasoning about the code. +To rebind existing dynvars, use `dyn.k = v`, or `dyn.update(k0=v0, ...)`. Rebinding occurs in the closest enclosing dynamic environment that has the target name bound. If the name is not bound in any dynamic environment (including the top-level one), `AttributeError` is raised. -The values of dynamic variables remain bound for the dynamic extent of the `with` block. Exiting the `with` block then pops the stack. Inner dynamic scopes shadow outer ones. Dynamic variables are seen also by code that is outside the lexical scope where the `with dyn.let` resides. +**CAUTION**: Use rebinding of dynvars carefully, if at all. Stealth updates of dynvars defined in an enclosing dynamic extent can destroy any chance of statically reasoning about your code. + +There is no `set` function or `<<` operator, unlike in the other `unpythonic` environments. + +
Each thread has its own dynamic scope stack. There is also a global dynamic scope for default values, shared between threads. -
-Each thread has its own dynamic scope stack. There is also a global dynamic scope for default values, shared between threads. A newly spawned thread automatically copies the then-current state of the dynamic scope stack **from the main thread** (not the parent thread!). Any copied bindings will remain on the stack for the full dynamic extent of the new thread. Because these bindings are not associated with any `with` block running in that thread, and because aside from the initial copying, the dynamic scope stacks are thread-local, any copied bindings will never be popped, even if the main thread pops its own instances of them. -The source of the copy is always the main thread mainly because Python's `threading` module gives no tools to detect which thread spawned the current one. (If someone knows a simple solution, PRs welcome!) +The source of the copy is always the main thread mainly because Python's `threading` module gives no tools to detect which thread spawned the current one. (If someone knows a simple solution, a PR is welcome!) -Finally, there is one global dynamic scope shared between all threads, where the default values of dynvars live. The default value is used when ``dyn`` is queried for the value outside the dynamic extent of any ``with dyn.let()`` blocks. Having a default value is convenient for eliminating the need for ``if "x" in dyn`` checks, since the variable will always exist (after the global definition has been executed). +Finally, there is one global dynamic scope shared between all threads, where the default values of dynvars live. The default value is used when `dyn` is queried for the value outside the dynamic extent of any `with dyn.let()` blocks. Having a default value is convenient for eliminating the need for `if "x" in dyn` checks, since the variable will always exist (at any time after the global definition has been executed).
-To create a dynvar and set its default value, use ``make_dynvar``. Each dynamic variable, of the same name, should only have one default set; the (dynamically) latest definition always overwrites. However, we do not prevent overwrites, because in some codebases the same module may run its top-level initialization code multiple times (e.g. if a module has a ``main()`` for tests, and the file gets loaded both as a module and as the main program). +For more details, see the methods of `dyn`; particularly noteworthy are `asdict` and `items`, which give access to a *live view* to dyn's contents in a dictionary format (intended for reading only!). The `asdict` method essentially creates a `collections.ChainMap` instance, while `items` is an abbreviation for `asdict().items()`. The `dyn` object itself can also be iterated over; this creates a `ChainMap` instance and redirects to iterate over it. `dyn` also provides the `collections.abc.Mapping` API. -See also the methods of ``dyn``; particularly noteworthy are ``asdict`` and ``items``, which give access to a live view to dyn's contents in a dictionary format (intended for reading only!). The ``asdict`` method essentially creates a ``collections.ChainMap`` instance, while ``items`` is an abbreviation for ``asdict().items()``. The ``dyn`` object itself can also be iterated over; this creates a ``ChainMap`` instance and redirects to iterate over it. ``dyn`` also provides the ``collections.abc.Mapping`` API. - -To support dictionary-like idioms in iteration, dynvars can alternatively be accessed by subscripting; ``dyn["x"]`` has the same meaning as ``dyn.x``, so you can do things like: +To support dictionary-like idioms in iteration, dynvars can alternatively be accessed by subscripting; `dyn["x"]` has the same meaning as `dyn.x`, to allow things like: ```python print(tuple((k, dyn[k]) for k in dyn)) ``` -Finally, ``dyn`` supports membership testing as ``"x" in dyn``, ``"y" not in dyn``, where the string is the name of the dynvar whose presence is being tested. +Finally, `dyn` supports membership testing as `"x" in dyn`, `"y" not in dyn`, where the string is the name of the dynvar whose presence is being tested. -For some more details, see [the unit tests](../unpythonic/test/test_dynassign.py). +For some more details, see [the unit tests](../unpythonic/tests/test_dynassign.py). ### Relation to similar features in Lisps -This is essentially [SRFI-39: Parameter objects](https://srfi.schemers.org/srfi-39/), using the MzScheme approach in the presence of multiple threads. +This is essentially [SRFI-39: Parameter objects](https://srfi.schemers.org/srfi-39/) for Python, using the MzScheme approach in the presence of multiple threads. -[Racket](http://racket-lang.org/)'s [`parameterize`](https://docs.racket-lang.org/guide/parameterize.html) behaves similarly. However, Racket seems to be the state of the art in many lispy language design related things, so its take on the feature may have some finer points I haven't thought of. +[Racket](http://racket-lang.org/)'s [`parameterize`](https://docs.racket-lang.org/guide/parameterize.html) behaves similarly. However, Racket seems to be the state of the art in many lispy language design related things, so its take on the feature may have some finer points I have not thought of. On Common Lisp's special variables, see [Practical Common Lisp by Peter Seibel](http://www.gigamonkeys.com/book/variables.html), especially footnote 10 in the linked chapter, for a definition of terms. Similarly, dynamic variables in our `dyn` have *indefinite scope* (because `dyn` is implemented as a module-level global, accessible from anywhere), but *dynamic extent*. So what we have in `dyn` is almost exactly like Common Lisp's special variables, except we are missing convenience features such as `setf` and a smart `let` that auto-detects whether a variable is lexical or dynamic (if the name being bound is already in scope). + ## Containers -We provide some additional containers. +We provide some additional low-level containers beyond those provided by Python itself. The class names are lowercase, because these are intended as low-level utility classes in principle on par with the builtins. The immutable containers are hashable. All containers are pickleable (if their contents are). -### ``frozendict``: an immutable dictionary +### `frozendict`: an immutable dictionary -Given the existence of ``dict`` and ``frozenset``, this one is oddly missing from the standard library. +**Changed in 0.14.2**. *[A bug in `frozendict` pickling](https://github.com/Technologicat/unpythonic/issues/55) has been fixed. Now also the empty `frozendict` pickles and unpickles correctly.* + +Given the existence of `dict` and `frozenset`, this one is oddly missing from the language. ```python from unpythonic import frozendict @@ -380,7 +559,7 @@ assert d4['a'] == 23 and d4['b'] == 2 assert d3['a'] == 42 and d3['b'] == 2 # ...of course without touching the original ``` -Any mappings used when creating an instance are shallow-copied, so that the bindings of the ``frozendict`` do not change even if the original input is later mutated: +Any mappings used when creating an instance are shallow-copied, so that the bindings of the `frozendict` do not change even if the original input is later mutated: ```python d = {1:2, 3:4} @@ -392,7 +571,7 @@ assert fd == {1: 2, 3: 4} **The usual caution** concerning immutable containers in Python applies: the container protects only the bindings against changes. If the values themselves are mutable, the container cannot protect from mutations inside them. -All the usual read-access stuff works: +All the usual read-access features work: ```python d7 = frozendict({1:2, 3:4}) @@ -411,7 +590,7 @@ assert d7.get(5, 0) == 0 assert d7.get(5) is None ``` -In terms of ``collections.abc``, a ``frozendict`` is a hashable immutable mapping: +In terms of `collections.abc`, a `frozendict` is a hashable immutable mapping: ```python assert issubclass(frozendict, Mapping) @@ -422,21 +601,21 @@ assert hash(d7) == hash(frozendict({1:2, 3:4})) assert hash(d7) != hash(frozendict({1:2})) ``` -The abstract superclasses are virtual, just like for ``dict`` (i.e. they do not appear in the MRO). +The abstract superclasses are virtual, just like for `dict`. We mean *virtual* in the sense of [`abc.ABCMeta`](https://docs.python.org/3/library/abc.html#abc.ABCMeta), i.e. a virtual superclass does not appear in the MRO. -Finally, ``frozendict`` obeys the empty-immutable-container singleton invariant: +Finally, `frozendict` obeys the empty-immutable-container singleton invariant: ```python assert frozendict() is frozendict() ``` -**Changed in 0.14.2**. *[A bug in `frozendict` pickling](https://github.com/Technologicat/unpythonic/issues/55) has been fixed. Now also the empty `frozendict` pickles and unpickles correctly.* - ### `cons` and friends: pythonic lispy linked lists *Laugh, it's funny.* +**Changed in v0.14.2.** *`nil` is now a `Singleton`, so it is treated correctly by `pickle`. The `nil` instance refresh code inside the `cons` class has been removed, so the previous caveat about pickling a standalone `nil` value no longer applies.* + ```python from unpythonic import (cons, nil, ll, llist, car, cdr, caar, cdar, cadr, cddr, @@ -472,13 +651,13 @@ assert lzip(ll(1, 2, 3), ll(4, 5, 6)) == ll(ll(1, 4), ll(2, 5), ll(3, 6)) Cons cells are immutable à la Racket (no `set-car!`/`rplaca`, `set-cdr!`/`rplacd`). Accessors are provided up to `caaaar`, ..., `cddddr`. -Although linked lists are created with ``ll`` or ``llist``, the data type (for e.g. ``isinstance``) is ``cons``. +Although linked lists are created with the functions `ll` or `llist`, the data type (for e.g. `isinstance`) is `cons`. -Iterators are supported to walk over linked lists (this also gives sequence unpacking support). When ``next()`` is called, we return the car of the current cell the iterator points to, and the iterator moves to point to the cons cell in the cdr, if any. When the cdr is not a cons cell, it is the next (and last) item returned; except if it `is nil`, then iteration ends without returning the `nil`. +Iterators are supported, to walk over linked lists. This also gives sequence unpacking support. When `next()` is called, we return the `car` of the current cell the iterator points to, and the iterator moves to point to the cons cell in the `cdr`, if any. When the `cdr` is not a cons cell, it is the next (and last) item returned; except if it `is nil`, then iteration ends without returning the `nil`. -Python's builtin ``reversed`` can be applied to linked lists; it will internally ``lreverse`` the list (which is O(n)), then return an iterator to that. The ``llist`` constructor is special-cased so that if the input is ``reversed(some_ll)``, it just returns the internal already reversed list. (This is safe because cons cells are immutable.) +Python's builtin `reversed` can be applied to linked lists; it will internally `lreverse` the list (which is O(n)), then return an iterator to that. The `llist` constructor is special-cased so that if the input is `reversed(some_ll)`, it just returns the internal already reversed list. (This is safe because cons cells are immutable.) -Cons structures, by default, print in a pythonic format suitable for ``eval`` (if all elements are): +Cons structures, by default, print in a pythonic format suitable for `eval` (if all elements are): ```python print(cons(1, 2)) # --> cons(1, 2) @@ -494,26 +673,24 @@ print(ll(1, 2, 3).lispyrepr()) # --> (1 2 3) print(cons(cons(1, 2), cons(3, 4)).lispyrepr()) # --> ((1 . 2) . (3 . 4)) ``` -For more, see the ``llist`` submodule. +For more, see the `llist` submodule. #### Notes -There is no ``copy`` method or ``lcopy`` function, because cons cells are immutable; which makes cons structures immutable. +There is no `copy` method or `lcopy` function, because cons cells are immutable; which makes cons structures immutable. -(However, for example, it is possible to ``cons`` a new item onto an existing linked list; that's fine because it produces a new cons structure - which shares data with the original, just like in Racket.) +However, for example, it is possible to `cons` a new item onto an existing linked list; that is fine, because it produces a new cons structure - which shares data with the original, just like in Racket. In general, copying cons structures can be error-prone. Given just a starting cell it is impossible to tell if a given instance of a cons structure represents a linked list, or something more general (such as a binary tree) that just happens to locally look like one, along the path that would be traversed if it was indeed a linked list. -The linked list iteration strategy does not recurse in the ``car`` half, which could lead to incomplete copying. The tree strategy that recurses on both halves, on the other hand, will flatten nested linked lists and produce also the final ``nil``. - -We provide a ``JackOfAllTradesIterator`` as a compromise that understands both trees and linked lists. Nested lists will be flattened, and in a tree any ``nil`` in a ``cdr`` position will be omitted from the output. ``BinaryTreeIterator`` and ``JackOfAllTradesIterator`` use an explicit data stack instead of implicitly using the call stack for keeping track of the recursion. All ``cons`` iterators work for arbitrarily deep cons structures without causing Python's call stack to overflow, and without the need for TCO. +The linked list iteration strategy does not recurse in the `car` half, which could lead to incomplete copying. The tree strategy that recurses on both halves, on the other hand, will flatten nested linked lists and produce also the final `nil`. -``cons`` has no ``collections.abc`` virtual superclasses (except the implicit ``Hashable`` since ``cons`` provides ``__hash__`` and ``__eq__``), because general cons structures do not fit into the contracts represented by membership in those classes. For example, size cannot be known without iterating, and depends on which iteration scheme is used (e.g. ``nil`` dropping, flattening); which scheme is appropriate depends on the content. +We provide a `JackOfAllTradesIterator` as a compromise that understands both trees and linked lists. Nested lists will be flattened, and in a tree any `nil` in a `cdr` position will be omitted from the output. `BinaryTreeIterator` and `JackOfAllTradesIterator` use an explicit data stack instead of implicitly using the call stack for keeping track of the recursion. All `cons` iterators work for arbitrarily deep cons structures without causing Python's call stack to overflow, and without the need for TCO. -**Caution**: the ``nil`` singleton is freshly created in each session; newnil is not oldnil, so don't pickle a standalone ``nil``. The unpickler of ``cons`` automatically refreshes any ``nil`` instances inside a pickled cons structure, so that **cons structures** support the illusion that ``nil`` is a special value like ``None`` or ``...``. After unpickling, ``car(c) is nil`` and ``cdr(c) is nil`` still work as expected, even though ``id(nil)`` has changed between sessions. +`cons` has no `collections.abc` virtual superclasses (except the implicit `Hashable` since `cons` provides `__hash__` and `__eq__`), because general cons structures do not fit into the contracts represented by membership in those classes. For example, size cannot be known without iterating, and depends on which iteration scheme is used (e.g. `nil` dropping, flattening); which scheme is appropriate depends on the content. -### ``box``: a mutable single-item container +### `box`: a mutable single-item container **Changed in v0.14.2**. *The `box` container API is now `b.set(newvalue)` to rebind, returning the new value as a convenience. The equivalent syntactic sugar is `b << newvalue`. The item inside the box can be extracted with `b.get()`. The equivalent syntactic sugar is `unbox(b)`.* @@ -523,7 +700,9 @@ We provide a ``JackOfAllTradesIterator`` as a compromise that understands both t **Changed in v0.14.2**. *Accessing the `.x` attribute of a `box` directly is now deprecated. It will continue to work with `box` at least until 0.15, but it does not and cannot work with `ThreadLocalBox`, which must handle things differently due to implementation reasons. Use the API mentioned above; it supports both kinds of boxes with the same syntax.* -No doubt anyone programming in an imperative language has run into the situation caricatured by this highly artificial example: +#### `box` + +Consider this highly artificial example: ```python animal = "dog" @@ -535,9 +714,9 @@ f(animal) assert animal == "dog" ``` -Many solutions exist. Common pythonic ones are abusing a ``list`` to represent a box (and then trying to manually remember that it is supposed to hold only a single item), or (if the lexical structure of the particular piece of code allows it) using the ``global`` or ``nonlocal`` keywords to tell Python, on assignment, to overwrite a name that already exists in a surrounding scope. +Many solutions exist. Common pythonic ones are abusing a `list` to represent a box (and then trying to remember that it is supposed to hold only a single item), or (if the lexical structure of the particular piece of code allows it) using the `global` or `nonlocal` keywords to tell Python, on assignment, to overwrite a name that already exists in a surrounding scope. -As an alternative to the rampant abuse of lists, we provide a rackety ``box``, which is a minimalistic mutable container that holds exactly one item. Any code that has a reference to the box can update the data in it: +As an alternative to the rampant abuse of lists, we provide a rackety `box`, which is a minimalistic mutable container that holds exactly one item. Any code that has a reference to the box can update the data in it: ```python from unpythonic import box, unbox @@ -569,7 +748,7 @@ f("dog") Here `g` *effectively rebinds a local variable of `f`* - whether that is a good idea is a separate question, but technically speaking, this would not be possible without a container. As mentioned, abusing a `list` is the standard Python (but not very pythonic!) solution. Using specifically a `box` makes the intent explicit. -The ``box`` API is summarized by: +The `box` API is summarized by: ```python from unpythonic import box, unbox @@ -602,13 +781,23 @@ box3.set("fox") # same without syntactic sugar assert "fox" in box3 ``` -The expression ``item in b`` has the same meaning as ``unbox(b) == item``. Note ``box`` is a **mutable container**, so it is **not hashable**. +The expression `item in b` has the same meaning as `unbox(b) == item`. Note `box` is a **mutable container**, so it is **not hashable**. -The expression `unbox(b)` has the same meaning as `b.get()`, but because it is a function (instead of a method), it additionally sanity checks that `b` is a box, and if not, raises `TypeError`. +The expression `unbox(b)` has the same meaning as `b.get()`, but because it is a function (instead of a method), it additionally sanity-checks that `b` is a box, and if not, raises `TypeError`. The expression `b << newitem` has the same meaning as `b.set(newitem)`. In both cases, the new value is returned as a convenience. -`ThreadLocalBox` is otherwise exactly like `box`, but it's magic: its contents are thread-local. It also holds a default object, which is set initially when the `ThreadLocalBox` is instantiated. The default object is seen by threads that have not placed any object into the box. +#### `Some` + +We also provide an **immutable** box, `Some`. This can be useful to represent optional data. + +The idea is that the value, when present, is placed into a `Some`, such as `Some(42)`, `Some("cat")`, `Some(myobject)`. Then, the situation where the value is absent can be represented as a bare `None`. So specifically, `Some(None)` means that a value is present and this value is `None`, whereas a bare `None` means that there is no value. + +It is like the `Some` constructor of a `Maybe` monad, but with no monadic magic. In this interpretation, the bare constant `None` plays the role of `Nothing`. + +#### `ThreadLocalBox` + +`ThreadLocalBox` is otherwise exactly like `box`, but magical: its contents are thread-local. It also holds a default object, which is set initially when the `ThreadLocalBox` is instantiated. The default object is seen by threads that have not placed any object into the box. ```python from unpythonic import ThreadLocalBox, unbox @@ -665,18 +854,16 @@ tlb.clear() # When we clear the box in this thread... assert unbox(tlb) == "cat" # ...this thread sees the current default object again. ``` -We also provide an **immutable** box, `Some`. This can be useful for optional data. The idea is that the value, when present, is placed into a `Some`, such as `Some(42)`, `Some("cat")`, `Some(myobject)`. Then, the situation where the value is absent can be represented as a bare `None`. So specifically, `Some(None)` means that a value is present and this value is `None`, whereas a bare `None` means that there is no value. - -### ``Shim``: redirect attribute accesses +### `Shim`: redirect attribute accesses **Added in v0.14.2**. -A `Shim` is an attribute access proxy. The shim holds a `box` (or a `ThreadLocalBox`), and redirects attribute accesses on the shim to whatever object happens to currently be in the box. The point is that the object in the box can be replaced with a different one later (by sending another object into the box), and the code accessing the proxied object through the shim doesn't need to be aware that anything has changed. +A `Shim` is an *attribute access proxy*. The shim holds a `box` (or a `ThreadLocalBox`; your choice), and redirects attribute accesses on the shim to whatever object happens to currently be in the box. The point is that the object in the box can be replaced with a different one later (by sending another object into the box), and the code accessing the proxied object through the shim does not need to be aware that anything has changed. -For example, this can combo with `ThreadLocalBox` to redirect standard output only in particular threads. Place the stream object in a `ThreadLocalBox`, shim that box, then replace `sys.stdout` with the shim. See the source code of `unpythonic.net.server` for an example that actually does (and cleanly undoes) this. +For example, `Shim` can combo with `ThreadLocalBox` to redirect standard output only in particular threads. Place the stream object in a `ThreadLocalBox`, shim that box, then replace `sys.stdout` with the shim. See the source code of `unpythonic.net.server` for an example that actually does (and cleanly undoes) this. -Since deep down, attribute access is the whole point of objects, `Shim` is essentially a transparent object proxy. (For example, a method call is an attribute read (via a descriptor), followed by a function call.) +Since deep down, attribute access is the whole point of objects, `Shim` is essentially a transparent object proxy. (For example, a method call is an attribute read (via a [descriptor](https://docs.python.org/3/howto/descriptor.html)), followed by a function call.) ```python from unpythonic import Shim, box, unbox @@ -707,9 +894,9 @@ assert s.getme() == 42 assert not hasattr(s, "y") # The new TestTarget instance doesn't have "y". ``` -A shim can have an optional fallback object. It can be either any object, or a box if you want to replace the fallback later. **For attribute reads** (i.e. `__getattr__`), if the object in the primary box does not have the requested attribute, `Shim` will try to get it from the fallback. If `fallback` is boxed, the attribute read takes place on the object in the box. If it is not boxed, the attribute read takes place directly on `fallback`. +A shim can have an optional fallback object. It can be either any object, or a `box` (or `ThreadLocalBox`) if you want to replace the fallback later. **For attribute reads** (i.e. `__getattr__`), if the object in the primary box does not have the requested attribute, `Shim` will try to get it from the fallback. If `fallback` is boxed, the attribute read takes place on the object in the box. If it is not boxed, the attribute read takes place directly on `fallback`. -Any **attribute writes** (i.e. `__setattr__`, binding or rebinding an attribute) always take place on the object in the primary box. +Any **attribute writes** (i.e. `__setattr__`, binding or rebinding an attribute) always take place on the object in the **primary** box. That is, binding or rebinding of attributes is never performed on the fallback object. ```python from unpythonic import Shim, box, unbox @@ -752,9 +939,34 @@ assert s.y == "hi from Wai" assert s.z == "hi from Zee" ``` +Or, since the operation takes just one `elt` and an `acc`, we can also use `reducer` instead of `foldr`, shortening this by one line: + +```python +from unpythonic import Shim, box, unbox, reducer + +class Ex: + x = "hi from Ex" +class Wai: + x = "hi from Wai" + y = "hi from Wai" +class Zee: + x = "hi from Zee" + y = "hi from Zee" + z = "hi from Zee" + + # There will be tried from left to right. +boxes = [box(obj) for obj in (Ex(), Wai(), Zee())] +s = reducer(Shim, boxes) # Shim(box, fallback) <-> op(elt, acc) +assert s.x == "hi from Ex" +assert s.y == "hi from Wai" +assert s.z == "hi from Zee" +``` + ### Container utilities +**Changed in v0.15.0.** *The sequence length argument in `in_slice`, `index_in_slice` is now named `length`, not `l` (ell). This avoids an E741 warning in `flake8`, and is more descriptive.* + **Inspect the superclasses** that a particular container type has: ```python @@ -762,7 +974,7 @@ from unpythonic import get_abcs print(get_abcs(list)) ``` -This includes virtual superclasses, i.e. those that are not part of the MRO. This works by ``issubclass(cls, v)`` on all classes defined in ``collections.abc``. +This includes virtual superclasses, i.e. those that are not part of the MRO. This works by `issubclass(cls, v)` on all classes defined in `collections.abc`. **Reflection on slices**: @@ -783,41 +995,53 @@ An optional length argument can be given to interpret negative indices. See the Sequencing refers to running multiple expressions, in sequence, in place of one expression. -Keep in mind the only reason to ever need multiple expressions: *side effects.* (Assignment is a side effect, too; it modifies the environment. In functional style, intermediate named definitions to increase readability are perhaps the most useful kind of side effect.) +Keep in mind the only reason to ever need multiple expressions: *side effects.* Assignment is a side effect, too; it modifies the environment. In functional style, intermediate named definitions to increase readability are perhaps the most useful kind of side effect. + +See also `multilambda` in [macros](macros.md). -See also ``multilambda`` in [macros](macros.md). +### `begin`: sequence side effects -### ``begin``: sequence side effects +**CAUTION**: the `begin` family of forms are provided **for use in pure-Python projects only**, and are a permanent part of the `unpythonic` API for that purpose. They are somewhat simpler and less flexible than the `do` family, described further below. -**CAUTION**: the `begin` family of forms are provided **for use in pure-Python projects only** (and are a permanent part of the `unpythonic` API for that purpose). If your project uses macros, prefer the `do[]` and `do0[]` macros; these are the only sequencing constructs understood by other macros in `unpythonic.syntax` that need to perform tail-position analysis (e.g. `tco`, `autoreturn`, `continuations`). The `do[]` and `do0[]` macros also provide some convenience features, such as expression-local variables. +*If your project uses macros, prefer the `do[]` and `do0[]` macros; those are the only sequencing constructs understood by other macros in `unpythonic.syntax` that need to perform tail-position analysis (e.g. `tco`, `autoreturn`, `continuations`). The `do[]` and `do0[]` macros also provide some convenience features, such as expression-local variables.* ```python from unpythonic import begin, begin0 f1 = lambda x: begin(print("cheeky side effect"), - 42*x) + 42 * x) f1(2) # --> 84 -f2 = lambda x: begin0(42*x, +f2 = lambda x: begin0(42 * x, print("cheeky side effect")) f2(2) # --> 84 ``` -Actually a tuple in disguise. If worried about memory consumption, use `lazy_begin` and `lazy_begin0` instead, which indeed use loops. The price is the need for a lambda wrapper for each expression to delay evaluation, see [`unpythonic.seq`](../unpythonic/seq.py) for details. +The `begin` and `begin0` forms are actually tuples in disguise; evaluation of **all** items occurs before the `begin` or `begin0` form gets control. Items are evaluated left-to-right due to Python's argument passing rules. + +We provide also `lazy_begin` and `lazy_begin0`, which use loops. The price is the need for a lambda wrapper for each expression to delay evaluation. See the module [`unpythonic.seq`](../unpythonic/seq.py) for details. + + +### `do`: stuff imperative code into an expression + +**NOTE**: *This is primarily a code generation target API for the `do[]` and `do0[]` [macros](macros.md), which make the constructs easier to use, and make the code look almost like normal Python. Below is the documentation for the raw API.* +Basically, the `do` family is a more advanced and flexible variant of the `begin` family. -### ``do``: stuff imperative code into an expression + - `do` can bind names to intermediate results and then use them in later items. -**NOTE**: This is primarily a code generation target API for the ``do[]`` [macro](macros.md), which makes the construct easier to use. Below is the documentation for the raw API. + - `do` is effectively a `let*` (technically, `letrec`) where making a binding is optional, so that some items can have only side effects if so desired. There is no semantically distinct `body`; all items play the same role. -No monadic magic. Basically, ``do`` is: + - Despite the name, there is no monadic magic. - - An improved ``begin`` that can bind names to intermediate results and then use them in later items. +Like in `letrec`, use `lambda e: ...` to access the environment, and to wrap callable values (to prevent misinterpretation by the machinery). - - A ``let*`` (technically, ``letrec``) where making a binding is optional, so that some items can have only side effects if so desired. No semantically distinct ``body``; all items play the same role. +Unlike `begin` (and `begin0`), there is no separate `lazy_do` (`lazy_do0`), because using a `lambda e: ...` wrapper for an item will already delay its evaluation; and the main point of `do`/`do0` is that there is an environment that holds local definitions. If you want a lazy variant, just wrap each item with a `lambda e: ...`, also those that don't otherwise need it. -Like in ``letrec`` (see below), use ``lambda e: ...`` to access the environment, and to wrap callable values (to prevent misunderstandings). +#### `do` + +Like `begin` and `lazy_begin`, the `do` form evaluates all items in order, and then returns the value of the **last** item. ```python from unpythonic import do, assign @@ -830,7 +1054,7 @@ y = do(assign(x=17), # create and set e.x assert y == 42 y = do(assign(x=17), - assign(z=lambda e: 2*e.x), + assign(z=lambda e: 2 * e.x), lambda e: e.z) assert y == 34 @@ -841,16 +1065,89 @@ y = do(assign(x=5), assert y == 25 ``` -If you need to return the first value instead of the last one, use this trick: +For comparison, with the macro API, this becomes: + +```python +from unpythonic.syntax import macros, do, local + +y = do[local[x << 17], # create and set an x local to the environment + print(x), + x << 23, # overwrite x + print(x), + 42] # return value +assert y == 42 + +y = do[local[x << 17], + local[z << 2 * x], + z] +assert y == 34 + +y = do[local[x << 5], + local[f << (lambda x: x**2)], + print("hello from 'do'"), + f(x)] +assert y == 25 +``` + +*In the macro version, all items are delayed automatically; that is, **every** item has an implicit `lambda e: ...`. Note that instead of the `assign` function, the macro version uses the syntax `local[name << value]` to **create** an expression-local variable. Updating an existing variable in the `do` environment is just `name << value`. Finally, there is also `delete[name]`.* + +When using the raw API, beware of this pitfall: + +```python +from unpythonic import do + +do(lambda e: print("hello 2 from 'do'"), # delayed because lambda e: ... + print("hello 1 from 'do'"), # Python prints immediately before do() + "foo") # gets control, because technically, it is + # **the return value** that is an argument + # for do(). +``` + +The above pitfall also applies to using escape continuations inside a `do`. To do that, wrap the ec call into a `lambda e: ...` to delay its evaluation until the `do` actually runs: + +```python +from unpythonic import call_ec, do, assign + +call_ec( + lambda ec: + do(assign(x=42), + lambda e: ec(e.x), # IMPORTANT: must delay this! + lambda e: print("never reached"))) # and this (as above) +``` + +This way, any assignments made in the `do` (which occur only after `do` gets control), performed above the line with the `ec` call, will have been performed when the `ec` is called. + +For comparison, with the macro API, the last example becomes: + +```python +from unpythonic.syntax import macros, do, local +from unpythonic import call_ec + +call_ec( + lambda ec: + do[local[x << 42], + ec(x), + print("never reached")]) +``` + +*In the macro version, all items are delayed automatically, so there `do`/`do0` gets control before any items are evaluated. The `ec` fires when the `do` evaluates that item, and the `print` is indeed never reached.* + +#### `do0` + +Like `begin0` and `lazy_begin0`, the `do0` form evaluates all items in order, and then returns the value of the **first** item. + +It effectively does this internally: ```python +from unpythonic import do, assign + y = do(assign(result=17), print("assigned 'result' in env"), lambda e: e.result) # return value assert y == 17 ``` -Or use ``do0``, which does it for you: +So we can write: ```python from unpythonic import do0, assign @@ -866,37 +1163,54 @@ y = do0(assign(x=17), # the first item of do0 can be an assignment, too assert y == 17 ``` -Beware of this pitfall: +For comparison, with the macro API, this becomes: ```python -do(lambda e: print("hello 2 from 'do'"), # delayed because lambda e: ... - print("hello 1 from 'do'"), # Python prints immediately before do() - "foo") # gets control, because technically, it is - # **the return value** that is an argument - # for do(). -``` +from unpythonic.syntax import macros, do, local -Unlike ``begin`` (and ``begin0``), there is no separate ``lazy_do`` (``lazy_do0``), because using a ``lambda e: ...`` wrapper will already delay evaluation of an item. If you want a lazy variant, just wrap each item (also those which don't otherwise need it). +y = do[local[result << 17], + print("assigned 'result' in env"), + result] +assert y == 17 -The above pitfall also applies to using escape continuations inside a ``do``. To do that, wrap the ec call into a ``lambda e: ...`` to delay its evaluation until the ``do`` actually runs: +y = do0[17, + local[x << 42], + print(x), + print("hello from 'do0'")] +assert y == 17 -```python -call_ec( - lambda ec: - do(assign(x=42), - lambda e: ec(e.x), # IMPORTANT: must delay this! - lambda e: print("never reached"))) # and this (as above) +y = do0[local[x << 17], + print(x)] +assert y == 17 ``` -This way, any assignments made in the ``do`` (which occur only after ``do`` gets control), performed above the line with the ``ec`` call, will have been performed when the ``ec`` is called. +### `pipe`, `piped`, `lazy_piped`: sequence functions + +**Changed in v0.15.0.** *Multiple return values and named return values, for unpacking to the args and kwargs of the next function in the pipe, as well as in the final return value from the pipe, are now represented as a `Values`.* + +*The variants `pipe` and `pipec` now expect a `Values` initial value if you want to unpack it into the args and kwargs of the first function in the pipe. Otherwise, the initial value is sent as a single positional argument (notably tuples too).* -### ``pipe``, ``piped``, ``lazy_piped``: sequence functions +*The variants `piped` and `lazy_piped` automatically pack the initial arguments into a `Values`.* -Similar to Racket's [threading macros](https://docs.racket-lang.org/threading/). A pipe performs a sequence of operations, starting from an initial value, and then returns the final value. It's just function composition, but with an emphasis on data flow, which helps improve readability: +*The deprecated names `getvalue` and `runpipe` have been removed.* + +**Changed in v0.14.2**. *Both `getvalue` and `runpipe`, used in the shell-like syntax, are now known by the single unified name `exitpipe`. This is just a rename, with no functionality changes. The old names are now deprecated.* + +Similar to Racket's [threading macros](https://docs.racket-lang.org/threading/), but no macros. A pipe performs a sequence of operations, starting from an initial value, and then returns the final value. It is just function composition, but with an emphasis on data flow, which helps improve readability. + +Both one-in-one-out (*1-to-1*) and n-in-m-out (*n-to-m*) pipes are provided. The 1-to-1 versions have names suffixed with `1`, and they are slightly faster than the general versions. The use case is one-argument functions that return one value. + +In the n-to-m versions, when a function returns a `Values`, it is unpacked to the args and kwargs of the next function in the pipeline. When a pipe exits, the `Values` wrapper (if any) around the final result is discarded if it contains only one positional value. The main use case is computations that deal with multiple values, the number of which may also change during the computation (as long as the args/kwargs of each output `Values` can be accepted as input by the next function in the pipe). + +Additional examples can be found in [the unit tests](../unpythonic/tests/test_seq.py). + +#### `pipe` + +The function `pipe` represents a self-contained pipeline that starts from a given value (or values), applies some operations in sequence, and then exits: ```python -from unpythonic import pipe +from unpythonic import pipe, Values double = lambda x: 2 * x inc = lambda x: x + 1 @@ -905,11 +1219,43 @@ x = pipe(42, double, inc) assert x == 85 ``` -We also provide ``pipec``, which curries the functions before applying them. Useful with passthrough (see below on ``curry``). +To pass several positional values and/or named values, use a `Values` object: + +```python +from unpythonic import pipe, Values + +a, b = pipe(Values(2, 3), + lambda x, y: Values(x=(x + 1), y=(2 * y)), + lambda x, y: Values(x * 2, y + 1)) +assert (a, b) == (6, 7) +``` + +In this example, we pass the initial values positionally into the first function in the pipeline; that function passes its return values by name; and the second function in the pipeline passes the final results positionally. Because there are only positional values in the final `Values` object, it can be unpacked like a tuple. + +#### `pipec` + +The function `pipec` is otherwise exactly like `pipe`, but it curries the functions before applying them. This is useful with the passthrough feature of `curry`. + +With `pipec` you can do things like: + +```python +from unpythonic import pipec, Values + +a, b = pipec(Values(1, 2), + lambda x: x + 1, # extra values passed through by curry (positionals on the right) + lambda x, y: Values(x * 2, y + 1)) +assert (a, b) == (4, 3) +``` + +For more on passthrough, see the section on `curry`. -Optional **shell-like syntax**, with purely functional updates. +#### `piped` -**Changed in v0.14.2**. *Both `getvalue` and `runpipe` are now known by the single unified name `exitpipe`. This is just a rename, with no functionality changes. The old names are now deprecated, and will be removed in 0.15.0.* +We also provide a **shell-like syntax**, with purely functional updates. + +To set up a pipeline for use with the shell-like syntax, call `piped` to load the initial value(s). It is possible to provide both positional and named values. Each use of the pipe operator applies the given function, but keeps the result inside the pipeline, ready to accept another function. + +When done, pipe into the sentinel `exitpipe` to exit the pipeline and return the current value(s): ```python from unpythonic import piped, exitpipe @@ -922,17 +1268,41 @@ assert p | inc | exitpipe == 85 assert p | exitpipe == 84 # p itself is never modified by the pipe system ``` -Set up a pipe by calling ``piped`` for the initial value. Pipe into the sentinel ``exitpipe`` to exit the pipe and return the current value. +Multiple values work like in `pipe`, except the initial value(s) passed to `piped` are automatically packed into a `Values`. The pipe system then automatically unpacks a `Values` object into the args/kwargs of the next function in the pipeline. + +To return multiple positional values and/or named values, return a `Values` object from your function. + +When `exitpipe` is applied, if the last function returned anything other than one positional value, you will get a `Values` object. + +```python +from unpythonic import piped, exitpipe, Values + +f = lambda x, y: Values(2 * x, y + 1) +g = lambda x, y: Values(x + 1, 2 * y) +x = piped(2, 3) | f | g | exitpipe # --> (5, 8) +assert x == Values(5, 8) +``` + +Unpacking works also here, because in the final result, there are only positional values: + +```python +from unpythonic import piped, exitpipe + +a, b = piped(2, 3) | f | g | exitpipe # --> (5, 8) +assert (a, b) == (5, 8) +``` + +#### `lazy_piped` -**Lazy pipes**, useful for mutable initial values. To perform the planned computation, pipe into the sentinel ``exitpipe``: +Lazy pipes are useful when you have mutable initial values. To perform the planned computation, pipe into the sentinel `exitpipe`: ```python from unpythonic import lazy_piped1, exitpipe lst = [1] -def append_succ(l): - l.append(l[-1] + 1) - return l # this return value is handed to the next function in the pipe +def append_succ(lis): + lis.append(lis[-1] + 1) + return lis # this return value is handed to the next function in the pipe p = lazy_piped1(lst) | append_succ | append_succ # plan a computation assert lst == [1] # nothing done yet p | exitpipe # run the computation @@ -947,18 +1317,17 @@ from unpythonic import lazy_piped, exitpipe fibos = [] def nextfibo(a, b): # multiple arguments allowed fibos.append(a) # store result by side effect - return (b, a + b) # new state, handed to next function in the pipe + # New state, handed to the next function in the pipe. + # As of v0.15.0, use `Values(...)` to represent multiple return values. + # Positional args will be passed positionally, named ones by name. + return Values(a=b, b=(a + b)) p = lazy_piped(1, 1) # load initial state for _ in range(10): # set up pipeline p = p | nextfibo -p | exitpipe +assert (p | exitpipe) == Values(a=89, b=144) # run; check final state assert fibos == [1, 1, 2, 3, 5, 8, 13, 21, 34, 55] ``` -Both one-in-one-out (*1-to-1*) and n-in-m-out (*n-to-m*) pipes are provided. The 1-to-1 versions have names suffixed with ``1``. The use case is one-argument functions that return one value (which may also be a tuple). - -In the n-to-m versions, when a function returns a tuple, it is unpacked to the argument list of the next function in the pipe. At ``exitpipe`` time, the tuple wrapper (if any) around the final result is discarded if it contains only one item. (This allows the n-to-m versions to work also with a single value, as long as it is not a tuple.) The main use case is computations that deal with multiple values, the number of which may also change during the computation (as long as there are as many "slots" on both sides of each individual connection). - ## Batteries @@ -966,67 +1335,35 @@ Things missing from the standard library. ### Batteries for functools - - `memoize`: - - Caches also exceptions à la Racket. If the memoized function is called again with arguments with which it raised an exception the first time, the same exception instance is raised again. - - Works also on instance methods, with results cached separately for each instance. - - This is essentially because ``self`` is an argument, and custom classes have a default ``__hash__``. - - Hence it doesn't matter that the memo lives in the ``memoized`` closure on the class object (type), where the method is, and not directly on the instances. The memo itself is shared between instances, but calls with a different value of ``self`` will create unique entries in it. - - For a solution that performs memoization at the instance level, see [this ActiveState recipe](https://github.com/ActiveState/code/tree/master/recipes/Python/577452_memoize_decorator_instance) (and to demystify the magic contained therein, be sure you understand [descriptors](https://docs.python.org/3/howto/descriptor.html)). - - `curry`, with some extra features: - - Passthrough on the right when too many args (à la Haskell; or [spicy](https://github.com/Technologicat/spicy) for Racket) - - If the intermediate result of a passthrough is callable, it is (curried and) invoked on the remaining positional args. This helps with some instances of [point-free style](https://en.wikipedia.org/wiki/Tacit_programming). - - For simplicity, all remaining keyword args are fed in at the first step that has too many positional args. - - If more positional args are still remaining when the top-level curry context exits, by default ``TypeError`` is raised. - - To override, set the dynvar ``curry_context``. It is a list representing the stack of currently active curry contexts. A context is any object, a human-readable label is fine. See below for an example. - - To set the dynvar, `from unpythonic import dyn`, and then `with dyn.let(curry_context=...):`. - - Can be used both as a decorator and as a regular function. - - As a regular function, `curry` itself is curried à la Racket. If it gets extra arguments (beside the function ``f``), they are the first step. This helps eliminate many parentheses. - - **Caution**: If the positional arities of ``f`` cannot be inspected, currying fails, raising ``UnknownArity``. This may happen with builtins such as ``list.append``. + - `memoize`, with exception caching. + - `curry`, with passthrough like in Haskell. + - `fix`: detect and break infinite recursion cycles. **Added in v0.14.2.** + - `partial` with run-time type checking, which helps a lot with fail-fast in code that uses partial application. This function type-checks arguments against type annotations, then delegates to `functools.partial`. Supports `unpythonic`'s `@generic` and `@typed` functions, too. **Added in v0.15.0.** - `composel`, `composer`: both left-to-right and right-to-left function composition, to help readability. - - Any number of positional arguments is supported, with the same rules as in the pipe system. Multiple return values packed into a tuple are unpacked to the argument list of the next function in the chain. - - `composelc`, `composerc`: curry each function before composing them. Useful with passthrough. - - An implicit top-level curry context is inserted around all the functions except the one that is applied last. - - `composel1`, `composer1`: 1-in-1-out chains (faster; also useful for a single value that is a tuple). + - **Changed in v0.15.0.** *For the benefit of code using the `with lazify` macro, the compose functions are now marked lazy. Arguments will be forced only when a lazy function in the chain actually uses them, or when an eager (not lazy) function is encountered in the chain.* + - Any number of positional and keyword arguments are supported, with the same rules as in the pipe system. Multiple return values, or named return values, represented as a `Values`, are automatically unpacked to the args and kwargs of the next function in the chain. + - `composelc`, `composerc`: curry each function before composing them. This comboes well with the passthrough of extra args/kwargs in `curry`. + - An implicit top-level curry context is inserted around all the functions except the one that is applied last, to allow passthrough to the top level while applying the composed function. + - `composel1`, `composer1`: 1-in-1-out chains (faster). - suffix `i` to use with an iterable that contains the functions (`composeli`, `composeri`, `composelci`, `composerci`, `composel1i`, `composer1i`) - `withself`: essentially, the Y combinator trick as a decorator. Allows a lambda to refer to itself. - - The ``self`` argument is declared explicitly, but passed implicitly (as the first positional argument), just like the ``self`` argument of a method. - - `apply`: the lispy approach to starargs. Mainly useful with the ``prefix`` [macro](macros.md). + - The `self` argument is declared explicitly, but passed implicitly (as the first positional argument), just like the `self` argument of a method. + - `apply`: the lispy approach to starargs. Mainly useful with the `prefix` [macro](macros.md). - `andf`, `orf`, `notf`: compose predicates (like Racket's `conjoin`, `disjoin`, `negate`). + - **Changed in v0.15.0.** *For the benefit of code using the `with lazify` macro, `andf` and `orf` are now marked lazy. Arguments will be forced only when a lazy predicate in the chain actually uses them, or when an eager (not lazy) predicate is encountered in the chain.* - `flip`: reverse the order of positional arguments. - `rotate`: a cousin of `flip`. Permute the order of positional arguments in a cycle. - `to1st`, `to2nd`, `tokth`, `tolast`, `to` to help inserting 1-in-1-out functions into m-in-n-out compose chains. (Currying can eliminate the need for these.) - `identity`, `const` which sometimes come in handy when programming with higher-order functions. - - `fix`: detect and break infinite recursion cycles. **Added in v0.14.2.** -Examples (see also the next section): +We will discuss `memoize`, `curry` and `fix` in more detail shortly; but first, we will give some examples of the other utilities. Note that as always, more examples can be found in [the unit tests](../unpythonic/tests/test_fun.py). ```python -from operator import add, mul from typing import NoReturn -from unpythonic import (memoize, fix, andf, orf, flatmap, rotate, curry, dyn, - zipr, rzip, foldl, foldr, composer, to1st, cons, nil, ll, - withself) - -# memoize: cache the results of pure functions (arguments must be hashable) -ncalls = 0 -@memoize # <-- important part -def square(x): - global ncalls - ncalls += 1 - return x**2 -assert square(2) == 4 -assert ncalls == 1 -assert square(3) == 9 -assert ncalls == 2 -assert square(3) == 9 -assert ncalls == 2 # called only once for each unique set of arguments -assert square(x=3) == 9 -assert ncalls == 2 # only the resulting bindings matter, not how you pass the args - - # "memoize lambda": classic evaluate-at-most-once thunk -thunk = memoize(lambda: print("hi from thunk")) -thunk() # the message is printed only the first time -thunk() +from unpythonic import (fix, andf, orf, rotate, + foldl, foldr, + withself, + composel) # detect and break infinite recursion cycles: # a(0) -> b(1) -> a(2) -> b(0) -> a(1) -> b(2) -> a(0) -> ... @@ -1038,6 +1375,7 @@ def b(k): return a((k + 1) % 3) assert a(0) is NoReturn # the call does return, saying the original function wouldn't. +# andf, orf: short-circuiting predicate combinators isint = lambda x: isinstance(x, int) iseven = lambda x: x % 2 == 0 isstr = lambda s: isinstance(s, str) @@ -1060,103 +1398,292 @@ myzipr = curry(foldr, zipper, ()) assert myzipl((1, 2, 3), (4, 5, 6), (7, 8)) == ((1, 4, 7), (2, 5, 8)) assert myzipr((1, 2, 3), (4, 5, 6), (7, 8)) == ((2, 5, 8), (1, 4, 7)) -# zip and reverse don't commute for inputs with different lengths -assert tuple(zipr((1, 2, 3), (4, 5, 6), (7, 8))) == ((2, 5, 8), (1, 4, 7)) # zip first -assert tuple(rzip((1, 2, 3), (4, 5, 6), (7, 8))) == ((3, 6, 8), (2, 5, 7)) # reverse first - -# curry with passthrough on the right -# final result is a tuple of the result(s) and the leftover args -double = lambda x: 2 * x -with dyn.let(curry_context=["whatever"]): # set a context to allow passthrough to the top level - assert curry(double, 2, "foo") == (4, "foo") # arity of double is 1 - -mysum = curry(foldl, add, 0) -myprod = curry(foldl, mul, 1) -a = ll(1, 2) -b = ll(3, 4) -c = ll(5, 6) -append_two = lambda a, b: foldr(cons, b, a) -append_many = lambda *lsts: foldr(append_two, nil, lsts) # see unpythonic.lappend -assert mysum(append_many(a, b, c)) == 21 -assert myprod(b) == 12 - -map_one = lambda f: curry(foldr, composer(cons, to1st(f)), nil) -doubler = map_one(double) -assert doubler((1, 2, 3)) == ll(2, 4, 6) - -assert curry(map_one, double, ll(1, 2, 3)) == ll(2, 4, 6) +# composel: compose functions, applying the leftmost first +with_n = lambda *args: (partial(f, n) for n, f in args) +clip = lambda n1, n2: composel(*with_n((n1, drop), (n2, take))) +assert tuple(clip(5, 10)(range(20))) == tuple(range(5, 15)) ``` -*Minor detail*: We could also write the last example as: +In the last example, essentially we just want to `clip 5 10 (range 20)`, the grouping of the parentheses being pretty much an implementation detail. Using the passthrough in `curry` (more on which in the section on `curry`, below), we can rewrite the last line as: ```python -double = lambda x: 2 * x -rmap_one = lambda f: curry(foldl, composer(cons, to1st(f)), nil) # essentially reversed(map(...)) -map_one = lambda f: composer(rmap_one(f), lreverse) -assert curry(map_one, double, ll(1, 2, 3)) == ll(2, 4, 6) +assert tuple(curry(clip, 5, 10, range(20)) == tuple(range(5, 15)) ``` -which may be a useful pattern for lengthy iterables that could overflow the call stack (although not in ``foldr``, since our implementation uses a linear process). -In ``rmap_one``, we can use either ``curry`` or ``functools.partial``. In this case it doesn't matter which, since we want just one partial application anyway. We provide two arguments, and the minimum arity of ``foldl`` is 3, so ``curry`` will trigger the call as soon as (and only as soon as) it gets at least one more argument. +#### `memoize` -The final ``curry`` uses both of the extra features. It invokes passthrough, since ``map_one`` has arity 1. It also invokes a call to the callable returned from ``map_one``, with the remaining arguments (in this case just one, the ``ll(1, 2, 3)``). +**Changed in v0.15.0.** *Fix bug: `memoize` is now thread-safe. Even when the same memoized function instance is called concurrently from multiple threads, exactly one thread will compute the result. If `f` is recursive, the thread that acquired the lock is the one that is allowed to recurse into the memoized `f`.* -Yet another way to write ``map_one`` is: +[*Memoization*](https://en.wikipedia.org/wiki/Memoization) is a functional programming technique, meant to be used with [pure functions](https://en.wikipedia.org/wiki/Pure_function). It caches the return value, so that *for each unique set of arguments*, the original function will be evaluated only once. All arguments must be hashable. + +Our `memoize` caches also exceptions, à la the [Mischief package in Racket](https://docs.racket-lang.org/mischief/memoize.html). If the memoized function is called again with arguments with which it raised an exception the first time, **that same exception instance** is raised again. + +The decorator **works also on instance methods**, with results cached separately for each instance. This is essentially because `self` is an argument, and custom classes have a default `__hash__`. Hence it doesn't matter that the memo lives in the `memoized` closure on the class object (type), where the method is, and not directly on the instances. The memo itself is shared between instances, but calls with a different value of `self` will create unique entries in it. (This approach does have the expected problem: if lots of instances are created and destroyed, and a memoized method is called for each, the memo will grow without bound.) + +*For a solution that performs memoization at the instance level, see [this ActiveState recipe](https://github.com/ActiveState/code/tree/master/recipes/Python/577452_memoize_decorator_instance) (and to demystify the magic contained therein, be sure you understand [descriptors](https://docs.python.org/3/howto/descriptor.html)).* + +There are some **important differences** to the nearest equivalents in the standard library, [`functools.cache`](https://docs.python.org/3/library/functools.html#functools.cache) (Python 3.9+) and [`functools.lru_cache`](https://docs.python.org/3/library/functools.html#functools.lru_cache): + + - `memoize` **binds arguments** like Python itself does, so given this definition: + + ```python + from unpythonic import memoize + + @memoize + def f(a, b): + return a + b + ``` + + the calls `f(1, 2)`, `f(1, b=2)`, `f(a=1, b=2)`, and `f(b=2, a=1)` all hit **the same cache key**. + + As of Python 3.9, in `functools.lru_cache` this is not so; see the internal function `functools._make_key` in [`functools.py`](https://github.com/python/cpython/blob/main/Lib/functools.py), where the comments explicitly say so. + + - `memoize` **caches exceptions**, too. A pure function that crashed for some combination of arguments, if given the same inputs again, will just crash again with the same error, so there is no reason to run it again. + + - `memoize` has **no** maximum cache size or hit/miss statistics counting. + + - `memoize` does **not** have a `typed` mode to treat `42` and `42.0` as different keys to the memo. The function arguments are hashed, and both an `int` and an equal `float` happen to hash to the same value. + + The `typed` mode of the standard library functions is actually a form of dispatch. Hence, you can use `@generic` (which see), and `@memoize` each individual multimethod: + + ```python + from unpythonic import generic, memoize + + @generic + @memoize + def thrice(x: int): + return 3 * x + + @generic + @memoize + def thrice(x: float): + return 3.0 * x + ``` + + Without using `@generic`, the essential idea is: + + ```python + from unpythonic import memoize + + def thrice(x): # the dispatcher + if isinstance(x, int): + return thrice_int(x) + elif isinstance(x, float): + return thrice_float(x) + raise TypeError(f"unsupported argument: {type(x)} with value {repr(x)}") + + @memoize + def thrice_int(x): + return 3 * x + + @memoize + def thrice_float(x): + return 3.0 * x + ``` + + Observe that we memoize **each implementation**, not the dispatcher. + + This solution keeps dispatching and memoization orthogonal. + +Examples: + +```python +from unpythonic import memoize + +ncalls = 0 +@memoize # <-- important part +def square(x): + global ncalls + ncalls += 1 + return x**2 +assert square(2) == 4 +assert ncalls == 1 +assert square(3) == 9 +assert ncalls == 2 +assert square(3) == 9 +assert ncalls == 2 # called only once for each unique set of arguments +assert square(x=3) == 9 +assert ncalls == 2 # only the resulting bindings matter, not how you pass the args + +# "memoize lambda": classic evaluate-at-most-once thunk +# See also the `lazy[]` macro. +thunk = memoize(lambda: print("hi from thunk")) +thunk() # the message is printed only the first time +thunk() +``` + + +#### `curry` + +**Changed in v0.15.0.** *`curry` supports both positional and named arguments, and binds arguments to function parameters like Python itself does. The call triggers when all parameters are bound, regardless of whether they were passed by position or by name, and at which step of the currying process they were passed.* + +*`unpythonic`'s multiple-dispatch system (`@generic`, `@typed`) is supported. `curry` looks for an exact match first, then a match with extra args/kwargs, and finally a partial match. If there is still no match, this implies that at least one parameter would get a binding that fails the type check. In such a case `TypeError` regarding failed multiple dispatch is raised.* + +*If the function being curried is `@generic` or `@typed`, or has type annotations on its parameters, the parameters being passed in are type-checked. A type mismatch immediately raises `TypeError`. This helps support [fail-fast](https://en.wikipedia.org/wiki/Fail-fast) in code using `curry`.* + +[*Currying*](https://en.wikipedia.org/wiki/Currying) is a technique in functional programming, where a function that takes multiple arguments is converted to a sequence of nested one-argument functions, each one *specializing* (fixing the value of) the leftmost remaining positional parameter. Each such function returns another function that takes the next parameter. The last function, when no more parameters remain, then performs the actual computation and returns the result. + +Some languages, such as Haskell, curry all functions natively. In languages that do not, like Python or [Racket](https://docs.racket-lang.org/reference/procedures.html#%28def._%28%28lib._racket%2Ffunction..rkt%29._curry%29%29), when currying is implemented as a library function, this is often done as a form of [partial application](https://en.wikipedia.org/wiki/Partial_application), which is a subtly different concept, but encompasses the curried behavior as a special case. In practice this means that you can pass several arguments in a single step, and the original function will be called when all parameters have been bound. + +Our `curry` can be used both as a decorator and as a regular function. As a decorator, `curry` takes no decorator arguments. As a regular function, `curry` itself is curried à la Racket. If any args or kwargs are given (beside the function to be curried), they are the first step. This helps eliminate many parentheses. + +**CAUTION**: If the signature of `f` cannot be inspected, currying fails, raising `ValueError`, like `inspect.signature` does. This may happen with builtins such as `list.append`, `operator.add`, `print`, or `range`, depending on which version of Python is used (and whether it is CPython or PyPy3). + +Like Haskell, and [`spicy` for Racket](https://github.com/Technologicat/spicy), our `curry` supports *passthrough*; but we pass through **both positional and named arguments**. + +Any args and/or kwargs that are incompatible with the target function's call signature, are *passed through* in the sense that the function is called with the args and kwargs compatible with its call signature, and then its return value is merged with the remaining args and kwargs. + +If the *first positional return value* of the result of passthrough is callable, it is (curried and) invoked on the remaining args and kwargs, after the merging. This helps with some instances of [point-free style](https://en.wikipedia.org/wiki/Tacit_programming). + +Some finer points concerning the passthrough feature: + + - *Incompatible* means too many positional args, or named args that have no corresponding parameter. Note that if the function has a `**kwargs` parameter, then all named args are considered compatible, because it absorbs anything. + + - Multiple return values (both positional and named) are denoted using `Values` (which see). A standard return value is considered to consist of *one positional return value* only (even if it is a `tuple`). + + - Extra positional args are passed through **on the right**. Any positional return values of the curried function are prepended, on the left. + + - Extra named args are passed through by name. They may be overridden by named return values (with the same name) from the curried function. + + - If more args/kwargs are still remaining when the top-level curry context exits, by default `TypeError` is raised. + - To override this behavior, set the dynvar `curry_context`. It is a list representing the stack of currently active curry contexts. A context is any object, a human-readable label is fine. See below for an example. + - To set the dynvar, `from unpythonic import dyn`, and then `with dyn.let(curry_context=["whatever"]):`. + +Examples: + +```python +from operator import add, mul +from unpythonic import curry, foldl, foldr, composer, to1st, cons, nil, ll, dyn, Values + +mysum = curry(foldl, add, 0) +myprod = curry(foldl, mul, 1) +a = ll(1, 2) +b = ll(3, 4) +c = ll(5, 6) +append_two = lambda a, b: foldr(cons, b, a) +append_many = lambda *lsts: foldr(append_two, nil, lsts) # see unpythonic.lappend +assert mysum(append_many(a, b, c)) == 21 +assert myprod(b) == 12 + +# curry with passthrough +double = lambda x: 2 * x +with dyn.let(curry_context=["whatever"]): # set a context to allow passthrough to the top level + # positionals are passed through on the right + assert curry(double, 2, "foo") == Values(4, "foo") # arity of double is 1 + # named args are passed through by name + assert curry(double, 2, nosucharg="foo") == Values(4, nosucharg="foo") + +# actual use case for passthrough +map_one = lambda f: curry(foldr, composer(cons, to1st(f)), nil) +doubler = map_one(double) +assert doubler((1, 2, 3)) == ll(2, 4, 6) + +assert curry(map_one, double, ll(1, 2, 3)) == ll(2, 4, 6) +``` + +We could also write the last example as: + +```python +from unpythonic import curry, foldl, composer, const, to1st, nil, lreverse + +double = lambda x: 2 * x +rmap_one = lambda f: curry(foldl, composer(cons, to1st(f)), nil) # essentially reversed(map(...)) +map_one = lambda f: composer(rmap_one(f), lreverse) +assert curry(map_one, double, ll(1, 2, 3)) == ll(2, 4, 6) +``` + +which may be a useful pattern for lengthy iterables that could overflow the call stack (although not in `foldr`, since our implementation uses a linear process). + +In the example, in `rmap_one`, we can use either `curry` or `partial`. In this case it does not matter which, since we want just one partial application anyway. We provide two arguments, and the minimum arity of `foldl` is 3, so `curry` will trigger the call as soon as (and only as soon as) it gets at least one more argument. + +The final `curry` in the example uses the passthrough features. The function `map_one` has arity 1, but two positional arguments are given. It also invokes a call to the callable returned by `map_one`, with the remaining arguments (in this case just one, the `ll(1, 2, 3)`). + +Yet another way to write `map_one` is: ```python +from unpythonic import curry, foldr, composer, cons, nil + mymap = lambda f: curry(foldr, composer(cons, curry(f)), nil) ``` -The curried ``f`` uses up one argument (provided it is a one-argument function!), and the second argument is passed through on the right; this two-tuple then ends up as the arguments to ``cons``. +The curried `f` uses up one argument (provided it is a one-argument function!), and the second argument is passed through on the right; these two values then end up as the arguments to `cons`. -Using a currying compose function (name suffixed with ``c``), the inner curry can be dropped: +Using a **currying compose function** (name suffixed with `c`), we can drop the inner curry: ```python +from unpythonic import curry, foldr, composerc, cons, nil + mymap = lambda f: curry(foldr, composerc(cons, f), nil) myadd = lambda a, b: a + b assert curry(mymap, myadd, ll(1, 2, 3), ll(2, 4, 6)) == ll(3, 6, 9) ``` -This is as close to ```(define (map f) (foldr (compose cons f) empty)``` (in ``#lang`` [``spicy``](https://github.com/Technologicat/spicy)) as we're gonna get in Python. +This is as close to ```(define (map f) (foldr (compose cons f) empty)``` (in `#lang` [`spicy`](https://github.com/Technologicat/spicy)) as we're gonna get in pure Python. + +Notice how the last two versions accept multiple input iterables; this is thanks to currying `f` inside the composition. An element from each of the iterables is taken by the processing function `f`. Being the last argument, `acc` is passed through on the right. The output from the processing function - one new item - and `acc` then become two arguments, passed into cons. -Notice how the last two versions accept multiple input iterables; this is thanks to currying ``f`` inside the composition. An element from each of the iterables is taken by the processing function ``f``. Being the last argument, ``acc`` is passed through on the right. The output from the processing function - one new item - and ``acc`` then become a two-tuple, passed into cons. +Finally, keep in mind the `mymap` example is intended as a feature demonstration. In production code, the builtin `map` is much better. It produces a lazy iterable, so it does not care which kind of actual data structure the items will be stored in (once they are computed). In other words, a lazy iterable is a much better model for a process that produces a sequence of values; how, and whether, to store that sequence is an orthogonal concern. -Finally, keep in mind this exercise is intended as a feature demonstration. In production code, the builtin ``map`` is much better. +The example we have here evaluates all items immediately, and specifically produces a linked list. It is just a nice example of function composition involving incompatible positional arities, thus demonstrating the kind of situation where the passthrough feature of `curry` is useful. It is taken from a paper by [John Hughes (1984)](https://www.cse.chalmers.se/~rjmh/Papers/whyfp.html). -#### ``curry`` and reduction rules +##### `curry` and reduction rules -The provided variant of ``curry``, beside what it says on the tin, is effectively an explicit local modifier to Python's reduction rules, which allows some Haskell-like idioms. When we say: +Our `curry`, beside what it says on the tin, is effectively an explicit local modifier to Python's reduction rules, which allows some Haskell-like idioms. Let's consider a simple example with positional arguments only. When we say: ```python curry(f, a0, a1, ..., a[n-1]) ``` -it means the following. Let ``m1`` and ``m2`` be the minimum and maximum positional arity of the callable ``f``, respectively. +it means the following. Let `m1` and `m2` be the minimum and maximum positional arity of the callable `f`, respectively. - - If ``n > m2``, call ``f`` with the first ``m2`` arguments. + - If `n > m2`, call `f` with the first `m2` arguments. - If the result is a callable, curry it, and recurse. - - Else form a tuple, where first item is the result, and the rest are the remaining arguments ``a[m2]``, ``a[m2+1]``, ..., ``a[n-1]``. Return it. - - If more positional args are still remaining when the top-level curry context exits, by default ``TypeError`` is raised. Use the dynvar ``curry_context`` to override; see above for an example. - - If ``m1 <= n <= m2``, call ``f`` and return its result (like a normal function call). - - **Any** positional arity accepted by ``f`` triggers the call; beware when working with [variadic](https://en.wikipedia.org/wiki/Variadic_function) functions. - - If ``n < m1``, partially apply ``f`` to the given arguments, yielding a new function with smaller ``m1``, ``m2``. Then curry the result and return it. - - Internally we stack ``functools.partial`` applications, but there will be only one ``curried`` wrapper no matter how many invocations are used to build up arguments before ``f`` eventually gets called. - -In the above example: + - Else form a tuple, where first item is the result, and the rest are the remaining arguments `a[m2]`, `a[m2+1]`, ..., `a[n-1]`. Return it. + - If more positional args are still remaining when the top-level curry context exits, by default `TypeError` is raised. Use the dynvar `curry_context` to override; see above for an example. + - If `m1 <= n <= m2`, call `f` and return its result (like a normal function call). + - **Any** positional arity accepted by `f` triggers the call; beware when working with [variadic](https://en.wikipedia.org/wiki/Variadic_function) functions. + - If `n < m1`, partially apply `f` to the given arguments, yielding a new function with smaller `m1`, `m2`. Then curry the result and return it. + - Internally we stack `functools.partial` applications, but there will be only one `curried` wrapper no matter how many invocations are used to build up arguments before `f` eventually gets called. + +As of v0.15.0, the actual algorithm by which `curry` decides what to do, in the presence of kwargs, `@generic` functions, and `Values` multiple-return-values (and named return values), is: + + - If `f` is **not** `@generic` or `@typed`: + - Compute parameter bindings of the args and kwargs collected so far, against the call signature of `f`. + - Note we keep track of which arguments were passed positionally and which by name. To avoid subtle errors, they are eventually passed to `f` the same way they were passed to `curry`. (Positional args are passed positionally, and kwargs are passed by name.) + - If there are no unbound parameters, and no args/kwargs are left over, we have an exact match. Call `f` and return its result, like a normal function call. + - Any sequence of curried calls that ends up binding all parameters of `f` triggers the call. + - Beware when working with variadic functions. Particularly, keep in mind that `*args` matches **zero or more** positional arguments (as the [Kleene star](https://en.wikipedia.org/wiki/Kleene_star)-ish notation indeed suggests). + - If there are no unbound parameters, but there are args/kwargs left over, arrange passthrough for the leftover args/kwargs (that were rejected by the call signature of `f`), and call `f`. Any leftover positional arguments are passed through **on the right**. + - Merge the return value of `f` with the leftover args/kwargs, thus forming updated leftover args/kwargs. + - If the return value of `f` is a `Values`: prepend positional return values into the leftover args (i.e. insert them **on the left**), and update the leftover kwargs with the named return values. (I.e. a key name conflict causes an overwrite in the leftover kwargs.) + - Else: there is just one positional return value. Prepend it to the leftover args. + - If the first positional return value is a callable: remove it from the leftover args, curry it, and recurse with the (updated) leftover args/kwargs. + - Else: form a `Values` from the leftover args/kwargs, and return it. (This return goes to the next outer curry context, or at the top level, to the original caller.) + - If neither of the above match, we know there is at least one unbound parameter, i.e. we have a partial match. Keep currying. + - If `f` is `@generic` or `@typed`: + - Iterate over multimethods registered on `f`, **up to three times**. + - First, try for an exact match that passes the type check. **If any such match is found**, pick that multimethod. Call it and return its result (as above). + - Then, try for a match that passes the type check, but has extra args/kwargs. **If any such match is found**, pick that multimethod. Arrange passthrough... (as above). + - Then, try for a partial match that passes the type check. **If any such match is found**, keep currying. + - If none of the above match, it implies that no matter which multimethod we pick, at least one parameter will get a binding that fails the type check. Raise `TypeError`. + +If interested in the gritty details, see [the source code](../unpythonic/fun.py) of `unpythonic.curry`, in the module `unpythonic.fun`. It calls some functions from the module `unpythonic.dispatch` for its `@generic` support, but otherwise it is pretty much self-contained. + +Getting back to the simple case, in the above example: ```python curry(mapl_one, double, ll(1, 2, 3)) ``` -the callable ``mapl_one`` takes one argument, which is a function. It yields another function, let us call it ``g``. We are left with: +the callable `mapl_one` takes one argument, which is a function. It returns another function, let us call it `g`. We are left with: ```python curry(g, ll(1, 2, 3)) ``` -The argument is then passed into ``g``; we obtain a result, and reduction is complete. +The remaining argument is then passed into `g`; we obtain a result, and reduction is complete. A curried function is also a curry context: @@ -1166,19 +1693,19 @@ a2 = curry(add2) a2(a, b, c) # same as curry(add2, a, b, c); reduces to (a + b, c) ``` -so on the last line, we don't need to say +so on the last line, we do not need to say ```python curry(a2, a, b, c) ``` -because ``a2`` is already curried. Doing so does no harm, though; ``curry`` automatically prevents stacking ``curried`` wrappers: +because `a2` is already curried. Doing so does no harm, though; `curry` automatically prevents stacking `curried` wrappers: ```python curry(a2) is a2 # --> True ``` -If we wish to modify precedence, parentheses are needed, which takes us out of the curry context, unless we explicitly ``curry`` the subexpression. This works: +If we wish to modify precedence, parentheses are needed, which takes us out of the curry context, unless we explicitly `curry` the subexpression. This works: ```python curry(f, a, curry(g, x, y), b, c) @@ -1190,17 +1717,31 @@ but this **does not**: curry(f, a, (g, x, y), b, c) ``` -because ``(g, x, y)`` is just a tuple of ``g``, ``x`` and ``y``. This is by design; as with all things Python, *explicit is better than implicit*. +because `(g, x, y)` is just a tuple of `g`, `x` and `y`. This is by design; as with all things Python, *explicit is better than implicit*. + +**Note**: to code in curried style, a [contract system](https://en.wikipedia.org/wiki/Design_by_contract) or a type checker can be useful. Also, be careful with variadic functions, because any allowable arity will trigger the call. + +(The `map` function in the standard library is a particular offender here, since it requires at least one iterable to actually do anything but raise `TypeError`, but its call signature suggests it can be called without any iterables. Hence, for curry-friendliness we provide a wrapper `unpythonic.map` that *requires* at least one iterable.) + +- Contract systems for Python include [icontract](https://github.com/Parquery/icontract) and [PyContracts](https://github.com/AndreaCensi/contracts). + +- For static type checking, consider [mypy](http://mypy-lang.org/). + +- For run-time type checking, consider `@typed` or `@generic` right here in `unpythonic`. -**Note**: to code in curried style, a [contract system](https://en.wikipedia.org/wiki/Design_by_contract) (such as [icontract](https://github.com/Parquery/icontract) or [PyContracts](https://github.com/AndreaCensi/contracts)) or the [mypy static type checker](http://mypy-lang.org/) can be useful; also, be careful with variadic functions. +- You can also just use Python's type annotations; `unpythonic`'s `curry` type-checks the arguments before accepting the curried function. The annotations work if the stdlib function [`typing.get_type_hints`](https://docs.python.org/3/library/typing.html#typing.get_type_hints) can find them. -#### ``fix``: break infinite recursion cycles +#### `fix`: break infinite recursion cycles -The name `fix` comes from the *least fixed point* with respect to the definedness relation, which is related to Haskell's `fix` function. However, this `fix` is not that function. Our `fix` breaks recursion cycles in strict functions - thus causing some non-terminating strict functions to return. (Here *strict* means that the arguments are evaluated eagerly.) +The name `fix` comes from the *least fixed point* with respect to the definedness relation, which is related to Haskell's `fix` function. However, this `fix` is **not** that function. Our `fix` breaks recursion cycles in strict functions - thus causing some non-terminating strict functions to return. (Here [*strict*](https://en.wikipedia.org/wiki/Evaluation_strategy#Strict_evaluation) means that the arguments are evaluated eagerly.) **CAUTION**: Worded differently, this function solves a small subset of the halting problem. This should be hint enough that it will only work for the advertised class of special cases - i.e., a specific kind of recursion cycles. +If you need `fix` for code that uses TCO, use `fixtco`. The implementations of recursion cycle breaking and TCO must interact in a very particular way to work properly; this is done by `fixtco`. + +For examples, see [the unit tests](../unpythonic/tests/test_fix.py). + Usage: ```python @@ -1223,11 +1764,11 @@ If no recursion cycle occurs, `f` returns normally. If a cycle occurs, the call - In the latter example, the name `"f"` and the offending args are returned. -**A cycle is detected when** `f` is called again with a set of args that have already been previously seen in the current call chain. Infinite mutual recursion is detected too, at the point where any `@fix`-instrumented function is entered again with a set of args already seen during the current call chain. +**A cycle is detected when** `f` is called again with a set of args that have already been previously seen in the current call chain. Infinite *mutual recursion* is detected too, at the point where any `@fix`-instrumented function is entered again with a set of args already seen during the current call chain. -**CAUTION**: The infinitely recursive call sequence `f(0) → f(1) → ... → f(k+1) → ...` contains no cycles in the sense detected by `fix`. The `fix` function will not catch all cases of infinite recursion, but only those where a previously seen set of arguments is seen again. (If `f` is pure, the same arguments appearing again implies the call will not return, so we can terminate it.) +**CAUTION**: The infinitely recursive call sequence `f(0) → f(1) → ... → f(k+1) → ...` contains no cycles in the sense detected by `fix`. The `fix` function will **not** catch all cases of infinite recursion, but only those where a previously seen set of arguments is seen again. If `f` is [pure](https://en.wikipedia.org/wiki/Pure_function), the same arguments appearing again during recursion implies the call will not return, so we can terminate it. -**CAUTION**: If we have a function `g(a, b)`, the argument lists of the invocations `g(1, 2)` and `g(a=1, b=2)` are in principle different. This is a Python gotcha that was originally noticed by the author of the `wrapt` library, and mentioned in [its documentation](https://wrapt.readthedocs.io/en/latest/decorators.html#processing-function-arguments). However, once arguments are bound to the formal parameters of `g`, the result is the same. We consider the *resulting bindings*, not the exact way the arguments were passed. +**CAUTION**: If we have a function `g(a, b)`, the argument lists of the invocations `g(1, 2)` and `g(a=1, b=2)` are in principle different. However, we bind arguments like Python itself does, and consider the *resulting bindings* only. It does not matter how the arguments were passed. We can use `fix` to find the (arithmetic) fixed point of `cos`: @@ -1272,7 +1813,7 @@ c = fixpoint(cos, x0=1) assert c == cos(c) ``` -**NOTE**: But see `unpythonic.fixpoint`, which is meant specifically for finding *arithmetic* fixed points, and `unpythonic.iterate1`, which produces a generator that iterates `f` without needing recursion. +**NOTE**: *See `unpythonic.fixpoint`, which is meant specifically for finding arithmetic fixed points, and `unpythonic.iterate1`, which produces a generator that iterates `f` without needing recursion.* **Notes**: @@ -1292,15 +1833,15 @@ assert c == cos(c) - `bottom` can be a callable, in which case the function name and args at the point where the cycle was detected are passed to it, and its return value becomes the final return value. This is useful e.g. for debug logging. - - The `memo` flag controls whether to memoize also intermediate results. It adds some additional function call layers between function entries from recursive calls; if that is a problem (due to causing Python's call stack to blow up faster), use `memo=False`. You can still memoize the final result if you want; just put `@memoize` on the outside. + The function name is provided, because we catch also infinite *mutual recursion*; so it can be a useful piece of information *which function* it was that was first called with already-seen arguments. -**NOTE**: If you need `fix` for code that uses TCO, use `fixtco` instead. The implementations of recursion cycle breaking and TCO must interact in a very particular way to work properly; this is done by `fixtco`. + - The `memo` flag controls whether to memoize intermediate results. It adds some additional function call layers between function entries from recursive calls; if that is a problem (due to causing Python's call stack to blow up faster), use `memo=False`. You can still memoize the final result if you want; just put `@memoize` on the outside. ##### Real-world use and historical note This kind of `fix` is sometimes helpful in recursive pattern-matching definitions for parsers. When the pattern matcher gets stuck in an infinite left-recursion, it can return a customizable special value instead of not terminating. Being able to not care about non-termination may simplify definitions. -This `fix` can also be used to find fixed points of functions, as in the above examples. +This `fix` can also be used to find arithmetic fixed points of functions, as in the above examples. The idea comes from Matthew Might's article on [parsing with (Brzozowski's) derivatives](http://matt.might.net/articles/parsing-with-derivatives/), where it was a utility implemented in Racket as the `define/fix` form. It was originally ported to Python [by Per Vognsen](https://gist.github.com/pervognsen/8dafe21038f3b513693e) (linked from the article). The `fix` in `unpythonic` is a redesign with kwargs support, thread safety, and TCO support. @@ -1316,7 +1857,7 @@ A simple way to explain Haskell's `fix` is: fix f = let x = f x in x ``` -so anywhere the argument is referred to in the definition of `f`, it is replaced by another application of `f`, recursively. This obviously yields a notation useful for corecursively defining infinite lazy lists. +so anywhere the argument is referred to in the definition of `f`, it is replaced by another application of `f`, recursively. This obviously yields a notation useful for [corecursively](https://en.wikipedia.org/wiki/Corecursion) defining infinite lazy lists. For more, see [[1]](https://www.parsonsmatt.org/2016/10/26/grokking_fix.html) [[2]](https://www.vex.net/~trebla/haskell/fix.xhtml) [[3]](https://stackoverflow.com/questions/4787421/how-do-i-use-fix-and-how-does-it-work) [[4]](https://medium.com/@cdsmithus/fixpoints-in-haskell-294096a9fc10) [[5]](https://en.wikibooks.org/wiki/Haskell/Fix_and_recursion). @@ -1324,15 +1865,15 @@ For more, see [[1]](https://www.parsonsmatt.org/2016/10/26/grokking_fix.html) [[ ### Batteries for itertools - `unpack`: lazily unpack an iterable. Suitable for infinite inputs. - - Return the first ``n`` items and the ``k``th tail, in a tuple. Default is ``k = n``. - - Use ``k > n`` to fast-forward, consuming the skipped items. Works by `drop`. - - Use ``k < n`` to peek without permanently extracting an item. Works by [tee](https://docs.python.org/3/library/itertools.html#itertools.tee)ing; plan accordingly. - - *folds, scans, unfold*: + - Return the first `n` items and the `k`th tail, in a tuple. Default is `k = n`. + - Use `k > n` to fast-forward, consuming the skipped items. Works by `drop`. + - Use `k < n` to peek without permanently extracting an item. Works by [tee](https://docs.python.org/3/library/itertools.html#itertools.tee)ing; plan accordingly. + - *fold, scan, unfold*: - `foldl`, `foldr` with support for multiple input iterables, like in Racket. - Like in Racket, `op(elt, acc)`; general case `op(e1, e2, ..., en, acc)`. Note Python's own `functools.reduce` uses the ordering `op(acc, elt)` instead. - No sane default for multi-input case, so the initial value for `acc` must be given. - One-input versions with optional init are provided as `reducel`, `reducer`, with semantics similar to Python's `functools.reduce`, but with the rackety ordering `op(elt, acc)`. - - By default, multi-input folds terminate on the shortest input. To instead terminate on the longest input, use the ``longest`` and ``fillvalue`` kwargs. + - By default, multi-input folds terminate on the shortest input. To instead terminate on the longest input, use the `longest` and `fillvalue` kwargs. - For multiple inputs with different lengths, `foldr` syncs the **left** ends. - `rfoldl`, `rreducel` reverse each input and then left-fold. This syncs the **right** ends. - `scanl`, `scanr`: scan (a.k.a. accumulate, partial fold); a lazy fold that returns a generator yielding intermediate results. @@ -1345,23 +1886,24 @@ For more, see [[1]](https://www.parsonsmatt.org/2016/10/26/grokking_fix.html) [[ - `rscanl`, `rscanl1` reverse each input and then left-scan. This syncs the **right** ends. - `unfold1`, `unfold`: generate a sequence [corecursively](https://en.wikipedia.org/wiki/Corecursion). The counterpart of `foldl`. - `unfold1` is for 1-in-2-out functions. The input is `state`, the return value must be `(value, newstate)` or `None`. - - `unfold` is for n-in-(1+n)-out functions. The input is `*states`, the return value must be `(value, *newstates)` or `None`. - - Unfold returns a generator yielding the collected values. The output can be finite or infinite; to signify that a finite sequence ends, the user function must return `None`. + - `unfold` is for n-in-(1+n)-out functions. + - **Changed in v0.15.0.** *The initial args/kwargs are unpacked to the args/kwargs of the user function. The function must return a `Values` object, where the first positional return value is the value to yield, and anything else is unpacked to the args/kwargs of the user function at the next iteration.* + - Unfold returns a generator yielding the collected values. The output can be finite or infinite; to signify that a finite sequence ends, the user function must return `None`. (Beside a `Values` object, a bare `None` is the only other allowed return value from the user function.) - *mapping and zipping*: - `map_longest`: the final missing battery for `map`. - - Essentially `starmap(func, zip_longest(*iterables))`, so it's [spanned](https://en.wikipedia.org/wiki/Linear_span) by ``itertools``. + - Essentially `starmap(func, zip_longest(*iterables))`, so it's [spanned](https://en.wikipedia.org/wiki/Linear_span) by `itertools`, but it's convenient to have a named shorthand to do that. - `rmap`, `rzip`, `rmap_longest`, `rzip_longest`: reverse each input, then map/zip. For multiple inputs, syncs the **right** ends. - `mapr`, `zipr`, `mapr_longest`, `zipr_longest`: map/zip, then reverse the result. For multiple inputs, syncs the **left** ends. - `map`: curry-friendly wrapper for the builtin, making it mandatory to specify at least one iterable. **Added in v0.14.2.** - *windowing, chunking, and similar*: - - `window`: sliding length-n window iterator for general iterables. Acts like the well-known [n-gram zip trick](http://www.locallyoptimal.com/blog/2013/01/20/elegant-n-gram-generation-in-python/), but the input can be any iterable. + - `window`: sliding length-n window iterator for general iterables. Acts like the well-known [n-gram zip trick](http://www.locallyoptimal.com/blog/2013/01/20/elegant-n-gram-generation-in-python/), but the input can be any iterable. **Changed in v0.15.0.** *Parameter ordering is now `window(n, iterable)`, to make it curry-friendly.* - `chunked`: split an iterable into constant-length chunks. **Added in v0.14.2.** - `pad`: extend an iterable to length at least `n` with a `fillvalue`. **Added in v0.14.2.** - `interleave`: interleave items from several iterables: `interleave(a, b, c)` → `a0, b0, c0, a1, b1, c1, ...` until the next item does not exist. **Added in v0.14.2.** - This differs from `zip` in that the output is flattened, and the termination condition is checked after each item. So e.g. `interleave(['a', 'b', 'c'], ['+', '*'])` → `['a', '+', 'b', '*', 'c']` (the actual return value is a generator, not a list). - *flattening*: - `flatmap`: map a function, that returns a list or tuple, over an iterable and then flatten by one level, concatenating the results into a single tuple. - - Essentially, ``composel(map(...), flatten1)``; the same thing the bind operator of the List monad does. + - Essentially, `composel(map(...), flatten1)`; the same thing the bind operator of the List monad does. - `flatten1`, `flatten`, `flatten_in`: remove nested list structure. - `flatten1`: outermost level only. - `flatten`: recursive, with an optional predicate that controls whether to flatten a given sublist. @@ -1369,7 +1911,7 @@ For more, see [[1]](https://www.parsonsmatt.org/2016/10/26/grokking_fix.html) [[ - *extracting items, subsequences*: - `take`, `drop`, `split_at`: based on `itertools` [recipes](https://docs.python.org/3/library/itertools.html#itertools-recipes). - Especially useful for testing generators. - - `islice` is maybe more pythonic than `take` and `drop`. We provide a utility that supports the slice syntax. + - `islice` is maybe more pythonic than `take` and `drop`; it enables slice syntax for any iterable. - `tail`: return the tail of an iterable. Same as `drop(1, iterable)`; common use case. - `butlast`, `butlastn`: return a generator that yields from iterable, dropping the last `n` items if the iterable is finite. Inspired by a similar utility in PG's [On Lisp](http://paulgraham.com/onlisp.html). - Works by using intermediate storage. **Do not** use the original iterator after a call to `butlast` or `butlastn`. @@ -1380,40 +1922,46 @@ For more, see [[1]](https://www.parsonsmatt.org/2016/10/26/grokking_fix.html) [[ - Can be useful for the occasional abuse of `collections.deque` as an *alist* [[1]](https://en.wikipedia.org/wiki/Association_list) [[2]](http://www.gigamonkeys.com/book/beyond-lists-other-uses-for-cons-cells.html). Use `.appendleft(...)` to add new items, and then this `find` to get the currently active association. - `running_minmax`, `minmax`: Extract both min and max in one pass over an iterable. The `running_` variant is a scan and returns a generator; the just-give-me-the-final-result variant is a fold. **Added in v0.14.2.** - *math-related*: - - `fixpoint`: arithmetic fixed-point finder (not to be confused with `fix`). **Added in v0.14.2.** - `within`: yield items from iterable until successive iterates are close enough. Useful with [Cauchy sequences](https://en.wikipedia.org/wiki/Cauchy_sequence). **Added in v0.14.2.** - `prod`: like the builtin `sum`, but compute the product. Oddly missing from the standard library. - `iterate1`, `iterate`: return an infinite generator that yields `x`, `f(x)`, `f(f(x))`, ... - - `iterate1` is for 1-to-1 functions; `iterate` for n-to-n, unpacking the return value to the argument list of the next call. + - `iterate1` is for 1-to-1 functions. + - `iterate` is for n-to-n, unpacking the return value to the args/kwargs of the next call. + - **Changed in v0.15.0.** *In the n-to-n version, now the user function must return a `Values` object in the same shape as it accepts args and kwargs. This `Values` object is the `x` that is yielded at each iteration.* - *miscellaneous*: - `uniqify`, `uniq`: remove duplicates (either all or consecutive only, respectively), preserving the original ordering of the items. - `rev` is a convenience function that tries `reversed`, and if the input was not a sequence, converts it to a tuple and reverses that. The return value is a `reversed` object. - - `scons`: prepend one element to the start of an iterable, return new iterable. ``scons(x, iterable)`` is lispy shorthand for ``itertools.chain((x,), iterable)``, allowing to omit the one-item tuple wrapper. - - `inn`: contains-check (``x in iterable``) with automatic termination for monotonic divergent infinite iterables. - - Only applicable to monotonic divergent inputs (such as ``primes``). Increasing/decreasing is auto-detected from the first non-zero diff, but the function may fail to terminate if the input is actually not monotonic, or has an upper/lower bound. - - `iindex`: like ``list.index``, but for a general iterable. Consumes the iterable, so only makes sense for memoized inputs. - - `CountingIterator`: count how many items have been yielded, as a side effect. The count is stored in the `.count` attribute. **Added in v0.14.2.** + - `scons`: prepend one element to the start of an iterable, return new iterable. `scons(x, iterable)` is lispy shorthand for `itertools.chain((x,), iterable)`, allowing to omit the one-item tuple wrapper. The name is an abbreviation of [`stream-cons`](https://docs.racket-lang.org/reference/streams.html). + - `inn`: contains-check (`x in iterable`) with automatic termination for monotonic divergent infinite iterables. + - Only applicable to monotonic divergent inputs (such as `primes`). Increasing/decreasing is auto-detected from the first non-zero diff, but the function may fail to terminate if the input is actually not monotonic, or has an upper/lower bound. + - `iindex`: like `list.index`, but for a general iterable. Consumes the iterable, so only makes sense for memoized inputs. + - `CountingIterator`: use `CountingIterator(iterable)` instead of `iter(iterable)` to produce an iterator that, as a side effect, counts how many items have been yielded. The count is stored in the `.count` attribute. **Added in v0.14.2.** - `slurp`: extract all items from a `queue.Queue` (until it is empty) to a list, returning that list. **Added in v0.14.2.** - `subset`: test whether an iterable is a subset of another. **Added in v0.14.3.** - `powerset`: yield the power set (set of all subsets) of an iterable. Works also for potentially infinite iterables, if only a finite prefix is ever requested. (But beware, both runtime and memory usage are exponential in the input size.) **Added in v0.14.2.** - - `partition_int`: split a small positive integer, in all possible ways, into smaller integers that sum to it. Useful e.g. for determining how many letters the components of an anagram may have. **Added in v0.14.2.** - `allsame`: test whether all elements of an iterable are the same. Sometimes useful in writing testing code. **Added in v0.14.3.** Examples: ```python from functools import partial +from itertools import count, takewhile +from operator import add, mul from unpythonic import (scanl, scanr, foldl, foldr, - mapr, zipr, + mapr, zipr, rmap, rzip, identity, uniqify, uniq, flatten1, flatten, flatten_in, flatmap, take, drop, unfold, unfold1, + unpack, cons, nil, ll, curry, - s, inn, iindex, + imemoize, gmemoize, + s, inn, iindex, find, + partition, partition_int, window, subset, powerset, - allsame) + allsame, + Values) assert tuple(scanl(add, 0, range(1, 5))) == (0, 1, 3, 6, 10) assert tuple(scanr(add, 0, range(1, 5))) == (0, 4, 7, 9, 10) @@ -1428,7 +1976,10 @@ def step2(k): # x0, x0 + 2, x0 + 4, ... assert tuple(take(10, unfold1(step2, 10))) == (10, 12, 14, 16, 18, 20, 22, 24, 26, 28) def nextfibo(a, b): - return (a, b, a + b) # value, *newstates + # First positional return value is the value to yield. + # Everything else is newstate, to be unpacked to `nextfibo`'s + # args/kwargs at the next iteration. + return Values(a, a=b, b=a + b) assert tuple(take(10, unfold(nextfibo, 1, 1))) == (1, 1, 2, 3, 5, 8, 13, 21, 34, 55) def fibos(): @@ -1446,7 +1997,7 @@ assert inn(42, evens()) assert not inn(41, evens()) @gmemoize -def primes(): +def primes(): # FP sieve of Eratosthenes yield 2 for n in count(start=3, step=2): if not any(n % p == 0 for p in takewhile(lambda x: x*x <= n, primes())): @@ -1458,8 +2009,9 @@ assert not inn(1337, primes()) iseven = lambda x: x % 2 == 0 assert [tuple(it) for it in partition(iseven, range(10))] == [(1, 3, 5, 7, 9), (0, 2, 4, 6, 8)] +# CAUTION: not to be confused with: # partition_int: split a small positive integer, in all possible ways, into smaller integers that sum to it -assert tuple(partition_int(4)) == ((1, 1, 1, 1), (1, 1, 2), (1, 2, 1), (1, 3), (2, 1, 1), (2, 2), (3, 1), (4,)) +assert tuple(partition_int(4)) == ((4,), (3, 1), (2, 2), (2, 1, 1), (1, 3), (1, 2, 1), (1, 1, 2), (1, 1, 1, 1)) assert all(sum(terms) == 10 for terms in partition_int(10)) # iindex: find index of item in iterable (mostly only makes sense for memoized input) @@ -1474,7 +2026,7 @@ assert find(lambda x: x >= 3, gen) == 4 # if consumable, consumed as usual # window: length-n sliding window iterator for general iterables lst = (x for x in range(5)) out = [] -for a, b, c in window(lst, n=3): +for a, b, c in window(3, lst): out.append((a, b, c)) assert out == [(0, 1, 2), (1, 2, 3), (2, 3, 4)] @@ -1502,16 +2054,31 @@ def msqrt(x): # multivalued sqrt return (s, -s) assert tuple(flatmap(msqrt, (0, 1, 4, 9))) == (0., 1., -1., 2., -2., 3., -3.) -# zipr reverses, then iterates. -assert tuple(zipr((1, 2, 3), (4, 5, 6), (7, 8))) == ((3, 6, 8), (2, 5, 7)) +# **CAUTION**: zip and reverse do NOT commute for inputs with different lengths: +assert tuple(zipr((1, 2, 3), (4, 5, 6), (7, 8))) == ((2, 5, 8), (1, 4, 7)) # zip first +assert tuple(rzip((1, 2, 3), (4, 5, 6), (7, 8))) == ((3, 6, 8), (2, 5, 7)) # reverse first + +# zipr syncs *left* ends, then iterates *from the right*. +assert tuple(zipr((1, 2, 3), (4, 5, 6), (7, 8))) == ((2, 5, 8), (1, 4, 7)) -zipr2 = partial(mapr, identity) # mapr works the same way. -assert tuple(zipr2((1, 2, 3), (4, 5, 6), (7, 8))) == ((3, 6, 8), (2, 5, 7)) +# so does mapr. +zipr2 = partial(mapr, identity) +assert tuple(zipr2((1, 2, 3), (4, 5, 6), (7, 8))) == (Values(2, 5, 8), Values(1, 4, 7)) -# foldr doesn't; it walks from the left, but collects results from the right: +# rzip syncs *right* ends, then iterates from the right. +assert tuple(rzip((1, 2, 3), (4, 5, 6), (7, 8))) == ((3, 6, 8), (2, 5, 7)) + +# so does rmap. +rzip2 = partial(rmap, identity) +assert tuple(rzip2((1, 2, 3), (4, 5, 6), (7, 8))) == (Values(3, 6, 8), Values(2, 5, 7)) + +# foldr syncs *left* ends, then collects results from the right: +def zipper(*args): + *rest, acc = args + return acc + (tuple(rest),) zipr1 = curry(foldr, zipper, ()) assert zipr1((1, 2, 3), (4, 5, 6), (7, 8)) == ((2, 5, 8), (1, 4, 7)) -# so the result is reversed(zip(...)), whereas zipr gives zip(*(reversed(s) for s in ...)) +# so the result is tuple(rev(zip(...))), whereas rzip gives tuple(zip(*(rev(s) for s in ...))) assert tuple(uniqify((1, 1, 2, 2, 2, 1, 2, 2, 4, 3, 4, 3, 3))) == (1, 2, 4, 3) # all assert tuple(uniq((1, 1, 2, 2, 2, 1, 2, 2, 4, 3, 4, 3, 3))) == (1, 2, 1, 2, 4, 3, 4, 3) # consecutive @@ -1525,16 +2092,6 @@ assert tuple(flatten((((1, 2), (3, 4)), (5, 6)), is_nested)) == ((1, 2), (3, 4), data = (((1, 2), ((3, 4), (5, 6)), 7), ((8, 9), (10, 11))) assert tuple(flatten(data, is_nested)) == (((1, 2), ((3, 4), (5, 6)), 7), (8, 9), (10, 11)) assert tuple(flatten_in(data, is_nested)) == (((1, 2), (3, 4), (5, 6), 7), (8, 9), (10, 11)) - -with_n = lambda *args: (partial(f, n) for n, f in args) -clip = lambda n1, n2: composel(*with_n((n1, drop), (n2, take))) -assert tuple(clip(5, 10)(range(20))) == tuple(range(5, 15)) -``` - -In the last example, essentially we just want to `clip 5 10 (range 20)`, the grouping of the parentheses being pretty much an implementation detail. With ``curry``, we can rewrite the last line as: - -```python -assert tuple(curry(clip, 5, 10, range(20)) == tuple(range(5, 15)) ``` ### Batteries for network programming @@ -1544,10 +2101,16 @@ assert tuple(curry(clip, 5, 10, range(20)) == tuple(range(5, 15)) While all other pure-Python features of `unpythonic` live in the main `unpythonic` package, the network-related features are placed in the subpackage `unpythonic.net`. This subpackage also contains the [REPL server and client](repl.md) for hot-patching live processes. - `unpythonic.net.msg`: A simplistic message protocol for sending message data over a stream-based transport, such as TCP. -- `unpythonic.net.ptyproxy`: Proxy between a Linux [PTY](https://en.wikipedia.org/wiki/Pseudoterminal) and a network socket. Useful for serving terminal utilities over the network. The selling point is this doesn't use `pty.spawn`, so it can be used for proxying also Python libraries that expect to run in a terminal. +- `unpythonic.net.ptyproxy`: Proxy between a Linux [PTY](https://en.wikipedia.org/wiki/Pseudoterminal) and a network socket. Useful for serving terminal utilities over the network. The selling point is this does **not** use `pty.spawn`, so it can be used for proxying also Python libraries that expect to run in a terminal. - `unpythonic.net.util`: Miscellaneous small utilities. -The thing about stream-based transports is that they have no concept of a message boundary [[1]](http://stupidpythonideas.blogspot.com/2013/05/sockets-are-byte-streams-not-message.html) [[2]](https://eli.thegreenplace.net/2011/08/02/length-prefix-framing-for-protocol-buffers) [[3]](https://docs.python.org/3/howto/sockets.html). This is where a message protocol comes in. We provide a [sans-io](https://sans-io.readthedocs.io/) implementation of a minimalistic custom protocol that adds rudimentary [message framing](https://blog.stephencleary.com/2009/04/message-framing.html) and [stream re-synchronization](https://en.wikipedia.org/wiki/Frame_synchronization). Example: +For a usage example of `unpythonic.net.ptyproxy`, see the source code of `unpythonic.net.server`. + +More details can be found in the docstrings. + +#### `unpythonic.net.msg` + +The problem with stream-based transports, such as network sockets, is that they have no concept of a message boundary [[1]](http://stupidpythonideas.blogspot.com/2013/05/sockets-are-byte-streams-not-message.html) [[2]](https://eli.thegreenplace.net/2011/08/02/length-prefix-framing-for-protocol-buffers) [[3]](https://docs.python.org/3/howto/sockets.html). This is where a message protocol comes in. We provide a [sans-io](https://sans-io.readthedocs.io/) implementation of a minimalistic message protocol that adds rudimentary [message framing](https://blog.stephencleary.com/2009/04/message-framing.html) and [stream re-synchronization](https://en.wikipedia.org/wiki/Frame_synchronization). Example: ```python from io import BytesIO, SEEK_SET @@ -1587,14 +2150,12 @@ assert decoder.decode() == b"mew" assert decoder.decode() is None ``` -For a usage example of `unpythonic.net.PTYProxy`, see the source code of `unpythonic.net.server`. - -### ``islice``: slice syntax support for ``itertools.islice` +### `islice`: slice syntax support for `itertools.islice` **Changed in v0.14.2.** *Added support for negative `start` and `stop`.* -Slice an iterable, using the regular slicing syntax: +Slice any iterable, using the regular slicing syntax: ```python from unpythonic import islice, primes, s @@ -1612,38 +2173,40 @@ assert tuple(islice(odds)[:5]) == (1, 3, 5, 7, 9) assert tuple(islice(odds)[:5]) == (11, 13, 15, 17, 19) # five more ``` -As a convenience feature: a single index is interpreted as a length-1 islice starting at that index. The slice is then immediately evaluated and the item is returned. +As a convenience feature: a single index is interpreted as a length-1 `islice` starting at that index. The slice is then immediately evaluated and the item is returned. -The slicing variant calls ``itertools.islice`` with the corresponding slicing parameters, after possibly converting negative `start` and `stop` to the appropriate positive values. +The slicing variant calls `itertools.islice` with the corresponding slicing parameters, after possibly converting negative `start` and `stop` to the appropriate positive values. -**CAUTION**: When using negative `start` and/or `stop`, we must consume the whole iterable to determine where it ends, if at all. Obviously, this will not terminate for infinite iterables. +**CAUTION**: When using negative `start` and/or `stop`, the whole iterable is consumed to determine where it ends, if at all. Obviously, this will not terminate for infinite iterables. The desired elements are then held in an internal buffer until they are yielded by iterating over the `islice`. **CAUTION**: Keep in mind that negative `step` is not supported, and that the slicing process consumes elements from the iterable. -Like ``fup``, our ``islice`` is essentially a manually curried function with unusual syntax; the initial call to ``islice`` passes in the iterable to be sliced. The object returned by the call accepts a subscript to specify the slice or index. Once the slice or index is provided, the call to ``itertools.islice`` triggers. +Like `fup`, our `islice` is essentially a manually curried function with unusual syntax; the initial call to `islice` passes in the iterable to be sliced. The object returned by the call accepts a subscript to specify the slice or index. Once the slice or index is provided, the call to `itertools.islice` triggers. Inspired by Python itself. ### `gmemoize`, `imemoize`, `fimemoize`: memoize generators +**Changed in v0.15.0.** *The generator instances created by the gfuncs returned by `gmemoize`, `imemoize`, and `fimemoize`, now support the `__len__` and `__getitem__` methods to access the already-yielded, memoized part. Asking for the `len` returns the current length of the memo. For subscripting, both a single `int` index and a slice are accepted. Note that memoized generators do **not** support all of the [`collections.abc.Sequence`](https://docs.python.org/3/library/collections.abc.html) API, because e.g. `__contains__` and `__reversed__` are missing, on purpose.* + Make generator functions (gfunc, i.e. a generator definition) which create memoized generators, similar to how streams behave in Racket. Memoize iterables; like `itertools.tee`, but no need to know in advance how many copies of the iterator will be made. Provided for both iterables and for factory functions that make iterables. - `gmemoize` is a decorator for a gfunc, which makes it memoize the instantiated generators. - If the gfunc takes arguments, they must be hashable. A separate memoized sequence is created for each unique set of argument values seen. - - For simplicity, the generator itself may use ``yield`` for output only; ``send`` is not supported. - - Any exceptions raised by the generator (except StopIteration) are also memoized, like in ``memoize``. - - Thread-safe. Calls to ``next`` on the memoized generator from different threads are serialized via a lock. Each memoized sequence has its own lock. This uses ``threading.RLock``, so re-entering from the same thread (e.g. in recursively defined sequences) is fine. + - For simplicity, the generator itself may use `yield` for output only; `send` is **not** supported. + - Any exceptions raised by the generator (except StopIteration) are also memoized, like in `memoize`. + - Thread-safe. Calls to `next` on the memoized generator from different threads are serialized via a lock. Each memoized sequence has its own lock. This uses `threading.RLock`, so re-entering from the same thread (e.g. in recursively defined mathematical sequences) is fine. - The whole history is kept indefinitely. For infinite iterables, use this only if you can guarantee that only a reasonable number of terms will ever be evaluated (w.r.t. available RAM). - - Typically, this should be the outermost decorator if several are used on the same gfunc. + - Typically, `gmemoize` should be the outermost decorator if several are used on the same gfunc. - `imemoize`: memoize an iterable. Like `itertools.tee`, but keeps the whole history, so more copies can be teed off later. - Same limitation: **do not** use the original iterator after it is memoized. The danger is that if anything other than the memoization mechanism advances the original iterator, some values will be lost before they can reach the memo. - Returns a gfunc with no parameters which, when called, returns a generator that yields items from the memoized iterable. The original iterable is used to retrieve more terms when needed. - Calling the gfunc essentially tees off a new instance, which begins from the first memoized item. - `fimemoize`: convert a factory function, that returns an iterable, into the corresponding gfunc, and `gmemoize` that. Return the memoized gfunc. - - Especially convenient with short lambdas, where `(yield from ...)` instead of `...` is just too much text. + - Especially convenient with short lambdas, where `(yield from ...)` instead of `...` is just too much text. See example below. ```python from itertools import count, takewhile @@ -1683,21 +2246,21 @@ def some_evens(n): # we want to memoize the result without the n first terms assert last(some_evens(25)) == last(some_evens(25)) # iterating twice! ``` -Using a lambda, we can also write ``some_evens`` as: +Using a lambda, we can also write `some_evens` as: ```python se = gmemoize(lambda n: (yield from drop(n, evens()))) assert last(se(25)) == last(se(25)) ``` -Using `fimemoize`, we can omit the ``yield from``, shortening this to: +Using `fimemoize`, we can omit the `yield from`, shortening this to: ```python se = fimemoize(lambda n: drop(n, evens())) assert last(se(25)) == last(se(25)) ``` -If we don't need to take an argument, we can memoize the iterable directly, using ``imemoize``: +If we don't need to take an argument, we can memoize the iterable directly, using `imemoize`: ```python se = imemoize(drop(25, evens())) @@ -1716,33 +2279,46 @@ def some_evens(n): yield from drop(n, evens()) ``` -The only differences are the name of the decorator and ``return`` vs. ``yield from``. The point of `fimemoize` is that in simple cases like this, it allows us to use a regular factory function that makes an iterable, instead of a gfunc. Of course, the gfunc could have several `yield` expressions before it finishes, whereas the factory function terminates at the `return`. +The only differences are the name of the decorator and `return` vs. `yield from`. The point of `fimemoize` is that in simple cases like this, it allows us to use a regular factory function that makes an iterable, instead of a gfunc. Of course, the gfunc could have several `yield` expressions before it finishes, whereas the factory function terminates at the `return`. -### ``fup``: Functional update; ``ShadowedSequence`` +### `fup`: Functional update; `ShadowedSequence` -We provide ``ShadowedSequence``, which is a bit like ``collections.ChainMap``, but for sequences, and only two levels (but it's a sequence; instances can be chained). It supports slicing (read-only), equality comparison, ``str`` and ``repr``. Out-of-range read access to a single item emits a meaningful error, like in ``list``. See the docstring of ``ShadowedSequence`` for details. +**Changed in v0.15.0.** *Bug fixed: Now an infinite replacement sequence to pull items from is actually ok, as the documentation has always claimed.* -The function ``fupdate`` functionally updates sequences and mappings. Whereas ``ShadowedSequence`` reads directly from the original sequences at access time, ``fupdate`` makes a shallow copy, of the same type as the given input sequence, when it finalizes its output. +We provide three layers, in increasing order of the level of abstraction: `ShadowedSequence`, `fupdate`, and `fup`. -**The preferred way** to use ``fupdate`` on sequences is through the ``fup`` utility function, which specializes ``fupdate`` to sequences, and adds support for Python's standard slicing syntax: +The class `ShadowedSequence` is a bit like `collections.ChainMap`, but for sequences, and only two levels (but it's a sequence; instances can be chained). It supports slicing (read-only), equality comparison, `str` and `repr`. Out-of-range read access to a single item emits a meaningful error, like in `list`. We will not discuss `ShadowedSequence` in more detail here, as it is a low-level tool; see its docstring for details. + +The function `fupdate` functionally updates sequences and mappings. Whereas `ShadowedSequence` reads directly from the original sequences at access time, `fupdate` makes a shallow copy, of the same type as the given input sequence, when it finalizes its output. + +Finally, the function `fup` provides a high-level API to functionally update a sequence, with nice syntax. + +#### `fup` + +**The preferred way** to use `fupdate` on sequences is through the `fup` utility function, which specializes `fupdate` to sequences, and adds support for Python's standard **slicing syntax**: ```python from unpythonic import fup from itertools import repeat -lst = (1, 2, 3, 4, 5) -assert fup(lst)[3] << 42 == (1, 2, 3, 42, 5) -assert fup(lst)[0::2] << tuple(repeat(10, 3)) == (10, 2, 10, 4, 10) +tup = (1, 2, 3, 4, 5) +assert fup(tup)[3] << 42 == (1, 2, 3, 42, 5) +assert fup(tup)[0::2] << tuple(repeat(10, 3)) == (10, 2, 10, 4, 10) +assert fup(tup)[0::2] << repeat(10) == (10, 2, 10, 4, 10) # infinite replacement ``` -Currently only one update specification is supported in a single ``fup()``. (The ``fupdate`` function supports more; see below.) +Currently only one *update specification* is supported in a single `fup()`. The low-level `fupdate` function supports more; see below. -The notation follows the ``unpythonic`` convention that ``<<`` denotes an assignment of some sort. Here it denotes a functional update, which returns a modified copy, leaving the original untouched. +An *update specification* is a combination of **where** to update, and **what** to put there. The *where* part can be a single index or a slice. When it is a single index, the *what* is a single item; and when a slice, the *what* is a sequence or an iterable, which must contain at least as many items as are required to perform the update. For details, see `fupdate` below. -The ``fup`` call is essentially curried. It takes in the sequence to be functionally updated. The object returned by the call accepts a subscript to specify the index or indices. This then returns another object that accepts a left-shift to specify the values. Once the values are provided, the underlying call to ``fupdate`` triggers, and the result is returned. +The `fup` function is essentially curried. It takes in the sequence to be functionally updated. The object returned by the call accepts a subscript to specify the index or indices. This then returns another object that accepts a left-shift to specify the values. Once the values are provided, the underlying call to `fupdate` triggers, and the result is returned. -The ``fupdate`` function itself works as follows: +The notation follows the `unpythonic` convention that `<<` denotes an assignment of some sort. Here it denotes a functional update, which returns a modified copy, leaving the original untouched. + +#### `fupdate` + +The `fupdate` function itself, which is the next lower abstraction level, works as follows: ```python from unpythonic import fupdate @@ -1753,58 +2329,97 @@ assert lst == [1, 2, 3] # the original remains untouched assert out == [1, 42, 3] lst = [1, 2, 3] -out = fupdate(lst, -1, 42) # negative indices also supported +out = fupdate(lst, -1, 42) # negative indices are also supported assert lst == [1, 2, 3] assert out == [1, 2, 42] ``` -Immutable input sequences are allowed. Replacing a slice of a tuple by a sequence: +Because the update is functional - i.e. the result is a new object, without mutating the original - immutable update target sequences are allowed. For example, we can replace a slice of a tuple by a sequence: ```python from itertools import repeat -lst = (1, 2, 3, 4, 5) -assert fupdate(lst, slice(0, None, 2), tuple(repeat(10, 3))) == (10, 2, 10, 4, 10) -assert fupdate(lst, slice(1, None, 2), tuple(repeat(10, 2))) == (1, 10, 3, 10, 5) -assert fupdate(lst, slice(None, None, 2), tuple(repeat(10, 3))) == (10, 2, 10, 4, 10) -assert fupdate(lst, slice(None, None, -1), tuple(range(5))) == (4, 3, 2, 1, 0) +tup = (1, 2, 3, 4, 5) +assert fupdate(tup, slice(0, None, 2), tuple(repeat(10, 3))) == (10, 2, 10, 4, 10) +assert fupdate(tup, slice(1, None, 2), tuple(repeat(10, 2))) == (1, 10, 3, 10, 5) +assert fupdate(tup, slice(None, None, 2), tuple(repeat(10, 3))) == (10, 2, 10, 4, 10) +assert fupdate(tup, slice(None, None, -1), range(5)) == (4, 3, 2, 1, 0) +``` + +Slicing supports negative indices and steps, and default starts, stops and steps, as usual in Python. Just remember `a[start:stop:step]` actually means `a[slice(start, stop, step)]` (with `None` replacing omitted `start`, `stop` and `step`), and everything should follow. Multidimensional arrays are **not** supported. + +When `fupdate` constructs its output, the replacement occurs by walking *the input sequence* left-to-right, and pulling an item from the replacement sequence when the given replacement specification so requires. Hence the replacement sequence is not necessarily accessed left-to-right. In the last example above, the `range(5)` was read in the order `4, 3, 2, 1, 0`. This is because when `slice(None, None, -1)` is applied to the input sequence, the first item of the input sequence is index `4` in the slice. So when replacing the first item, `fupdate` looked up index `4` in the replacement sequence. Because the replacement was just `range(5)`, the value at index `4` was also `4`. + +The replacement sequence must have at least as many items as the slice requires, when the slice is applied to the original input sequence. Any extra items in the replacement sequence are simply ignored, but if the replacement is too short, `IndexError` is raised. + +The replacement must have `__len__` and `__getitem__` methods if the slice (when treated as explained above) requires reading the replacement backwards, and/or if you plan to iterate over the `ShadowedSequence` multiple times. If the replacement only needs to be read forwards, **AND** you only plan to iterate over the `ShadowedSequence` just once (e.g., as part of a `fup`/`fupdate` operation), then it is sufficient for the replacement to implement the `collections.abc.Iterator` API only (i.e. just `__iter__` and `__next__`). + +##### Infinite replacements + +An infinite replacement causes `fupdate` (and `fup`) to pull as many items as are needed: + +```python +from itertools import repeat, count +from unpythonic import fup + +tup = (1, 2, 3, 4, 5) +assert fup(tup)[::] << repeat(42) == (42, 42, 42, 42, 42) +assert fup(tup)[::] << count(start=10) == (10, 11, 12, 13, 14) +``` + +The rest of the infinite replacement is considered as extra items, and is ignored. + +**CAUTION**: If converting existing code, **be careful** not to accidentally `tuple(...)` an infinite replacement. Python will happily fill all available RAM and essentially crash your machine trying to exhaust the infinite generator. + +If you need to reverse-walk the start of an infinite replacement: use `imemoize(...)` on the original iterable, instantiate the generator, and use that generator instance as the replacement: + +```python +from itertools import count +from unpythonic import fup, imemoize + +tup = (1, 2, 3, 4, 5) +assert fup(tup)[::-1] << imemoize(count(start=10))() == (14, 13, 12, 11, 10) ``` -Slicing supports negative indices and steps, and default starts, stops and steps, as usual in Python. Just remember ``a[start:stop:step]`` actually means ``a[slice(start, stop, step)]`` (with ``None`` replacing omitted ``start``, ``stop`` and ``step``), and everything should follow. Multidimensional arrays are **not** supported. +Just like above, due to the slice `[::-1]`, `fup` calculates that - when walking *the input sequence* left-to-right - it first needs to take the item at index `4` of the replacement. The `fup` succeeds, because when it retrieves this fifth item, all of the first five items are stored in the memo (which is internally a sequence). So `fup` can retrieve the fifth item, then the fourth, and so on - even though from the viewpoint of the original underlying iterable, the earlier items have already been consumed when the fifth item is accessed. -When ``fupdate`` constructs its output, the replacement occurs by walking *the input sequence* left-to-right, and pulling an item from the replacement sequence when the given replacement specification so requires. Hence the replacement sequence is not necessarily accessed left-to-right. (In the last example above, ``tuple(range(5))`` was read in the order ``(4, 3, 2, 1, 0)``.) +`ShadowedSequence` (and thus also `fupdate` and `fup`) internally uses `__getitem__` to retrieve the actual previous items from the memo, so even the memoized generator is only iterated over once. This functionality supports any generator instance created by the gfuncs returned by `imemoize`, `fimemoize`, or `gmemoize`. -The replacement sequence must have at least as many items as the slice requires (when applied to the original input). Any extra items in the replacement sequence are simply ignored (so e.g. an infinite ``repeat`` is fine), but if the replacement is too short, ``IndexError`` is raised. +##### Multiple update specifications -It is also possible to replace multiple individual items. These are treated as separate specifications, applied left to right (so later updates shadow earlier ones, if updating at the same index): +In `fupdate`, it is also possible to replace multiple individual items: ```python -lst = (1, 2, 3, 4, 5) -out = fupdate(lst, (1, 2, 3), (17, 23, 42)) -assert lst == (1, 2, 3, 4, 5) +tup = (1, 2, 3, 4, 5) +out = fupdate(tup, (1, 2, 3), (17, 23, 42)) # target, (*where), (*what) +assert tup == (1, 2, 3, 4, 5) assert out == (1, 17, 23, 42, 5) ``` +These are treated as separate specifications, applied left to right. This means later updates shadow earlier ones, if updating at the same index: + Multiple specifications can be used with slices and sequences as well: ```python -lst = tuple(range(10)) -out = fupdate(lst, (slice(0, 10, 2), slice(1, 10, 2)), +tup = tuple(range(10)) +out = fupdate(tup, (slice(0, 10, 2), slice(1, 10, 2)), (tuple(repeat(2, 5)), tuple(repeat(3, 5)))) -assert lst == tuple(range(10)) +assert tup == tuple(range(10)) assert out == (2, 3, 2, 3, 2, 3, 2, 3, 2, 3) ``` Strictly speaking, each specification can be either a slice/sequence pair or an index/item pair: ```python -lst = tuple(range(10)) -out = fupdate(lst, (slice(0, 10, 2), slice(1, 10, 2), 6), +tup = tuple(range(10)) +out = fupdate(tup, (slice(0, 10, 2), slice(1, 10, 2), 6), (tuple(repeat(2, 5)), tuple(repeat(3, 5)), 42)) -assert lst == tuple(range(10)) +assert tup == tuple(range(10)) assert out == (2, 3, 2, 3, 2, 3, 42, 3, 2, 3) ``` -Also mappings can be functionally updated: +##### `fupdate` and mappings + +Mappings can be functionally updated, too: ```python d1 = {'foo': 'bar', 'fruit': 'apple'} @@ -1813,9 +2428,11 @@ assert sorted(d1.items()) == [('foo', 'bar'), ('fruit', 'apple')] assert sorted(d2.items()) == [('foo', 'tavern'), ('fruit', 'apple')] ``` -For immutable mappings, ``fupdate`` supports ``frozendict`` (see below). Any other mapping is assumed mutable, and ``fupdate`` essentially just performs ``copy.copy()`` and then ``.update()``. +For immutable mappings, `fupdate` supports `frozendict` (see below). Any other mapping is assumed mutable, and `fupdate` essentially just performs `copy.copy()` and then `.update()`. -We can also functionally update a namedtuple: +##### `fupdate` and named tuples + +Named tuples can be functionally updated, too: ```python from collections import namedtuple @@ -1826,11 +2443,12 @@ assert a == A(17, 23) assert out == A(42, 23) ``` -Namedtuples export only a sequence interface, so they cannot be treated as mappings. +Named tuples export only a sequence interface, so they **cannot** be treated as mappings, even though their elements have names. + +Support for `namedtuple` uses an extra feature of `fupdate`, which is available for custom classes, too. When constructing the output sequence, `fupdate` first checks whether the type of the input sequence has a `._make()` method, and if so, hands the iterable containing the final data to that to construct the output. Otherwise the regular constructor is called (and it must accept a single iterable). -Support for ``namedtuple`` requires an extra feature, which is available for custom classes, too. When constructing the output sequence, ``fupdate`` first checks whether the input type has a ``._make()`` method, and if so, hands the iterable containing the final data to that to construct the output. Otherwise the regular constructor is called (and it must accept a single iterable). -### ``view``: writable, sliceable view into a sequence +### `view`: writable, sliceable view into a sequence A writable view into a sequence, with slicing, so you can take a slice of a slice (of a slice ...), and it reflects the original both ways: @@ -1855,30 +2473,36 @@ v[:] = 42 # scalar broadcast assert lst == [0, 1, 42, 42, 4] ``` -While ``fupdate`` lets you be more functional than Python otherwise allows, ``view`` lets you be more imperative than Python otherwise allows. +While `fupdate` lets you be more functional than Python otherwise allows, `view` lets you be more imperative than Python otherwise allows. We store slice specs, not actual indices, so this works also if the underlying sequence undergoes length changes. -Slicing a view returns a new view. Slicing anything else will usually copy, because the object being sliced does, before we get control. To slice lazily, first view the sequence itself and then slice that. The initial no-op view is optimized away, so it won't slow down accesses. Alternatively, pass a ``slice`` object into the ``view`` constructor. +Slicing a view returns a new view. Slicing anything else will usually shallow-copy, because the object being sliced does, before we get control. To slice lazily, first view the sequence itself and then slice that. The initial no-op view is optimized away, so it won't slow down accesses. Alternatively, pass a `slice` object into the `view` constructor. The view can be efficiently iterated over. As usual, iteration assumes that no inserts/deletes in the underlying sequence occur during the iteration. Getting/setting an item (subscripting) checks whether the index cache needs updating during each access, so it can be a bit slow. Setting a slice checks just once, and then updates the underlying iterable directly. Setting a slice to a scalar value broadcasts the scalar à la NumPy. -The ``unpythonic.collections`` module also provides the ``SequenceView`` and ``MutableSequenceView`` abstract base classes; ``view`` is a ``MutableSequenceView``. +Beside `view` itself, the `unpythonic.collections` module provides also some other related abstractions. -There is the read-only cousin ``roview``, which behaves the same except it has no ``__setitem__`` or ``reverse``. This can be useful for giving read-only access to an internal sequence. The constructor of the writable ``view`` checks that the input is not read-only (``roview``, or a ``Sequence`` that is not also a ``MutableSequence``) before allowing creation of the writable view. +There is the read-only sister of view, `roview`, which is like `view`, except it has no `__setitem__` or `reverse`. This can be useful for providing explicit read-only access to a sequence, when it is undesirable to have clients write into it. +The constructor of the writable `view` checks that the input is not read-only (`roview`, or a `Sequence` that is not also a `MutableSequence`) before allowing creation of the writable view. -### ``mogrify``: update a mutable container in-place +Finally, there are the `SequenceView` and `MutableSequenceView` abstract base classes. The concrete `view` and `roview` are instances of them. + +**NOTE**: A writable view supports also the read-only API, so `isinstance(MutableSequenceView, SequenceView) is True`; as well as `isinstance(view, roview) is True`. Keep in mind the [Liskov substitution principle](https://en.wikipedia.org/wiki/Liskov_substitution_principle). + + +### `mogrify`: update a mutable container in-place **Changed in v0.14.3.** *`mogrify` now skips `nil`, actually making it useful for processing `ll` linked lists.* -Recurse on given container, apply a function to each atom. If the container is mutable, then update in-place; if not, then construct a new copy like ``map`` does. +Recurse on a given container, apply a function to each atom. If the container is mutable, then update in-place; if not, then construct a new copy like `map` does. If the container is a mapping, the function is applied to the values; keys are left untouched. -Unlike ``map`` and its cousins, only a single input container is supported. (Supporting multiple containers as input would require enforcing some compatibility constraints on their type and shape, since ``mogrify`` is not limited to sequences.) +Unlike `map` and its cousins, **`mogrify` only supports a single input container**. Supporting multiple containers as input would require enforcing some compatibility constraints on their type and shape, because `mogrify` is not limited to sequences. ```python from unpythonic import mogrify @@ -1889,47 +2513,49 @@ assert lst2 == [2, 4, 6] assert lst2 is lst1 ``` -Containers are detected by checking for instances of ``collections.abc`` superclasses (also virtuals are ok). Supported abcs are ``MutableMapping``, ``MutableSequence``, ``MutableSet``, ``Mapping``, ``Sequence`` and ``Set``. Any value that does not match any of these is treated as an atom. Containers can be nested, with an arbitrary combination of the types supported. +Containers are detected by checking for instances of `collections.abc` superclasses (also virtuals are ok). Supported abcs are `MutableMapping`, `MutableSequence`, `MutableSet`, `Mapping`, `Sequence` and `Set`. Any value that does not match any of these is treated as an atom. Containers can be nested, with an arbitrary combination of the types supported. + +For convenience, we support some special cases: -For convenience, we introduce some special cases: + - Any classes created by `collections.namedtuple`; they do not conform to the standard constructor API for a `Sequence`. - - Any classes created by ``collections.namedtuple``, because they do not conform to the standard constructor API for a ``Sequence``. + Thus, to support also named tuples: for any immutable `Sequence`, we first check for the presence of a `._make()` method, and if found, use it as the constructor. Otherwise we use the regular constructor. - Thus, for (an immutable) ``Sequence``, we first check for the presence of a ``._make()`` method, and if found, use it as the constructor. Otherwise we use the regular constructor. + - `str` is treated as an atom, although technically a `Sequence`. - - ``str`` is treated as an atom, although technically a ``Sequence``. + It does not conform to the exact same API (its constructor does not take an iterable), and often one does not want to treat strings as containers anyway. - It doesn't conform to the exact same API (its constructor does not take an iterable), and often we don't want to treat strings as containers anyway. + If you want to process strings, implement it in your function that is called by `mogrify`. You can e.g. `tuple(thestring)` and then call `mogrify` on that. - If you want to process strings, implement it in your function that is called by ``mogrify``. + - The `box`, `ThreadLocalBox` and `Some` containers from the module `unpythonic.collections`. Although the first two are mutable, their update is not conveniently expressible by the `collections.abc` APIs. - - The ``box``, `ThreadLocalBox` and `Some` containers from ``unpythonic.collections``. Although the first two are mutable, their update is not conveniently expressible by the ``collections.abc`` APIs. + - The `cons` container from the module `unpythonic.llist`, including linked lists created using `ll` or `llist`. This is treated with the general tree strategy, so nested linked lists will be flattened, and the final `nil` is also processed. - - The ``cons`` container from ``unpythonic.llist`` (including the ``ll``, ``llist`` linked lists). This is treated with the general tree strategy, so nested linked lists will be flattened, and the final ``nil`` is also processed. + Note that since `cons` is immutable, anyway, if you know you have a long linked list where you need to update the values, just iterate over it and produce a new copy - that will work as intended. - Note that since ``cons`` is immutable, anyway, if you know you have a long linked list where you need to update the values, just iterate over it and produce a new copy - that will work as intended. +### `s`, `imathify`, `gmathify`: lazy mathematical sequences with infix arithmetic -### ``s``, ``imathify``, ``gmathify``: lazy mathematical sequences with infix arithmetic +**Changed in v0.15.0.** *The deprecated names have been removed.* -**Changed in v0.14.3.** Added convenience mode to generate cyclic infinite sequences. +**Changed in v0.14.3.** *To improve descriptiveness, and for consistency with names of other abstractions in `unpythonic`, `m` has been renamed `imathify` and `mg` has been renamed `gmathify`. This is a one-time change; it is not likely that these names will be changed ever again. The old names are now deprecated.* -**Changed in v0.14.3.** To improve descriptiveness, and for consistency with names of other abstractions in `unpythonic`, `m` has been renamed `imathify` and `mg` has been renamed `gmathify`. The old names will continue working in v0.14.x, and will be removed in v0.15.0. This is a one-time change; it is not likely that these names will be changed ever again. +**Changed in v0.14.3.** *Added convenience mode to generate cyclic infinite sequences.* -We provide a compact syntax to create lazy constant, cyclic, arithmetic, geometric and power sequences: ``s(...)``. Numeric (``int``, ``float``, ``mpmath``) and symbolic (SymPy) formats are supported. We avoid accumulating roundoff error when used with floating-point formats. +We provide a compact syntax to create lazy constant, cyclic, arithmetic, geometric and power sequences: `s(...)`. Numeric (`int`, `float`, `mpmath`) and symbolic (SymPy) formats are supported. We avoid accumulating roundoff error when used with floating-point formats. -We also provide arithmetic operation support for iterables (termwise). To make any iterable infix math aware, use ``imathify(iterable)``. The arithmetic is lazy; it just plans computations, returning a new lazy mathematical sequence. To extract values, iterate over the result. (Note this implies that expressions consisting of thousands of operations will overflow Python's call stack. In practice this shouldn't be a problem.) +We also provide arithmetic operation support for iterables (termwise). To make any iterable infix math aware, use `imathify(iterable)`. The arithmetic is lazy; it just plans computations, returning a new lazy mathematical sequence. To extract values, iterate over the result. (Note this implies that expressions consisting of thousands of operations will overflow Python's call stack. In practice this shouldn't be a problem.) -The function versions of the arithmetic operations (also provided, à la the ``operator`` module) have an **s** prefix (short for mathematical **sequence**), because in Python the **i** prefix (which could stand for *iterable*) is already used to denote the in-place operators. +The function versions of the arithmetic operations (also provided, à la the `operator` module) have an **s** prefix (short for mathematical **sequence**), because in Python the **i** prefix (which could stand for *iterable*) is already used to denote the in-place operators. -We provide the [Cauchy product](https://en.wikipedia.org/wiki/Cauchy_product), and its generalization, the diagonal combination-reduction, for two (possibly infinite) iterables. Note ``cauchyprod`` **does not sum the series**; given the input sequences ``a`` and ``b``, the call ``cauchyprod(a, b)`` computes the elements of the output sequence ``c``. +We provide the [Cauchy product](https://en.wikipedia.org/wiki/Cauchy_product), and its generalization, the diagonal combination-reduction, for two (possibly infinite) iterables. Note `cauchyprod` **does not sum the series**; given the input sequences `a` and `b`, the call `cauchyprod(a, b)` computes the elements of the output sequence `c`. -We also provide ``gmathify``, a decorator to mathify a gfunc, so that it will ``imathify()`` the generator instances it makes. Combo with ``imemoize`` for great justice, e.g. ``a = gmathify(imemoize(myiterable))``, and then ``a()`` to instantiate a memoized-and-mathified copy. +We also provide `gmathify`, a decorator to mathify a gfunc, so that it will `imathify()` the generator instances it makes. Combo with `imemoize` for great justice, e.g. `a = gmathify(imemoize(myiterable))`, and then `a()` to instantiate a memoized-and-mathified copy. -Finally, we provide ready-made generators that yield some common sequences (currently, the Fibonacci numbers and the prime numbers). The prime generator is an FP-ized sieve of Eratosthenes. +Finally, we provide ready-made generators that yield some common sequences (currently, the Fibonacci numbers, the triangular numbers, and the prime numbers). The prime generator is an FP-ized sieve of Eratosthenes. ```python -from unpythonic import s, imathify, cauchyprod, take, last, fibonacci, primes +from unpythonic import s, imathify, cauchyprod, take, last, fibonacci, triangular, primes assert tuple(take(10, s(1, ...))) == (1,)*10 assert tuple(take(10, s(1, 2, ...))) == tuple(range(1, 11)) @@ -1957,9 +2583,10 @@ assert tuple(take(3, cauchyprod(s(1, 3, 5, ...), s(2, 4, 6, ...)))) == (2, 10, 2 assert tuple(take(10, primes())) == (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) assert tuple(take(10, fibonacci())) == (1, 1, 2, 3, 5, 8, 13, 21, 34, 55) +assert tuple(take(10, triangular())) == (1, 3, 6, 10, 15, 21, 28, 36, 45, 55) ``` -A math iterable (i.e. one that has infix math support) is an instance of the class ``imathify``: +A math iterable (i.e. one that has infix math support) is an instance of the class `imathify`: ```python a = s(1, 3, ...) @@ -2010,16 +2637,16 @@ s2 = px(s(2, 4, 6, ...)) # 2, 4*x, 6*x**2, ... assert tuple(take(3, cauchyprod(s1, s2))) == (2, 10*x, 28*x**2) ``` -**CAUTION**: Symbolic sequence detection is sensitive to the assumptions on the symbols, because very pythonically, ``SymPy`` only simplifies when the result is guaranteed to hold in the most general case under the given assumptions. +**CAUTION**: Symbolic sequence detection is sensitive to the assumptions on the symbols, because very pythonically, `SymPy` only simplifies when the result is guaranteed to hold in the most general case under the given assumptions. Inspired by Haskell. -### ``sym``, ``gensym``, ``Singleton``: symbols and singletons +### `sym`, `gensym`, `Singleton`: symbols and singletons **Added in v0.14.2**. -We provide **lispy symbols**, an **uninterned symbol generator**, and a **pythonic singleton abstraction**. These are all pickle-aware, and instantiation is thread-safe. +We provide **lispy symbols**, an **uninterned symbol generator**, and a **pythonic singleton abstraction**. These are all pickle-aware and thread-safe. #### Symbol @@ -2033,17 +2660,17 @@ assert cat is sym("cat") assert cat is not sym("dog") ``` -The constructor `sym` produces an ***interned symbol***. Whenever (in the same process) **the same name** is passed to the `sym` constructor, it gives **the same object instance**. Even unpickling a symbol that has the same name produces the same `sym` object instance as any other `sym` with that name. +The constructor `sym` produces an ***interned symbol***. Whenever, in the same process, **the same name** is passed to the `sym` constructor, it gives **the same object instance**. Even unpickling a symbol that has the same name produces the same `sym` object instance as any other `sym` with that name. Thus a `sym` behaves like a Lisp symbol. Technically speaking, it's like a zen-minimalistic [Scheme/Racket symbol](https://stackoverflow.com/questions/8846628/what-exactly-is-a-symbol-in-lisp-scheme), since Common Lisp [stuffs all sorts of additional cruft in symbols](https://www.cs.cmu.edu/Groups/AI/html/cltl/clm/node27.html). If you insist on emulating that, note a `sym` is just a Python object you could customize in the usual ways, even though its instantiation logic plays by somewhat unusual rules. #### Gensym -The function `gensym` creates an ***uninterned symbol***, also known as *a gensym*. The label given in the call to `gensym` is a short human-readable description, like the name of a named symbol, but it has no relation to object identity. Object identity is tracked by an [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier), which is automatically assigned when `gensym` creates the value. Even if `gensym` is called with the same label, the return value is a new unique symbol each time. +The function `gensym`, which is an abbreviation for *generate symbol*, creates an ***uninterned symbol***, also known as *a gensym*. The label given in the call to `gensym` is a short human-readable description, like the name of a named symbol, but it has no relation to object identity. Object identity is tracked by an [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier), which is automatically assigned when `gensym` creates the value. Even if `gensym` is called with the same label, the return value is a new unique symbol each time. A gensym never conflicts with any named symbol; not even if one takes the UUID from a gensym and creates a named symbol using that as the name. -*The return value is the only time you'll see that symbol object; take good care of it!* +*The return value of `gensym` is the only time you will see that particular uninterned symbol object; take good care of it!* For example: @@ -2059,7 +2686,7 @@ print(scottishfold) # gensym:cat:94287f75-02b5-4138-9174-1e422e618d59 Uninterned symbols are useful as guaranteed-unique sentinel or [nonce (sense 2, adapted to programming)](https://en.wiktionary.org/wiki/nonce#Noun) values, like the pythonic idiom `nonce = object()`, but they come with a human-readable label. -They also have a superpower: with the help of the UUID automatically assigned by `gensym`, they survive a pickle roundtrip with object identity intact. Unpickling the *same* gensym value multiple times in the same process will produce just one object instance. (If the original return value from gensym is still alive, it is that same object instance.) +They also have a superpower: with the help of the UUID automatically assigned by `gensym`, they survive a pickle roundtrip with object identity intact. Unpickling the *same* gensym value multiple times in the same process will produce just one object instance. If the original return value from gensym is still alive, it is that same object instance. The UUID is generated with the pseudo-random algorithm [`uuid.uuid4`](https://docs.python.org/3/library/uuid.html). Due to rollover of the time field, it is possible for collisions with current UUIDs (as of the early 21st century) to occur with those generated after (approximately) the year 3400. See [RFC 4122](https://tools.ietf.org/html/rfc4122). @@ -2069,9 +2696,9 @@ Our `sym` is like a Lisp/Scheme/Racket symbol, which is essentially an [interned Our `gensym` is like the [Lisp `gensym`](http://clhs.lisp.se/Body/f_gensym.htm), and the [JavaScript `Symbol`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Symbol). -If you're familiar with `mcpyrate`'s `gensym` or MacroPy's `gen_sym`, those mean something different. Their purpose is to create, at import time, a lexical identifier that is not already in use in the source code being compiled, whereas our `gensym` creates an uninterned symbol object for run-time use. Lisp macros use symbols to represent identifiers, hence the potential for confusion in Python, where that is not the case. (The symbols of `unpythonic` are a purely run-time abstraction.) +If you're familiar with `mcpyrate`'s `gensym` or MacroPy's `gen_sym`, those mean something different. Their purpose is to create, in a macro, a lexical identifier that is not already in use in the source code being compiled, whereas our `gensym` creates an uninterned symbol object for run-time use. Lisp macros use symbols to represent identifiers, hence the potential for confusion in Python, where that is not the case. The symbols of `unpythonic` are a purely run-time abstraction. -If your background is in C++ or Java, you may notice the symbol abstraction is a kind of a parametric [singleton](https://en.wikipedia.org/wiki/Singleton_pattern); each symbol with the same name is a singleton (as is any gensym with the same UUID). +If your background is in C++ or Java, you may notice the symbol abstraction is a kind of a parametric [singleton](https://en.wikipedia.org/wiki/Singleton_pattern); each symbol with the same name is a singleton, as is any gensym with the same UUID. #### Singleton @@ -2088,7 +2715,7 @@ class SingleXHolder(Singleton): h = SingleXHolder(17) s = pickle.dumps(h) h2 = pickle.loads(s) -assert h2 is h # it's the same instance +assert h2 is h # the same instance! ``` Often the [singleton pattern](https://en.wikipedia.org/wiki/Singleton_pattern) is discussed in the context of classic relatively low-level, static languages such as C++ or Java. [In Python](https://stackoverflow.com/questions/6760685/creating-a-singleton-in-python), some of the classical issues, such as singletons being forced to use a clunky, nonstandard object construction syntax, are moot, because the language itself offers customization hooks that can be used to smooth away such irregularities. @@ -2099,13 +2726,13 @@ As the result of answering these questions, `unpythonic`'s idea of a singleton s However, Python can easily retrieve a singleton instance with syntax that looks like regular object construction, by customizing [`__new__`](https://docs.python.org/3/reference/datamodel.html#object.__new__). Hence no static accessor method is needed. This in turn raises the question, what should we do with constructor arguments, as we surely would like to (in general) to allow those, and they can obviously differ between call sites. Since there is only one object instance to load state into, we could either silently update the state, or silently ignore the new proposed arguments. Good luck tracking down bugs either way. But upon closer inspection, that question depends on an unfounded assumption. What we should be asking instead is, *what should happen* if the constructor of a singleton is called again, while an instance already exists? -We believe in the principles of [separation of concerns](https://en.wikipedia.org/wiki/Separation_of_concerns) and [fail-fast](https://en.wikipedia.org/wiki/Fail-fast). The textbook singleton pattern conflates two concerns, possibly due to language limitations: the *management of object instances*, and the *enforcement of the at-most-one-instance-only guarantee*. If we wish to uncouple these responsibilities, then the obvious pythonic answer is that attempting to construct the singleton again while it already exists **should be considered a run-time error**. Since a singleton **type** does not support that operation, this situation should raise a `TypeError`. This makes the error explicit as early as possible, thus adhering to the fail-fast principle, hence making it difficult for bugs to hide (constructor arguments will either take effect, or the constructor call will explicitly fail). +We believe in the principles of [separation of concerns](https://en.wikipedia.org/wiki/Separation_of_concerns) and [fail-fast](https://en.wikipedia.org/wiki/Fail-fast). The textbook singleton pattern conflates two concerns, possibly due to language limitations: the *management of object instances*, and the *enforcement of the at-most-one-instance-only guarantee*. If we wish to uncouple these responsibilities, then the obvious pythonic answer is that attempting to construct the singleton again while it already exists **should be considered a run-time error**. Since a singleton **type** does not support that operation, this situation should raise a `TypeError`. This makes the error explicit as early as possible, thus adhering to the fail-fast principle, hence making it difficult for bugs to hide. Constructor arguments will either take effect, or the constructor call will explicitly fail. Another question arises due to Python having builtin support for object persistence, namely `pickle`. What *should* happen when a singleton is unpickled, while an instance of that singleton already exists? Arguably, by default, it should load the state from the pickle file into the existing instance, overwriting its current state. -(Scenario: during second and later runs, a program first initializes, which causes the singleton instance to be created, just like during the first run of that program. Then the program loads state from a pickle file, containing (among other data) the state the singleton instance was in when the program previously shut down. In this scenario, considering the singleton, the data in the file is more relevant than the defaults the program initialization feeds in. Hence the default should be to replace the state of the existing singleton instance with the data from the pickle file.) +This design is based on considering the following scenario. Consider a program that uses the singleton abstraction. During its second and later runs, the program first initializes, which causes the singleton instance to be created, just like during the first run of the program. Then the program loads state from a pickle file, containing (among other data) the state the singleton instance was in when the program previously shut down. Considering the singleton, the data in the file is more relevant than the defaults the program initialization step feeds in. Hence, the default should be to *replace the state of the existing singleton instance with the data from the pickle file*. -Our `Singleton` abstraction is the result of these pythonifications applied to the classic pattern. For more documentation and examples, see the unit tests in [`unpythonic/test/test_singleton.py`](../unpythonic/test/test_singleton.py). +Our `Singleton` abstraction is the result of these pythonifications applied to the classic pattern. For more documentation and examples, see the unit tests in [`unpythonic/tests/test_singleton.py`](../unpythonic/tests/test_singleton.py). **NOTE**: A related pattern is the *[Borg](http://code.activestate.com/recipes/66531-singleton-we-dont-need-no-stinkin-singleton-the-bo/)*, a.k.a. *Monostate*. [After considering the matter](https://github.com/Technologicat/unpythonic/issues/22), it was felt that in the context of Python, it offers no advantages over the singleton abstraction, while eliminating a useful feature: the singleton abstraction allows using the object identity check (`is`) to check if a name refers to the singleton instance. For this reason, `unpythonic` provides `Singleton`, but no `Borg`. If you feel this is unjust, please let me know - this decision can be revisited, if a situation in which a `Borg` is more appropriate than a `Singleton` comes up. @@ -2113,32 +2740,34 @@ Our `Singleton` abstraction is the result of these pythonifications applied to t #### When to use a singleton? -Most often, **don't**. ``Singleton`` is provided for the very rare occasion where it's the appropriate abstraction. There exist **at least** three categories of use cases where singleton-like instantiation semantics are desirable: +Most often, **don't**. `Singleton` is provided for the very rare occasion where it's the appropriate abstraction. There exist **at least** three categories of use cases where singleton-like instantiation semantics are desirable: 1. **A process-wide unique marker value**, which has no functionality other than being quickly and uniquely identifiable as that marker. - - `sym` and `gensym` are the specific tools that cover this use case, depending on whether the intent is to allow that value to be independently "constructed" in several places yet always obtaining the same instance (`sym`), or if the implementation just happens to internally need a guaranteed-unique value that no value passed in from the outside could possibly clash with (`gensym`). For the latter case, sometimes a simple (and much faster) `nonce = object()` will do just as well, if you don't need the human-readable label and `pickle` support. + - `sym` and `gensym` are the specific tools that cover this use case, depending on whether the intent is to allow that value to be independently "constructed" in several places yet always obtaining the same instance (`sym`), or if the implementation just happens to internally need a guaranteed-unique value that no value passed in from the outside could possibly clash with (`gensym`). For the latter case, sometimes the simple (and much faster) pythonic idiom `nonce = object()` will do just as well, if you don't need a human-readable label, and `pickle` support. - If you need the singleton object to have extra functionality (e.g. our `nil` supports the iterator protocol), it's possible to subclass `sym` or `gsym`, but subclassing `Singleton` is also a possible solution. 2. **An empty immutable collection**. - - It can't have elements added to it after construction, so there's no point in creating more than one instance of an empty *immutable* collection of any particular type. - - Unfortunately, a class can't easily be partly `Singleton` (i.e., only when the instance is empty). So this use case is better coded manually, like `frozendict` does. Also, for this use case silently returning the existing instance is the right thing to do. + - An immutable collection instance cannot have elements added to it after construction, so there is no point in creating more than one instance of an *empty* immutable collection of any particular type. + - Unfortunately, a class cannot easily be partly `Singleton` (i.e., only when the instance is empty). So this use case is better coded manually, like `frozendict` does. Also, for this use case silently returning the existing instance is the right thing to do. 3. **A service that may have at most one instance** per process. - *But only if it is certain* that there can't arise a situation where multiple simultaneous instances of the service are needed. - The dynamic assignment controller `dyn` is an example, and it is indeed a `Singleton`. Cases 1 and 2 have no meaningful instance data. Case 3 may or may not, depending on the specifics. If your object does, and if you want it to support `pickle`, you may want to customize [`__getnewargs__`](https://docs.python.org/3/library/pickle.html#object.__getnewargs__) (called *at pickling time*), [`__setstate__`](https://docs.python.org/3/library/pickle.html#object.__setstate__), and sometimes maybe also [`__getstate__`](https://docs.python.org/3/library/pickle.html#object.__getstate__). Note that unpickling skips `__init__`, and calls just `__new__` (with the "newargs") and then `__setstate__`. -I'm not completely sure if it's meaningful to provide a generic `Singleton` abstraction for Python, except for teaching purposes. Practical use cases may differ so much, and some of the implementation details of the specific singleton object (esp. related to pickling) may depend so closely on the implementation details of the singleton abstraction, that it may be easier to just roll your own singleton code when needed. If you're new to customizing this part of Python, the code we have here should at least demonstrate an approach for how to do this. +I am not completely sure if it is meaningful to provide a generic `Singleton` abstraction for Python, except for teaching purposes. Practical use cases may differ so much, and some of the implementation details of the specific singleton object (especially related to pickling) may depend so closely on the implementation details of the singleton abstraction, that it may be easier to just roll your own singleton code when needed. If you are new to customizing this part of Python, the code we have here should at least demonstrate how to do that. ## Control flow tools -Tools related to control flow. +Tools related to [control flow](https://en.wikipedia.org/wiki/Control_flow). -### ``trampolined``, ``jump``: tail call optimization (TCO) / explicit continuations +### `trampolined`, `jump`: tail call optimization (TCO) / explicit continuations -Express algorithms elegantly without blowing the call stack - with explicit, clear syntax. +*See also the `with tco` [macro](macros.md), which applies tail call optimization **automatically**.* -*Tail recursion*: +*Tail call optimization* is a technique to treat [tail calls](https://en.wikipedia.org/wiki/Tail_call) in such a way that they do not grow the call stack. It sometimes allows expressing algorithms very elegantly. Some functional programming patterns such as functional loops are based on tail calls. + +The factorial function is a classic example of *tail recursion*: ```python from unpythonic import trampolined, jump @@ -2147,62 +2776,94 @@ from unpythonic import trampolined, jump def fact(n, acc=1): if n == 0: return acc - else: - return jump(fact, n - 1, n * acc) + return jump(fact, n - 1, n * acc) print(fact(4)) # 24 +fact(5000) # no crash ``` -Functions that use TCO **must** be `@trampolined`. Calling a trampolined function normally starts the trampoline. +Functions that use TCO **must** be `@trampolined`. The decorator wraps the original function with a [trampoline](https://en.wikipedia.org/wiki/Trampoline_(computing)#High-level_programming). Calling a trampolined function normally starts the trampoline. Inside a trampolined function, a normal call `f(a, ..., kw=v, ...)` remains a normal call. -A tail call with target `f` is denoted `return jump(f, a, ..., kw=v, ...)`. This explicitly marks that it is indeed a tail call (due to the explicit ``return``). Note that `jump` is **a noun, not a verb**. The `jump(f, ...)` part just evaluates to a `jump` instance, which on its own does nothing. Returning it to the trampoline actually performs the tail call. +A tail call with target `f` is denoted `return jump(f, a, ..., kw=v, ...)`. This explicitly marks that it is indeed a tail call, due to the explicit `return`. Note that `jump` is **a noun, not a verb**. The `jump(f, ...)` part just evaluates to a `jump` instance, which on its own does nothing. Returning the `jump` instance to the trampoline actually performs the tail call. + +If the jump target has a trampoline, the trampoline implementation will automatically strip it and jump into the actual entry point. + +To return a final result, just `return` it normally. Returning anything but a `jump` shuts down the trampoline, and returns the given value from the initial call (to the `@trampolined` function) that originally started that trampoline. -If the jump target has a trampoline, don't worry; the trampoline implementation will automatically strip it and jump into the actual entrypoint. +**CAUTION**: Trying to `jump(...)` without the `return` does nothing useful, and will **usually** print an *unclaimed jump* warning. It does this by checking a flag in the `__del__` method of `jump`; any correctly used jump instance should have been claimed by a trampoline before it gets garbage-collected. It can only print a warning, not raise an exception or halt the program, due to the limitations of `__del__`. -Trying to ``jump(...)`` without the ``return`` does nothing useful, and will **usually** print an *unclaimed jump* warning. It does this by checking a flag in the ``__del__`` method of ``jump``; any correctly used jump instance should have been claimed by a trampoline before it gets garbage-collected. +Some *unclaimed jump* warnings may appear also if the process is terminated by Ctrl+C (`KeyboardInterrupt`). This is normal; it just means that the termination occurred after a jump object was instantiated but before it was claimed by a trampoline. -(Some *unclaimed jump* warnings may appear also if the process is terminated by Ctrl+C (``KeyboardInterrupt``). This is normal; it just means that the termination occurred after a jump object was instantiated but before it was claimed by the trampoline.) +For comparison, with the macro API, the example becomes: + +```python +from unpythonic.syntax import macros, tco + +with tco: + def fact(n, acc=1): + if n == 0: + return acc + return fact(n - 1, n * acc) +print(fact(4)) # 24 +fact(5000) # no crash +``` -The final result is just returned normally. This shuts down the trampoline, and returns the given value from the initial call (to a ``@trampolined`` function) that originally started that trampoline. +*The `with tco` macro implicitly inserts the `@trampolined` decorator, and converts any regular call that appears in tail position into a `jump`. It also transforms lambdas in a similar way.* +#### Tail recursion in a `lambda` -*Tail recursion in a lambda*: +To make a tail-recursive anonymous function, use `trampolined` together with `withself`. The `self` argument is declared explicitly, but passed implicitly, just like the `self` argument of a method: ```python +from unpythonic import trampolined, jump, withself + t = trampolined(withself(lambda self, n, acc=1: acc if n == 0 else jump(self, n - 1, n * acc))) print(t(4)) # 24 ``` -Here the jump is just `jump` instead of `return jump`, since lambda does not use the `return` syntax. +Here the jump is just `jump` instead of `return jump`, because `lambda` does not use the `return` syntax. + +For comparison, with the macro API, this becomes: -To denote tail recursion in an anonymous function, use ``unpythonic.fun.withself``. The ``self`` argument is declared explicitly, but passed implicitly, just like the ``self`` argument of a method. +```python +from unpythonic.syntax import macros, tco +from unpythonic import withself + +with tco: + t = withself(lambda self, n, acc=1: + acc if n == 0 else self(n - 1, n * acc)) +print(t(4)) # 24 +``` +#### Mutual recursion with TCO -*Mutual recursion with TCO*: +[Mutual recursion](https://en.wikipedia.org/wiki/Mutual_recursion) is also supported. Just ask the trampoline to `jump` into the desired function: ```python +from unpythonic import trampolines,jump + @trampolined def even(n): if n == 0: return True - else: - return jump(odd, n - 1) + return jump(odd, n - 1) @trampolined def odd(n): if n == 0: return False - else: - return jump(even, n - 1) + return jump(even, n - 1) assert even(42) is True assert odd(4) is False assert even(10000) is True # no crash ``` -*Mutual recursion in `letrec` with TCO*: +#### Mutual recursion in `letrec` with TCO ```python +from unpythonic import letrec, trampolined, jump + letrec(evenp=lambda e: trampolined(lambda x: (x == 0) or jump(e.oddp, x - 1)), @@ -2213,6 +2874,18 @@ letrec(evenp=lambda e: e.evenp(10000)) ``` +For comparison, with the macro API of `letrec`, this becomes: + +```python +from unpythonic.syntax import macros, letrec +from unpythonic import trampolined, jump + +letrec[[evenp << trampolined(lambda x: + (x == 0) or jump(oddp, x - 1)), + oddp << trampolined(lambda x: + (x != 0) and jump(evenp, x - 1))] in + evenp(10000)] +``` #### Reinterpreting TCO as explicit continuations @@ -2255,22 +2928,25 @@ Clojure has [`(trampoline ...)`](https://clojuredocs.org/clojure.core/trampoline The `return jump(...)` solution is essentially the same there (the syntax is `#(...)`), but in Clojure, the trampoline must be explicitly enabled at the call site, instead of baking it into the function definition, as our decorator does. -Clojure's trampoline system is thus more explicit and simple than ours (the trampoline doesn't need to detect and strip the tail-call target's trampoline, if it has one - because with Clojure's solution, it never does), at some cost to convenience at each use site. We have chosen to emphasize use-site convenience. +Clojure's trampoline system is thus more explicit and simple than ours (the trampoline does not need to detect and strip the tail-call target's trampoline, if it has one - because with Clojure's solution, it never does), at some cost to convenience at each use site. We have chosen to emphasize use-site convenience. + +### `looped`, `looped_over`: loops in FP style (with TCO) -### ``looped``, ``looped_over``: loops in FP style (with TCO) +In functional programming, looping can be represented as recursion. The loop body is written as a recursive function. To loop, the function tail-calls itself, possibly with new argument values. Both `for` and `while` loops can be expressed in this way. -*Functional loop with automatic tail call optimization* (for calls re-invoking the loop body): +As a practical detail, tail-call optimization is important, to avoid growing the call stack at each iteration of the loop. + +Here is a functional loop using `unpythonic`, with automatic tail call optimization - no macros needed: ```python -from unpythonic import looped, looped_over +from unpythonic import looped @looped def s(loop, acc=0, i=0): if i == 10: return acc - else: - return loop(acc + i, i + 1) + return loop(acc + i, i + 1) print(s) # 45 ``` @@ -2287,32 +2963,39 @@ define s displayln s ; 45 ``` -The `@looped` decorator is essentially sugar. Behaviorally equivalent code: +In `@looped`, the function name of the loop body is the name of the final result, like in `@call`. To terminate the loop, just `return` the final result normally. This shuts down the loop and replaces the loop body definition (in the example, `s`) with the final result value. -```python -@trampolined -def s(acc=0, i=0): - if i == 10: - return acc - else: - return jump(s, acc + i, i + 1) -s = s() -print(s) # 45 -``` +The first parameter of the loop body is the magic parameter `loop`. It is *self-ish*, representing a jump back to the loop body itself, starting a new iteration. Just like Python's `self`, `loop` can have any name; it is passed positionally. + +Note that `loop` is **a noun, not a verb.** This is because the expression `loop(...)` is essentially the same as `jump(...)` to the loop body itself. However, it also arranges things so that the trampolined call inserts the magic parameter `loop`, which can only be set up via this mechanism. + +Additional arguments can be given to `loop(...)`. When the loop body is called, any additional positional arguments are appended to the implicit ones, and can be anything. Additional arguments can also be passed by name. The initial values of any additional arguments **must** be declared as defaults in the formal parameter list of the loop body. The loop is automatically started by `@looped`, by calling the body with the magic `loop` as the only argument. -In `@looped`, the function name of the loop body is the name of the final result, like in `@call`. The final result of the loop is just returned normally. +Any loop variables such as `i` in the above example are **in scope only in the loop body**; there is no `i` in the surrounding scope. Moreover, it is a fresh `i` at each iteration; nothing is mutated by the looping mechanism. -The first parameter of the loop body is the magic parameter ``loop``. It is *self-ish*, representing a jump back to the loop body itself, starting a new iteration. Just like Python's ``self``, ``loop`` can have any name; it is passed positionally. +**Be careful** if you use a mutable object instance as a loop variable: the loop body is just a function call like any other, so the usual rules apply. -Note that ``loop`` is **a noun, not a verb.** This is because the expression ``loop(...)`` is essentially the same as ``jump(...)`` to the loop body itself. However, it also inserts the magic parameter ``loop``, which can only be set up via this mechanism. +For another example of functional looping, here is a typical `while True` loop in FP style: -Additional arguments can be given to ``loop(...)``. When the loop body is called, any additional positional arguments are appended to the implicit ones, and can be anything. Additional arguments can also be passed by name. The initial values of any additional arguments **must** be declared as defaults in the formal parameter list of the loop body. The loop is automatically started by `@looped`, by calling the body with the magic ``loop`` as the only argument. +```python +from unpythonic import looped -Any loop variables such as ``i`` in the above example are **in scope only in the loop body**; there is no ``i`` in the surrounding scope. Moreover, it's a fresh ``i`` at each iteration; nothing is mutated by the looping mechanism. (But be careful if you use a mutable object instance as a loop variable. The loop body is just a function call like any other, so the usual rules apply.) +@looped +def _(loop): + print("Enter your name (or 'q' to quit): ", end='') + s = input() + if s.lower() == 'q': + return # ...the implicit None. In a "while True:", "break" here. + else: + print(f"Hello, {s}!") + return loop() +``` -FP loops don't have to be pure: +Functional loops do not have to be pure. Here is a functional loop with a side effect: ```python +from unpythonic import looped + out = [] @looped def _(loop, i=0): @@ -2323,59 +3006,73 @@ def _(loop, i=0): assert out == [0, 1, 2, 3] ``` -Keep in mind, though, that this pure-Python FP looping mechanism is slow, so it may make sense to use it only when "the FP-ness" (no mutation, scoping) is important. +**CAUTION**: This pure-Python FP looping mechanism is slow, so it may make sense to use it only when "the FP-ness" (no mutation, scoping) is important. -Also be aware that `@looped` is specifically neither a ``for`` loop nor a ``while`` loop; instead, it is a general looping mechanism that can express both kinds of loops. +#### Relation to the TCO system -*Typical `while True` loop in FP style*: +The `@looped` decorator is essentially sugar. If you read the section further above on TCO, you may have guessed how it is implemented: the `loop` function is actually a jump record in disguise, and `@looped` installs a trampoline. + +Indeed, the following code is behaviorally equivalent to the first example: ```python -@looped -def _(loop): - print("Enter your name (or 'q' to quit): ", end='') - s = input() - if s.lower() == 'q': - return # ...the implicit None. In a "while True:", "break" here. - else: - print(f"Hello, {s}!") - return loop() +from unpythonic import trampolined, jump + +@trampolined +def s(acc=0, i=0): + if i == 10: + return acc + return jump(s, acc + i, i + 1) +s = s() +print(s) # 45 ``` +However, the actual implementation of `@looped` slightly differs from what would be implied by this straightforward translation, because the feature uses no macros. + #### FP loop over an iterable -In Python, loops often run directly over the elements of an iterable, which markedly improves readability compared to dealing with indices. Enter ``@looped_over``: +In Python, loops often run directly over the elements of an iterable, which markedly improves readability compared to dealing with indices. + +For this use case, we provide `@looped_over`: ```python +from unpythonic import looped_over + @looped_over(range(10), acc=0) def s(loop, x, acc): return loop(acc + x) assert s == 45 ``` -The ``@looped_over`` decorator is essentially sugar. Behaviorally equivalent code: +The `@looped_over` decorator is essentially sugar. Behaviorally equivalent code: ```python +from unpythonic import call, looped + @call def s(iterable=range(10)): it = iter(iterable) @looped - def _tmp(loop, acc=0): + def tmp(loop, acc=0): try: x = next(it) - return loop(acc + x) + return loop(acc + x) # <-- the loop body except StopIteration: return acc - return _tmp + return tmp assert s == 45 ``` -In ``@looped_over``, the loop body takes three magic positional parameters. The first parameter ``loop`` works like in ``@looped``. The second parameter ``x`` is the current element. The third parameter ``acc`` is initialized to the ``acc`` value given to ``@looped_over``, and then (functionally) updated at each iteration, taking as the new value the first positional argument given to ``loop(...)``, if any positional arguments were given. Otherwise ``acc`` retains its last value. +In `@looped_over`, the loop body takes **three** magic positional parameters. The first parameter `loop` is similar to that in `@looped`. The second parameter `x` is the current element. The third parameter `acc` is initialized to the `acc` value given to `@looped_over`, and then (functionally) updated at each iteration. -If ``acc`` is a mutable object, mutating it is allowed. For example, if ``acc`` is a list, it is perfectly fine to ``acc.append(...)`` and then just ``loop()`` with no arguments, allowing ``acc`` to retain its last value. To be exact, keeping the last value means *the binding of the name ``acc`` does not change*, so when the next iteration starts, the name ``acc`` still points to the same object that was mutated. This strategy can be used to pythonically construct a list in an FP loop. +The new value of `acc` is the first positional argument given to `loop(...)`, if any positional arguments were given. Otherwise `acc` retains its last value. -Additional arguments can be given to ``loop(...)``. The same notes as above apply. For example, here we have the additional parameters ``fruit`` and ``number``. The first one is passed positionally, and the second one by name: +If `acc` is a mutable object, mutating it **is allowed**. For example, if `acc` is a list, it is perfectly fine to `acc.append(...)` and then just `loop()` with no arguments, allowing `acc` to retain its last value. To be exact, keeping the last value means *the binding of the name `acc` does not change*, so when the next iteration starts, the name `acc` still points to the same object that was mutated. This strategy can be used to pythonically construct a list in an FP loop. + +Additional arguments can be given to `loop(...)`. The same notes as above apply. For example, here we have the additional parameters `fruit` and `number`. The first one is passed positionally, and the second one by name: ```python +from unpythonic import looped_over + @looped_over(range(10), acc=0) def s(loop, x, acc, fruit="pear", number=23): print(fruit, number) @@ -2385,13 +3082,15 @@ def s(loop, x, acc, fruit="pear", number=23): assert s == 45 ``` -The loop body is called once for each element in the iterable. When the iterable runs out of elements, the last ``acc`` value that was given to ``loop(...)`` becomes the return value of the loop. If the iterable is empty, the body never runs; then the return value of the loop is the initial value of ``acc``. +The loop body is called once for each element in the iterable. When the iterable runs out of elements, the final value of `acc` becomes the return value of the loop. If the iterable is empty, the body never runs; then the return value of the loop is the initial value of `acc`. -To terminate the loop early, just ``return`` your final result normally, like in ``@looped``. (It can be anything, does not need to be ``acc``.) +To terminate the loop early, just `return` your final result normally, like in `@looped`. It can be anything, it does not need to be `acc`. -Multiple input iterables work somewhat like in Python's ``for``, except any sequence unpacking must be performed inside the body: +Multiple input iterables work somewhat like in Python's `for`, except any sequence unpacking must be performed inside the body: ```python +from unpythonic import looped_over + @looped_over(zip((1, 2, 3), ('a', 'b', 'c')), acc=()) def p(loop, item, acc): numb, lett = item @@ -2410,6 +3109,8 @@ This is because while *tuple parameter unpacking* was supported in Python 2.x, i FP loops can be nested (also those over iterables): ```python +from unpythonic import looped_over + @looped_over(range(1, 4), acc=()) def outer_result(outer_loop, y, outer_acc): @looped_over(range(1, 3), acc=()) @@ -2419,18 +3120,20 @@ def outer_result(outer_loop, y, outer_acc): assert outer_result == ((1, 2), (2, 4), (3, 6)) ``` -If you feel the trailing commas ruin the aesthetics, see ``unpythonic.misc.pack``. +If you feel the trailing commas ruin the aesthetics, see `unpythonic.pack`. #### Accumulator type and runtime cost As [the reference warns (note 6)](https://docs.python.org/3/library/stdtypes.html#common-sequence-operations), repeated concatenation of tuples has an O(n²) runtime cost, because each concatenation creates a new tuple, which needs to copy all of the already existing elements. To keep the runtime O(n), there are two options: - - *Pythonic solution*: Destructively modify a mutable sequence. Particularly, ``list`` is a dynamic array that has a low amortized cost for concatenation (most often O(1), with the occasional O(n) when the allocated storage grows). - - *Unpythonic solution*: ``cons`` a linked list, and reverse it at the end. Cons cells are immutable; consing a new element to the front costs O(1). Reversing the list costs O(n). + - *Pythonic solution*: Destructively modify a mutable sequence. Particularly, `list` is a dynamic array that has a low amortized cost for concatenation (most often O(1), with the occasional O(n) when the allocated storage grows). + - *Unpythonic solution*: `cons` a linked list, and reverse it at the end. Cons cells are immutable; consing a new element to the front costs O(1). Reversing the list costs O(n). -Mutable sequence (Python ``list``): +Mutable sequence (Python `list`): ```python +from unpythonic import looped_over + @looped_over(zip((1, 2, 3), ('a', 'b', 'c')), acc=[]) def p(loop, item, acc): numb, lett = item @@ -2443,7 +3146,7 @@ assert p == ['1a', '2b', '3c'] Linked list: ```python -from unpythonic import cons, nil, ll +from unpythonic import looped_over, cons, nil, ll, lreverse @lreverse @looped_over(zip((1, 2, 3), ('a', 'b', 'c')), acc=nil) @@ -2454,11 +3157,13 @@ def p(loop, item, acc): assert p == ll('1a', '2b', '3c') ``` -Note the unpythonic use of the ``lreverse`` function as a decorator. ``@looped_over`` overwrites the def'd name by the return value of the loop; then ``lreverse`` takes that as input, and overwrites once more. Thus ``p`` becomes the final list. +Note the unpythonic use of the `lreverse` function as a decorator. `@looped_over` overwrites the def'd name by the return value of the loop; then `lreverse` takes that as input, and overwrites once more. Thus `p` becomes the final list. -To get the output as a tuple, we can add ``tuple`` to the decorator chain: +To get the output as a tuple, we can add `tuple` to the decorator chain: ```python +from unpythonic import looped_over, cons, nil, ll, lreverse + @tuple @lreverse @looped_over(zip((1, 2, 3), ('a', 'b', 'c')), acc=nil) @@ -2471,17 +3176,19 @@ assert p == ('1a', '2b', '3c') This works in both solutions. The cost is an additional O(n) step. -#### ``break`` +#### `break` -The main way to exit an FP loop (also early) is, at any time, to just ``return`` the final result normally. +The main way to exit an FP loop (also early) is, at any time, to just `return` the final result normally. If you want to exit the function *containing* the loop from inside the loop, see **escape continuations** below. -#### ``continue`` +#### `continue` -The main way to *continue* an FP loop is, at any time, to ``loop(...)`` with the appropriate arguments that will make it proceed to the next iteration. Or package the appropriate `loop(...)` expression into your own function ``cont``, and then use ``cont(...)``: +The main way to *continue* an FP loop is, at any time, to `loop(...)` with the appropriate arguments that will make the loop proceed to the next iteration. Or package the appropriate `loop(...)` expression into your own function `cont`, and then use `cont(...)`: ```python +from unpythonic import looped + @looped def s(loop, acc=0, i=0): cont = lambda newacc=acc: loop(newacc, i + 1) # always increase i; by default keep current value of acc @@ -2496,31 +3203,35 @@ print(s) # 35 This approach separates the computations of the new values for the iteration counter and the accumulator. -#### Prepackaged ``break`` and ``continue`` +#### Prepackaged `break` and `continue` -See ``@breakably_looped`` (offering `brk`) and ``@breakably_looped_over`` (offering `brk` and `cnt`). +See `@breakably_looped` (offering `brk`) and `@breakably_looped_over` (offering `brk` and `cnt`). -The point of `brk(value)` over just `return value` is that `brk` is first-class, so it can be passed on to functions called by the loop body (so that those functions then have the power to directly terminate the loop). +The point of `brk(value)` over just `return value` is that `brk` is first-class, so it can be passed on to functions called by the loop body - so that those functions then have the power to directly terminate the loop. -In ``@looped``, a library-provided ``cnt`` wouldn't make sense, since all parameters except ``loop`` are user-defined. *The client code itself defines what it means to proceed to the "next" iteration*. Really the only way in a construct with this degree of flexibility is for the client code to fill in all the arguments itself. +In `@looped`, a library-provided `cnt` would not make sense, since all parameters except `loop` are user-defined. *The client code itself defines what it means to proceed to the "next" iteration*. Really the only way in a construct with this degree of flexibility is for the client code to fill in all the arguments itself. -Because ``@looped_over`` is a more specific abstraction, there the concept of *continue* is much more clear-cut. We define `cnt` to mean *proceed to take the next element from the iterable, keeping the current value of `acc`*. Essentially `cnt` is a partially applied `loop(...)` with the first positional argument set to the current value of `acc`. +Because `@looped_over` is a more specific abstraction, there the concept of *continue* is much more clear-cut. We define `cnt` to mean *proceed to take the next element from the iterable, keeping the current value of `acc`*. Essentially `cnt` is a partially applied `loop(...)` with the first positional argument set to the current value of `acc`. #### FP loops using a lambda as body Just call the `looped()` decorator manually: ```python +from unpythonic import looped + s = looped(lambda loop, acc=0, i=0: loop(acc + i, i + 1) if i < 10 else acc) print(s) ``` -It's not just a decorator; in Lisps, a construct like this would likely be named ``call/looped``. +It's not just a decorator; in the Scheme family of Lisps, a construct like this would likely be named `call/looped`. -We can also use ``let`` to make local definitions: +We can also use `let` to make local definitions: ```python +from unpythonic import looped, let + s = looped(lambda loop, acc=0, i=0: let(cont=lambda newacc=acc: loop(newacc, i + 1), @@ -2532,31 +3243,58 @@ print(s) The `looped_over()` decorator also works, if we just keep in mind that parameterized decorators in Python are actually decorator factories: ```python +from unpythonic import looped_over + r10 = looped_over(range(10), acc=0) s = r10(lambda loop, x, acc: loop(acc + x)) assert s == 45 ``` -If you **really** need to make that into an expression, bind ``r10`` using ``let`` (if you use ``letrec``, keeping in mind it is a callable), or to make your code unreadable, just inline it. +If you **really** need to make that into an expression, bind `r10` using `let` (if you use `letrec`, keeping in mind it is a callable), or to make your code unreadable, just inline it. -With ``curry``, this is also a possible solution: +With `curry`, using its passthrough feature, this is also a possible solution: ```python +from unpythonic import curry, looped_over + s = curry(looped_over, range(10), 0, lambda loop, x, acc: loop(acc + x)) assert s == 45 ``` -### ``gtrampolined``: generators with TCO - -In ``unpythonic``, a generator can tail-chain into another generator. This is like invoking ``itertools.chain``, but as a tail call from inside the generator - so the generator itself can choose the next iterable in the chain. If the next iterable is a generator, it can again tail-chain into something else. If it is not a generator, it becomes the last iterable in the TCO chain. - -Python provides a convenient hook to build things like this, in the guise of ``return``: +As of v0.15.0, `curry` handles also named arguments, so we can make explicit what the `0` means: ```python -from unpythonic import gtco, take, last +from unpythonic import curry, looped_over + +s = curry(looped_over, range(10), acc=0, + body=(lambda loop, x, acc: + loop(acc + x))) +assert s == 45 +``` + +but because, due to syntactic limitations of Python, no positional arguments can be given *after* a named argument, you then have to know - in order to be able to provide the loop body - that the decorator returned by the factory `looped_over` calls it `body`. + +You can of course obtain such information by inspection (here shown in IPython running Python 3.8): + +```python +In [2]: looped_over(range(10), acc=0) +Out[2]: .run(body)> +``` + +or by looking at [the source code](../unpythonic/fploop.py). + + +### `gtrampolined`: generators with TCO + +In `unpythonic`, a generator can tail-chain into another generator. This is like invoking `itertools.chain`, but as a tail call from inside the generator - so that the generator itself can choose the next iterable in the chain. If the next iterable is a generator, it can again tail-chain into something else. If it is not a generator, it becomes the last iterable in the TCO chain. + +Python provides a convenient hook to build things like this, in the guise of `return`: + +```python +from unpythonic import gtco, take, last def march(): yield 1 @@ -2566,7 +3304,7 @@ assert tuple(take(6, gtco(march()))) == (1, 2, 1, 2, 1, 2) last(take(10000, gtco(march()))) # no crash ``` -Note the calls to ``gtco`` at the use sites. For convenience, we provide ``@gtrampolined``, which automates that: +Note the calls to `gtco` at the use sites. For convenience, we provide `@gtrampolined`, which automates that: ```python from unpythonic import gtrampolined, take, last @@ -2579,7 +3317,7 @@ assert tuple(take(10, ones())) == (1,) * 10 last(take(10000, ones())) # no crash ``` -It is safe to tail-chain into a ``@gtrampolined`` generator; the system strips the TCO target's trampoline if it has one. +It is safe to tail-chain into a `@gtrampolined` generator; the system strips the TCO target's trampoline if it has one. Like all tail calls, this works for any *iterative* process. In contrast, this **does not work**: @@ -2594,26 +3332,30 @@ def fibos(): # see numerics.py print(tuple(take(10, fibos()))) # --> (1, 1, 2), only 3 terms?! ``` -This sequence (technically iterable, but in the mathematical sense) is recursively defined, and the ``return`` shuts down the generator before it can yield more terms into ``scanl``. With ``yield from`` instead of ``return`` the second example works (but since it is recursive, it eventually blows the call stack). +This sequence (technically iterable, but in the mathematical sense) is recursively defined, and the `return` shuts down the generator before it can yield more terms into `scanl`. With `yield from` instead of `return` the second example works - but since it is recursive, it eventually blows the call stack. This particular example can be converted into a linear process with a different higher-order function, no TCO needed: ```python -from unpythonic import unfold, take, last +from unpythonic import unfold, take, last, Values def fibos(): def nextfibo(a, b): - return a, b, a + b # value, *newstates + return Values(a, a=b, b=a + b) return unfold(nextfibo, 1, 1) assert tuple(take(10, fibos())) == (1, 1, 2, 3, 5, 8, 13, 21, 34, 55) last(take(10000, fibos())) # no crash ``` -### ``catch``, ``throw``: escape continuations (ec) +### `catch`, `throw`: escape continuations (ec) + +**Changed in v0.15.0.** *The deprecated names have been removed.* -**Changed in v0.14.2.** *These constructs were previously named `setescape`, `escape`. The names have been changed to match the standard naming for this feature in several Lisps. Starting in 0.14.2, using the old names emits a `FutureWarning`, and the old names will be removed in 0.15.0.* +**Changed in v0.14.2.** *These constructs were previously named `setescape`, `escape`. The names have been changed to match the standard naming for this feature in several Lisps. The old names are now deprecated.* -Escape continuations can be used as a *multi-return*: +In a nutshell, an *escape continuation*, often abbreviated *ec*, transfers control outward on the call stack. Escape continuations are a generalization of `continue`, `break` and `return`. Those three constructs are essentially second-class ecs with a hard-coded escape point (respectively: end of iteration of loop; end of loop; end of function). A general escape continuation mechanism allows setting an escape point explicitly. + +For example, escape continuations can be used as a *multi-return*: ```python from unpythonic import catch, throw @@ -2628,13 +3370,11 @@ def f(): assert f() == "hello from g" ``` -**CAUTION**: The implementation is based on exceptions, so catch-all ``except:`` statements will intercept also throws, breaking the escape mechanism. As you already know, be specific in which exception types you catch in an `except` clause! - -In Lisp terms, `@catch` essentially captures the escape continuation (ec) of the function decorated with it. The nearest (dynamically) surrounding ec can then be invoked by `throw(value)`. When the `throw` is performed, the function decorated with `@catch` immediately terminates, returning ``value``. +In Lisp terms, `@catch` essentially captures the escape continuation (ec) of the function decorated with it. The nearest (dynamically) surrounding ec can then be invoked by `throw(value)`. When the `throw` is performed, the function decorated with `@catch` immediately terminates, returning `value`. -In Python terms, a throw means just raising a specific type of exception; the usual rules concerning ``try/except/else/finally`` and ``with`` blocks apply. It is a function call, so it works also in lambdas. +In Python terms, a throw (in the escape continuation sense) means just raising a specific type of exception; the usual rules concerning `try/except/else/finally` and `with` blocks apply. The `throw` is a function call, so it works also in lambdas. -Escaping the function surrounding an FP loop, from inside the loop: +For another example, here we return from the function surrounding an FP loop, from inside the loop: ```python @catch() @@ -2648,7 +3388,7 @@ def f(): f() # --> 15 ``` -For more control, both ``@catch`` points and ``throw`` instances can be tagged: +For more control, both `@catch` points and `throw` instances can be tagged: ```python @catch(tags="foo") # catch point tags can be single value or tuple (tuples OR'd, like isinstance()) @@ -2668,24 +3408,28 @@ def foo(): assert foo() == 15 ``` -For details on tagging, especially how untagged and tagged throw and catch points interact, and how to make one-to-one connections, see the docstring for ``@catch``. +For details on tagging, especially how untagged and tagged throw and catch points interact, and how to make one-to-one connections, see the docstring for `@catch`. See also `call_ec` (below), which is a compact syntax to make a one-to-one connection. + +**CAUTION**: The implementation is based on exceptions, so catch-all `except:` statements will intercept also throws, breaking the escape mechanism. As you already know, be specific in which exception types you catch in an `except` clause! **Etymology** -This feature is known as `catch`/`throw` in several Lisps, e.g. in Emacs Lisp and in Common Lisp (as well as some of its ancestors). This terminology is independent of the use of `throw`/`catch` in C++/Java for the exception handling mechanism. Common Lisp also provides a lexically scoped variant (`BLOCK`/`RETURN-FROM`) that is more idiomatic [according to Seibel](http://www.gigamonkeys.com/book/the-special-operators.html). +This feature is known as `catch`/`throw` in several Lisps, e.g. in Emacs Lisp and in Common Lisp (as well as some of its ancestors). This terminology is independent of the use of `throw`/`catch` in C++/Java for the exception handling mechanism. + +Common Lisp also provides a lexically scoped variant (`BLOCK`/`RETURN-FROM`) that is more idiomatic ([according to Seibel](http://www.gigamonkeys.com/book/the-special-operators.html)), but we currently provide only this dynamic variant. -#### ``call_ec``: first-class escape continuations +#### `call_ec`: first-class escape continuations -We provide ``call/ec`` (a.k.a. ``call-with-escape-continuation``), in Python spelled as ``call_ec``. It's a decorator that, like ``@call``, immediately runs the function and replaces the def'd name with the return value. The twist is that it internally sets up a catch point, and hands a **first-class escape continuation** to the callee. +We provide the function `call/ec` (a.k.a. [`call-with-escape-continuation`](https://docs.racket-lang.org/reference/cont.html#(def._((quote._~23~25kernel)._call-with-escape-continuation)))), in Python spelled as `call_ec`. It's a decorator that, like `@call`, immediately runs the function and replaces the def'd name with the return value. The twist is that it internally sets up a catch point, and hands a **first-class escape continuation** to the callee. -The function to be decorated **must** take one positional argument, the ec instance. +The function to be decorated **must** take one positional argument, the ec instance. The parameter is conventionally named `ec`. -The ec instance itself is another function, which takes one positional argument: the value to send to the catch point. The ec instance and the catch point are connected one-to-one. No other ``@catch`` point will catch the ec instance, and the catch point catches only this particular ec instance and nothing else. +The ec instance itself is another function, which takes one positional argument: the value to send to the catch point. That value can also be a `Values` object if you want to escape with multiple-return-values or named return values; the ec will send any argument given to it. -Any particular ec instance is only valid inside the dynamic extent of the ``call_ec`` invocation that created it. Attempting to call the ec later raises ``RuntimeError``. +The ec instance and the catch point are connected one-to-one. No other `@catch` point will catch the ec instance, and the catch point catches only the ec instances created by this invocation of `call_ec`, and nothing else. -This builds on ``@catch`` and ``throw``, so the caution about catch-all ``except:`` statements applies here, too. +Any particular ec instance is only valid inside the dynamic extent of the `call_ec` invocation that created it. Attempting to call the ec later raises `RuntimeError`. ```python from unpythonic import call_ec @@ -2712,7 +3456,7 @@ def result(ec): assert result == 42 ``` -The ec doesn't have to be called from the lexical scope of the call_ec'd function, as long as the call occurs within the dynamic extent of the ``call_ec``. It's essentially a *return from me* for the original function: +The ec does not have to be called from the lexical scope of the `call_ec`'d function, as long as the call occurs *within the dynamic extent* of the `call_ec`. It's essentially a *return from me* for the original function: ```python def f(ec): @@ -2726,7 +3470,7 @@ def result(ec): assert result == 42 ``` -This also works with lambdas, by using ``call_ec()`` directly. No need for a trampoline: +This also works with lambdas, by using `call_ec()` directly. No need for a trampoline: ```python result = call_ec(lambda ec: @@ -2736,11 +3480,11 @@ result = call_ec(lambda ec: assert result == 42 ``` -Normally ``begin()`` would return the last value, but the ec overrides that; it is effectively a ``return`` for multi-expression lambdas! +Normally `begin()` would return the last value, but the ec overrides that; it is effectively a `return` for multi-expression lambdas! But wait, doesn't Python evaluate all the arguments of `begin(...)` before the `begin` itself has a chance to run? Why doesn't the example print also *never reached*? This is because escapes are implemented using exceptions. Evaluating the ec call raises an exception, preventing any further elements from being evaluated. -This usage is valid with named functions, too - ``call_ec`` is not only a decorator: +This usage is valid with named functions, too, so strictly speaking, `call_ec` is not only a decorator: ```python def f(ec): @@ -2754,31 +3498,35 @@ result = call_ec(f) assert result == 42 ``` +*If you use the macro API of `unpythonic`, be aware that the macros cannot analyze this last example properly, because there is no lexical clue that `f` will actually be called using `call_ec`. To be safe in situations like this, name your ec parameter `ec`; then it will be recognized as an escape continuation. Also `brk` (defined by `@looped_over`) and `throw` are recognized by name.* + +**CAUTION**: The `call_ec` mechanism builds on `@catch` and `throw`, so the caution about catch-all `except:` statements applies here, too. -### ``forall``: nondeterministic evaluation + +### `forall`: nondeterministic evaluation We provide a simple variant of nondeterministic evaluation. This is essentially a toy that has no more power than list comprehensions or nested for loops. See also the easy-to-use [macro](macros.md) version with natural syntax and a clean implementation. -An important feature of McCarthy's [`amb` operator](https://rosettacode.org/wiki/Amb) is its nonlocality - being able to jump back to a choice point, even after the dynamic extent of the function where that choice point resides. If that sounds a lot like ``call/cc``, that's because that's how ``amb`` is usually implemented. See examples [in Ruby](http://www.randomhacks.net/2005/10/11/amb-operator/) and [in Racket](http://www.cs.toronto.edu/~david/courses/csc324_w15/extra/choice.html). +An important feature of McCarthy's [`amb` operator](https://rosettacode.org/wiki/Amb) is its nonlocality - being able to jump back to a choice point, even after the dynamic extent of the function where that choice point resides. If that sounds a lot like `call/cc`, that is because that's how `amb` is usually implemented. See examples [in Ruby](http://www.randomhacks.net/2005/10/11/amb-operator/) and [in Racket](http://www.cs.toronto.edu/~david/courses/csc324_w15/extra/choice.html). -Python can't do that, short of transforming the whole program into [CPS](https://en.wikipedia.org/wiki/Continuation-passing_style), while applying TCO everywhere to prevent stack overflow. **If that's what you want**, see ``continuations`` in [the macros](macros.md). +Python cannot do that, short of transforming the whole program into [CPS](https://en.wikipedia.org/wiki/Continuation-passing_style), while applying TCO everywhere to prevent stack overflow. **If that is what you want**, see `continuations` in [the macros](macros.md). -This ``forall`` is essentially a tuple comprehension that: +This `forall` is essentially a tuple comprehension that: - Can have multiple body expressions (side effects also welcome!), by simply listing them in sequence. - Allows filters to be placed at any level of the nested looping. - Presents the source code in the same order as it actually runs. -The ``unpythonic.amb`` module defines four operators: +The module `unpythonic.amb` defines four operators: - - ``forall`` is the control structure, which marks a section with nondeterministic evaluation. - - ``choice`` binds a name: ``choice(x=range(3))`` essentially means ``for e.x in range(3):``. - - ``insist`` is a filter, which allows the remaining lines to run if the condition evaluates to truthy. - - ``deny`` is ``insist not``; it allows the remaining lines to run if the condition evaluates to falsey. + - `forall` is the control structure, which marks a section that uses nondeterministic evaluation. + - `choice` binds a name: `choice(x=range(3))` essentially means `for e.x in range(3):`. + - `insist` is a filter, which allows the remaining lines to run if the condition evaluates to truthy. + - `deny` is `insist not`; it allows the remaining lines to run if the condition evaluates to falsey. -Choice variables live in the environment, which is accessed via a ``lambda e: ...``, just like in ``letrec``. Lexical scoping is emulated. In the environment, each line only sees variables defined above it; trying to access a variable defined later raises ``AttributeError``. +Choice variables live in the environment, which is accessed via a `lambda e: ...`, just like in `letrec`. Lexical scoping is emulated. In the environment, each line only sees variables defined above it; trying to access a variable defined later raises `AttributeError`. -The last line in a ``forall`` describes one item of the output. The output items are collected into a tuple, which becomes the return value of the ``forall`` expression. +The last line in a `forall` describes one item of the output. The output items are collected into a tuple, which becomes the return value of the `forall` expression. ```python out = forall(choice(y=range(3)), @@ -2810,46 +3558,52 @@ assert tuple(sorted(pt)) == ((3, 4, 5), (5, 12, 13), (6, 8, 10), Beware: ```python -out = forall(range(2), # do the rest twice! +out = forall(range(2), # evaluate remaining items twice! choice(x=range(1, 4)), lambda e: e.x) assert out == (1, 2, 3, 1, 2, 3) ``` -The initial ``range(2)`` causes the remaining lines to run twice - because it yields two output values - regardless of whether we bind the result to a variable or not. In effect, each line, if it returns more than one output, introduces a new nested loop at that point. +The initial `range(2)` causes the remaining items to run twice - because it yields two output values - regardless of whether we bind the result to a variable or not. In effect, each line, if it returns more than one output, introduces a new nested loop at that point. -For more, see the docstring of ``forall``. +For more, see the docstring of `forall`. #### For haskellers The implementation is based on the List monad, and a bastardized variant of do-notation. Quick vocabulary: - - ``forall(...)`` = ``do ...`` (for a List monad) - - ``choice(x=foo)`` = ``x <- foo``, where ``foo`` is an iterable - - ``insist x`` = ``guard x`` - - ``deny x`` = ``guard (not x)`` - - Last line = implicit ``return ...`` + - `forall(...)` = `do ...` (for a List monad) + - `choice(x=foo)` = `x <- foo`, where `foo` is an iterable + - `insist x` = `guard x` + - `deny x` = `guard (not x)` + - Last line = implicit `return ...` -### ``handlers``, ``restarts``: conditions and restarts +### `handlers`, `restarts`: conditions and restarts -**Added in v0.14.2**. +**Changed in v0.15.0.** *Functions `resignal_in` and `resignal` added; these perform the same job for conditions as `reraise_in` and `reraise` do for exceptions, that is, they allow you to map library exception types to semantically appropriate application exception types, with minimum boilerplate.* + +*Upon an unhandled signal, `signal` now returns the canonized input `condition`, with a nice traceback attached. This feature is intended for implementing custom error protocols on top of `signal`; `error` already uses it to produce a nice-looking error report.* + +*The error-handling protocol that was used to send a signal is now available for inspection in the `__protocol__` attribute of the condition instance. It is the callable that sent the signal, such as `signal`, `error`, `cerror` or `warn`. It is the responsibility of each error-handling protocol (except the fundamental `signal` itself) to pass its own function to `signal` as the `protocol` argument; if not given, `protocol` defaults to `signal`. The protocol information is used by the `resignal` mechanism.* -**Changed in v0.14.3**. *Conditions can now inherit from `BaseException`, not only from `Exception.` `with handlers` catches also derived types, e.g. a handler for `Exception` now catches a signaled `ValueError`.* +**Changed in v0.14.3**. *Conditions can now inherit from `BaseException`, not only from `Exception.` Just like the `except` statement, `with handlers` catches also derived types, e.g. a handler for `Exception` now catches a signaled `ValueError`.* *When an unhandled `error` or `cerror` occurs, the original unhandled error is now available in the `__cause__` attribute of the `ControlError` exception that is raised in this situation.* *Signaling a class, as in `signal(SomeExceptionClass)`, now implicitly creates an instance with no arguments, just like the `raise` statement does. On Python 3.7+, `signal` now automatically equips the condition instance with a traceback, just like the `raise` statement does for an exception.* +**Added in v0.14.2**. + One of the killer features of Common Lisp are *conditions*, which are essentially **resumable exceptions**. -Following Peter Seibel ([Practical Common Lisp, chapter 19](http://www.gigamonkeys.com/book/beyond-exception-handling-conditions-and-restarts.html)), we define *errors* as the consequences of [Murphy's Law](https://en.wikipedia.org/wiki/Murphy%27s_law), i.e. situations where circumstances cause interaction between the program and the outside world to fail. An error is no bug, but failing to handle an error certainly is. +Following Peter Seibel ([Practical Common Lisp, chapter 19](http://www.gigamonkeys.com/book/beyond-exception-handling-conditions-and-restarts.html)), we define *errors* as the consequences of [Murphy's Law](https://en.wikipedia.org/wiki/Murphy%27s_law), i.e. situations where circumstances cause interaction between the program and the outside world to fail. An error is not a bug, but failing to handle an error certainly is. An exception system splits error-recovery responsibilities into two parts. In Python terms, we speak of *raising* and then *handling* an exception. In comparison, a condition system splits error-recovery responsibilities into **three parts**: *signaling*, *handling* and *restarting*. -The result is improved modularity. Consider [separation of mechanism and policy](https://en.wikipedia.org/wiki/Separation_of_mechanism_and_policy). We place the actual error-recovery code (the mechanism) in *restarts*, at the inner level (of the call stack) - which has access to all the low-level technical details that are needed to actually perform the recovery. We can provide *several different* canned recovery strategies, which implement any appropriate ways to recover, in the context of each low- or middle-level function. We defer the decision of which one to use (the policy), *to an outer level*. The outer level knows about the big picture - *why* the inner levels are running in this particular case, i.e. what we are trying to accomplish and how. Hence, it is in the ideal position to choose which error-recovery strategy should be used *in its high-level context*. +The result is improved modularity and better [separation of mechanism and policy](https://en.wikipedia.org/wiki/Separation_of_mechanism_and_policy). The actual error-recovery code (the **mechanism**) lives in *restarts*, at the inner level (of the call stack) - which has access to all the low-level technical details that are needed to actually perform an error recovery. It is possible to provide *several different* canned recovery strategies, which implement any appropriate ways to recover, in the context of each low- or middle-level function. The decision of which strategy to use (the **policy**) in any particular situation is deferred *to an outer level* (of the call stack). The outer level knows the big picture - *why* the inner levels are running in this particular case, i.e., what we are trying to accomplish and how. Hence, it is the appropriate place to choose which error-recovery strategy should be used *in its high-level context*. -Practical Common Lisp explains conditions in the context of a log file parser. In contrast, let us explain them with some Theoretical Python: +Seibel's *Practical Common Lisp* explains conditions in the context of a log file parser. In contrast, let us explain them with some *Theoretical Python*: ```python from unpythonic import restarts, handlers, signal, invoke, unbox @@ -2892,19 +3646,21 @@ high3() #### Fundamental signaling protocol -Generally a condition system operates as follows. A *signal* is sent (outward on the call stack) from the actual location where the error was detected. A *handler* at any outer level may then respond to it, and execution resumes from the *restart* that is *invoked* by the handler. +Generally a conditions-and-restarts system operates as follows. A *signal* is sent, outward on the call stack, from the actual location where an error was detected. A *handler* at any outer level (of the call stack) may then respond to it, and execution resumes from the *restart* that is *invoked* by the handler. -The sequence of catching a signal and invoking a restart is termed *handling* the signal. Handlers are searched in order from innermost to outermost on the call stack. (Strictly speaking, the handlers live on a separate stack; we consider those handlers whose dynamic extent the point of execution is in, at the point of time when the signal is sent.) +The sequence of catching a signal and invoking a restart is termed *handling* the signal. Handlers are searched in order from innermost to outermost on the call stack. Strictly speaking, though, the handlers live on a separate stack; we consider those handler bindings whose dynamic extent the point of execution is in, at the point of time when the signal is sent. In general, it is allowed for a handler to fall through (return normally); then the next outer handler for the same signal type gets control. This allows the programmer to chain handlers to obtain their side effects, such as logging. This is referred to as *canceling*, since as a result, the signal remains unhandled. -Viewed with respect to the call stack, the restarts live between the (outer) level of the handler, and the (inner) level where the signal was sent from. The main difference to the exception model is that unlike raising an exception, **sending a signal does not unwind the call stack**. Although the handlers live further out on the call stack, the stack does not unwind that far. The handlers are just consulted for what to do. The call stack unwinds only when a restart is being invoked. Then, only the part of the call stack between the location that sent the signal, and the invoked restart, is unwound. +Viewed with respect to the call stack, the restarts live between the (outer) level of the handler, and the (inner) level where the signal was sent from. The main difference to the exception model is that unlike raising an exception, **sending a signal does not unwind the call stack**. (Let that sink in for a moment.) + +Although the handlers live further out on the call stack, the stack does not unwind that far. The handlers are just consulted for what to do. **The call stack unwinds only when a restart is invoked.** Then, only the part of the call stack between the location that sent the signal, and the invoked restart, is unwound. -Restarts, despite the name, are a mildly behaved, structured control construct. The block of code that encountered the error is actually not arbitrarily resumed; instead, the restart code runs instead of the rest of the block, and the return value of the restart replaces the normal return value. (But see `cerror`.) +Restarts, despite the name, are a mildly behaved, structured control construct. The block of code that encountered the error is actually not arbitrarily resumed; instead, the code of the invoked restart runs instead of the rest of the block, and the return value of the restart replaces the normal return value. (But see `cerror`.) #### API summary -Restarts are set up using the `with restarts` context manager (Common Lisp: `RESTART-CASE`). Restarts are defined by giving named arguments to the `restarts` form; the argument name sets the restart name. The restart name is distinct from the name (if any) of the function that is used as the restart. A restart can only be invoked from within the dynamic extent of its `with restarts` (the same rule is effect also in Common Lisp). A restart may take any args and kwargs; any that it expects must be provided when it is invoked. +Restarts are set up using the `with restarts` context manager (Common Lisp: `RESTART-CASE`). Restarts are defined by passing named arguments to the `restarts` form; the argument name sets the *restart name*. The restart name is distinct from the name (if any) of the function that is used as the restart. A restart can only be invoked from within the dynamic extent of its `with restarts` (the same rule is effect also in Common Lisp). A restart may take any args and kwargs; any that it expects must be provided when it is invoked. *Note difference to the API of [python-cl-conditions](https://github.com/svetlyak40wt/python-cl-conditions/), which requires functions used as restarts to be named, and uses the function name as the restart name.* @@ -2914,21 +3670,21 @@ Signals are sent using `signal` (Common Lisp: `SIGNAL`). Any exception or warnin Handlers are established using the `with handlers` context manager (Common Lisp: `HANDLER-BIND`). Handlers are bound to exception types, or tuples of types, just like regular exception handlers in Python. The `handlers` form takes as its arguments any number of `(exc_spec, handler)` pairs. Here `exc_spec` specifies the exception types to catch (when sent via `signal`), and `handler` is a callable. When catching a signal, in case of multiple matches in the same `with handlers` form, the handler that appears earlier in the argument list wins. -A handler catches signals of the types it is bound to. The code in the handler may invoke a restart by calling `invoke` (Common Lisp: `INVOKE-RESTART`), with the desired restart name as a string. In case of duplicate names, the most recently established restart (that is still in scope) with the given name wins. Any extra args and kwargs are passed through to the restart. The `invoke` function always transfers control, never returns normally. +A handler catches signals of the types it is bound to, and their subtypes. The code in the handler may invoke a restart by calling `invoke` (Common Lisp: `INVOKE-RESTART`), with the desired restart name as a string. In case of duplicate names, the most recently established restart (that is still in scope) with the given name wins. Any extra args and kwargs are passed through to the restart. The `invoke` function always transfers control, it never returns normally. -A handler **may** take one optional positional argument, the exception instance being signaled. Roughly, API-wise signal handlers are similar to exception handlers (`except` clauses). A handler that accepts an argument is like an `except ... as ...`, whereas one that does not is like `except ...`. **The main difference** to an exception handler is that a **signal handler should not try to recover from the error itself**; instead, **it should just choose** which strategy the lower-level code should use to recover from the error. Usually, the only thing a signal handler needs to do, is to invoke a particular restart. +A handler **may** take one optional positional argument, the exception instance being signaled. Roughly, API-wise signal handlers are similar to exception handlers (`except` clauses). A handler that accepts an argument is like an `except ... as ...`, whereas one that does not is like `except ...`. **The main difference** to an exception handler is that a **signal handler should not try to recover from the error by itself**; instead, **it should just choose** which strategy the lower-level code should use to recover from the error. Usually, the only thing a signal handler needs to do is to invoke a particular restart. To create a simple handler that does not take an argument, and just invokes a pre-specified restart, see `invoker`. If you instead want to create a function that you can call from a handler, in order to invoke a particular restart immediately (so to define a shorthand notation similar to `use_value`), use `functools.partial(invoke, "my_restart_name")`. -Following Common Lisp terminology, *a named function that invokes a specific restart* - whether it is intended to act as a handler or to be called from one - is termed a *restart function*. (This is somewhat confusing, as a *restart function* is not a function that implements a restart, but a function that *invokes* a specific one.) The `use_value` function mentioned above is an example. +Following Common Lisp terminology, *a named function that invokes a specific restart* - whether it is intended to act as a handler or to be called from one - is termed a *restart function*. This is somewhat confusing, as a *restart function* is not a function that implements a restart, but a function that *invokes* a specific one. The `use_value` function mentioned above is an example. -For a detailed API reference, see the module ``unpythonic.conditions``. +For a detailed API reference, see the module `unpythonic.conditions`. #### High-level signaling protocols We actually provide four signaling protocols: `signal` (i.e. the fundamental protocol), and three that build additional behavior on top of it: `error`, `cerror` and `warn`. Each of the three is modeled after its Common Lisp equivalent. -If no handler *handles* the signal, the `signal(...)` protocol just returns normally. In effect, with respect to control flow, unhandled signals are ignored by this protocol. (But any side effects of handlers that caught the signal but did not invoke a restart, still take place.) +If no handler *handles* the signal, the `signal(...)` protocol just returns normally. In effect, with respect to control flow, unhandled signals are ignored by this protocol. However, any side effects of handlers that caught the signal but did not invoke a restart, still take place. The `error(...)` protocol first delegates to `signal`, and if the signal was not handled by any handler, then **raises** `ControlError` as a regular exception. (Note the Common Lisp `ERROR` function would at this point drop you into the debugger.) The implementation of `error` itself is the only place in the condition system that *raises* an exception for the end user; everything else (including any error situations) uses the signaling mechanism. @@ -2938,17 +3694,19 @@ Finally, there is the `warn(...)` protocol, which is just a lispy interface to P The combination of `warn` and `muffle` (as well as `cerror` when a handler invokes its `proceed` restart) behaves somewhat like [`contextlib.suppress`](https://docs.python.org/3/library/contextlib.html#contextlib.suppress), except that execution continues normally from the next statement in the caller of `warn` (respectively `cerror`) instead of unwinding to the handler. -If the standard protocols don't cover what you need, you can also build your own high-level protocols on top of `signal`. See the source code of `error`, `cerror` and `warn` for examples (it's just a few lines in each case). +If the standard protocols do not cover what you need, you can also build your own high-level protocols on top of `signal`. See the source code of `error`, `cerror` and `warn` for examples (it's just a few lines in each case). ##### Notes The name `cerror` stands for *correctable error*, see e.g. [CERROR in the CL HyperSpec](http://clhs.lisp.se/Body/f_cerror.htm). What we call `proceed`, Common Lisp calls `CONTINUE`; the name is different because in Python the function naming convention is lowercase, and `continue` is a reserved word. -If you really want to emulate `ON ERROR RESUME NEXT`, just use `Exception` as the condition type for your handler, and all `cerror` calls within the block will return normally, provided that no other handler handles those conditions first. +If you really want to emulate `ON ERROR RESUME NEXT`, just use `Exception` as the condition type for your handler, and all `cerror` calls within the block will return normally, provided that no other handler (that appears in an inner position on the call stack) handles those conditions first. #### Conditions vs. exceptions -Using the condition system essentially requires eschewing exceptions, using only restarts and handlers instead. A regular `raise` will fly past a `with handlers` form uncaught. The form just maintains a stack of functions; it does not establish an *exception* handler. Similarly, a `try`/`except` cannot catch a signal, because no exception is raised yet at handler lookup time. Delaying the stack unwind, to achieve the three-way split of responsibilities, is the whole point of the condition system. Which of the two systems to use is a design decision that must be made consistently on a per-project basis. +Using the condition system essentially requires eschewing exceptions, using only restarts and handlers instead. A regular `raise` will fly past a `with handlers` form uncaught. The form just maintains a stack of functions; it does not establish an *exception* handler. Similarly, a `try`/`except` cannot catch a signal, because no exception is raised yet at handler lookup time. Delaying the stack unwind, to achieve the three-way split of responsibilities, is the whole point of the condition system. + +Which of the two systems to use is a design decision that must be made consistently on a per-project basis. Even better would be to make it globally on a per-language basis. Python's standard library, as well as all existing libraries, use exceptions instead of conditions, so to obtain a truly seamless conditions-and-restarts user experience, one would have to wrap (or rewrite) at least all of the standard library, plus any other libraries a project needs, to be protected from sudden, unexpected unwinds of the call stack. (The nature of both conditions and exceptions is that, in principle, they may be triggered anywhere.) Be aware that error-recovery code in a Lisp-style signal handler is of a very different nature compared to error-recovery code in an exception handler. A signal handler usually only chooses a restart and invokes it; as was explained above, the code that actually performs the error recovery (i.e. the *restart*) lives further in on the call stack, and still has available (in its local variables) the state that is needed to perform the recovery. An exception handler, on the other hand, must respond by directly performing error recovery right where it is, without any help from inner levels - because the stack has already unwound when the exception handler gets control. @@ -2962,34 +3720,60 @@ If this `ControlError` signal is not handled, a `ControlError` will then be **ra #### Historical note -Conditions are one of the killer features of Common Lisp, so if you're new to conditions, [Peter Seibel: Practical Common Lisp, chapter 19](http://www.gigamonkeys.com/book/beyond-exception-handling-conditions-and-restarts.html) is a good place to learn about them. There's also a relevant [discussion on Lambda the Ultimate](http://lambda-the-ultimate.org/node/1544). +Conditions are one of the killer features of Common Lisp, so if you are new to conditions, [Peter Seibel: Practical Common Lisp, chapter 19](http://www.gigamonkeys.com/book/beyond-exception-handling-conditions-and-restarts.html) is a good place to learn about them. There is also a relevant [discussion on Lambda the Ultimate](http://lambda-the-ultimate.org/node/1544). For Python, conditions were first implemented in [python-cl-conditions](https://github.com/svetlyak40wt/python-cl-conditions/) by Alexander Artemenko (2016). -What we provide here is essentially a rewrite, based on studying that implementation. The main reasons for the rewrite are to give the condition system an API consistent with the style of `unpythonic`, to drop any and all historical baggage without needing to consider backward compatibility, and to allow interaction with (and customization taking into account) the other parts of `unpythonic`. If you specifically need a condition system, not a kitchen-sink language extension, then by all means go for `python-cl-conditions`! +What we provide here is essentially a rewrite, based on studying that implementation. The main reasons for the rewrite are to give the condition system an API consistent with the style of `unpythonic`, to drop any and all historical baggage without needing to consider backward compatibility, and to allow interaction with (and customization taking into account) the other parts of `unpythonic`. + +The core idea can be expressed in fewer than 100 lines of Python; ours is (as of v0.15.0) 199 lines, not counting docstrings, comments, or blank lines. The main reason our module is over 900 lines are the docstrings. + + +### `generic`, `typed`, `isoftype`: multiple dispatch + +**Changed in v2.0.0.** *`isoftype` now supports many more `typing` features: `NoReturn`, `Never`, `Literal`, `Type`, `ClassVar`, `Final`, `DefaultDict`, `OrderedDict`, `Counter`, `ChainMap`, `IO`/`TextIO`/`BinaryIO`, `Pattern`/`Match`, `ContextManager`/`AsyncContextManager`, `Awaitable`/`Coroutine`, `AsyncIterable`/`AsyncIterator`, `Generator`/`AsyncGenerator`, `TypedDict`, `@runtime_checkable` `Protocol`, and parametric forms of abstract ABCs (`Iterable[T]`, `Collection[T]`, `Reversible[T]` with best-effort element checking; `Iterator[T]`, `Container[T]`). See the [`isoftype` section](#isoftype-the-big-sister-of-isinstance) for the full list.* + +**Changed in v0.15.0**. *The `dispatch` and `typecheck` modules providing this functionality are now considered stable (no longer experimental). Starting with this release, they receive the same semantic-versioning guarantees as the rest of `unpythonic`.* + +*Added the `@augment` parametric decorator that can register a new multimethod on an existing generic function originally defined in another lexical scope.* -The core idea can be expressed in fewer than 100 lines of Python; ours is (as of v0.14.2) 151 lines, not counting docstrings, comments, or blank lines. The main reason our module is over 700 lines are the docstrings. +*Added the function `methods`, which displays a list of multimethods of a generic function. This is especially useful in the REPL.* +*Docstrings of the multimethods are now automatically concatenated to make up the docstring of the generic function, so you can document each multimethod separately.* -### ``generic``, ``typed``, ``isoftype``: multiple dispatch +*`curry` now supports `@generic`. In the case where the **number** of positional arguments supplied so far matches at least one multimethod, but there is no match for the given combination of argument **types**, `curry` waits for more arguments (returning the curried function). See the manual section on `curry` for details.* + +*It is now possible to dispatch also on a homogeneous type of contents collected by a `**kwargs` parameter. In the type signature, use `typing.Dict[str, mytype]`. Note that in this use, the key type is always `str`.* + +**Changed in v0.14.3**. *The multiple-dispatch decorator `@generic` no longer takes a master definition. Multimethods are registered directly with `@generic`; the first multimethod definition implicitly creates the generic function.* + +*The `@generic` and `@typed` decorators can now decorate also instance methods, class methods and static methods (beside regular functions, as previously in 0.14.2).* **Added in v0.14.2**. -**Changed in v0.14.3**. *The multiple-dispatch decorator `@generic` no longer takes a master definition. Methods are registered directly with `@generic`; the first method definition implicitly creates the generic function.* +The `generic` decorator allows creating [multiple-dispatch](https://en.wikipedia.org/wiki/Multiple_dispatch) generic functions with type annotation syntax. We also provide some friendly utilities: `augment` adds a new multimethod to an existing generic function, `typed` creates a single-method generic with the same syntax (i.e. provides a compact notation for writing dynamic type-checking code), and `isoftype` (which powers the first three) is the big sister of `isinstance`, with support for many (but not all) features of the `typing` standard library module. + +This is a purely run-time implementation, so it does **not** give performance benefits, but it can make code more readable, and makes it modular to add support for new input types (or different call signatures) to an existing function later. + +The terminology is: + + - The function that supports multiple call signatures is a *generic function*. + - Each of its individual implementations is a *multimethod*. + +The term *multimethod* distinguishes them from the OOP sense of *method*, already established in Python, as well as reminds that multiple arguments participate in dispatching. -**Changed in v0.14.3**. *The `@generic` and `@typed` decorators can now decorate also instance methods, class methods and static methods (beside regular functions, as previously in 0.14.2).* +**CAUTION**: Code using the `with lazify` macro cannot usefully use `@generic` or `@typed`, because all arguments of each function call will be wrapped in a promise (`unpythonic.Lazy`) that carries no type information on its contents. -**Changed in v0.15.0**. *The `dispatch` and `typecheck` modules providing this functionality are now considered stable (no longer experimental). Added the `@generic_for` parametric decorator that can register a new method on an existing generic function originally defined in another lexical scope.* -The ``generic`` decorator allows creating multiple-dispatch generic functions with type annotation syntax. +#### `generic`: multiple dispatch with type annotation syntax -We also provide some friendly utilities: ``typed`` creates a single-method generic with the same syntax (i.e. provides a compact notation for writing dynamic type checking code), and ``isoftype`` (which powers the first two) is the big sister of ``isinstance``, with support for many (but unfortunately not all) features of the ``typing`` standard library module. +The `generic` decorator essentially allows replacing the `if`/`elif` dynamic type checking boilerplate of polymorphic functions with type annotations on the function parameters, with support for features from the `typing` stdlib module. This not only kills boilerplate, but makes the dispatch extensible, since the dispatcher is separate from the actual function definition, and has a mechanism to register new multimethods. -For what kind of things can be done with this, see particularly the [*holy traits*](https://ahsmart.com/pub/holy-traits-design-patterns-and-best-practice-book/) example in [`unpythonic.test.test_dispatch`](../unpythonic/test/test_dispatch.py). +If several multimethods of the same generic function match the arguments given, the most recently registered multimethod wins. -#### ``generic``: multiple dispatch with type annotation syntax +To see what multimethods are registered on a given generic function `f`, call `methods(f)`. It will print a human-readable description to stdout. -The ``generic`` decorator essentially allows replacing the `if`/`elif` dynamic type checking boilerplate of polymorphic functions with type annotations on the function parameters, with support for features from the `typing` stdlib module. +**CAUTION**: The winning multimethod is chosen differently from Julia, where the most specific multimethod wins. Doing that requires a more careful type analysis than what we have here. The details are best explained by example: @@ -2997,10 +3781,10 @@ The details are best explained by example: import typing from unpythonic import generic -@generic # The first definition creates the generic function, and registers the first method. +@generic # The first definition creates the generic function, and registers the first multimethod. def zorblify(x: int, y: int): return "int, int" -@generic # noqa: F811, registered as a method of the same generic function. +@generic # noqa: F811, registered as a multimethod of the same generic function. def zorblify(x: str, y: int): return "str, int" @generic # noqa: F811 @@ -3008,7 +3792,7 @@ def zorblify(x: str, y: float): return "str, float" # Then we just call our function as usual. -# Note all arguments participate in dispatching (i.e. in choosing which method gets called). +# Note all arguments participate in dispatching (i.e. in choosing which multimethod gets called). assert zorblify(2, 3) == "int, int" assert zorblify("cat", 3) == "str, int" assert zorblify("cat", 3.14) == "str, float" @@ -3049,38 +3833,190 @@ assert gargle(1, 2, 3, 4, 5) == "int" assert gargle(2.71828, 3.14159) == "float" assert gargle(42, 6.022e23, "hello") == "int, float, str" assert gargle(1, 2, 3) == "int" # as many as in the [int, float, str] case. Still resolves correctly. + +# v0.15.0: dispatching on a homogeneous type inside **kwargs is also supported, via `typing.Dict` +@generic +def kittify(**kwargs: typing.Dict[str, int]): # all kwargs are ints + return "int" +@generic +def kittify(**kwargs: typing.Dict[str, float]): # all kwargs are floats # noqa: F811 + return "float" + +assert kittify(x=1, y=2) == "int" +assert kittify(x=1.0, y=2.0) == "float" ``` -See [the unit tests](../unpythonic/test/test_dispatch.py) for more. For which features of the ``typing`` stdlib module are supported, see ``isoftype`` below. +See [the unit tests](../unpythonic/tests/test_dispatch.py) for more. For which features of the `typing` stdlib module are supported, see [`isoftype`](#isoftype-the-big-sister-of-isinstance) below. + -Inspired by the [multi-methods of CLOS](http://www.gigamonkeys.com/book/object-reorientation-generic-functions.html) (the Common Lisp Object System), and the [generic functions of Julia](https://docs.julialang.org/en/v1/manual/methods/). +##### `@generic` and OOP -##### ``@generic`` and OOP +Beginning with v0.14.3, `@generic` and `@typed` can decorate instance methods, class methods and static methods (beside regular functions as in v0.14.2). -As of version 0.14.3, `@generic` and `@typed` can decorate instance methods, class methods and static methods (beside regular functions as in 0.14.2). +When using both `@generic` or `@typed` and OOP, important things to know are: -When using both `@generic` or `@typed` and OOP: + - In case of `@generic`, consider first if that is what you really want. + - The method access syntax already hides a single-dispatch mechanism behind the dot-access syntax: the syntax `x.op(...)` picks the definition of `op` based on the type of `x`. This behaves exactly like a single-dispatch function where the first argument is `x`, i.e., we could as well write `op(x, ...)`. + - So the question to ask is, is the use case best served by two overlapping dispatch mechanisms? + - If not, what are the alternative strategies? Would it be better, for example, to represent the operations as top-level `@generic` *functions*, and perform the dispatch there, dispatching to OOP methods as appropriate? + - `@typed` is fine to use with OOP, because semantically, it is not really a dispatch mechanism, but a run-time type-checking mechanism, even though it is implemented in terms of the multiple-dispatch machinery. - **`self` and `cls` parameters**. - The `self` and `cls` parameters do not participate in dispatching, and need no type annotation. - - Beside appearing as the first positional-or-keyword parameter, the self-like parameter **must be named** one of `self`, `this`, `cls`, or `klass` to be detected by the ignore mechanism. This limitation is due to implementation reasons; while a class body is being evaluated, the context needed to distinguish a method (OOP sense) from a regular function is not yet present. + - Beside appearing as the first positional-or-keyword parameter, the self-like parameter **must be named** one of `self`, `this`, `cls`, or `klass` to be detected by the ignore mechanism. + + This limitation is due to implementation reasons; while a class body is being evaluated, the context needed to distinguish a method (in the OOP sense) from a regular function is not yet present. In Python, OOP method binding is performed by the [descriptor](https://docs.python.org/3/howto/descriptor.html) that triggers when the method attribute is read on an instance. + + If curious, try this (tested in Python 3.8): + + ```python + class Thing: + def f(self): + pass + + print(type(Thing.f)) # --> "function", i.e. the same type as a bare function + assert Thing.f is Thing.f # it's always the same function object + + thing = Thing() + print(type(thing.f)) # --> "method", i.e. a bound method of Thing instance at 0x... + assert thing.f is not thing.f # each read produces a **new** bound method object + + lst = [1, 2, 3] + print(type(lst.append)) # --> "builtin_function_or_method" + assert lst.append is not lst.append # this happens even for builtins + ``` - **OOP inheritance**. - - When `@generic` is installed on an OOP method (instance method, or `@classmethod`), then at call time, classes are tried in [MRO](https://en.wikipedia.org/wiki/C3_linearization) order. All generic-function methods of the OOP method defined in the class currently being looked up are tested for matches first, before moving on to the next class in the MRO. (This has subtle consequences, related to in which class in the hierarchy the various generic-function methods for a particular OOP method are defined.) + - When `@generic` is installed on a method (instance method, or `@classmethod`), then at call time, classes are tried in [MRO](https://en.wikipedia.org/wiki/C3_linearization) order. All multimethods of the method defined in the class currently being looked up are tested for matches first, before moving on to the next class in the MRO. This has subtle consequences, related to in which class in the hierarchy the various multimethods for a particular method are defined. - To work with OOP inheritance, `@generic` must be the outermost decorator (except `@classmethod` or `@staticmethod`, which are essentially compiler annotations). - - However, when installed on a `@staticmethod`, the `@generic` decorator does not support MRO lookup, because that would make no sense. See discussions on interaction between `@staticmethod` and `super` in Python: [[1]](https://bugs.python.org/issue31118) [[2]](https://stackoverflow.com/questions/26788214/super-and-staticmethod-interaction/26807879). + - However, when installed on a `@staticmethod`, the `@generic` decorator does not support MRO lookup, because that would make no sense. A static method is just a bare function that happens to be stored in a class namespace. See discussions on the interaction between `@staticmethod` and `super` in Python: [[1]](https://bugs.python.org/issue31118) [[2]](https://stackoverflow.com/questions/26788214/super-and-staticmethod-interaction/26807879). + - When inspecting an **instance method** that is `@generic`, be sure to call the `methods` function **on an instance**: -##### Notes + ```python + class Thing: + @generic + def f(self, x: int): + pass -*Terminology*: in both CLOS and in Julia, *function* is the generic entity, while *method* refers to its specialization to a particular combination of argument types. Note that *no object instance or class is needed*. Contrast with the classical OOP sense of *method*, i.e. a function that is associated with an object instance or class, with single dispatch based on the class (or in exotic cases, such as monkey-patched instances, on the instance). + @classmethod + @generic + def g(cls, x: int): + pass -Based on my own initial experiments with this feature, the machinery itself works well enough, but to really shine - just like resumable exceptions - multiple dispatch needs to be used everywhere, throughout the language's ecosystem. Python obviously doesn't do that. + thing = Thing() + methods(thing.f) + methods(Thing.g) + ``` -#### ``typed``: add run-time type checks with type annotation syntax + This allows seeing registered multimethods also from linked dispatchers in the MRO. -The ``typed`` decorator creates a one-method pony, which automatically enforces its argument types. Just like with ``generic``, the type specification may use features from the `typing` stdlib module. + If we instead call it as `methods(Thing.f)`, the `self` argument is not bound yet (because `Thing.f` is just a bare function), so the dispatch machinery cannot get a reference to the MRO. This is obviously not an issue when actually *using* `f`, since an instance method is pretty much always invoked on an instance. + + For class methods, `methods(Thing.g)` sees the MRO, because `cls` is already bound. + +For usage examples of `@generic` with OOP, see [the unit tests](../unpythonic/tests/test_dispatch.py). + + +#### `augment`: add a new multimethod to an existing generic function + +The `@augment` decorator adds a new multimethod to an existing generic function. With this system, it is possible to implement [*holy traits*](https://ahsmart.com/pub/holy-traits-design-patterns-and-best-practice-book/): + +```python +import typing +from unpythonic import generic, augment + +class FunninessTrait: + pass +class IsFunny(FunninessTrait): + pass +class IsNotFunny(FunninessTrait): + pass + +@generic +def funny(x: typing.Any): # default + raise NotImplementedError(f"`funny` trait not registered for anything matching {type(x)}") + +@augment(funny) +def funny(x: str): # noqa: F811 + return IsFunny() +@augment(funny) +def funny(x: int): # noqa: F811 + return IsNotFunny() + +@generic +def laugh(x: typing.Any): + return laugh(funny(x), x) + +@augment(laugh) +def laugh(traitvalue: IsFunny, x: typing.Any): + return f"Ha ha ha, {x} is funny!" +@augment(laugh) +def laugh(traitvalue: IsNotFunny, x: typing.Any): + return f"{x} is not funny." + +assert laugh("that") == "Ha ha ha, that is funny!" +assert laugh(42) == "42 is not funny." +``` + +**CAUTION**: `@augment` can be dangerous to the readability of your codebase. Keep in mind that the multiple-dispatch table is global state. If you add a new multimethod for a generic function defined elsewhere, for types defined elsewhere, this may lead to [*spooky action at a distance*](https://lexi-lambda.github.io/blog/2016/02/18/simple-safe-multimethods-in-racket/) (as in [action at a distance](https://en.wikipedia.org/wiki/Action_at_a_distance_(computer_programming))), because it may change the meaning of existing code. In the Julia community, this is known as [*type piracy*](https://docs.julialang.org/en/v1/manual/style-guide/#Avoid-type-piracy). + +As Alexis King points out, no type piracy occurs if **at least one** of the following conditions holds: + + 1. At least one of the types in the call signature of the new multimethod is defined by you. + + 2. The generic function you are augmenting is defined by you. + + +##### How to augment a function that is not already `@generic` + +Given this: + +```python +# thirdparty.py +def op(x): + if isinstance(x, int): + return 2 * x + elif isinstance(x, float): + return 2.0 * x + raise TypeError(f"unsupported argument: {type(x)} with value {repr(x)}") +``` + +you do not have to change that code, but you will have to know which argument types the existing function supports (because that information is not available in an inspectable form at its interface), and then overwrite the original binding, with something like this: + +```python +# ours.py +import thirdparty + +original_op = thirdparty.op + +# Multimethod implementations for the types supported by the original `op`. +# We just re-dispatch here. +@generic +def op(x: int): + return original_op(x) +@generic +def op(x: float): + return original_op(x) + +thirdparty.op = op # unavoidable bit of monkey-patching +``` + +Then it can be augmented as usual: + +```python +@augment(op) +def op(x: str): # "ha" -> "ha, ha" + return ", ".join(x for _ in range(2)) +``` + +while preserving the meaning of all existing code that uses `thirdparty.op`. + + +#### `typed`: add run-time type checks with type annotation syntax + +The `typed` decorator creates a one-multimethod pony, which automatically enforces its argument types. Just like with `generic`, the type specification may use features from the `typing` stdlib module. ```python import typing @@ -3096,23 +4032,52 @@ def jack(x: typing.Union[int, str]): assert blubnify(2, 21.0) == 42 blubnify(2, 3) # TypeError -assert not hasattr(blubnify, "register") # no more methods can be registered on this function +assert not hasattr(blubnify, "register") # no more multimethods can be registered on this function assert jack(42) == 42 assert jack("foo") == "foo" jack(3.14) # TypeError ``` -For which features of the ``typing`` stdlib module are supported, see ``isoftype`` below. +For which features of the `typing` stdlib module are supported, see [`isoftype`](#isoftype-the-big-sister-of-isinstance) below. + + +#### `isoftype`: the big sister of `isinstance` + +Type check object instances against type specifications at run time. This is the machinery that powers `generic` and `typed`. This goes beyond `isinstance` in that many (but not all) features of the `typing` standard library module are supported. -**CAUTION**: When using ``typed`` with ``curry``, the type checking (and hence ``TypeError``, if any) only occurs when the actual call triggers. Code using that combination may be hard to debug. +`isoftype` is a **non-destructive** runtime type checker. It never consumes iterators, calls functions, or enters context managers to inspect their types. This limits what it can check — for example, element types of iterators and argument/return types of callables cannot be verified — but it means `isoftype` is always safe to call, even in hot loops or dispatch logic. +Any checks on the type arguments of the meta-utilities defined in the `typing` stdlib module are performed recursively using `isoftype` itself, in order to allow compound specifications. -#### ``isoftype``: the big sister of ``isinstance`` +**Supported `typing` features:** -Type check object instances against type specifications at run time. This is the machinery that powers ``generic`` and ``typed``. This goes beyond ``isinstance`` in that many (but unfortunately not all) features of the ``typing`` standard library module are supported. +| Category | Supported types | +|----------|----------------| +| Basics | `Any`, `TypeVar`, `NewType`, `Union`, `Optional` | +| Bottom | `NoReturn`, `Never` (3.11+) | +| Values | `Literal[v1, v2, ...]` | +| Classes | `Type[X]` | +| Wrappers | `ClassVar[T]`, `Final[T]` (stripped, inner type checked) | +| Tuples | `Tuple`, `Tuple[T, ...]`, `Tuple[T1, T2, ..., TN]` | +| Sequences | `List[T]`, `Sequence[T]`, `MutableSequence[T]`, `Deque[T]` | +| Sets | `Set[T]`, `FrozenSet[T]`, `AbstractSet[T]`, `MutableSet[T]` | +| Mappings | `Dict[K, V]`, `DefaultDict[K, V]`, `OrderedDict[K, V]`, `Counter[T]`, `ChainMap[K, V]`, `Mapping[K, V]`, `MutableMapping[K, V]` | +| Views | `KeysView[K]`, `ValuesView[V]`, `ItemsView[K, V]` | +| IO | `IO`, `IO[str]`, `IO[bytes]`, `TextIO`, `BinaryIO` | +| Regex | `Pattern[T]`, `Match[T]` (string type checked) | +| Callables | `Callable` (arg/return types **not** checked) | +| Generators | `Generator`, `AsyncGenerator` (yield/send/return types **not** checked) | +| Async | `Awaitable`, `Coroutine`, `AsyncIterable`, `AsyncIterator` | +| Context managers | `ContextManager`, `AsyncContextManager` | +| Protocols | `SupportsInt`, `SupportsFloat`, `SupportsComplex`, `SupportsBytes`, `SupportsIndex`, `SupportsAbs`, `SupportsRound` | +| Protocol (user) | `@runtime_checkable` Protocol subclasses (structural subtyping via `isinstance`) | +| TypedDict | Structural check: required/optional keys, value types recursively checked | +| ABCs (best-effort) | `Iterable[T]`, `Collection[T]`, `Reversible[T]` (elements checked when value is `Sized`; ABC-only when not) | +| ABCs (type arg ignored) | `Iterator[T]`, `Container[T]` (parametric form accepted, type arg silently ignored) | +| ABCs (non-generic) | `Hashable`, `Sized` | -Any checks on the type arguments of the meta-utilities defined in the ``typing`` stdlib module are performed recursively using `isoftype` itself, in order to allow compound abstract specifications. +**Not supported:** `Generic`, `ForwardRef`. Specific `NamedTuple` subclasses work via the `isinstance` fallback. Non-`@runtime_checkable` Protocols raise `TypeError` with an actionable message. Some examples: @@ -3120,11 +4085,11 @@ Some examples: import typing from unpythonic import isoftype -# concrete types - uninteresting, we just delegate to `isinstance` +# concrete types — just delegates to isinstance assert isoftype(17, int) assert isoftype(lambda: ..., typing.Callable) -# typing.newType +# typing.NewType UserId = typing.NewType("UserId", int) assert isoftype(UserId(42), UserId) # Note limitation: since NewType types discard their type information at @@ -3168,31 +4133,269 @@ assert isoftype({1: "foo", 2: "bar"}, typing.MutableMapping[int, str]) assert isoftype((1, 2, 3), typing.Sequence[int]) assert isoftype({1, 2, 3}, typing.AbstractSet[int]) +# new in 2.0.0 +assert isoftype(200, typing.Literal[200, 404, 500]) +assert isoftype(int, typing.Type[int]) +assert isoftype(bool, typing.Type[int]) # bool is a subclass of int +import collections +assert isoftype(collections.Counter("hello"), typing.Counter[str]) +import re +assert isoftype(re.compile(r"\d+"), typing.Pattern[str]) +import io +assert isoftype(io.StringIO("hi"), typing.TextIO) +assert isoftype(io.BytesIO(b"hi"), typing.BinaryIO) + # one-trick ponies assert isoftype(3.14, typing.SupportsRound) assert isoftype([1, 2, 3], typing.Sized) + +# best-effort element checking for abstract iterables +assert isoftype([1, 2, 3], typing.Iterable[int]) # concrete → elements checked +assert not isoftype([1, 2, 3], typing.Iterable[str]) # wrong element type +assert isoftype(iter([1, 2, 3]), typing.Iterable[int]) # opaque iterator → ABC only + +# TypedDict — structural checking of keys and value types +class Point(typing.TypedDict): + x: float + y: float +assert isoftype({"x": 1.0, "y": 2.0}, Point) +assert not isoftype({"x": 1.0}, Point) # missing required key + +# Protocol (must be @runtime_checkable) +@typing.runtime_checkable +class Drawable(typing.Protocol): + def draw(self) -> None: ... +class Circle: + def draw(self): + pass +assert isoftype(Circle(), Drawable) ``` -See [the unit tests](../unpythonic/test/test_typecheck.py) for more. +See [the unit tests](../unpythonic/tests/test_typecheck.py) for the full set of supported features. -**CAUTION**: Callables are just checked for being callable; no further analysis is done. Type-checking callables properly requires a much more complex type checker. +**CAUTION**: For types where the type parameters describe behavior rather than stored data — `Callable`, `Generator`, `AsyncGenerator`, `ContextManager`, `AsyncContextManager`, `Awaitable`, `Coroutine`, `AsyncIterable`, `AsyncIterator`, `Iterator`, `Container` — only the ABC is checked. The type parameters are silently ignored, because checking them would require consuming or invoking the value. -**CAUTION**: The `isoftype` function is one big hack. As of Python 3.6, there is no consistent way to handle a type specification at run time. We must access some private attributes of the ``typing`` meta-utilities, because that seems to be the only way to get what we need to do this. +For `Iterable[T]`, `Collection[T]`, and `Reversible[T]`, element types are checked **best-effort**: if the value is `Sized` (a concrete collection like `list`, `set`, etc.), elements are checked; if it's an opaque iterator, only the ABC is checked. Empty concrete collections reject parametric specs (consistent with `List[T]`, `Sequence[T]`, etc.). -If you need a run-time type checker for serious general use, consider the [`typeguard`](https://github.com/agronholm/typeguard) library, which focuses on that. +#### Notes -## Other +The multiple-dispatch subsystem of `unpythonic` was inspired by the [multi-methods of CLOS](http://www.gigamonkeys.com/book/object-reorientation-generic-functions.html) (the Common Lisp Object System), and the [generic functions of Julia](https://docs.julialang.org/en/v1/manual/methods/). -Stuff that didn't fit elsewhere. +In both CLOS and in Julia, *function* is the generic entity, while *method* refers to its specialization to a particular combination of argument types. Note that *no object instance or class is needed*. Contrast with the classical OOP sense of *method*, i.e. a function that is associated with an object instance or class, with single dispatch based on the class (or in exotic cases, such as monkey-patched instances, on the instance). + +Based on my own initial experiments with this feature in Python, the machinery itself works well enough, but to really shine - just like conditions and restarts - multiple dispatch needs to be used everywhere, throughout the language's ecosystem. Julia is impressive here. Python obviously does not do that. + +Our machinery is missing some advanced features, such as matching the most specific multimethod candidate instead of the most recently defined one; an `issubclass` equivalent that understands `typing` type specifications; and a mechanism to remove previously declared multimethods. + +*If you need multiple dispatch, but not the other features of `unpythonic`, see the [multipledispatch](https://github.com/mrocklin/multipledispatch) library, which likely runs faster.* + +*If you need a run-time type checker, but not the other features of `unpythonic`, see the [`typeguard`](https://github.com/agronholm/typeguard) library. If you are fine with a separate static type checker (which is the step where type checking arguably belongs), just use [`Mypy`](http://mypy-lang.org/).* + + +## Exception tools + +Utilities for dealing with exceptions. + +### `raisef`, `tryf`: `raise` and `try` as functions + +**Changed in v0.15.0.** *Deprecated parameters for `raisef` removed.* + +**Changed in v0.14.3**. *Now we have also `tryf`.* + +**Changed in v0.14.2**. *The parameters of `raisef` now more closely match what would be passed to `raise`. See examples below. Old-style parameters are now deprecated.* + +The `raisef` function allows to raise an exception from an expression position: + +```python +from unpythonic import raisef + +# plain `raise ...` +f = lambda x: raisef(RuntimeError("I'm in ur lambda raising exceptions")) + +# `raise ... from ...` +exc = TypeError("oof") +g = lambda x: raisef(RuntimeError("I'm in ur lambda raising exceptions"), cause=exc) +``` + +The `tryf` function is a `try`/`except`/`else`/`finally` construct for an expression position: + +```python +from unpythonic import raisef, tryf + +raise_instance = lambda: raisef(ValueError("all ok")) +test[tryf(lambda: raise_instance(), + (ValueError, lambda err: f"got a ValueError: '{err.args[0]}'")) == "got a ValueError: 'all ok'"] +``` + +The exception handler is a function. It may optionally accept one argument, the exception instance. Just like in an `except` clause, the exception specification can be either an exception type, or a `tuple` of exception types. + +Functions can also be specified to represent the `else` and `finally` blocks; the keyword parameters to do this are `elsef` and `finallyf`. Each of them is a thunk (a 0-argument function). See the docstring of `unpythonic.tryf` for details. + +Examples can be found in [the unit tests](../unpythonic/tests/test_excutil.py). + + +### `equip_with_traceback` + +**Added in v0.14.3**. + +In Python 3.7 and later, the `equip_with_traceback` function equips a manually created exception instance with a traceback. This is useful mainly in special cases, where `raise` cannot be used for some reason. (The `signal` function in the conditions-and-restarts system uses this.) + +```python +e = SomeException(...) +e = equip_with_traceback(e) +``` + +The traceback is automatically extracted from the call stack of the calling thread. + +Optionally, you can cull a number of the topmost frames by passing the optional argument `stacklevel=...`. Typically, for direct use of this function `stacklevel` should be the default `1` (so it excludes `equip_with_traceback` itself, but shows all stack levels from your code), and for use in a utility function that itself is called from your code, it should be `2` (so it excludes the utility function, too). If the utility function itself calls a separate low-level utility, `3` can be useful (see [the source code](../unpythonic/conditions.py) of the conditions-and-restarts system for an example). + + +### `async_raise`: inject an exception to another thread + +**Added in v0.14.2**. + +**CAUTION**: *Currently this is supported by CPython only, because as of June 2021, PyPy3 does not expose the required functionality to the Python level, nor there seem to be any plans to do so.* + +Usually injecting an exception into an unsuspecting thread makes absolutely no sense. But there are special cases, notably `KeyboardInterrupt`. Especially, a REPL server may need to send a `KeyboardInterrupt` into a REPL session thread that is happily stuck waiting for input inside [`InteractiveConsole.interact`](https://docs.python.org/3/library/code.html#code.InteractiveConsole.interact) - while the client that receives the actual `Ctrl+C` is running in a separate process, possibly even on a different machine. This and similar awkward situations in network programming are pretty much the only use case for this feature. + +The function is named `async_raise`, because it injects an *asynchronous exception*. This has nothing to do with `async`/`await`. Synchronous vs. asynchronous exceptions [mean something different](https://en.wikipedia.org/wiki/Exception_handling#Exception_synchronicity). + +In a nutshell, a *synchronous* exception (which is the usual kind of exception) has an explicit `raise` somewhere in the code that the thread that encountered the exception is running. In contrast, an *asynchronous* exception **does not**, it just suddenly magically materializes from the outside. As such, it can in principle happen *anywhere*, with absolutely no hint about it in any obvious place in the code. + +Obviously, this can be very confusing, so this feature should be used sparingly, if at all. **We only provide it because the REPL server needs it**, and it would be silly to have such a feature but not make it public. + +Here is an example: + +```python +from unpythonic import async_raise, box + +out = box() +def worker(): + try: + for j in range(10): + sleep(0.1) + except KeyboardInterrupt: # normally, KeyboardInterrupt is only raised in the main thread + pass + out << j +t = threading.Thread(target=worker) +t.start() +sleep(0.1) # make sure the worker has entered the loop +async_raise(t, KeyboardInterrupt) # CPython only! This will gracefully error out on PyPy. +t.join() +assert unbox(out) < 9 # thread terminated early due to the injected KeyboardInterrupt +``` + +#### Is this how KeyboardInterrupt works under the hood? + +**No, it is not.** The way `KeyboardInterrupt` usually works is, the OS sends a [SIGINT](https://en.wikipedia.org/wiki/Signal_(IPC)#SIGINT), which is then trapped by an [OS signal handler](https://docs.python.org/3/library/signal.html) that runs in the main thread. + +Note that it is an OS signal, in the *nix sense; which is unrelated to the Lisp/`unpythonic` sense, as in conditions-and-restarts. + +At that point the magic has already happened: the control of the main thread is now inside the signal handler, as if the signal handler was called from the otherwise currently innermost point on the call stack. All the handler needs to do is to perform a regular `raise`, and the exception will propagate correctly. + +#### History + +Original detective work by [Federico Ficarelli](https://gist.github.com/nazavode/84d1371e023bccd2301e) and [LIU Wei](https://gist.github.com/liuw/2407154). + +Raising async exceptions is a [documented feature of Python's public C API](https://docs.python.org/3/c-api/init.html#c.PyThreadState_SetAsyncExc), but it was never meant to be invoked from within pure Python code. But then the CPython devs gave us [ctypes.pythonapi](https://docs.python.org/3/library/ctypes.html#accessing-values-exported-from-dlls), which allows access to CPython's C API from within Python. Combining the two gives `async_raise` without the need to compile a C extension. + +(If you think `ctypes.pythonapi` is too quirky, the [pycapi](https://pypi.org/project/pycapi/) PyPI package smooths over the rough edges.) + +Unfortunately PyPy does **not** currently (June 2021) implement this function in its CPython C API emulation layer, `cpyext`. See `unpythonic` issue [#58](https://github.com/Technologicat/unpythonic/issues/58). + + +### `reraise_in`, `reraise`: automatically convert exception types + +**Added in v0.15.0.** + +Sometimes it is useful to semantically convert exception types from one problem domain to another, particularly across the different levels of abstraction in an application. We provide `reraise_in` and `reraise` to do this with minimum boilerplate: + +```python +from unpythonic import reraise_in, reraise, raisef + +class LibraryException(Exception): + pass +class MoreSophisticatedLibraryException(LibraryException): + pass + +class UnrelatedException(Exception): + pass + +class ApplicationException(Exception): + pass + +# reraise_in: expr form +# The mapping is {in0: out0, ...} +try: + # reraise_in(thunk, mapping) + reraise_in(lambda: raisef(LibraryException), + {LibraryException: ApplicationException}) +except ApplicationException: # note the type! + print("all ok!") + +try: + # subclasses are converted, too + reraise_in(lambda: raisef(MoreSophisticatedLibraryException), + {LibraryException: ApplicationException}) +except ApplicationException: + print("all ok!") + +try: + # tuples of types are accepted, like in `except` clauses + reraise_in(lambda: raisef(UnrelatedException), + {(LibraryException, UnrelatedException): + ApplicationException}) +except ApplicationException: + print("all ok!") + +# reraise: block form +# The mapping is {in0: out0, ...} +try: + with reraise({LibraryException: ApplicationException}): + raise LibraryException +except ApplicationException: + print("all ok!") + +try: + with reraise({LibraryException: ApplicationException}): + raise MoreSophisticatedLibraryException +except ApplicationException: + print("all ok!") + +try: + with reraise({(LibraryException, UnrelatedException): + ApplicationException}): + raise LibraryException +except ApplicationException: + print("all ok!") + +``` + +If that does not seem much shorter than a hand-written `try`/`except`/`raise from`, consider that you can create the mapping once and then use it from a variable - this shortens it to just `with reraise(my_mapping)`. + +Any exceptions that do not match anything in the mapping are passed through. When no exception occurs, `reraise_in` passes the return value of `thunk` through, and `reraise` does nothing. + +Full details in docstrings. -### ``def`` as a code block: ``@call`` +If you use the conditions-and-restarts system, see also `resignal_in`, `resignal`, which perform the same job for conditions. The new signal is sent using the same error handling protocol as the original signal, so e.g. an `error` will remain an `error` even if re-signaling changes its type. -Fuel for different thinking. Compare `call-with-something` in Lisps - but without parameters, so just `call`. A `def` is really just a new lexical scope to hold code to run later... or right now! +Examples can be found in [the unit tests](../unpythonic/tests/test_excutil.py). -At the top level of a module, this is seldom useful, but keep in mind that Python allows nested function definitions. Used with an inner ``def``, this becomes a versatile tool. -*Make temporaries fall out of scope as soon as no longer needed*: +## Function call and return value tools + +### `def` as a code block: `@call` + +Fuel for different thinking. Compare `call-with-something` in Lisps - but without parameters, so just `call`. A `def` is really just a new lexical scope to hold code to run later... or as `@call` does, right now! + +At the top level of a module, this is seldom useful, but keep in mind that Python allows nested function definitions. Used with an inner `def`, this becomes a versatile tool. + +Note that beside use as a decorator, `call` can also be used as a normal function: `call(f, *a, **kw)` is the same as `f(*a, **kw)`. This is occasionally useful. + +Let us consider some example use cases of `@call`. + +#### Make temporaries fall out of scope as soon as no longer needed ```python from unpythonic import call @@ -3206,9 +4409,13 @@ def x(): print(x) # 30 ``` -*Multi-break out of nested loops* - `continue`, `break` and `return` are really just second-class [ec](https://docs.racket-lang.org/reference/cont.html#%28def._%28%28lib._racket%2Fprivate%2Fletstx-scheme..rkt%29._call%2Fec%29%29)s. So `def` to make `return` escape to exactly where you want: +#### Multi-break out of nested loops + +As was noted in the section on escape continuations, `continue`, `break` and `return` are really just second-class [ec](https://docs.racket-lang.org/reference/cont.html#%28def._%28%28lib._racket%2Fprivate%2Fletstx-scheme..rkt%29._call%2Fec%29%29)s. So use a `def` to make `return` escape to exactly where you want: ```python +from unpythonic import call + @call def result(): for x in range(10): @@ -3218,7 +4425,7 @@ def result(): print(result) # (6, 7) ``` -(But see ``@catch``, ``throw``, and ``call_ec``.) +But if you need a *multi-return*, see `@catch`, `throw`, and `call_ec`. Compare the sweet-exp Racket: @@ -3233,9 +4440,11 @@ define result displayln result ; (6 7) ``` -Noting [what ``let/ec`` does](https://docs.racket-lang.org/reference/cont.html#%28form._%28%28lib._racket%2Fprivate%2Fletstx-scheme..rkt%29._let%2Fec%29%29), using ``call_ec`` we can make the Python even closer to the Racket: +Noting [what `let/ec` does](https://docs.racket-lang.org/reference/cont.html#%28form._%28%28lib._racket%2Fprivate%2Fletstx-scheme..rkt%29._let%2Fec%29%29), using `call_ec` we can make the Python even closer to the Racket: ```python +from unpythonic import call_ec + @call_ec def result(rtn): for x in range(10): @@ -3245,20 +4454,24 @@ def result(rtn): print(result) # (6, 7) ``` -*Twist the meaning of `def` into a "let statement"*: +#### Twist the meaning of `def` into a "let statement" ```python +from unpythonic import call + @call def result(x=1, y=2, z=3): return x * y * z print(result) # 6 ``` -(But see `blet`, `bletrec` if you want an `env` instance.) +If you want an `env` instance, see `blet` and `bletrec`. -*Letrec without `letrec`*, when it doesn't have to be an expression: +#### Letrec without `letrec`, when a statement is acceptable ```python +from unpythonic import call + @call def t(): def evenp(x): return x == 0 or oddp(x - 1) @@ -3267,22 +4480,22 @@ def t(): print(t) # True ``` -Essentially the implementation is just `def call(thunk): return thunk()`. The point is to: +#### Notes - - Make it explicit right at the definition site that this block is *going to be called now* (in contrast to an explicit call and assignment *after* the definition). Centralize the related information. Align the presentation order with the thought process. +Essentially the implementation is just `def call(thunk): return thunk()`. The point of this seemingly trivial construct is to: - - Help eliminate errors, in the same way as the habit of typing parentheses only in pairs. No risk of forgetting to call the block after writing the definition. + - Make it explicit right at the definition site that this block is *going to be called now*, in contrast to an explicit call and assignment *after* the definition. This centralizes the related information, and aligns the presentation order with the thought process. - - Document that the block is going to be used only once. Tell the reader there's no need to remember this definition. + - Help eliminate errors, in the same way as the habit of typing parentheses only in pairs (or using a tool like Emacs's `smartparens-mode` to enforce that). With `@call`, there is no risk of forgetting to call the block after writing the definition. -Note [the grammar](https://docs.python.org/3/reference/grammar.html) requires a newline after a decorator. + - Document that the block is going to be used only once. Tell your readers there is no need to remember this definition. -**NOTE**: ``call`` can also be used as a normal function: ``call(f, *a, **kw)`` is the same as ``f(*a, **kw)``. This is occasionally useful. +Note [the grammar](https://docs.python.org/3/reference/grammar.html) requires a newline after a decorator. -### ``@callwith``: freeze arguments, choose function later +### `@callwith`: freeze arguments, choose function later -If you need to pass arguments when using ``@call`` as a decorator, use its cousin ``@callwith``: +If you need to pass arguments when using `@call` as a decorator, use its sister `@callwith`: ```python from unpythonic import callwith @@ -3293,9 +4506,11 @@ def result(x): assert result == 9 ``` -Like ``call``, it can also be called normally. It's essentially an argument freezer: +Like `call`, beside use as a decorator, `callwith` can also be called normally. It is essentially an argument freezer: ```python +from unpythonic import callwith + def myadd(a, b): return a + b def mymul(a, b): @@ -3305,16 +4520,17 @@ assert apply23(myadd) == 5 assert apply23(mymul) == 6 ``` -When called normally, the two-step application is mandatory. The first step stores the given arguments. It returns a function ``f(callable)``. When ``f`` is called, it calls its ``callable`` argument, passing in the arguments stored in the first step. +When `callwith` is called normally, the two-step application is mandatory. The first step stores the given arguments. It then returns a function `f(callable)`. When `f` is called, it calls its `callable` argument, passing in the arguments stored in the first step. -In other words, ``callwith`` is similar to ``functools.partial``, but without specializing to any particular function. The function to be called is given later, in the second step. +In other words, `callwith` is similar to `functools.partial`, but without specializing to any particular function. The function to be called is given later, in the second step. -Hence, ``callwith(2, 3)(myadd)`` means "make a function that passes in two positional arguments, with values ``2`` and ``3``. Then call this function for the callable ``myadd``". But if we instead write``callwith(2, 3, myadd)``, it means "make a function that passes in three positional arguments, with values ``2``, ``3`` and ``myadd`` - not what we want in the above example. +Hence, `callwith(2, 3)(myadd)` means *make a function that passes in two positional arguments, with values `2` and `3`. Then call this function for the callable `myadd`*. But if we instead write `callwith(2, 3, myadd)`, it means *make a function that passes in three positional arguments, with values `2`, `3` and `myadd`* - not what we want in the above example. -If you want to specialize some arguments now and some later, combine with ``partial``: +If you want to specialize some arguments now and some later, combine `callwith` with `partial`: ```python from functools import partial +from unpythonic import callwith p1 = partial(callwith, 2) p2 = partial(p1, 3) @@ -3330,20 +4546,22 @@ assert apply234(mul3) == 24 If the code above feels weird, it should. Arguments are gathered first, and the function to which they will be passed is chosen in the last step. -Another use case of ``callwith`` is ``map``, if we want to vary the function instead of the data: +Another use case of `callwith` is `map`, if we want to vary the function instead of the data: ```python +from unpythonic import callwith + m = map(callwith(3), [lambda x: 2*x, lambda x: x**2, lambda x: x**(1/2)]) assert tuple(m) == (6, 9, 3**(1/2)) ``` -If you use the quick lambda macro `f[]` (underscore notation for Python), this combines nicely: +If you use the quick lambda macro `fn[]` (underscore notation for Python), these features combine nicely: ```python -from unpythonic.syntax import macros, f +from unpythonic.syntax import macros, fn from unpythonic import callwith -m = map(callwith(3), [f[2 * _], f[_**2], f[_**(1/2)]]) +m = map(callwith(3), [fn[2 * _], fn[_**2], fn[_**(1/2)]]) assert tuple(m) == (6, 9, 3**(1/2)) ``` @@ -3375,73 +4593,268 @@ assert tuple(m) == (6, 9, 3**(1/2)) Inspired by *Function application with $* in [LYAH: Higher Order Functions](http://learnyouahaskell.com/higher-order-functions). -### ``raisef``, ``tryf``: ``raise`` and ``try`` as functions +### `Values`: multiple and named return values -**Changed in v0.14.3**. *Now we have also `tryf`.* +**Added in v0.15.0.** + +`Values` is a structured multiple-return-values type. + +With `Values`, you can return multiple values positionally, and **return values by name**. This completes the symmetry between passing function arguments and returning values from a function. Python itself allows passing arguments by name, but has no concept of returning values by name. This class adds that concept. + +Having a `Values` type separate from `tuple` helps with semantic accuracy. In `unpythonic` 0.15.0 and later, a `tuple` return value means just that - one value that is a `tuple`. It is distinct from a `Values` that contains several positional return values (that are meant to be treated separately e.g. by a function composition utility). + +Inspired by the [`values`](https://docs.racket-lang.org/reference/values.html) form of Racket. + +#### When to use `Values` -**Changed in v0.14.2**. *The parameters of `raisef` now more closely match what would be passed to `raise`. See examples below. Old-style parameters are now deprecated, and support for them will be dropped in v0.15.0.* +Most of the time, returning a tuple to denote multiple-return-values and unpacking it is just fine, and that is exactly what `unpythonic` does internally in many places. -Raise an exception from an expression position: +But the distinction is critically important in function composition, so that positional return values can be automatically mapped into positional arguments to the next function in the chain, and named return values into named arguments. + +Accordingly, various parts of `unpythonic` that deal with function composition use the `Values` abstraction; particularly `curry`, `unfold`, `iterate`, the `compose` and `pipe` families, and the `with continuations` macro. + +#### Behavior + +`Values` is a duck-type with some features of both sequences and mappings, but not the full [`collections.abc`](https://docs.python.org/3/library/collections.abc.html) API of either. + +If there are no named return values in a `Values` object, it can be unpacked like a tuple. This covers the common use case of multiple positional return values with a minimum of fuss. + +Each operation that obviously and without ambiguity makes sense only for the positional or named part, accesses that part. + +The only exception is `__getitem__` (subscripting), which makes sense for both parts, unambiguously, because the key types differ. If the index expression is an `int` or a `slice`, it is an index/slice for the positional part. If it is an `str`, it is a key for the named part. + +If you need to explicitly access either part (and its full API), use the `rets` and `kwrets` attributes. The names are in analogy with `args` and `kwargs`. + +`rets` is a `tuple`, and `kwrets` is an `unpythonic.frozendict`. + +`Values` objects can be compared for equality. Two `Values` objects are equal if both their `rets` and `kwrets` (respectively) are. + +See the docstrings, [the source code](../unpythonic/funutil.py), and [the unit tests](../unpythonic/tests/test_funutil.py) for full details. + +Examples: ```python -from unpythonic import raisef +from unpythonic import Values -# plain `raise ...` -f = lambda x: raisef(RuntimeError("I'm in ur lambda raising exceptions")) +def f(): + return Values(1, 2, 3) +result = f() +assert isinstance(result, Values) +assert result.rets == (1, 2, 3) +assert not result.kwrets +assert result[0] == 1 +assert result[:-1] == (1, 2) +a, b, c = result # if no kwrets, can be unpacked like a tuple +a, b, c = f() -# `raise ... from ...` -exc = TypeError("oof") -g = lambda x: raisef(RuntimeError("I'm in ur lambda raising exceptions"), cause=exc) +def g(): + return Values(x=3) # named return value +result = g() +assert isinstance(result, Values) +assert not result.rets +assert result.kwrets == {"x": 3} # actually a `frozendict` +assert "x" in result # `in` looks in the named part +assert result["x"] == 3 +assert result.get("x", None) == 3 +assert result.get("y", None) is None +assert tuple(result.keys()) == ("x",) # also `values()`, `items()` + +def h(): + return Values(1, 2, x=3) +result = h() +assert isinstance(result, Values) +assert result.rets == (1, 2) +assert result.kwrets == {"x": 3} +a, b = result.rets # positionals can always be unpacked explicitly +assert result[0] == 1 +assert "x" in result +assert result["x"] == 3 + +def silly_but_legal(): + return Values(42) +result = silly_but_legal() +assert result.rets[0] == 42 +assert result.ret == 42 # shorthand for single-value case ``` -Catch an exception in an expression position: +The last example is silly, but legal, because it is preferable to just omit the `Values` if it is known that there is only one return value. This also applies when that value is a `tuple`, when the intent is to return it as a single `tuple`, in contexts where this distinction matters. + + +### `valuify` + +The `valuify` decorator converts the pythonic tuple-as-multiple-return-values idiom into `Values`, to easily use existing code with our function composition utilities. + +It converts a `tuple` return value, exactly; no subclasses. + +Demonstrating only the conversion: ```python -from unpythonic import raisef, tryf +from unpythonic import valuify, Values -raise_instance = lambda: raisef(ValueError("all ok")) -test[tryf(lambda: raise_instance(), - (ValueError, lambda err: f"got a ValueError: '{err.args[0]}'")) == "got a ValueError: 'all ok'"] +@valuify +def f(x, y, z): + return x, y, z + +assert isinstance(f(1, 2, 3), Values) +assert f(1, 2, 3) == Values(1, 2, 3) ``` -The exception handler is a function. It may optionally accept one argument, the exception instance. -Functions can also be specified for the `else` and `finally` behavior; see the docstring of `unpythonic.misc.tryf` for details. +## Numerical tools +We briefly introduce the functions below. More details and examples can be found in the docstrings and in [the unit tests](../unpythonic/tests/test_numutil.py). -### ``equip_with_traceback`` +**CAUTION** for anyone new to numerics: + +When working with floating-point numbers, keep in mind that they are, very roughly speaking, a finite-precision logarithmic representation of [ℝ](https://en.wikipedia.org/wiki/Real_line). They are, necessarily, actually a subset of [ℚ](https://en.wikipedia.org/wiki/Rational_number), that is not even [dense](https://en.wikipedia.org/wiki/Dense_set). The spacing between adjacent floats depends on where you are on the real line; see `ulp` below. + +For finer points concerning the behavior of floating-point numbers, see [David Goldberg (1991): What every computer scientist should know about floating-point arithmetic](https://docs.oracle.com/cd/E19957-01/806-3568/ncg_goldberg.html), or for a [tl;dr](http://catplanet.org/tldr-cat-meme/) version, [the floating point guide](https://floating-point-gui.de/). + +Or you could look at [my lecture slides from 2018](https://github.com/Technologicat/python-3-scicomp-intro/tree/master/lecture_slides); particularly, [lecture 7](https://github.com/Technologicat/python-3-scicomp-intro/blob/master/lecture_slides/lectures_tut_2018_7.pdf) covers the floating-point representation. It collects the most important details in a few slides, and contains some more links to further reading. + + +### `almosteq`: floating-point almost-equality + +Test floating-point numbers for near-equality. Beside the built-in `float`, we support also the arbitrary-precision software-implemented floating-point type `mpf` from `SymPy`'s `mpmath` package. + +Anything else, for example `SymPy` expressions, strings, and containers (regardless of content), is tested for exact equality. + +For `mpmath.mpf`, we just delegate to `mpmath.almosteq`, with the given tolerance. + +For `float`, we use the strategy suggested in [the floating point guide](https://floating-point-gui.de/errors/comparison/), because naive absolute and relative comparisons against a tolerance fail in commonly encountered situations. -**Added in v0.14.3**. -Equip a manually created exception instance with a traceback. This is useful mainly in special cases, where `raise` cannot be used for some reason. (The `signal` function in the conditions-and-restarts system uses this.) +### `fixpoint`: arithmetic fixed-point finder + +**Added in v0.14.2.** + +*Not to be confused with the logical fixed point with respect to the definedness ordering, which is what Haskell's `fix` function relates to.* + +Compute the (arithmetic) fixed point of a function, starting from a given initial guess. The fixed point must be attractive for this to work. See the [Banach fixed point theorem](https://en.wikipedia.org/wiki/Banach_fixed-point_theorem). + +If the fixed point is attractive, and the values are represented in floating point (hence finite precision), the computation should eventually converge down to the last bit (barring roundoff or catastrophic cancellation in the final few steps). Hence the default tolerance is zero; but any desired tolerance can be passed as an argument. + +**CAUTION**: an arbitrary function from ℝ to ℝ **does not** necessarily have a fixed point. Limit cycles and chaotic behavior of the function will cause non-termination. Keep in mind the classic example, [the logistic map](https://en.wikipedia.org/wiki/Logistic_map). + +Examples: ```python -e = SomeException(...) -e = equip_with_traceback(e) +from math import cos, sqrt +from unpythonic import fixpoint, ulp + +c = fixpoint(cos, x0=1) + +# Actually "Newton's" algorithm for the square root was already known to the +# ancient Babylonians, ca. 2000 BCE. (Carl Boyer: History of mathematics) +# Concerning naming, see also https://en.wikipedia.org/wiki/Stigler's_law_of_eponymy +def sqrt_newton(n): + def sqrt_iter(x): # has an attractive fixed point at sqrt(n) + return (x + n / x) / 2 + return fixpoint(sqrt_iter, x0=n / 2) +assert abs(sqrt_newton(2) - sqrt(2)) <= ulp(1.414) ``` -The traceback is automatically extracted from the call stack of the calling thread. -Optionally, you can cull a number of the topmost frames by passing the optional argument `stacklevel=...`. Typically, for direct use of this function `stacklevel` should be the default `1` (so it excludes `equip_with_traceback` itself, but shows all stack levels from your code), and for use in a utility function that itself is called from your code, it should be `2` (so it excludes the utility function, too). +### `partition_int`: partition integers +**Changed in v0.15.0.** *Added `partition_int_triangular` and `partition_int_custom`.* -### ``callsite_filename`` +**Added in v0.14.2.** + +*Not to be confused with `unpythonic.partition`, which partitions an iterable based on a predicate.* + +The `partition_int` function [partitions](https://en.wikipedia.org/wiki/Partition_(number_theory)) a small positive integer, i.e., splits it in all possible ways, into smaller integers that sum to it. This is useful e.g. to determine the number of letters to allocate for each component of an anagram that may consist of several words. + +The `partition_int_triangular` function is like `partition_int`, but accepts only triangular numbers (1, 3, 6, 10, ...) as components of the partition. This function answers a timeless question: if I have `n` stackable plushies, what are the possible stack configurations? + +The `partition_int_custom` function is like `partition_int`, but lets you specify which numbers are acceptable as components of the partition. + +Examples: + +```python +from itertools import count, takewhile +from unpythonic import partition_int, partition_int_triangular, rev + +assert tuple(partition_int(4)) == ((4,), (3, 1), (2, 2), (2, 1, 1), (1, 3), (1, 2, 1), (1, 1, 2), (1, 1, 1, 1)) +assert tuple(partition_int(5, lower=2)) == ((5,), (3, 2), (2, 3)) +assert tuple(partition_int(5, lower=2, upper=3)) == ((3, 2), (2, 3)) + +assert (frozenset(tuple(sorted(c)) for c in partition_int_triangular(78, lower=10)) == + frozenset({(10, 10, 10, 10, 10, 28), + (10, 10, 15, 15, 28), + (15, 21, 21, 21), + (21, 21, 36), + (78,)})) + +evens_upto_n = lambda n: takewhile(lambda m: m <= n, count(start=2, step=2)) +assert tuple(partition_int_custom(6, rev(evens_upto_n(6)))) == ((6,), (4, 2), (2, 4), (2, 2, 2)) +``` + +As the first example demonstrates, most of the splits are a ravioli consisting mostly of ones. It is much faster to not generate such splits than to filter them out from the result. Use the `lower` parameter to set the smallest acceptable value for one component of the split; the default value `lower=1` generates all splits. Similarly, the `upper` parameter sets the largest acceptable value for one component of the split. The default `upper=None` sets no upper limit, so in effect the upper limit becomes `n`. + +In `partition_int_triangular`, the `lower` and `upper` parameters work exactly the same. The only difference to `partition_int` is that each component of the split must be a triangular number. + +In `partition_int_custom`, the components are given as an iterable, which is immediately forced (so if it is consumable, it will be completely consumed; and if it is infinite, the function will use up all available RAM and not terminate). Each component `x` must be an integer that satisfies `1 <= x <= n`. + +**CAUTION**: The number of possible partitions grows very quickly with `n`, so in practice these functions are only useful for small numbers, or when the smallest allowed component is not too much smaller than `n / 2`. + + +### `ulp`: unit in last place + +**Added in v0.14.2.** + +Given a floating point number `x`, return the value of the *unit in the last place* (the "least significant bit"). This is the local size of a "tick", i.e. the difference between `x` and the *next larger* float. At `x = 1.0`, this is the [machine epsilon](https://en.wikipedia.org/wiki/Machine_epsilon), by definition of the machine epsilon. + +The float format is [IEEE-754](https://en.wikipedia.org/wiki/IEEE_754), i.e. standard Python `float`. + +This is just a small convenience function that is for some reason missing from the `math` standard library. + +```python +from unpythonic import ulp + +# in IEEE-754, exponent changes at integer powers of two +print([ulp(x) for x in (0.25, 0.5, 1.0, 2.0, 4.0)]) +# --> [5.551115123125783e-17, +# 1.1102230246251565e-16, +# 2.220446049250313e-16, # x = 1.0, so this is sys.float_info.epsilon +# 4.440892098500626e-16, +# 8.881784197001252e-16] +print(ulp(1e10)) +# --> 1.9073486328125e-06 +print(ulp(1e100)) +# --> 1.942668892225729e+84 +print(ulp(2**52)) +# --> 1.0 # yes, exactly 1 +``` + +When `x` is a round number in base-10, the ULP is not, because the usual kind of floats use base-2. + + +## Other + +Stuff that didn't fit elsewhere. + +### `callsite_filename` + +**Changed in v0.15.0.** *This utility now ignores `unpythonic`'s call helpers, and gives the filename from the deepest stack frame that does not match one of our helpers. This allows the testing framework report the source code filename correctly when testing code using macros that make use of these helpers (e.g. `autocurry`, `lazify`).* **Added in v0.14.3**. Return the filename from which this function is being called. Useful as a building block for debug utilities and similar. -### ``safeissubclass`` +### `safeissubclass` **Added in v0.14.3**. Convenience function. Like `issubclass(cls)`, but if `cls` is not a class, swallow the `TypeError` and return `False`. -### ``pack``: multi-arg constructor for tuple +### `pack`: multi-arg constructor for tuple + +The default `tuple` constructor accepts a single iterable. But sometimes one needs to pass in the elements separately. Most often a literal tuple such as `(1, 2, 3)` is then the right solution, but there are situations that do not admit a literal tuple. -The default ``tuple`` constructor accepts a single iterable. But sometimes one needs to pass in the elements separately. Most often a literal tuple such as ``(1, 2, 3)`` is then the right solution, but there are situations that do not admit a literal tuple. Enter ``pack``: +In such cases it is possible to use `pack`: ```python from unpythonic import pack @@ -3452,13 +4865,13 @@ assert tuple(myzip(lol)) == ((1, 3, 5), (2, 4, 6)) ``` -### ``namelambda``: rename a function +### `namelambda`: rename a function -Rename any function object (including lambdas). The return value of ``namelambda`` is a modified copy; the original function object is not mutated. The input can be any function object (``isinstance(f, (types.LambdaType, types.FunctionType))``). It will be renamed even if it already has a name. +Rename any function object, even a lambda. The return value of `namelambda` is a modified copy; the original function object is not mutated. The input can be any function object (`isinstance(f, (types.LambdaType, types.FunctionType))`). It will be renamed even if it already has a name. This is mainly useful in those situations where you return a lambda as a closure, call it much later, and it happens to crash - so you can tell from the stack trace *which* of the *N* lambdas in your codebase it is. -For technical reasons, ``namelambda`` conforms to the parametric decorator API. Usage: +`namelambda` conforms to the parametric decorator API. Usage: ```python from unpythonic import namelambda @@ -3472,7 +4885,7 @@ kaboom() # --> stack trace, showing the function name "kaboom" The first call returns a *foo-renamer*, which takes a function object and returns a copy that has its name changed to *foo*. -Technically, this updates ``__name__`` (the obvious place), ``__qualname__`` (used by ``repr()``), and ``__code__.co_name`` (used by stack traces). +Technically, this updates `__name__` (the obvious place), `__qualname__` (used by `repr()`), and `__code__.co_name` (used by stack traces). **CAUTION**: There is one pitfall: @@ -3484,10 +4897,12 @@ print(nested.__qualname__) # "outer" print(nested().__qualname__) # "..inner" ``` -The inner lambda does not see the outer's new name; the parent scope names are baked into a function's ``__qualname__`` too early for the outer rename to be in effect at that time. +The inner lambda does not see the outer's new name; the parent scope names are baked into a function's `__qualname__` too early for the outer rename to be in effect at that time. + +### `timer`: a context manager for performance testing -### ``timer``: a context manager for performance testing +This is a small convenience utility, used as follows: ```python from unpythonic import timer @@ -3502,10 +4917,43 @@ with timer(p=True): # if p, auto-print result pass ``` -The auto-print mode is a convenience feature to minimize bureaucracy if you just want to see the *Δt*. To instead access the *Δt* programmatically, name the timer instance using the ``with ... as ...`` syntax. After the context exits, the *Δt* is available in its ``dt`` attribute. +The auto-print mode is a convenience feature to minimize bureaucracy if you just want to see the *Δt*. To instead access the *Δt* programmatically, name the timer instance using the `with ... as ...` syntax. After the context exits, the *Δt* is available in its `dt` attribute. The timer instance itself stays alive due to Python's scoping rules. -### ``getattrrec``, ``setattrrec``: access underlying data in an onion of wrappers +### `format_human_time`: seconds to days, hours, minutes, seconds + +**Added in v0.15.1.** + +Convert a duration from seconds (`float` or `int`) to a human-readable string of days, hours, minutes and seconds. + +```python +assert format_human_time(30) == "30 seconds" +assert format_human_time(90) == "01:30" # mm:ss +assert format_human_time(3690) == "01:01:30" # hh:mm:ss +assert format_human_time(86400 + 3690) == "1 day 01:01:30" +assert format_human_time(2 * 86400 + 3690) == "2 days 01:01:30" +``` + + +### `ETAEstimator`: estimate the time of completion of a long-running task + +**Added in v0.15.1.** + +Simple but useful: + +```python +n = 1000 +est = ETAEstimator(total=n, keep_last=10) +for k in range(n): + print(f"Processing item {k + 1} out of {n}, {est.formatted_eta}") + ... # do something + est.tick() +``` + +The ETA estimate is automatically formatted using `format_human_time` (see above) to maximize readability. + + +### `getattrrec`, `setattrrec`: access underlying data in an onion of wrappers ```python from unpythonic import getattrrec, setattrrec @@ -3526,13 +4974,17 @@ assert getattrrec(w, "x") == 23 ``` -### ``arities``, ``kwargs``, ``resolve_bindings``: Function signature inspection utilities +### `arities`, `kwargs`, `resolve_bindings`: Function signature inspection utilities + +**Changed in v0.15.0.** *Now `resolve_bindings` is a thin wrapper on top of `inspect.Signature.bind`, which was added in Python 3.5. In `unpythonic` 0.14.2 and 0.14.3, we used to have our own implementation of the parameter binding algorithm (that ran also on Python 3.4), but it is no longer needed, since now we support only Python 3.6 and later. Now `resolve_bindings` returns an `inspect.BoundArguments` object.* -**Added in v0.14.2**: `resolve_bindings`. *Get the parameter bindings a given callable would establish if it was called with the given args and kwargs. This is mainly of interest for implementing memoizers, since this allows them to see (e.g.) `f(1)` and `f(a=1)` as the same thing for `def f(a): pass`.* +*Now `tuplify_bindings` accepts an `inspect.BoundArguments` object instead of its previous input format. The function is only ever intended to be used to postprocess the output of `resolve_bindings`, so this change shouldn't affect your own code.* -Convenience functions providing an easy-to-use API for inspecting a function's signature. The heavy lifting is done by ``inspect``. +**Added in v0.14.2**: `resolve_bindings`. *Get the parameter bindings a given callable would establish if it was called with the given args and kwargs. This is mainly of interest for implementing memoizers, since this allows them to see (e.g.) `f(1)` and `f(a=1)` as the same thing for `def f(a): pass`. Thanks to Graham Dumpleton, the author of the [`wrapt`](https://pypi.org/project/wrapt/) library, for [noticing and documenting this gotcha](https://wrapt.readthedocs.io/en/latest/decorators.html#processing-function-arguments).* -Methods on objects and classes are treated specially, so that the reported arity matches what the programmer actually needs to supply when calling the method (i.e., implicit ``self`` and ``cls`` are ignored). +These are convenience functions providing an easy-to-use API for inspecting a function's signature. The heavy lifting is done by `inspect`. + +Methods on objects and classes are treated specially, so that the reported arity matches what the programmer actually needs to supply when calling the method (i.e., implicit `self` and `cls` are ignored). ```python from unpythonic import (arities, arity_includes, UnknownArity, @@ -3587,16 +5039,16 @@ assert tuple(resolve_bindings(f, 1, c=3, b=2).items()) == (("a", 1), ("b", 2), ( assert tuple(resolve_bindings(f, c=3, b=2, a=1).items()) == (("a", 1), ("b", 2), ("c", 3)) ``` -We special-case the builtin functions that either fail to return any arity (are uninspectable) or report incorrect arity information, so that also their arities are reported correctly. Note we **do not** special-case the *methods* of any builtin classes, so e.g. ``list.append`` remains uninspectable. This limitation might or might not be lifted in a future version. +We special-case the builtin functions that either fail to return any arity (are uninspectable) or report incorrect arity information, so that also their arities are reported correctly. Note we **do not** special-case the *methods* of any builtin classes, so e.g. `list.append` remains uninspectable. This limitation might or might not be lifted in a future version. -If the arity cannot be inspected, and the function is not one of the special-cased builtins, the ``UnknownArity`` exception is raised. +If the arity cannot be inspected, and the function is not one of the special-cased builtins, the `UnknownArity` exception is raised. -These functions are internally used in various places in unpythonic, particularly ``curry``. The ``let`` and FP looping constructs also use these to emit a meaningful error message if the signature of user-provided function does not match what is expected. +Up to v0.14.3, various places in `unpythonic` used to internally use `arities`; particularly `curry`, `fix`, and `@generic`. As of v0.15.0, we have started to prefer `resolve_bindings`, because often what matters are the parameter bindings established, and performing the binding covers all possible ways to pass arguments. The `let` and FP looping constructs still use `arities` to emit a meaningful error message if the signature of user-provided function does not match what is expected. -Inspired by various Racket functions such as ``(arity-includes?)`` and ``(procedure-keywords)``. +Inspired by various Racket functions such as `(arity-includes?)` and `(procedure-keywords)`. -### ``Popper``: a pop-while iterator +### `Popper`: a pop-while iterator Consider this highly artificial example: @@ -3612,7 +5064,7 @@ assert inp == deque([]) assert out == list(range(5)) ``` -``Popper`` condenses the ``while`` and ``pop`` into a ``for``, while allowing the loop body to mutate the input iterable in arbitrary ways (we never actually ``iter()`` it): +`Popper` condenses the `while` and `pop` into a `for`, while allowing the loop body to mutate the input iterable in arbitrary ways (we never actually `iter()` it): ```python from collections import deque @@ -3635,7 +5087,7 @@ assert inp == deque([]) assert out == [0, 10, 1, 11, 2, 12] ``` -``Popper`` comboes with other iterable utilities, such as ``window``: +`Popper` comboes with other iterable utilities, such as `window`: ```python from collections import deque @@ -3643,7 +5095,7 @@ from unpythonic import Popper, window inp = deque(range(3)) out = [] -for a, b in window(Popper(inp)): +for a, b in window(2, Popper(inp)): out.append((a, b)) if a < 10: inp.append(a + 10) @@ -3651,95 +5103,14 @@ assert inp == deque([]) assert out == [(0, 1), (1, 2), (2, 10), (10, 11), (11, 12)] ``` -(Although ``window`` invokes ``iter()`` on the ``Popper``, this works because the ``Popper`` never invokes ``iter()`` on the underlying container. Any mutations to the input container performed by the loop body will be understood by ``Popper`` and thus also seen by the ``window``. The first ``n`` elements, though, are read before the loop body gets control, because the window needs them to initialize itself.) +Although `window` invokes `iter()` on the `Popper` instance, this works because the `Popper` never invokes `iter()` on the underlying container. Any mutations to the input container performed by the loop body will be understood by `Popper` and thus also seen by the `window`. The first `n` elements, though, are read before the loop body gets control, because the window needs them to initialize itself. -One possible real use case for ``Popper`` is to split sequences of items, stored as lists in a deque, into shorter sequences where some condition is contiguously ``True`` or ``False``. When the condition changes state, just commit the current subsequence, and push the rest of that input sequence (still requiring analysis) back to the input deque, to be dealt with later. +One possible real use case for `Popper` is to split sequences of items, stored as lists in a deque, into shorter sequences where some condition is contiguously `True` or `False`. When the condition changes state, just commit the current subsequence, and push the rest of that input sequence (still requiring analysis) back to the input deque, to be dealt with later. -The argument to ``Popper`` (here ``lst``) contains the **remaining** items. Each iteration pops an element **from the left**. The loop terminates when ``lst`` is empty. +The argument to `Popper` contains the **remaining** items. Each iteration pops an element **from the left**. The loop terminates when, at the start of an iteration, there are no more items remaining. -The input container must support either ``popleft()`` or ``pop(0)``. This is fully duck-typed. At least ``collections.deque`` and any ``collections.abc.MutableSequence`` (including ``list``) are fine. +The input container must support either `popleft()` or `pop(0)`. This is fully duck-typed. At least `collections.deque` and any [`collections.abc.MutableSequence`](https://docs.python.org/3/library/collections.abc.html) (including `list`) are fine. -Per-iteration efficiency is O(1) for ``collections.deque``, and O(n) for a ``list``. +Per-iteration efficiency is O(1) for `collections.deque`, and O(n) for a `list`. Named after [Karl Popper](https://en.wikipedia.org/wiki/Karl_Popper). - - -### ``ulp``: unit in last place - -**Added in v0.14.2.** - -Given a floating point number `x`, return the value of the *unit in the last place* (the "least significant bit"). This is the local size of a "tick", i.e. the difference between `x` and the next larger float. At `x = 1.0`, this is the [machine epsilon](https://en.wikipedia.org/wiki/Machine_epsilon), by definition of the machine epsilon. - -The float format is [IEEE-754](https://en.wikipedia.org/wiki/IEEE_754), i.e. standard Python `float`. - -This is just a small convenience function that is for some reason missing from the `math` standard library. - -```python -from unpythonic import ulp - -# in IEEE-754, exponent changes at integer powers of two -print([ulp(x) for x in (0.25, 0.5, 1.0, 2.0, 4.0)]) -# --> [5.551115123125783e-17, -# 1.1102230246251565e-16, -# 2.220446049250313e-16, # x = 1.0, so this is sys.float_info.epsilon -# 4.440892098500626e-16, -# 8.881784197001252e-16] -print(ulp(1e10)) -# --> 1.9073486328125e-06 -print(ulp(1e100)) -# --> 1.942668892225729e+84 -print(ulp(2**52)) -# --> 1.0 # yes, exactly 1 -``` - -When `x` is a round number in base-10, the ULP is not, because the usual kind of floats use base-2. - -For more reading, see [David Goldberg (1991): What every computer scientist should know about floating-point arithmetic](https://docs.oracle.com/cd/E19957-01/806-3568/ncg_goldberg.html), or for a [tl;dr](http://catplanet.org/tldr-cat-meme/) version, [the floating point guide](https://floating-point-gui.de/). - - -### ``async_raise``: inject an exception to another thread - -**Added in v0.14.2**. - -*Currently CPython only, because as of this writing (March 2020) PyPy3 does not expose the required functionality to the Python level, nor there seem to be any plans to do so.* - -Usually injecting an exception into an unsuspecting thread makes absolutely no sense. But there are special cases, such as a REPL server which needs to send a `KeyboardInterrupt` into a REPL session thread that's happily stuck waiting for input at [`InteractiveConsole.interact()`](https://docs.python.org/3/library/code.html#code.InteractiveConsole.interact) - while the client that receives the actual `Ctrl+C` is running in a separate process. This and similar awkward situations in network programming are pretty much the only legitimate use case for this feature. - -The name is `async_raise`, because it injects an *asynchronous exception*. This has nothing to do with `async`/`await`. Synchronous vs. asynchronous exceptions [mean something different](https://en.wikipedia.org/wiki/Exception_handling#Exception_synchronicity). - -In a nutshell, a *synchronous* exception (which is the usual kind of exception) has an explicit `raise` somewhere in the code that the thread that encountered the exception is running. In contrast, an *asynchronous* exception **doesn't**, it just suddenly magically materializes from the outside. As such, it can in principle happen *anywhere*, with absolutely no hint about it in any obvious place in the code. - -Needless to say this can be very confusing, so this feature should be used sparingly, if at all. **We only have it because the REPL server needs it.** - -```python -from unpythonic import async_raise, box - -out = box() -def worker(): - try: - for j in range(10): - sleep(0.1) - except KeyboardInterrupt: # normally, KeyboardInterrupt is only raised in the main thread - pass - out << j -t = threading.Thread(target=worker) -t.start() -sleep(0.1) # make sure the worker has entered the loop -async_raise(t, KeyboardInterrupt) -t.join() -assert unbox(out) < 9 # thread terminated early due to the injected KeyboardInterrupt -``` - -#### So this is how KeyboardInterrupt works under the hood? - -No, this is **not** how `KeyboardInterrupt` usually works. Rather, the OS sends a [SIGINT](https://en.wikipedia.org/wiki/Signal_(IPC)#SIGINT), which is then trapped by an [OS signal handler](https://docs.python.org/3/library/signal.html) that runs in the main thread. - -At that point the magic has already happened: the control of the main thread is now inside the signal handler, as if the signal handler was called from the otherwise currently innermost point on the call stack. All the handler needs to do is to perform a regular `raise`, and the exception will propagate correctly. - -#### History - -Original detective work by [Federico Ficarelli](https://gist.github.com/nazavode/84d1371e023bccd2301e) and [LIU Wei](https://gist.github.com/liuw/2407154). - -Raising async exceptions is a [documented feature of Python's public C API](https://docs.python.org/3/c-api/init.html#c.PyThreadState_SetAsyncExc), but it was never meant to be invoked from within pure Python code. But then the CPython devs gave us [ctypes.pythonapi](https://docs.python.org/3/library/ctypes.html#accessing-values-exported-from-dlls), which allows access to Python's C API from within Python. (If you think ctypes.pythonapi is too quirky, the [pycapi](https://pypi.org/project/pycapi/) PyPI package smooths over the rough edges.) Combining the two gives `async_raise` without the need to compile a C extension. - -Unfortunately PyPy doesn't currently (March 2020) implement this function in its CPython C API emulation layer, `cpyext`. See `unpythonic` issue [#58](https://github.com/Technologicat/unpythonic/issues/58). diff --git a/doc/macros.md b/doc/macros.md index 0c32281d..d1b8ca0f 100644 --- a/doc/macros.md +++ b/doc/macros.md @@ -1,66 +1,82 @@ -# Language extensions using ``unpythonic.syntax`` +**Navigation** -Our extensions to the Python language are built on [``mcpyrate``](https://github.com/Technologicat/mcpyrate), from the PyPI package [``mcpyrate``](https://pypi.org/project/mcpyrate/). +- [README](../README.md) +- [Pure-Python feature set](features.md) +- **Syntactic macro feature set** +- [Examples of creating dialects using `mcpyrate`](dialects.md) +- [REPL server](repl.md) +- [Troubleshooting](troubleshooting.md) +- [Design notes](design-notes.md) +- [Essays](essays.md) +- [Additional reading](readings.md) +- [Contribution guidelines](../CONTRIBUTING.md) -Because in Python macro expansion occurs *at import time*, Python programs whose main module uses macros, such as [our unit tests that contain usage examples](../unpythonic/syntax/test/), cannot be run directly. Instead, run them via `macropython`, included in `mcpyrate`. +# Language extensions using `unpythonic.syntax` + +Our extensions to the Python language are built on [`mcpyrate`](https://github.com/Technologicat/mcpyrate), from the PyPI package [`mcpyrate`](https://pypi.org/project/mcpyrate/). + +Because in Python macro expansion occurs *at import time*, Python programs whose main module uses macros, such as [our unit tests that contain usage examples](../unpythonic/syntax/tests/), cannot be run directly by `python3`. Instead, run them via the `macropython` bootstrapper, included in `mcpyrate`. **Our macros expect a from-import style** for detecting uses of `unpythonic` constructs, *even when those constructs are regular functions*. For example, the function `curry` is detected from its bare name. So if you intend to use these macros, then, for regular imports from `unpythonic`, use `from unpythonic import ...` and avoid renaming (`as`). -*This document doubles as the API reference, but despite maintenance on a best-effort basis, may occasionally be out of date at places. In case of conflicts in documentation, believe the unit tests first; specifically the code, not necessarily the comments. Everything else (comments, docstrings and this guide) should agree with the unit tests. So if something fails to work as advertised, check what the tests say - and optionally file an issue on GitHub so that the documentation can be fixed.* +*This document doubles as the API reference, but despite maintenance on a best-effort basis, may occasionally be out of date at places. In case of conflicts in documentation, believe the unit tests first; specifically the code, not necessarily the comments. Everything else (comments, docstrings and this guide) should agree with the unit tests. So if something fails to work as advertised, check what the tests do - and optionally file an issue on GitHub so that the documentation can be fixed.* -**Changed in 0.15.** *To run macro-enabled programs, use the [`macropython`](https://github.com/Technologicat/mcpyrate/blob/master/doc/repl.md#macropython-the-universal-bootstrapper) bootstrapper from [`mcpyrate`](https://github.com/Technologicat/mcpyrate).* +**Changed in v0.15.0.** *To run macro-enabled programs, use the [`macropython`](https://github.com/Technologicat/mcpyrate/blob/master/doc/repl.md#macropython-the-universal-bootstrapper) bootstrapper from [`mcpyrate`](https://github.com/Technologicat/mcpyrate).* -**This document is up-to-date for v0.14.3.** +**This document is up-to-date for v0.15.0.** ### Features [**Bindings**](#bindings) -- [``let``, ``letseq``, ``letrec`` as macros](#let-letseq-letrec-as-macros); proper lexical scoping, no boilerplate. -- [``dlet``, ``dletseq``, ``dletrec``, ``blet``, ``bletseq``, ``bletrec``: decorator versions](#dlet-dletseq-dletrec-blet-bletseq-bletrec-decorator-versions) -- [``let_syntax``, ``abbrev``: syntactic local bindings](#let_syntax-abbrev-syntactic-local-bindings); splice code at macro expansion time. -- [Bonus: barebones ``let``](#bonus-barebones-let): pure AST transformation of ``let`` into a ``lambda``. +- [`let`, `letseq`, `letrec` as macros](#let-letseq-letrec-as-macros); proper lexical scoping, no boilerplate. +- [`dlet`, `dletseq`, `dletrec`, `blet`, `bletseq`, `bletrec`: decorator versions](#dlet-dletseq-dletrec-blet-bletseq-bletrec-decorator-versions) +- [Caution on name resolution and scoping](#caution-on-name-resolution-and-scoping) +- [`let_syntax`, `abbrev`: syntactic local bindings](#let_syntax-abbrev-syntactic-local-bindings); splice code at macro expansion time. +- [Bonus: barebones `let`](#bonus-barebones-let): pure AST transformation of `let` into a `lambda`. [**Sequencing**](#sequencing) -- [``do`` as a macro: stuff imperative code into an expression, *with style*](#do-as-a-macro-stuff-imperative-code-into-an-expression-with-style) +- [`do` as a macro: stuff imperative code into an expression, *with style*](#do-as-a-macro-stuff-imperative-code-into-an-expression-with-style) [**Tools for lambdas**](#tools-for-lambdas) -- [``multilambda``: supercharge your lambdas](#multilambda-supercharge-your-lambdas); multiple expressions, local variables. -- [``namedlambda``: auto-name your lambdas](#namedlambda-auto-name-your-lambdas) by assignment. -- [``f``: underscore notation (quick lambdas) for Python](#f-underscore-notation-quick-lambdas-for-python) -- [``quicklambda``: expand quick lambdas first](#quicklambda-expand-quick-lambdas-first) -- [``envify``: make formal parameters live in an unpythonic ``env``](#envify-make-formal-parameters-live-in-an-unpythonic-env) +- [`multilambda`: supercharge your lambdas](#multilambda-supercharge-your-lambdas); multiple expressions, local variables. +- [`namedlambda`: auto-name your lambdas](#namedlambda-auto-name-your-lambdas) by assignment. +- [`fn`: underscore notation (quick lambdas) for Python](#f-underscore-notation-quick-lambdas-for-python) +- [`quicklambda`: expand quick lambdas first](#quicklambda-expand-quick-lambdas-first) +- [`envify`: make formal parameters live in an unpythonic `env`](#envify-make-formal-parameters-live-in-an-unpythonic-env) [**Language features**](#language-features) -- [``curry``: automatic currying for Python](#curry-automatic-currying-for-python) -- [``lazify``: call-by-need for Python](#lazify-call-by-need-for-python) +- [`autocurry`: automatic currying for Python](#autocurry-automatic-currying-for-python) +- [`lazify`: call-by-need for Python](#lazify-call-by-need-for-python) + - [`lazy[]` and `lazyrec[]` macros](#lazy-and-lazyrec-macros) - [Forcing promises manually](#forcing-promises-manually) - [Binding constructs and auto-lazification](#binding-constructs-and-auto-lazification) - [Note about TCO](#note-about-tco) -- [``tco``: automatic tail call optimization for Python](#tco-automatic-tail-call-optimization-for-python) +- [`tco`: automatic tail call optimization for Python](#tco-automatic-tail-call-optimization-for-python) - [TCO and continuations](#tco-and-continuations) -- [``continuations``: call/cc for Python](#continuations-callcc-for-python) +- [`continuations`: call/cc for Python](#continuations-callcc-for-python) - [General remarks on continuations](#general-remarks-on-continuations) - - [Differences between ``call/cc`` and certain other language features](#differences-between-callcc-and-certain-other-language-features) (generators, exceptions) - - [``call_cc`` API reference](#call_cc-api-reference) + - [Differences between `call/cc` and certain other language features](#differences-between-callcc-and-certain-other-language-features) (generators, exceptions) + - [`call_cc` API reference](#call_cc-api-reference) - [Combo notes](#combo-notes) - [Continuations as an escape mechanism](#continuations-as-an-escape-mechanism) - [What can be used as a continuation?](#what-can-be-used-as-a-continuation) - - [This isn't ``call/cc``!](#this-isnt-callcc) + - [This isn't `call/cc`!](#this-isnt-callcc) - [Why this syntax?](#why-this-syntax) -- [``prefix``: prefix function call syntax for Python](#prefix-prefix-function-call-syntax-for-python) -- [``autoreturn``: implicit ``return`` in tail position](#autoreturn-implicit-return-in-tail-position), like in Lisps. -- [``forall``: nondeterministic evaluation](#forall-nondeterministic-evaluation) with monadic do-notation for Python. +- [`prefix`: prefix function call syntax for Python](#prefix-prefix-function-call-syntax-for-python) +- [`autoreturn`: implicit `return` in tail position](#autoreturn-implicit-return-in-tail-position), like in Lisps. +- [`forall`: nondeterministic evaluation](#forall-nondeterministic-evaluation) with monadic do-notation for Python. [**Convenience features**](#convenience-features) -- [``cond``: the missing ``elif`` for ``a if p else b``](#cond-the-missing-elif-for-a-if-p-else-b) -- [``aif``: anaphoric if](#aif-anaphoric-if), the test result is ``it``. -- [``autoref``: implicitly reference attributes of an object](#autoref-implicitly-reference-attributes-of-an-object) +- [`cond`: the missing `elif` for `a if p else b`](#cond-the-missing-elif-for-a-if-p-else-b) +- [`aif`: anaphoric if](#aif-anaphoric-if), the test result is `it`. +- [`autoref`: implicitly reference attributes of an object](#autoref-implicitly-reference-attributes-of-an-object) [**Testing and debugging**](#testing-and-debugging) -- [``unpythonic.test.fixtures``: a test framework for macro-enabled Python](#unpythonic-test-fixtures-a-test-framework-for-macro-enabled-python) +- [`unpythonic.test.fixtures`: a test framework for macro-enabled Python](#unpythonic-test-fixtures-a-test-framework-for-macro-enabled-python) - [Overview](#overview) - [Testing syntax quick reference](#testing-syntax-quick-reference) + - [Expansion order](#expansion-order) - [`with test`: test blocks](#with-test-test-blocks) - [`the`: capture the value of interesting subexpressions](#the-capture-the-value-of-interesting-subexpressions) - [Test sessions and testsets](#test-sessions-and-testsets) @@ -68,10 +84,10 @@ Because in Python macro expansion occurs *at import time*, Python programs whose - [Advanced: building a custom test framework](#advanced-building-a-custom-test-framework) - [Why another test framework?](#why-another-test-framework) - [Etymology and roots](#etymology-and-roots) -- [``dbg``: debug-print expressions with source code](#dbg-debug-print-expressions-with-source-code) +- [`dbg`: debug-print expressions with source code](#dbg-debug-print-expressions-with-source-code) [**Other**](#other) -- [``nb``: silly ultralight math notebook](#nb-silly-ultralight-math-notebook) +- [`nb`: silly ultralight math notebook](#nb-silly-ultralight-math-notebook) [**Meta**](#meta) - [The xmas tree combo](#the-xmas-tree-combo): notes on the macros working together. @@ -82,154 +98,222 @@ Because in Python macro expansion occurs *at import time*, Python programs whose Macros that introduce new ways to bind identifiers. -### ``let``, ``letseq``, ``letrec`` as macros +### `let`, `letseq`, `letrec` as macros + +**Changed in v0.15.3.** *Added support for the walrus operator `:=` for env-assignment. This is the new preferred syntax to establish let-bindings. All old syntaxes are still supported for backward compatibility.* + +**Changed in v0.15.0.** *Added support for env-assignment syntax in the bindings subform. For consistency with other env-assignments, this is now the preferred syntax to establish let-bindings. Additionally, the old lispy syntax now accepts also brackets, for consistency with the use of brackets for macro invocations.* -Properly lexically scoped ``let`` constructs, no boilerplate: +These macros provide properly lexically scoped `let` constructs, no boilerplate: ```python from unpythonic.syntax import macros, let, letseq, letrec -let[(x, 17), # parallel binding, i.e. bindings don't see each other - (y, 23)][ +let[x := 17, # parallel binding, i.e. bindings don't see each other + y := 23][ print(x, y)] -letseq[(x, 1), # sequential binding, i.e. Scheme/Racket let* - (y, x+1)][ +letseq[x := 1, # sequential binding, i.e. Scheme/Racket let* + y := x + 1][ print(x, y)] -letrec[(evenp, lambda x: (x == 0) or oddp(x - 1)), # mutually recursive binding, sequentially evaluated - (oddp, lambda x: (x != 0) and evenp(x - 1))][ +letrec[evenp := (lambda x: (x == 0) or oddp(x - 1)), # mutually recursive binding, sequentially evaluated + oddp := (lambda x: (x != 0) and evenp(x - 1))][ print(evenp(42))] ``` -As seen in the examples, the syntax is similar to [``unpythonic.lispylet``](../doc/features.md#lispylet-alternative-syntax). Assignment to variables in the environment is supported via the left-shift syntax ``x << 42``. +Even with just one binding, the syntax remains the same: + +```python +let[x := 21][2 * x] +``` + +There must be at least one binding; `let[][...]` is a syntax error, since Python's parser rejects an empty subscript slice. + +Bindings are established using standard assignment expression syntax, `name := value`. The let-bindings can be rebound in the body with the same syntax, e.g. `x := 42`. + +The old `unpythonic` env-assignment syntax, `name << value`, is also supported for backward compatibility. This was the preferred syntax in v0.15.0 to v0.15.2. + +**NOTE**: All let-bindings must be established in the bindings subform. If you absolutely need to do establish more bindings in the body, see the sequencing construct `do[]` and its syntax `local[x := 42]`. + +**NOTE**: Language support for using an assignment expression inside a subscript *without parenthesizing it* was [added in Python 3.10](https://docs.python.org/3/whatsnew/3.10.html#other-language-changes). The syntax accepted when running on Python 3.8 or 3.9 is: + +```python +let[(x := 17), + (y := 23)][ + print(x, y)] +``` + +That is, Python 3.8 and 3.9 require parentheses around each let binding if you use the new `:=` syntax, because syntactically, the bindings subform looks like a subscript. The unit tests use this syntax so that they work on 3.8 and 3.9. But for new code using Python 3.10 or later, it is preferable to omit the parentheses to improve readability. + +The same syntax for the bindings subform is used by: + +- `let`, `letseq`, `letrec` (expressions) +- `dlet`, `dletseq`, `dletrec`, `blet`, `bletseq`, `bletrec` (decorators) + - As of v0.15.0, it is possible to use `@dlet(...)` instead of `@dlet[...]` in Python 3.8 and earlier. +- `let_syntax`, `abbrev` (expression mode) -The bindings are given as macro arguments as ``((name, value), ...)``, the body goes into the ``[...]``. -#### Alternate syntaxes +#### Haskelly let-in, let-where -The following Haskell-inspired, perhaps more pythonic alternate syntaxes are also available: +The following Haskell-inspired, perhaps more pythonic alternative syntaxes are also available: ```python -let[((x, 21), - (y, 17), - (z, 4)) in +let[[x := 21, + y := 17, + z := 4] in x + y + z] let[x + y + z, - where((x, 21), - (y, 17), - (z, 4))] + where[x := 21, + y := 17, + z := 4]] + +let[[x := 21] in 2 * x] +let[2 * x, where[x := 21]] ``` -These syntaxes take no macro arguments; both the let-body and the bindings are placed inside the same ``[...]``. +These syntaxes take no macro arguments; both the let-body and the bindings are placed inside the `...` in `let[...]`. + +Note the bindings subform is always enclosed by brackets. + +The `where` operator, if used, must be macro-imported. It may only appear at the top level of the let-where form, separating the body and the bindings subforms. In any invalid position, `where` is considered a syntax error at macro expansion time.
Semantically, these do the exact same thing as the original lispy syntax: >The bindings are evaluated first, and then the body is evaluated with the bindings in place. The purpose of the second variant (the *let-where*) is just readability; sometimes it looks clearer to place the body expression first, and only then explain what the symbols in it mean. > ->These syntaxes are valid for all **expression forms** of ``let``, namely: ``let[]``, ``letseq[]``, ``letrec[]``, ``let_syntax[]`` and ``abbrev[]``. The decorator variants (``dlet`` et al., ``blet`` et al.) and the block variants (``with let_syntax``, ``with abbrev``) support only the original lispy syntax, because there the body is in any case placed differently. +>These syntaxes are valid for all **expression forms** of `let`, namely: `let[]`, `letseq[]`, `letrec[]`, `let_syntax[]` and `abbrev[]`. The decorator variants (`dlet` et al., `blet` et al.) and the block variants (`with let_syntax`, `with abbrev`) support only the formats where the bindings subform is given in the macro arguments part, because there the body is in any case placed differently (it's the body of the function being decorated). > ->In the first variant above (the *let-in*), note the bindings block still needs the outer parentheses. This is due to Python's precedence rules; ``in`` binds more strongly than the comma (which makes sense almost everywhere else), so to make it refer to all of the bindings, the bindings block must be parenthesized. If the ``let`` expander complains your code does not look like a ``let`` form and you have used *let-in*, check your parentheses. +>In the first variant above (the *let-in*), note that even there, the bindings block needs the brackets. This is due to Python's precedence rules; `in` binds more strongly than the comma (which makes sense almost everywhere else), so to make the `in` refer to all of the bindings, the bindings block must be bracketed. If the `let` expander complains your code does not look like a `let` form and you have used *let-in*, check your brackets. > ->In the second variant (the *let-where*), note the comma between the body and ``where``; it is compulsory to make the expression into syntactically valid Python. (It's however semi-easyish to remember, since also English requires the comma for a where-expression.) +>In the second variant (the *let-where*), note the comma between the body and `where`; it is compulsory to make the expression into syntactically valid Python. (It's however semi-easyish to remember, since also English requires the comma for a where-expression. It's not only syntactically valid Python, it is also syntactically valid English, at least for mathematicians.)
-#### Special syntax for one binding +#### Alternative syntaxes for the bindings subform + +**Changed in v0.15.0.** -If there is only one binding, to make the syntax more pythonic, the outer parentheses may be omitted in the bindings block of the **expr forms** of: +Beginning with v0.15.0, the env-assignment syntax presented above is the preferred syntax to establish let-bindings, for consistency with other env-assignments. This reminds that let variables live in an `env`, which is created by the `let` form. -- ``let``, ``letseq``, ``letrec`` -- ``dlet``, ``dletseq``, ``dletrec``, ``blet``, ``bletseq``, ``bletrec`` -- ``let_syntax``, ``abbrev`` +There is also an alternative, lispy notation for the bindings subform, where each name-value pair is given using brackets: ```python -let[x, 21][2*x] -let[(x, 21) in 2*x] -let[2*x, where(x, 21)] +let[[x, 42], [y, 9001]][...] +let[[[x, 42], [y, 9001]] in ...] +let[..., where[[x, 42], [y, 9001]]] + +# one-binding special case: outer brackets not needed +let[x, 42][...] +let[[x, 42] in ...] +let[..., where[x, 42]] ``` -This is valid also in the *let-in* variant, because there is still one set of parentheses enclosing the bindings block. +This is similar in spirit to the notation used in v0.14.3 and earlier. + +Actually, for backwards compatibility, we still support some use of parentheses instead of brackets in the bindings subform. The following formats, used in versions of `unpythonic` up to v0.14.3, are still accepted: + +```python +let((x, 42), (y, 9001))[...] +let[((x, 42), (y, 9001)) in ...] +let[..., where((x, 42), (y, 9001))] + +# one-binding special case: outer parentheses not needed +let(x, 42)[...] +let[(x, 42) in ...] +let[..., where(x, 42)] +``` + +Even though an expr macro invocation itself is always denoted using brackets, as of `unpythonic` v0.15.0 parentheses can still be used *to pass macro arguments*, hence `let(...)[...]` is still accepted. The code that interprets the AST for the let-bindings accepts both lists and tuples for each key-value pair, and the top-level container for the bindings subform in a let-in or let-where can be either list or tuple, so whether brackets or parentheses are used does not matter there, either. + +Still, brackets are now the preferred delimiter, for consistency between the bindings and body subforms. + +We plan to drop support for parentheses to pass macro arguments in the future, when Python 3.9 becomes the minimum Python version supported. The reason we will wait that long is that up to Python 3.8, decorators cannot be subscripted. Up to Python 3.8, `@dlet[x, 42]` is rejected by Python's parser, whereas `@dlet(x, 42)` is accepted. + +The issue has been fixed in Python 3.9. If you already only use 3.9 and later, please prefer brackets to pass macro arguments. -This is essentially special-cased in the ``let`` expander. (If interested in the technical details, look at ``unpythonic.syntax.letdoutil.UnexpandedLetView``, which performs the destructuring. See also ``unpythonic.syntax.__init__.let``; the macro expander itself already destructures the original lispy syntax when the macro is invoked.) #### Multiple expressions in body -The `let` constructs can now use a multiple-expression body. The syntax to activate multiple expression mode is an extra set of brackets around the body ([like in `multilambda`](#multilambda-supercharge-your-lambdas)): +The `let` constructs can use a multiple-expression body. The syntax to activate multiple expression mode is an extra set of brackets around the body ([like in `multilambda`](#multilambda-supercharge-your-lambdas)): ```python -let[(x, 1), - (y, 2)][[ # note extra [ - y << x + y, +let[x := 1, + y := 2][[ # note extra [ + y := x + y, print(y)]] -let[((x, 1), # v0.12.0+ - (y, 2)) in - [y << x + y, # body starts here +let[[x := 1, + y := 2] in + [y := x + y, # body starts here print(y)]] -let[[y << x + y, # v0.12.0+ +let[[y := x + y, print(y)], # body ends here - where((x, 1), - (y, 2))] + where[x := 1, + y := 2]] ``` -The let macros implement this by inserting a ``do[...]`` (see below). In a multiple-expression body, also an internal definition context exists for local variables that are not part of the ``let``; see [``do`` for details](#do-as-a-macro-stuff-imperative-code-into-an-expression-with-style). +The let macros implement this by inserting a `do[...]` (see below). In a multiple-expression body, a separate internal definition context exists for local variables that are not part of the `let`; see [the `do` macro for details](#do-as-a-macro-stuff-imperative-code-into-an-expression-with-style). -Only the outermost set of extra brackets is interpreted as a multiple-expression body. The rest are interpreted as usual, as lists. If you need to return a literal list from a ``let`` form with only one body expression, use three sets of brackets: +Only the outermost set of extra brackets is interpreted as a multiple-expression body. The rest are interpreted as usual, as lists. If you need to return a literal list from a `let` form with only one body expression, double the brackets on the *body* part: ```python -let[(x, 1), - (y, 2)][[ +let[x := 1, + y := 2][[ [x, y]]] -let[((x, 1), # v0.12.0+ - (y, 2)) in +let[[x := 1, + y := 2] in [[x, y]]] -let[[[x, y]], # v0.12.0+ - where((x, 1), - (y, 2))] +let[[[x, y]], + where[x := 1, + y := 2]] ``` -The outermost brackets delimit the ``let`` form, the middle ones activate multiple-expression mode, and the innermost ones denote a list. +The outermost brackets delimit the `let` form itself, the middle ones activate multiple-expression mode, and the innermost ones denote a list. Only brackets are affected; parentheses are interpreted as usual, so returning a literal tuple works as expected: ```python -let[(x, 1), - (y, 2)][ +let[x := 1, + y := 2][ (x, y)] -let[((x, 1), # v0.12.0+ - (y, 2)) in +let[[x := 1, + y := 2] in (x, y)] -let[(x, y), # v0.12.0+ - where((x, 1), - (y, 2))] +let[(x, y), + where[x := 1, + y := 2]] ``` #### Notes -``let`` and ``letrec`` expand into the ``unpythonic.lispylet`` constructs, implicitly inserting the necessary boilerplate: the ``lambda e: ...`` wrappers, quoting variable names in definitions, and transforming ``x`` to ``e.x`` for all ``x`` declared in the bindings. Assignment syntax ``x << 42`` transforms to ``e.set('x', 42)``. The implicit environment parameter ``e`` is actually named using a gensym, so lexically outer environments automatically show through. ``letseq`` expands into a chain of nested ``let`` expressions. +The main difference of the `let` family to Python's own named expressions (a.k.a. the walrus operator, added in Python 3.8) is that `x := 42` does not create a scope, but `let[x := 42][...]` does. The walrus operator assigns to the name `x` in the scope it appears in, whereas in the `let` expression, the `x` only exists in that expression. + +As of v0.15.3, this is somewhat complicated by the fact that now the syntax `x := 42` can be used to rebind let variables. See the unit test examples for `@dlet` above, at the beginning of the `let` section. -Nesting utilizes an inside-out macro expansion order: +`let` and `letrec` expand into the `unpythonic.lispylet` constructs, implicitly inserting the necessary boilerplate: the `lambda e: ...` wrappers, quoting variable names in definitions, and transforming `x` to `e.x` for all `x` declared in the bindings. Assignment syntax `x := 42` transforms to `e.set('x', 42)`. The implicit environment parameter `e` is actually named using a gensym, so lexically outer environments automatically show through. `letseq` expands into a chain of nested `let` expressions. + +All the `let` macros respect lexical scope, so this works as expected: ```python -letrec[(z, 1)][[ +letrec[z := 1][[ print(z), - letrec[(z, 2)][ + letrec[z := 2][ print(z)]]] ``` -Hence the ``z`` in the inner scope expands to the inner environment's ``z``, which makes the outer expansion leave it alone. (This works by transforming only ``ast.Name`` nodes, stopping recursion when an ``ast.Attribute`` is encountered.) +The `z` in the inner `letrec` expands to the inner environment's `z`, and the `z` in the outer `letrec` to the outer environment's `z`. -### ``dlet``, ``dletseq``, ``dletrec``, ``blet``, ``bletseq``, ``bletrec``: decorator versions +### `dlet`, `dletseq`, `dletrec`, `blet`, `bletseq`, `bletrec`: decorator versions -Similar to ``let``, ``letseq``, ``letrec``, these sugar the corresponding ``unpythonic.lispylet`` constructs, with the ``dletseq`` and ``bletseq`` constructs existing only as macros (expanding to nested ``dlet`` or ``blet``, respectively). +Similar to `let`, `letseq`, `letrec`, these macros sugar the corresponding `unpythonic.lispylet` constructs, with the `dletseq` and `bletseq` constructs existing only as macros. They expand to nested `dlet` or `blet`, respectively. Lexical scoping is respected; each environment is internally named using a gensym. Nesting is allowed. @@ -238,83 +322,109 @@ Examples: ```python from unpythonic.syntax import macros, dlet, dletseq, dletrec, blet, bletseq, bletrec -@dlet[(x, 0)] +@dlet[x := 0] # up to Python 3.8, use `@dlet(x := 0)` instead (decorator subscripting was added in 3.9) def count(): - x << x + 1 + (x := x + 1) # update `x` in let env return x assert count() == 1 assert count() == 2 -@dletrec[(evenp, lambda x: (x == 0) or oddp(x - 1)), - (oddp, lambda x: (x != 0) and evenp(x - 1))] +@dletrec[evenp := (lambda x: (x == 0) or oddp(x - 1)), + oddp := (lambda x: (x != 0) and evenp(x - 1))] def f(x): return evenp(x) assert f(42) is True assert f(23) is False -@dletseq[(x, 1), - (x, x+1), - (x, x+2)] +@dletseq[x := 1, + x := x + 1, + x := x + 2] def g(a): return a + x assert g(10) == 14 # block versions: the def takes no arguments, runs immediately, and is replaced by the return value. -@blet[(x, 21)] +@blet[x := 21] def result(): return 2*x assert result == 42 -@bletrec[(evenp, lambda x: (x == 0) or oddp(x - 1)), - (oddp, lambda x: (x != 0) and evenp(x - 1))] +@bletrec[evenp := (lambda x: (x == 0) or oddp(x - 1)), + oddp := (lambda x: (x != 0) and evenp(x - 1))] def result(): return evenp(42) assert result is True -@bletseq[(x, 1), - (x, x+1), - (x, x+2)] +@bletseq[x := 1, + x := x + 1, + x := x + 2] def result(): return x assert result == 4 ``` -**CAUTION**: assignment to the let environment uses the syntax ``name << value``, as always with ``unpythonic`` environments. The standard Python syntax ``name = value`` creates a local variable, as usual - *shadowing any variable with the same name from the ``let``*. +**CAUTION**: assignment to the let environment uses the assignment expression syntax `name := value`. The assignment statement `name = value` creates a local variable, as usual - *shadowing any variable with the same name from the `let`*. -The write of a ``name << value`` always occurs to the lexically innermost environment (as seen from the write site) that has that ``name``. If no lexically surrounding environment has that ``name``, *then* the expression remains untransformed, and means a left-shift (if ``name`` happens to be otherwise defined). +The write of a `name := value` always occurs to the lexically innermost environment (as seen from the write site) that has that `name`. If no lexically surrounding environment has that `name`, *then* the expression remains untransformed, and means binding a new lexical variable in the nearest enclosing scope, as per Python's standard rules. -**CAUTION**: formal parameters of a function definition, local variables, and any names declared as ``global`` or ``nonlocal`` in a given lexical scope shadow names from the ``let`` environment. Mostly, this applies *to the entirety of that lexical scope*. This is modeled after Python's standard scoping rules. +**CAUTION**: formal parameters of a function definition, local variables, and any names declared as `global` or `nonlocal` in a given lexical scope shadow names from an enclosing `let` environment. Mostly, this applies *to the entirety of that lexical scope*. This is modeled after Python's standard scoping rules. -As an exception to the rule, for the purposes of the scope analysis performed by ``unpythonic.syntax``, creations and deletions *of lexical local variables* take effect from the next statement, and remain in effect for the **lexically** remaining part of the current scope. This allows ``x = ...`` to see the old bindings on the RHS, as well as allows the client code to restore access to a surrounding env's ``x`` (by deleting a local ``x`` shadowing it) when desired. +As an exception to the rule, for the purposes of the scope analysis performed by `unpythonic.syntax`, creations and deletions *of lexical local variables* take effect from the next statement, and remain in effect for the **lexically** remaining part of the current scope. This allows `x = ...` to see the old bindings on the RHS, as well as allows the client code to restore access to a surrounding env's `x` (by deleting a local `x` shadowing it) when desired. -To clarify, here's a sampling from the unit tests: +To clarify, here is a sampling from [the unit tests](../unpythonic/syntax/tests/test_letdo.py): ```python -@dlet[(x, "the env x")] +@dlet[x := "the env x"] def f(): - return x + return x # No lexical variable `x` exists; this refers to the env `x`. assert f() == "the env x" -@dlet[(x, "the env x")] +@dlet[x := "the env x"] def f(): - x = "the local x" + x = "the local x" # The lexical variable shadows the env `x`. return x assert f() == "the local x" -@dlet[(x, "the env x")] +@dlet[x := "the env x"] def f(): return x - x = "the unused local x" + x = "the unused local x" # This appears *lexically after* the read access on the previous line. assert f() == "the env x" +@dlet[x := "the env x"] +def test15(): + def inner(): + (x := "updated env x") # noqa: F841, this writes to the let env since there is no `x` in an intervening scope, according to Python's standard rules. + inner() + return x +assert test15() == "updated env x" + +@dlet[x := "the env x"] +def test16(): + def inner(): + x = "the inner x" # noqa: F841, unused on purpose, for testing. An assignment *statement* does NOT write to the let env. + inner() + return x +assert test16() == "the env x" + +@dlet[x := "the env x"] +def test17(): + x = "the local x" # This lexical variable shadows the env x. + def inner(): + # The env x is shadowed. Since we don't say `nonlocal x`, this creates a new lexical variable scoped to `inner`. + (x := "the inner x") # noqa: F841, unused on purpose, for testing. + inner() + return x +assert test17() == "the local x" + x = "the global x" -@dlet[(x, "the env x")] +@dlet[x := "the env x"] def f(): global x return x assert f() == "the global x" -@dlet[(x, "the env x")] +@dlet[x := "the env x"] def f(): x = "the local x" del x # deleting a local, ok! @@ -323,7 +433,7 @@ assert f() == "the env x" try: x = "the global x" - @dlet[(x, "the env x")] + @dlet[x := "the env x"] def f(): global x del x # ignored by unpythonic's scope analysis, deletion of globals is too dynamic @@ -336,11 +446,60 @@ else: ``` -### ``let_syntax``, ``abbrev``: syntactic local bindings +### Caution on name resolution and scoping + +The name resolution behavior described above **does not fully make sense**, because to define things this way is to conflate static (lexical) and dynamic (run-time) concepts. This feature unfortunately got built before I understood the matter clearly. + +Python itself performs name resolution purely lexically, which is arguably the right thing to do. In any given lexical scope, an identifier such as `x` always refers to the same variable. Whether that variable has been initialized, or has already been deleted, is another matter, which has to wait until run time - but `del x` will **not** cause the identifier `x` to point to a different variable for the remainder of the same scope, like `delete[x]` **does** in the body of an `unpythonic` `let[]` or `do[]`. + +#### Aside: Names and variables + +To be technically correct, in Python, an identifier `x` refers to a *name*, not to a "variable". Python, like Lisp, has [*names and values*](https://nedbatchelder.com/text/names.html). + +Roughly, an *identifier* is a certain kind of token in the source code text - something that everyday English calls a "name". However, in programming, a *name* is technically the *key* component of a key-value pair that is stored in a particular *environment*. -Locally splice code at macro expansion time (it's almost like inlining functions): +Very roughly speaking, an *environment* is just a place to store such pairs, for the purposes of "the variables subsystem" of the language. There are important details, such as that each *activation* of a function (think: "a particular call of the function") will create a new environment instance, to hold the local variables of that activation; this detail allows [lexical closures](https://en.wikipedia.org/wiki/Closure_(computer_programming)) to work. The piece of bookkeeping for this is termed an *activation record*. But the important point here is, an environment stores name-value pairs. -#### ``let_syntax`` +An identifier *refers to* a name. Scoping rules concern themselves with the details of mapping identifiers to names. In *lexical scoping* (like in Python), the position of the identifier in the source code text determines the search order of environments for the target name, when resolving a particular instance of an identifier in the source code text. Python uses the LEGB ordering (local, enclosing, global, builtin). + +Finally, *values* are the run-time things names point to. They are the *value* component of the key-value pair. + +In this simple example: + +```python +def outer(): + x = 17 + def inner(): + x = 23 +``` + + - The piece of source code text `x` is an *identifier*. + - *The outer `x`* and *the inner `x`* are *names*, both of which have the textual representation `x`. + - *Which one of these the identifier `x` refers to depends on where it appears.* + - The integers `17` and `23` are *values*. + +Note that classically, names have no type; values do. + +Nowadays, a name may have a type annotation, which reminds the programmer about the type of *value* that is safe to bind to that particular name. In other words, the code that defines that name (e.g. as a function parameter) promises (in the sense of a contract) that the code knows how to behave if a value of that type is bound to that name (e.g. by passing such a value as a function argument that will be bound to that name). + +Here *type* may be a concrete [nominal type](https://en.wikipedia.org/wiki/Nominal_type_system) such as `int`, or for example, it may represent a particular interface (such as the types in [`collections.abc`](https://docs.python.org/3/library/collections.abc.html)), or it may allow multiple mutually exclusive options (a *union*). + +By default, Python treats type annotations as a form of comments; to actually statically type-check Python, [Mypy](http://mypy-lang.org/) can be used. + +Compare the *name*/*value* concept to the concept of a *variable* in the classical sense, such as in C, or `cdef` in Cython. In such *low-level* [HLLs](https://en.wikipedia.org/wiki/High-level_programming_language), a *variable* is a named, fixed memory location, with a static data type determining how to interpret the bits at that memory location. The contents of the memory location can be changed, hence "variable" is an apt description. + + +### `let_syntax`, `abbrev`: syntactic local bindings + +**Note v0.15.0.** *Now that we use `mcpyrate` as the macro expander, `let_syntax` and `abbrev` are not really needed. We are keeping them mostly for backwards compatibility, and because they exercise a different feature set in the macro expander, making the existence of these constructs particularly useful for system testing.* + +*To define macros in the same module that uses them, see [multi-phase compilation](https://github.com/Technologicat/mcpyrate/blob/master/doc/compiler.md#multi-phase-compilation) in the [compiler documentation](https://github.com/Technologicat/mcpyrate/blob/master/doc/compiler.md). Using [run-time compiler access](https://github.com/Technologicat/mcpyrate/blob/master/doc/compiler.md#invoking-the-compiler-at-run-time), you can even create a macro definition module at run time (e.g. from a [quasiquoted](https://github.com/Technologicat/mcpyrate/blob/master/doc/quasiquotes.md) block) and inject it to `sys.modules`, allowing other code to import and use those macros. See the [compiler tests](https://github.com/Technologicat/mcpyrate/blob/master/mcpyrate/test/test_compiler.py) for examples.* + +*To rename existing macros, you can as-import them. As of `unpythonic` v0.15.0, doing so for `unpythonic.syntax` constructs is not recommended, though, because there is still a lot of old analysis code in the macro implementations that may scan for the original name. This may or may not be fixed in a future release.* + +These constructs allow to locally splice code at macro expansion time. It is almost like inlining functions. + +#### `let_syntax` ```python from unpythonic.syntax import macros, let_syntax, block, expr @@ -349,71 +508,76 @@ def verylongfunctionname(x=1): return x # works as an expr macro -y = let_syntax[(f, verylongfunctionname)][[ # extra brackets: implicit do in body +y = let_syntax[f := verylongfunctionname][[ # extra brackets: implicit do in body print(f()), f(5)]] assert y == 5 -y = let_syntax[(f(a), verylongfunctionname(2*a))][[ # template with formal parameter "a" - print(f(2)), - f(3)]] +y = let_syntax[f[a] := verylongfunctionname(2*a)][[ # template with formal parameter "a" + print(f[2]), + f[3]]] assert y == 6 -# v0.12.0+ -y = let_syntax[((f, verylongfunctionname)) in +y = let_syntax[[f := verylongfunctionname] in [print(f()), f(5)]] y = let_syntax[[print(f()), f(5)], - where((f, verylongfunctionname))] -y = let_syntax[((f(a), verylongfunctionname(2*a))) in - [print(f(2)), - f(3)]] -y = let_syntax[[print(f(2)), - f(3)], - where((f(a), verylongfunctionname(2*a)))] + where[f := verylongfunctionname]] +y = let_syntax[[f[a] := verylongfunctionname(2*a)] in + [print(f[2]), + f[3]]] +y = let_syntax[[print(f[2]), + f[3]], + where[f[a] := verylongfunctionname(2*a)]] # works as a block macro with let_syntax: - with block(a, b, c) as makeabc: # capture a block of statements + # with block as name: + # with block[a0, ...] as name: + with block[a, b, c] as makeabc: # capture a block of statements lst = [a, b, c] makeabc(3 + 4, 2**3, 3 * 3) assert lst == [7, 8, 9] - with expr(n) as nth: # capture a single expression + # with expr as name: + # with expr[a0, ...] as name: + with expr[n] as nth: # capture a single expression lst[n] assert nth(2) == 9 with let_syntax: - with block(a) as twice: + with block[a] as twice: a a - with block(x, y, z) as appendxyz: + with block[x, y, z] as appendxyz: lst += [x, y, z] lst = [] twice(appendxyz(7, 8, 9)) assert lst == [7, 8, 9]*2 ``` -After macro expansion completes, ``let_syntax`` has zero runtime overhead; it completely disappears in macro expansion. +After macro expansion completes, `let_syntax` has zero runtime overhead; it completely disappears in macro expansion. + +The `expr` and `block` operators, if used, must be macro-imported. They may only appear in `with expr` and `with block` subforms at the top level of a `with let_syntax` or `with abbrev`. In any invalid position, `expr` and `block` are both considered a syntax error at macro expansion time.
There are two kinds of substitutions: ->*Bare name* and *template*. A bare name substitution has no parameters. A template substitution has positional parameters. (Named parameters, ``*args``, ``**kwargs`` and default values are currently **not** supported.) +>*Bare name* and *template*. A bare name substitution has no parameters. A template substitution has positional parameters. (Named parameters, `*args`, `**kwargs` and default values are **not** supported.) > ->When used as an expr macro, the formal parameter declaration is placed where it belongs; on the name side (LHS) of the binding. In the above example, ``f(a)`` is a template with a formal parameter ``a``. But when used as a block macro, the formal parameters are declared on the ``block`` or ``expr`` "context manager" due to syntactic limitations of Python. To define a bare name substitution, just use ``with block as ...:`` or ``with expr as ...:`` with no arguments. +>When used as an expr macro, the formal parameter declaration is placed where it belongs; on the name side (LHS) of the binding. In the above example, `f[a]` is a template with a formal parameter `a`. But when used as a block macro, the formal parameters are declared on the `block` or `expr` "context manager" due to syntactic limitations of Python. To define a bare name substitution, just use `with block as ...:` or `with expr as ...:` with no macro arguments. > ->In the body of ``let_syntax``, a bare name substitution is invoked by name (just like a variable). A template substitution is invoked like a function call. Just like in an actual function call, when the template is substituted, any instances of its formal parameters in the definition get replaced by the argument values from the "call" site; but ``let_syntax`` performs this at macro-expansion time, and the "value" is a snippet of code. +>In the body of `let_syntax`, a bare name substitution is invoked by name (just like a variable). A template substitution is invoked like an expr macro. Any instances of the formal parameters of the template get replaced by the argument values from the use site, at macro expansion time. > ->Note each instance of the same formal parameter (in the definition) gets a fresh copy of the corresponding argument value. In other words, in the example above, each ``a`` in the body of ``twice`` separately expands to a copy of whatever code was given as the positional argument ``a``. +>Note each instance of the same formal parameter (in the definition) gets a fresh copy of the corresponding argument value. In other words, in the example above, each `a` in the body of `twice` separately expands to a copy of whatever code was given as the macro argument `a`. > ->When used as a block macro, there are furthermore two capture modes: *block of statements*, and *single expression*. (The single expression can be an explicit ``do[]`` if multiple expressions are needed.) When invoking substitutions, keep in mind Python's usual rules regarding where statements or expressions may appear. +>When used as a block macro, there are furthermore two capture modes: *block of statements*, and *single expression*. The single expression can be an explicit `do[]`, if multiple expressions are needed. When invoking substitutions, keep in mind Python's usual rules regarding where statements or expressions may appear. > ->(If you know about Python ASTs, don't worry about the ``ast.Expr`` wrapper needed to place an expression in a statement position; this is handled automatically.) +>(If you know about Python ASTs, do not worry about the `ast.Expr` wrapper needed to place an expression in a statement position; this is handled automatically.)

-**HINT**: If you get a compiler error that some sort of statement was encountered where an expression was expected, check your uses of ``let_syntax``. The most likely reason is that a substitution is trying to splice a block of statements into an expression position. +**HINT**: If you get a compiler error that some sort of statement was encountered where an expression was expected, check your uses of `let_syntax`. The most likely reason is that a substitution is trying to splice a block of statements into an expression position.

Expansion of this macro is a two-step process: @@ -425,114 +589,132 @@ After macro expansion completes, ``let_syntax`` has zero runtime overhead; it co > >Within each step, the substitutions are applied **in definition order**: > -> - If the bindings are ``((x, y), (y, z))``, then an ``x`` at the use site transforms to ``z``. So does a ``y`` at the use site. -> - But if the bindings are ``((y, z), (x, y))``, then an ``x`` at the use site transforms to ``y``, and only an explicit ``y`` at the use site transforms to ``z``. +> - If the bindings are `[x := y, y := z]`, then an `x` at the use site transforms to `z`. So does a `y` at the use site. +> - But if the bindings are `[y := z, x := y]`, then an `x` at the use site transforms to `y`, and only an explicit `y` at the use site transforms to `z`. > ->Even in block templates, arguments are always expressions, because invoking a template uses the function-call syntax. But names and calls are expressions, so a previously defined substitution (whether bare name or an invocation of a template) can be passed as an argument just fine. Definition order is then important; consult the rules above. +>Even in block templates, arguments are always expressions, because invoking a template uses the subscript syntax. But names and calls are expressions, so a previously defined substitution (whether bare name or an invocation of a template) can be passed as an argument just fine. Definition order is then important; consult the rules above.

-Nesting ``let_syntax`` is allowed. Lexical scoping is supported (inner definitions of substitutions shadow outer ones). +Nesting `let_syntax` is allowed. Lexical scoping is respected. Inner definitions of substitutions shadow outer ones. -When used as an expr macro, all bindings are registered first, and then the body is evaluated. When used as a block macro, a new binding (substitution declaration) takes effect from the next statement onward, and remains active for the lexically remaining part of the ``with let_syntax:`` block. +When used as an expr macro, all bindings are registered first, and then the body is evaluated. When used as a block macro, a new binding (substitution declaration) takes effect from the next statement onward, and remains active for the lexically remaining part of the `with let_syntax` block. #### `abbrev` -The ``abbrev`` macro is otherwise exactly like ``let_syntax``, but it expands in the first pass (outside in). Hence, no lexically scoped nesting, but it has the power to locally rename also macros, because the ``abbrev`` itself expands before any macros invoked in its body. This allows things like: +The `abbrev` macro is otherwise exactly like `let_syntax`, but it expands outside-in. Hence, it has no lexically scoped nesting support, but it has the power to locally rename also macros, because the `abbrev` itself expands before any macros invoked in its body. This allows things like: ```python -abbrev[(a, ast_literal)][ - a[tree1] if a[tree2] else a[tree3]] - -# v0.12.0+ -abbrev[((a, ast_literal)) in - a[tree1] if a[tree2] else a[tree3]] -abbrev[a[tree1] if a[tree2] else a[tree3], - where((a, ast_literal))] +abbrev[m := macrowithverylongname][ + m[tree1] if m[tree2] else m[tree3]] +abbrev[[m := macrowithverylongname] in + m[tree1] if m[tree2] else m[tree3]] +abbrev[m[tree1] if m[tree2] else m[tree3], + where[m := macrowithverylongname]] ``` -which can be useful when writing macros. +which is sometimes useful when writing macros. But using `mcpyrate`, note that you can just as-import a macro if you need to rename it. + +**CAUTION**: `let_syntax` is essentially a toy macro system within the real macro system. The usual caveats of macro systems apply. Especially, `let_syntax` and `abbrev` support absolutely no form of hygiene. Be very, very careful to avoid name conflicts. -**CAUTION**: ``let_syntax`` is essentially a toy macro system within the real macro system. The usual caveats of macro systems apply. Especially, we support absolutely no form of hygiene. Be very, very careful to avoid name conflicts. +The `let_syntax` macro is meant for simple local substitutions where the elimination of repetition can shorten the code and improve its readability, in cases where the final "unrolled" code should be written out at compile time. If you need to do something complex (or indeed save a definition and reuse it somewhere else, non-locally), write a real macro directly in `mcpyrate`. -The ``let_syntax`` macro is meant for simple local substitutions where the elimination of repetition can shorten the code and improve its readability. If you need to do something complex (or indeed save a definition and reuse it somewhere else, non-locally), write a real macro directly in `mcpyrate`. +This was inspired by Racket's [`let-syntax`](https://docs.racket-lang.org/reference/let.html) and [`with-syntax`](https://docs.racket-lang.org/reference/stx-patterns.html) forms. -This was inspired by Racket's [``let-syntax``](https://docs.racket-lang.org/reference/let.html) and [``with-syntax``](https://docs.racket-lang.org/reference/stx-patterns.html). +### Bonus: barebones `let` -### Bonus: barebones ``let`` +As a bonus, we provide classical simple `let` and `letseq`, wholly implemented as AST transformations, providing true lexical variables, but no multi-expression body support. Just like in some Lisps, this version of `letseq` (Scheme/[Racket `let*`](https://docs.racket-lang.org/reference/let.html#%28form._%28%28lib._racket%2Fprivate%2Fletstx-scheme..rkt%29._let%2A%29%29)) expands into a chain of nested `let` expressions, which expand to lambdas. -As a bonus, we provide classical simple ``let`` and ``letseq``, wholly implemented as AST transformations, providing true lexical variables but no assignment support (because in Python, assignment is a statement) or multi-expression body support. Just like in Lisps, this version of ``letseq`` (Scheme/Racket ``let*``) expands into a chain of nested ``let`` expressions, which expand to lambdas. +These are provided in the separate module `unpythonic.syntax.simplelet`, and are not part of the `unpythonic.syntax` macro API. For simplicity, they support only the lispy list syntax in the bindings subform (using brackets, specifically!), and no haskelly syntax at all: -These are provided in the separate module ``unpythonic.syntax.simplelet``, import them with the line: +```python +from unpythonic.syntax.simplelet import macros, let, letseq + +let[[x, 42], [y, 23]][...] +let[[x, 42]][...] +letseq[[x, 1], [x, x + 1]][...] +letseq[[x, 1]][...] +``` + +Starting with Python 3.8, assignment (rebinding) is possible also in these barebones `let` constructs via the walrus operator. For example: + +```python +assert let[[x, 42]][x] == 42 +assert let[[x, 42]][(x := 5)] == 5 +``` + +However, this only works for variables created by the innermost `let` (viewed from the point where the assignment happens), because `nonlocal` is a statement and so cannot be used in expressions. -``from unpythonic.syntax.simplelet import macros, let, letseq``. ## Sequencing Macros that run multiple expressions, in sequence, in place of one expression. -### ``do`` as a macro: stuff imperative code into an expression, *with style* +### `do` as a macro: stuff imperative code into an expression, *with style* + +**Changed in v0.15.3.** *Env-assignments now use the walrus syntax `x := 42`. The old syntax `x << 42` is still supported for backward compatibility.* -We provide an ``expr`` macro wrapper for ``unpythonic.seq.do``, with some extra features. +We provide an `expr` macro wrapper for `unpythonic.do` and `unpythonic.do0`, with some extra features. -This essentially allows writing imperative code in any expression position. For an `if-elif-else` conditional, [see `cond`](#cond-the-missing-elif-for-a-if-p-else-b); for loops, see [the functions in `unpythonic.fploop`](../unpythonic/fploop.py) (esp. `looped`). +This essentially allows writing imperative code in any expression position. For an `if-elif-else` conditional, [see `cond`](#cond-the-missing-elif-for-a-if-p-else-b); for loops, see the functions in the module [`unpythonic.fploop`](../unpythonic/fploop.py) (`looped` and `looped_over`). ```python from unpythonic.syntax import macros, do, local, delete -y = do[local[x << 17], +y = do[local[x := 17], print(x), - x << 23, + x := 23, x] print(y) # --> 23 a = 5 -y = do[local[a << 17], +y = do[local[a := 17], print(a), # --> 17 delete[a], print(a), # --> 5 True] ``` -Local variables are declared and initialized with ``local[var << value]``, where ``var`` is a bare name. To explicitly denote "no value", just use ``None``. ``delete[...]`` allows deleting a ``local[...]`` binding. This uses ``env.pop()`` internally, so a ``delete[...]`` returns the value the deleted local variable had at the time of deletion. (So if you manually use the ``do()`` function in some code without macros, feel free to ``env.pop()`` in a do-item if needed.) +Local variables are declared and initialized with `local[var := value]`, where `var` is a bare name. To explicitly denote "no value", just use `None`. The syntax `delete[...]` allows deleting a `local[...]` binding. This uses `env.pop()` internally, so a `delete[...]` returns the value the deleted local variable had at the time of deletion. (This also means that if you manually use the `do()` function in some code without macros, you can `env.pop(...)` in a do-item if needed.) -A ``local`` declaration comes into effect in the expression following the one where it appears, capturing the declared name as a local variable for the **lexically** remaining part of the ``do``. In a ``local``, the RHS still sees the previous bindings, so this is valid (although maybe not readable): +The `local[]` and `delete[]` declarations may only appear at the top level of a `do[]`, `do0[]`, or implicit `do` (extra bracket syntax, e.g. for the body of a `let` form). In any invalid position, `local[]` and `delete[]` are considered a syntax error at macro expansion time. + +A `local` declaration comes into effect in the expression following the one where it appears, capturing the declared name as a local variable for the **lexically** remaining part of the `do`. In a `local`, the RHS still sees the previous bindings, so this is valid (although maybe not readable): ```python result = [] -let[(lst, [])][[result.append(lst), # the let "lst" - local[lst << lst + [1]], # LHS: do "lst", RHS: let "lst" +let[lst := []][[result.append(lst), # the let "lst" + local[lst := lst + [1]], # LHS: do "lst", RHS: let "lst" result.append(lst)]] # the do "lst" assert result == [[], [1]] ``` -Already declared local variables are updated with ``var << value``. Updating variables in lexically outer environments (e.g. a ``let`` surrounding a ``do``) uses the same syntax. +Already declared local variables are updated with `var := value`. Updating variables in lexically outer environments (e.g. a `let` surrounding a `do`) uses the same syntax.

The reason we require local variables to be declared is to allow write access to lexically outer environments. ->Assignments are recognized anywhere inside the ``do``; but note that any ``let`` constructs nested *inside* the ``do``, that define variables of the same name, will (inside the ``let``) shadow those of the ``do`` - as expected of lexical scoping. -> ->The necessary boilerplate (notably the ``lambda e: ...`` wrappers) is inserted automatically, so the expressions in a ``do[]`` are only evaluated when the underlying ``seq.do`` actually runs. +>Assignments are recognized anywhere inside the `do`; but note that any `let` constructs nested *inside* the `do`, that define variables of the same name, will (inside the `let`) shadow those of the `do` - as expected of lexical scoping. > ->When running, ``do`` behaves like ``letseq``; assignments **above** the current line are in effect (and have been performed in the order presented). Re-assigning to the same name later overwrites (this is afterall an imperative tool). +>The boilerplate needed by the underlying `unpythonic.do` form (notably the `lambda e: ...` wrappers) is inserted automatically. The expressions in a `do[]` are only evaluated when the underlying `unpythonic.do` actually runs. > ->We also provide a ``do0`` macro, which returns the value of the first expression, instead of the last. +>When running, `do` behaves like `letseq`; assignments **above** the current line are in effect (and have been performed in the order presented). Re-assigning to the same name later overwrites.

-**CAUTION**: ``do[]`` supports local variable deletion, but the ``let[]`` constructs don't, by design. When ``do[]`` is used implicitly with the extra bracket syntax, any ``delete[]`` refers to the scope of the implicit ``do[]``, not any surrounding ``let[]`` scope. +**CAUTION**: `do[]` supports local variable deletion, but the `let[]` constructs do **not**, by design. When `do[]` is used implicitly with the extra bracket syntax, any `delete[]` refers to the scope of the implicit `do[]`, not any surrounding `let[]` scope. + ## Tools for lambdas Macros that introduce additional features for Python's lambdas. -### ``multilambda``: supercharge your lambdas +### `multilambda`: supercharge your lambdas -**Multiple expressions**: use ``[...]`` to denote a multiple-expression body. The macro implements this by inserting a ``do``. +**Multiple expressions**: use `[...]` to denote a multiple-expression body. The macro implements this by inserting a `do`. -**Local variables**: available in a multiple-expression body. For details on usage, see ``do``. +**Local variables**: available in a multiple-expression body. For details on usage, see `do`. ```python from unpythonic.syntax import macros, multilambda, let @@ -541,21 +723,21 @@ with multilambda: echo = lambda x: [print(x), x] assert echo("hi there") == "hi there" - count = let[(x, 0)][ - lambda: [x << x + 1, # x belongs to the surrounding let + count = let[x := 0][ + lambda: [x := x + 1, # x belongs to the surrounding let x]] assert count() == 1 assert count() == 2 - test = let[(x, 0)][ - lambda: [x << x + 1, - local[y << 42], # y is local to the implicit do + test = let[x := 0][ + lambda: [x := x + 1, + local[y := 42], # y is local to the implicit do (x, y)]] assert test() == (1, 42) assert test() == (2, 42) myadd = lambda x, y: [print("myadding", x, y), - local[tmp << x + y], + local[tmp := x + y], print("result is", tmp), tmp] assert myadd(2, 3) == 5 @@ -565,10 +747,12 @@ with multilambda: assert t() == [1, 2] ``` -In the second example, returning ``x`` separately is redundant, because the assignment to the let environment already returns the new value, but it demonstrates the usage of multiple expressions in a lambda. +In the second example, returning `x` separately is redundant, because the assignment to the let environment already returns the new value, but it demonstrates the usage of multiple expressions in a lambda. + +### `namedlambda`: auto-name your lambdas -### ``namedlambda``: auto-name your lambdas +**Changed in v0.15.0.** *When `namedlambda` encounters a lambda definition it cannot infer a name for, it instead injects source location info into the name, provided that the AST node for that particular `lambda` has a line number for it. The result looks like ``.* Who said lambdas have to be anonymous? @@ -578,14 +762,14 @@ from unpythonic.syntax import macros, namedlambda with namedlambda: f = lambda x: x**3 # assignment: name as "f" assert f.__name__ == "f" - gn, hn = let[(x, 42), (g, None), (h, None)][[ - g << (lambda x: x**2), # env-assignment: name as "g" - h << f, # still "f" (no literal lambda on RHS) + gn, hn = let[x := 42, g := None, h := None][[ + g := (lambda x: x**2), # env-assignment: name as "g" + h := f, # still "f" (no literal lambda on RHS) (g.__name__, h.__name__)]] assert gn == "g" assert hn == "f" - foo = let[(f7, lambda x: x) in f7] # let-binding: name as "f7" + foo = let[[f7 := (lambda x: x)] in f7] # let-binding: name as "f7" def foo(func1, func2): assert func1.__name__ == "func1" @@ -600,82 +784,99 @@ with namedlambda: assert d["g"].__name__ == "g" ``` -Lexically inside a ``with namedlambda`` block, any literal ``lambda`` that is assigned to a name using one of the supported assignment forms is named to have the name of the LHS of the assignment. The name is captured at macro expansion time. +Lexically inside a `with namedlambda` block, any literal `lambda` that is assigned to a name using one of the supported assignment forms is named to have the name of the LHS of the assignment. The name is captured at macro expansion time. -Decorated lambdas are also supported, as is a ``curry`` (manual or auto) where the last argument is a lambda. The latter is a convenience feature, mainly for applying parametric decorators to lambdas. See [the unit tests](../unpythonic/syntax/test/test_lambdatools.py) for detailed examples. +Decorated lambdas are also supported, as is a `curry` (manual or auto) where the last argument is a lambda. The latter is a convenience feature, mainly for applying parametric decorators to lambdas. See [the unit tests](../unpythonic/syntax/tests/test_lambdatools.py) for detailed examples. -The naming is performed using the function ``unpythonic.misc.namelambda``, which will return a modified copy with its ``__name__``, ``__qualname__`` and ``__code__.co_name`` changed. The original function object is not mutated. +The naming is performed using the function `unpythonic.namelambda`, which will return a modified copy with its `__name__`, `__qualname__` and `__code__.co_name` changed. The original function object is not mutated. **Supported assignment forms**: - - Single-item assignment to a local name, ``f = lambda ...: ...`` + - Single-item assignment to a local name, `f = lambda ...: ...` - - Expression-assignment to an unpythonic environment, ``f << (lambda ...: ...)`` - - Env-assignments are processed lexically, just like regular assignments. + - Named expressions (a.k.a. walrus operator, Python 3.8+), `f := lambda ...: ...`. **Added in v0.15.0.** - - Let bindings, ``let[(f, (lambda ...: ...)) in ...]``, using any let syntax supported by unpythonic (here using the haskelly let-in just as an example). + - Expression-assignment to an unpythonic environment, `f := (lambda ...: ...)`, and the old syntax `f << (lambda ...: ...)`. + - Env-assignments are processed lexically, just like regular assignments. This should not cause problems, because left-shifting by a literal lambda most often makes no sense (whence, that syntax is *almost* guaranteed to mean an env-assignment). - - **Added in v0.14.2**: Named argument in a function call, as in ``foo(f=lambda ...: ...)``. + - Let-bindings, `let[[f := (lambda ...: ...)] in ...]`, using any let syntax supported by unpythonic (here using the haskelly let-in with env-assign style bindings just as an example). - - **Added in v0.14.2**: In a dictionary literal ``{...}``, an item with a literal string key, as in ``{"f": lambda ...: ...}``. + - Named argument in a function call, as in `foo(f=lambda ...: ...)`. **Added in v0.14.2.** -Support for other forms of assignment may or may not be added in a future version. + - In a dictionary literal `{...}`, an item with a literal string key, as in `{"f": lambda ...: ...}`. **Added in v0.14.2.** -### ``f``: underscore notation (quick lambdas) for Python. +Support for other forms of assignment may or may not be added in a future version. We will maintain a list here; but if you want the gritty details, see the `_namedlambda` syntax transformer in [`unpythonic.syntax.lambdatools`](../unpythonic/syntax/lambdatools.py). -**Changed in 0.15.** *Up to 0.14.x, the `f[]` macro used to be provided by `macropy`, but now that we use `mcpyrate`, we provide this ourselves. The underscore `_` is no longer a macro on its own. The `f` macro treats the underscore magically, as before, but anywhere else the underscore is available to be used as a regular variable. If you use `f[]`, change your import of this macro to `from unpythonic.syntax import macros, f`.* +### `fn`: underscore notation (quick lambdas) for Python -The syntax ``f[...]`` creates a lambda, where each underscore in the ``...`` part introduces a new parameter. The macro does not descend into any nested ``f[]``. +**Changed in v0.15.0.** *Up to 0.14.x, the `f[]` macro used to be provided by `macropy`, but now that we use `mcpyrate`, we provide this ourselves. Note that the name of the construct is now `fn[]`.* -Example: +The syntax `fn[...]` creates a lambda, where each underscore `_` in the `...` part introduces a new parameter: ```python -func = f[_ * _] # --> func = lambda x, y: x * y +from unpythonic.syntax import macros, fn +from unpythonic.syntax import _ # optional, makes IDEs happy + +double = fn[_ * 2] # --> double = lambda x: x * 2 +mul = fn[_ * _] # --> mul = lambda x, y: x * y ``` -Since in `mcpyrate`, macros can be as-imported, you can rename `f` at import time to have any name you want. The `quicklambda` block macro (see below) respects the as-import. Now you **must** import also the macro `f` when you import the macro `quicklambda`, because `quicklambda` internally queries the expander to determine the name(s) the macro `f` is currently bound to. +The macro does not descend into any nested `fn[]`, to allow the macro expander itself to expand those separately. + +We have named the construct `fn`, because `f` is often used as a function name in code examples, local temporaries, and similar. Also, `fn[]` is a less ambiguous abbreviation for a syntactic construct that means *function*, while remaining shorter than the equivalent `lambda`. + +The underscore `_` itself is not a macro. The `fn` macro treats the underscore magically, just like MacroPy's `f`, but anywhere else the underscore is available to be used as a regular variable. + +The underscore does not need to be imported for `fn[]` to recognize it, but if you want to make your IDE happy, there is a symbol named `_` in `unpythonic.syntax` you can import to silence any "undefined name" errors regarding the use of `_`. It is a regular run-time object, not a macro. It is available in `unpythonic.syntax` (not at the top level of `unpythonic`) because it is basically an auxiliary syntactic construct, with no meaningful run-time functionality of its own. -### ``quicklambda``: expand quick lambdas first +(It *could* be made into a `@namemacro` that triggers a syntax error when it appears in an improper context, like starting with v0.15.0, many auxiliary constructs in similar roles already do. But it was decided that in this particular case, it is more valuable to have the name `_` available for other uses in other contexts, because it is a standard dummy name in Python. The lambdas created using `fn[]` are likely short enough that not automatically detecting misplaced underscores does not cause problems in practice.) -To be able to transform correctly, the block macros in ``unpythonic.syntax`` that transform lambdas (e.g. ``multilambda``, ``tco``) need to see all ``lambda`` definitions written with Python's standard ``lambda``. +Because in `mcpyrate`, macros can be as-imported, you can rename `fn` at import time to have any name you want. The `quicklambda` block macro (see below) respects the as-import. You **must** import also the macro `fn` if you use `quicklambda`, because `quicklambda` internally queries the expander to determine the name(s) the macro `fn` is currently bound to. If the `fn` macro is not bound to any name, `quicklambda` will do nothing. -However, the ``f`` macro uses the syntax ``f[...]``, which (to the analyzer) does not look like a lambda definition. This macro changes the expansion order, forcing any ``f[...]`` lexically inside the block to expand before any other macros do. +It is sufficient that `fn` has been macro-imported by the time when the `with quicklambda` expands. So it is possible, for example, for a dialect template to macro-import just `quicklambda` and inject an invocation for it, and leave macro-importing `fn` to the user code. The `Lispy` variant of the [Lispython dialect](dialects/lispython.md) does exactly this. -Any expression of the form ``f[...]``, where ``f`` is any name bound in the current macro expander to the macro `unpythonic.syntax.f`, is understood as a quick lambda. (In plain English, this respects as-imports of the macro ``f``.) +### `quicklambda`: expand quick lambdas first + +To be able to transform correctly, the block macros in `unpythonic.syntax` that transform lambdas (e.g. `multilambda`, `tco`) need to see all `lambda` definitions written with Python's standard `lambda`. + +However, the `fn` macro uses the syntax `fn[...]`, which (to the analyzer) does not look like a lambda definition. The `quicklambda` block macro changes the expansion order, forcing any `fn[...]` lexically inside the block to expand before any other macros do. + +Any expression of the form `fn[...]`, where `fn` is any name bound in the current macro expander to the macro `unpythonic.syntax.fn`, is understood as a quick lambda. (In plain English, this respects as-imports of the macro `fn`.) Example - a quick multilambda: ```python -from unpythonic.syntax import macros, multilambda, quicklambda, f, local +from unpythonic.syntax import macros, multilambda, quicklambda, fn, local +from unpythonic.syntax import _ # optional, makes IDEs happy with quicklambda, multilambda: - func = f[[local[x << _], - local[y << _], - x + y]] + func = fn[[local[x := _], + local[y := _], + x + y]] assert func(1, 2) == 3 ``` -This is of course rather silly, as an unnamed formal parameter can only be mentioned once. If we're giving names to them, a regular ``lambda`` is shorter to write. A more realistic combo is: +This is of course rather silly, as an unnamed formal parameter can only be mentioned once. If we are giving names to them, a regular `lambda` is shorter to write. A more realistic combo is: ```python with quicklambda, tco: def g(x): - return 2*x - func1 = f[g(3*_)] # tail call + return 2 * x + func1 = fn[g(3 * _)] # tail call assert func1(10) == 60 - func2 = f[3*g(_)] # no tail call + func2 = fn[3 * g(_)] # no tail call assert func2(10) == 60 ``` -### ``envify``: make formal parameters live in an unpythonic ``env`` +### `envify`: make formal parameters live in an unpythonic `env` -When a function whose definition (``def`` or ``lambda``) is lexically inside a ``with envify`` block is entered, it copies references to its arguments into an unpythonic ``env``. At macro expansion time, all references to the formal parameters are redirected to that environment. This allows rebinding, from an expression position, names that were originally the formal parameters. +When a function whose definition (`def` or `lambda`) is lexically inside a `with envify` block is entered, it copies references to its arguments into an unpythonic `env`. At macro expansion time, all references to the formal parameters are redirected to that environment. This allows rebinding, from an expression position, names that were originally the formal parameters. Wherever could *that* be useful? For an illustrative caricature, consider [PG's accumulator puzzle](http://paulgraham.com/icad.html). -The modern pythonic solution: +The Python 3 solution: ```python def foo(n): @@ -686,43 +887,60 @@ def foo(n): return accumulate ``` -This avoids allocating an extra place to store the accumulator ``n``. If you want optimal bytecode, this is the best solution in Python 3. +This avoids allocating an extra place to store the accumulator `n`. The Python 3.8+ solution, using the new walrus operator, is one line shorter: -But what if, instead, we consider the readability of the unexpanded source code? The definition of ``accumulate`` requires many lines for something that simple. What if we wanted to make it a lambda? Because all forms of assignment are statements in Python, the above solution is not admissible for a lambda, even with macros. +```python +def foo(n): + def accumulate(i): + nonlocal n + return (n := n + i) + return accumulate +``` + +This is rather clean, but still needs the `nonlocal` declaration, which is available as a statement only. + +If you want optimal bytecode, these two are the best solutions of the puzzle in Python. -So if we want to use a lambda, we have to create an ``env``, so that we can write into it. Let's use the let-over-lambda idiom: +But what if we want to shorten the source code even more, for readability? We could make `accumulate` a lambda. But then, to rebind the `n` that lives in an enclosing scope - because Python does not support doing that from an expression position - we must make it live in an `unpythonic` `env`. + +Let's use the let-over-lambda idiom: ```python def foo(n0): - return let[(n, n0) in - (lambda i: n << n + i)] + return let[[n := n0] in + (lambda i: (n := n + i))] ``` -Already better, but the ``let`` is used only for (in effect) altering the passed-in value of ``n0``; we don't place any other variables into the ``let`` environment. Considering the source text already introduces an ``n0`` which is just used to initialize ``n``, that's an extra element that could be eliminated. +This is already shorter, but the `let` is used only for (in effect) storing the passed-in value of `n0`; we do not place any other variables into the `let` environment. Considering the source text already introduces a name `n0` which is just used to initialize `n`, that's an extra element that could be eliminated. -Enter the ``envify`` macro, which automates this: +Enter the `envify` macro, which automates this: ```python with envify: def foo(n): - return lambda i: n << n + i + return lambda i: (n := n + i) ``` -Combining with ``autoreturn`` yields the fewest-elements optimal solution to the accumulator puzzle: +Note this does not work without `envify`, because then the assignment expression will create a local variable (local to the lambda) instead of rebinding the outer existing `n`. + +Combining with `autoreturn` yields the fewest-source-code-elements optimal solution to the accumulator puzzle: ```python with autoreturn, envify: def foo(n): - lambda i: n << n + i + lambda i: (n := n + i) ``` -The ``with`` block adds a few elements, but if desired, it can be refactored into the definition of a custom dialect in [Pydialect](https://github.com/Technologicat/pydialect). +The `with` block adds a few elements, but if desired, it can be refactored into the definition of a custom dialect using `mcpyrate`. See [dialect examples](dialects.md). + ## Language features To boldly go where Python without macros just won't. Changing the rules by code-walking and making significant rewrites. -### ``curry``: automatic currying for Python +### `autocurry`: automatic currying for Python + +**Changed in v0.15.0.** *The macro is now named `autocurry`, to avoid shadowing the `curry` function.* ```python from unpythonic.syntax import macros, autocurry @@ -744,25 +962,27 @@ with autocurry: assert add3(1)(2)(3) == 6 ``` -*Lexically* inside a ``with autocurry`` block: +*Lexically* inside a `with autocurry` block: - - All **function calls** and **function definitions** (``def``, ``lambda``) are automatically curried, somewhat like in Haskell, or in ``#lang`` [``spicy``](https://github.com/Technologicat/spicy). + - All **function calls** and **function definitions** (`def`, `lambda`) are automatically curried, somewhat like in Haskell, or in `#lang` [`spicy`](https://github.com/Technologicat/spicy). - - Function calls are autocurried, and run ``unpythonic.fun.curry`` in a special mode that no-ops on uninspectable functions (triggering a standard function call with the given args immediately) instead of raising ``TypeError`` as usual. + - Function calls are autocurried, and run `unpythonic.curry` in a special mode that no-ops on uninspectable functions (triggering a standard function call with the given args immediately) instead of raising `TypeError` as usual. -**CAUTION**: Some built-ins are uninspectable or may report their arities incorrectly; in those cases, ``curry`` may fail, occasionally in mysterious ways. The function ``unpythonic.arity.arities``, which ``unpythonic.fun.curry`` internally uses, has a workaround for the inspectability problems of all built-ins in the top-level namespace (as of Python 3.7), but e.g. methods of built-in types are not handled. +**CAUTION**: Some built-ins are uninspectable or may report their call signature incorrectly; in those cases, `curry` may fail, occasionally in mysterious ways. When inspection fails, `curry` raises ``ValueError``, like `inspect.signature` does. Manual uses of the `curry` decorator (on both `def` and `lambda`) are detected, and in such cases the macro skips adding the decorator. -### ``lazify``: call-by-need for Python +### `lazify`: call-by-need for Python -**Changed in 0.15.** *Up to 0.14.x, the `lazy[]` macro, that is used together with `with lazify`, used to be provided by `macropy`, but now that we use `mcpyrate`, we provide it ourselves. If you use `lazy[]`, change your import of that macro to `from unpythonic.syntax import macros, lazy`*. +**Changed in v0.15.0.** *The `lazy[]` macro, that is used together with `with lazify`, used to be provided by `macropy` up to `unpythonic` v0.14.3. But now that we use `mcpyrate`, we provide a `lazy[]` macro and an underlying `Lazy` class ourselves. For details, see the separate section about `lazy[]` and `lazyrec[]` below.* -Also known as *lazy functions*. Like [lazy/racket](https://docs.racket-lang.org/lazy/index.html), but for Python. Note if you want *lazy sequences* instead, Python already provides those; just use the generator facility (and decorate your gfunc with ``unpythonic.gmemoize`` if needed). +Also known as *lazy functions*. Like [lazy/racket](https://docs.racket-lang.org/lazy/index.html), but for Python. Note if you want *lazy sequences* instead, Python already provides those; just use the generator facility (and decorate your gfunc with `unpythonic.gmemoize` if needed). Lazy function example: ```python +from unpythonic.syntax import macros, lazify + with lazify: def my_if(p, a, b): if p: @@ -779,15 +999,15 @@ with lazify: assert f(21, 1/0) == 42 ``` -In a ``with lazify`` block, function arguments are evaluated only when actually used, at most once each, and in the order in which they are actually used. Promises are automatically forced on access. Automatic lazification applies to arguments in function calls and to let-bindings, since they play a similar role. **No other binding forms are auto-lazified.** +In a `with lazify` block, function arguments are evaluated only when actually used, at most once each, and in the order in which they are actually used (regardless of the ordering of the formal parameters that receive them). Delayed values (*promises*) are automatically evaluated (*forced*) on access. Automatic lazification applies to arguments in function calls and to let-bindings, since they play a similar role. **No other binding forms are auto-lazified.** -Automatic lazification uses the ``lazyrec[]`` macro (see below), which recurses into certain types of container literals, so that the lazification will not interfere with unpacking. +Automatic lazification uses the `lazyrec[]` macro (see below), which recurses into certain types of container literals, so that the lazification will not interfere with unpacking. -Note ``my_if`` in the example is a run-of-the-mill runtime function, not a macro. Only the ``with lazify`` is imbued with any magic. Essentially, the above code expands into: +Note `my_if` in the example is a regular function, not a macro. Only the `with lazify` is imbued with any magic. Essentially, the above code expands into: ```python from unpythonic.syntax import macros, lazy -from unpythonic.syntax import force +from unpythonic import force def my_if(p, a, b): if force(p): @@ -804,83 +1024,109 @@ def f(a, b): assert f(lazy[21], lazy[1/0]) == 42 ``` -plus some clerical details to allow mixing lazy and strict code. This second example relies on the magic of closures to capture f's ``a`` and ``b`` into the promises. +plus some clerical details to allow mixing lazy and strict code. This second example relies on the magic of closures to capture f's `a` and `b` into the `lazy[]` promises. -Like ``with continuations``, no state or context is associated with a ``with lazify`` block, so lazy functions defined in one block may call those defined in another. +Like `with continuations`, no state or context is associated with a `with lazify` block, so lazy functions defined in one block may call those defined in another. Lazy code is allowed to call strict functions and vice versa, without requiring any additional effort. -Comboing with other block macros in ``unpythonic.syntax`` is supported, including ``autocurry`` and ``continuations``. See the [meta](#meta) section of this README for the correct ordering. +Comboing `lazify` with other block macros in `unpythonic.syntax` is supported, including `autocurry` and `continuations`. See the [meta](#meta) section of this README for the correct ordering. + +For more details, see the docstring of `unpythonic.syntax.lazify`. + +Inspired by Haskell, Racket's `(delay)` and `(force)`, and [lazy/racket](https://docs.racket-lang.org/lazy/index.html). + +**CAUTION**: The functions in the module `unpythonic.fun` are lazify-aware (so that e.g. `curry` and `compose` work with lazy functions), as are `call` and `callwith` in the module `unpythonic.funutil`, but a large part of `unpythonic` is not. Keep in mind that any call to a strict (regular Python) function will evaluate all of its arguments. + +#### `lazy[]` and `lazyrec[]` macros + +**Changed in v0.15.0.** *Previously, the `lazy[]` macro was provided by MacroPy. Now that we use `mcpyrate`, which doesn't provide it, we provide it ourselves, in `unpythonic.syntax`. We now provide also the underlying `Lazy` class ourselves.* + +*Note that a lazy value (an instance of `Lazy`) now no longer has a `__call__` operator; instead, it has a `force()` method. However, the preferred way is to use the top-level function `force`, which abstracts away this detail.* -For more details, see the docstring of ``unpythonic.syntax.lazify``. +*The `force` function was previously exported in `unpythonic.syntax`; now it is available in the top-level namespace of `unpythonic`. This follows the general convention that regular functions live in the top-level `unpythonic` package, while macros (and in general, syntactic constructs) live in `unpythonic.syntax`.* -See also ``unpythonic.syntax.lazy``, which explicitly lazifies a single expression, and ``unpythonic.syntax.lazyrec``, which can be used to lazify expressions inside container literals, recursively. This allows code like ``tpl = lazyrec[(1*2*3, 4*5*6)]``. Each item becomes wrapped with ``lazy[]``, but the container itself is left alone, to avoid interfering with unpacking. Because ``lazyrec[]`` is a macro and must work by names only, it supports a fixed set of container types: ``list``, ``tuple``, ``set``, ``dict``, ``frozenset``, ``unpythonic.collections.frozendict``, ``unpythonic.collections.box``, and ``unpythonic.llist.cons`` (specifically, the constructors ``cons``, ``ll`` and ``llist``). +We provide the macros `unpythonic.syntax.lazy`, which explicitly lazifies a single expression, and `unpythonic.syntax.lazyrec`, which can be used to lazify expressions inside container literals, recursively. -(It must work by names only, because in an eager language any lazification must be performed as a syntax transformation before the code actually runs. Lazification in an eager language is a hack, by necessity. [Fexprs](https://fexpr.blogspot.com/2011/04/fexpr.html) (along with [a new calculus to go with them](http://fexpr.blogspot.com/2014/03/continuations-and-term-rewriting-calculi.html)) are the clean, elegant solution, but this requires redesigning the whole language from ground up. Of course, if you're fine with a language not particularly designed for extensibility, and lazy evaluation is your top requirement, just use Haskell.) +Essentially, `lazy[...]` achieves the same result as `memoize(lambda: ...)`, with the practical difference that the `lazify` subsystem expects the `lazy[...]` notation in its analyzer, and will not recognize `memoize(lambda: ...)` as a delayed value. -Inspired by Haskell, Racket's ``(delay)`` and ``(force)``, and [lazy/racket](https://docs.racket-lang.org/lazy/index.html). +A `lazy[]` promise `p` is evaluated by calling `force(p)` or `p.force()`. In `unpythonic`, the promise datatype (`Lazy`) does not have a `__call__` method, because the word `force` better conveys the intent. -**CAUTION**: The functions in ``unpythonic.fun`` are lazify-aware (so that e.g. ``curry`` and ``compose`` work with lazy functions), as are ``call`` and ``callwith`` in ``unpythonic.misc``, but a large part of ``unpythonic`` is not. Keep in mind that any call to a strict (regular Python) function will evaluate all of its arguments. +It is preferable to use the `force` top-level function instead of the `.force` method, because the function will also pass through any non-promise value, whereas (obviously) a non-promise value will not have a `.force` method. Using the function, you can `force` a value just to be sure, without caring whether that value was a promise. The `force` function is available in the top-level namespace of `unpythonic`. + +The `lazyrec[]` macro allows code like `tpl = lazyrec[(1*2*3, 4*5*6)]`. Each item becomes wrapped with `lazy[]`, but the container itself is left alone, to avoid interfering with its unpacking. Because `lazyrec[]` is a macro and must work by names only, it supports a fixed set of container types: `list`, `tuple`, `set`, `dict`, `frozenset`, `unpythonic.frozendict`, `unpythonic.box`, and `unpythonic.cons` (specifically, the constructors `cons`, `ll` and `llist`). + +The `unpythonic` containers **must be from-imported** for `lazyrec[]` to recognize them. Either use `from unpythonic import xxx` (**recommended**), where `xxx` is a container type, or import the `containers` subpackage by `from unpythonic import containers`, and then use `containers.xxx`. (The analyzer only looks inside at most one level of attributes. This may change in the future.) + +Observe that the analysis in `lazyrec[]` must work by names only, because in an eager language any lazification must be performed as a syntax transformation before the code actually runs. Hence, the analysis must be performed statically - and locally, because `lazyrec[]` is an expr macro. [Fexprs](https://fexpr.blogspot.com/2011/04/fexpr.html) (along with [a new calculus to go with them](http://fexpr.blogspot.com/2014/03/continuations-and-term-rewriting-calculi.html)) are the clean, elegant solution, but this requires redesigning the whole language from ground up. Of course, if you are fine with a language not particularly designed for extensibility, and lazy evaluation is your top requirement, you could just use Haskell. #### Forcing promises manually -This is mainly useful if you ``lazy[]`` or ``lazyrec[]`` something explicitly, and want to compute its value outside a ``with lazify`` block. +**Changed in v0.15.0.** *The functions `force1` and `force` now live in the top-level namespace of `unpythonic`, no longer in `unpythonic.syntax`.* + +This is mainly useful if you `lazy[]` or `lazyrec[]` something explicitly, and want to compute its value outside a `with lazify` block. -We provide the functions ``force1`` and ``force``. Using ``force1``, if ``x`` is a ``lazy[]`` promise, it will be forced, and the resulting value is returned. If ``x`` is not a promise, ``x`` itself is returned, à la Racket. The function ``force``, in addition, descends into containers (recursively). When an atom ``x`` (i.e. anything that is not a container) is encountered, it is processed using ``force1``. +We provide the functions `force1` and `force`. Using `force1`, if `x` is a `lazy[]` promise, it will be forced, and the resulting value is returned. If `x` is not a promise, `x` itself is returned, à la Racket. The function `force`, in addition, descends into containers (recursively). When an atom `x` (i.e. anything that is not a container) is encountered, it is processed using `force1`. -Mutable containers are updated in-place; for immutables, a new instance is created, but as a side effect the promise objects **in the input container** will be forced. Any container with a compatible ``collections.abc`` is supported. (See ``unpythonic.collections.mogrify`` for details.) In addition, as special cases ``unpythonic.collections.box`` and ``unpythonic.llist.cons`` are supported. +Mutable containers are updated in-place; for immutables, a new instance is created, but as a side effect the promise objects **in the input container** will be forced. Any container with a compatible `collections.abc` is supported. (See `unpythonic.mogrify` for details.) In addition, as special cases `unpythonic.box` and `unpythonic.cons` are supported. #### Binding constructs and auto-lazification -Why do we auto-lazify in certain kinds of binding constructs, but not in others? Function calls and let-bindings have one feature in common: both are guaranteed to bind only new names. Auto-lazification of all assignments, on the other hand, in a language that allows mutation is dangerous, because then this superficially innocuous code will fail: +Why do we auto-lazify in certain kinds of binding constructs, but not in others? Function calls and let-bindings have one feature in common: both are guaranteed to bind only new names. Even if a name that uses the same identifier is already in scope, they are distinct; the new binding will shadow the old one. Auto-lazification of all assignments, on the other hand, in a language that allows mutation is dangerous, because then this superficially innocuous code will fail: ```python -a = 10 -a = 2*a -print(a) # 20, right? +from unpythonic.syntax import macros, lazify + +with lazify: + a = 10 + a = 2 * a + print(a) # 20, right? ``` -If we chose to auto-lazify assignments, then assuming a ``with lazify`` around the example, it would expand to: +If we chose to auto-lazify assignments, then the example would expand to: ```python from unpythonic.syntax import macros, lazy -from unpythonic.syntax import force +from unpythonic import force a = lazy[10] -a = lazy[2*force(a)] +a = lazy[2 * force(a)] print(force(a)) ``` -In the second assignment, the ``lazy[]`` sets up a promise, which will force ``a`` *at the time when the containing promise is forced*, but at that time the name ``a`` points to a promise, which will force... +Scan that again: in the second assignment, the `lazy[]` sets up a promise, which will force `a` *at the time when the containing promise is forced*, but at that time the name `a` points to a promise, which will force... -The fundamental issue is that ``a = 2*a`` is an imperative update. Therefore, to avoid this infinite loop trap for the unwary, assignments are not auto-lazified. Note that if we use two different names, this works just fine: +The fundamental issue is that `a = 2 * a` is an imperative update. Therefore, to avoid this infinite loop trap for the unwary, assignments are not auto-lazified. Note that if we use two *different* names, this works just fine: ```python from unpythonic.syntax import macros, lazy -from unpythonic.syntax import force +from unpythonic import force a = lazy[10] -b = lazy[2*force(a)] +b = lazy[2 * force(a)] print(force(b)) ``` -because now at the time when ``b`` is forced, the name ``a`` still points to the value we intended it to. +because now at the time when `b` is forced, the name `a` still points to the value we intended it to. That is, code that is normalized to [static single assignment (SSA) form](https://en.wikipedia.org/wiki/Static_single_assignment_form) could be auto-lazified. -If you're sure you have *new definitions* and not *imperative updates*, just manually use ``lazy[]`` (or ``lazyrec[]``, as appropriate) on the RHS. Or if it's fine to use eager evaluation, just omit the ``lazy[]``, thus allowing Python to evaluate the RHS immediately. +If you are sure you have *new definitions* and not *imperative updates*, you can just manually use `lazy[]` (or `lazyrec[]`, as appropriate) on the RHS. Or if it is fine to use eager evaluation, just omit the `lazy[]`, thus allowing Python to evaluate the RHS immediately. -Beside function calls (which bind the parameters of the callee to the argument values of the call) and assignments, there are many other binding constructs in Python. For a full list, see [here](http://excess.org/article/2014/04/bar-foo/), or locally [here](../unpythonic/syntax/scopeanalyzer.py), in function ``get_names_in_store_context``. Particularly noteworthy in the context of lazification are the ``for`` loop and the ``with`` context manager. +Beside function calls (which bind the parameters of the callee to the argument values of the call) and assignments, there are many other binding constructs in Python. For a full list, see [here](http://excess.org/article/2014/04/bar-foo/), or locally [here](../unpythonic/syntax/scopeanalyzer.py), in function `get_names_in_store_context`. Particularly noteworthy in the context of lazification are the `for` loop and the `with` context manager. -In Python's ``for``, the loop counter is an imperatively updated single name. In many use cases a rapid update is desirable for performance reasons, and in any case, the whole point of the loop is (almost always) to read the counter (and do something with the value) at least once per iteration. So it is much simpler, faster, and equally correct not to lazify there. +In Python's `for`, the loop counter is an imperatively updated single name. In many use cases a rapid update is desirable for performance reasons, and in any case, the whole point of the loop is (almost always) to read the counter (and do something with the value) at least once per iteration. So it is much simpler, faster, and equally correct not to lazify there. -In ``with``, the whole point of a context manager is that it is eagerly initialized when the ``with`` block is entered (and finalized when the block exits). Since our lazy code can transparently use both bare values and promises (due to the semantics of our ``force1``), and the context manager would have to be eagerly initialized anyway, we can choose not to lazify there. +In `with`, the whole point of a context manager is that it is eagerly initialized when the `with` block is entered, and finalized when the block exits. Since our lazy code can transparently use both bare values and promises (due to the semantics of our `force1`), and the context manager would have to be eagerly initialized anyway, we have chosen not to lazify there. #### Note about TCO -To borrow a term from PG's On Lisp, to make ``lazify`` *pay-as-you-go*, a special mode in ``unpythonic.tco.trampolined`` is automatically enabled by ``with lazify`` to build lazify-aware trampolines in order to avoid a drastic performance hit (~10x) in trampolines built for regular strict code. +To borrow a term from PG's On Lisp, to make `lazify` *pay-as-you-go*, a special mode in `unpythonic.trampolined` is automatically enabled by `with lazify` to build lazify-aware trampolines in order to avoid a drastic performance hit (~10x) in trampolines built for regular strict code. -The idea is that the mode is enabled while any function definitions in the ``with lazify`` block run, so they get a lazify-aware trampoline when the ``trampolined`` decorator is applied. This should be determined lexically, but that's complicated to do API-wise, so we currently enable the mode for the dynamic extent of the ``with lazify``. Usually this is close enough; the main case where this can behave unexpectedly is: +The idea is that the mode is enabled while any function definitions in the `with lazify` block run, so they get a lazify-aware trampoline when the `trampolined` decorator is applied. This should be determined lexically, but that is complicated to do, because the decorator is applied at run time; so we currently enable the mode for the dynamic extent of the `with lazify`. Usually this is close enough. The main case where this can behave unexpectedly is: ```python +from unpythonic.syntax import macros, lazify +from unpythonic import trampolined + @trampolined # strict trampoline def g(): ... @@ -901,20 +1147,32 @@ with lazify: f2 = make_f() # f2 gets the lazify-aware trampoline ``` -TCO chains with an arbitrary mix of lazy and strict functions should work as long as the first function in the chain has a lazify-aware trampoline, because the chain runs under the trampoline of the first function (the trampolines of any tail-called functions are stripped away by the TCO machinery). +TCO chains with an arbitrary mix of lazy and strict functions should work as long as the first function in the chain has a lazify-aware trampoline, because the chain runs under the trampoline of the first function. The trampolines of any tail-called functions are skipped by the TCO machinery. Tail-calling from a strict function into a lazy function should work, because all arguments are evaluated at the strict side before the call is made. -But tail-calling ``strict -> lazy -> strict`` will fail in some cases. The second strict callee may get promises instead of values, because the strict trampoline does not have the ``maybe_force_args`` (the mechanism ``with lazify`` uses to force the args when lazy code calls into strict code). +But tail-calling `strict -> lazy -> strict` will fail in some cases. The second strict callee may get promises instead of values, because the strict trampoline does not have the `maybe_force_args` (the mechanism `with lazify` uses to force the args when lazy code calls into strict code). -The reason we have this hack is that it allows the performance of strict code using unpythonic's TCO machinery, not even caring that a ``lazify`` exists, to be unaffected by the additional machinery used to support automatic lazy-strict interaction. +The reason we have this hack is that it allows the performance of strict code using `unpythonic`'s TCO machinery, not even caring that a `lazify` exists, to be unaffected by the additional machinery used to support automatic lazy-strict interaction. -### ``tco``: automatic tail call optimization for Python +### `tco`: automatic tail call optimization for Python + +*This is the macro that applies tail call optimization (TCO) automatically. See the manual section on [`trampolined` and `jump`](features.md#trampolined-jump-tail-call-optimization-tco--explicit-continuations) on what TCO is and where it is useful.* + +Using `with tco`, there is no need to manually use `trampolined` or `jump`: ```python from unpythonic.syntax import macros, tco +with tco: + def fact(n, acc=1): + if n == 0: + return acc + return fact(n - 1, n * acc) + print(fact(4)) # 24 + fact(5000) # no crash + with tco: evenp = lambda x: (x == 0) or oddp(x - 1) oddp = lambda x: (x != 0) and evenp(x - 1) @@ -932,77 +1190,109 @@ with tco: assert evenp(10000) is True ``` -All function definitions (``def`` and ``lambda``) lexically inside the block undergo TCO transformation. The functions are automatically ``@trampolined``, and any tail calls in their return values are converted to ``jump(...)`` for the TCO machinery. Here *return value* is defined as: +All function definitions (`def` and `lambda`) lexically inside the `with tco` block undergo TCO transformation. The functions are automatically `@trampolined`, and any tail calls in their return values are converted to `jump(...)` for the TCO machinery. Here *return value* is defined as: - - In a ``def``, the argument expression of ``return``, or of a call to a known escape continuation. + - In a `def`, the argument expression of `return`, or of a call to a known escape continuation. - - In a ``lambda``, the whole body, as well as the argument expression of a call to a known escape continuation. + - In a `lambda`, the whole body, as well as the argument expression of a call to a known escape continuation. -What is a *known escape continuation* is explained below, in the section [TCO and ``call_ec``](#tco-and-call_ec). +What is considered a *known escape continuation* is explained below, in the section [TCO and `call_ec`](#tco-and-call_ec). -To find the tail position inside a compound return value, this recursively handles any combination of ``a if p else b``, ``and``, ``or``; and from ``unpythonic.syntax``, ``do[]``, ``let[]``, ``letseq[]``, ``letrec[]``. Support for ``do[]`` includes also any ``multilambda`` blocks that have already expanded when ``tco`` is processed. The macros ``aif[]`` and ``cond[]`` are also supported, because they expand into a combination of ``let[]``, ``do[]``, and ``a if p else b``. +To find the tail position inside a compound return value, we recursively handle any combination of `a if p else b`, `and`, `or`; and from `unpythonic.syntax`, `do[]`, `let[]`, `letseq[]`, `letrec[]`. Support for `do[]` includes also any `multilambda` blocks that have already expanded when `tco` is processed. The macros `aif[]` and `cond[]` are also supported, because they expand into a combination of `let[]`, `do[]`, and `a if p else b`. -**CAUTION**: In an ``and``/``or`` expression, only the last item of the whole expression is in tail position. This is because in general, it is impossible to know beforehand how many of the items will be evaluated. +**CAUTION**: In an `and`/`or` expression, only the last item of the whole expression is in tail position. This is because in general, it is impossible to know beforehand how many of the items will be evaluated. -**CAUTION**: In a ``def`` you still need the ``return``; it marks a return value. If you want the tail position to imply a ``return``, use the combo ``with autoreturn, tco`` (on ``autoreturn``, see below). +**CAUTION**: In a `def` you still need the `return`; it marks a return value. If you want tail position to imply a `return`, use the combo `with autoreturn, tco` (on `autoreturn`, see below). -TCO is based on a strategy similar to MacroPy's ``tco`` macro, but using unpythonic's TCO machinery, and working together with the macros introduced by ``unpythonic.syntax``. The semantics are slightly different; by design, ``unpythonic`` requires an explicit ``return`` to mark tail calls in a ``def``. A call that is strictly speaking in tail position, but lacks the ``return``, is not TCO'd, and Python's implicit ``return None`` then shuts down the trampoline, returning ``None`` as the result of the TCO chain. +TCO is based on a strategy similar to MacroPy's `tco` macro, but using unpythonic's TCO machinery, and working together with the macros introduced by `unpythonic.syntax`. The semantics are slightly different; by design, `unpythonic` requires an explicit `return` to mark tail calls in a `def`. A call that is strictly speaking in tail position, but lacks the `return`, is not TCO'd, and Python's implicit `return None` then shuts down the trampoline, returning `None` as the result of the TCO chain. #### TCO and continuations -The ``tco`` macro detects and skips any ``with continuations`` blocks inside the ``with tco`` block, because ``continuations`` already implies TCO. This is done **for the specific reason** of allowing the [Lispython dialect](https://github.com/Technologicat/pydialect) to use ``with continuations``, because the dialect itself implies a ``with tco`` for the whole module (so the user code has no way to exit the TCO context). +The `tco` macro detects and skips any `with continuations` blocks inside the `with tco` block, because `continuations` already implies TCO. This is done **for the specific reason** of allowing the [Lispython dialect](https://github.com/Technologicat/pydialect) to use `with continuations`, because the dialect itself implies a `with tco` for the whole module. Hence, in that dialect, the user code has no way to exit the TCO context. -The ``tco`` and ``continuations`` macros actually share a lot of the code that implements TCO; ``continuations`` just hooks into some callbacks to perform additional processing. +The `tco` and `continuations` macros actually share a lot of the code that implements TCO; `continuations`, for its TCO processing, just hooks into some callbacks to make additional AST edits. -#### TCO and ``call_ec`` +#### TCO and `call_ec` -(Mainly of interest for lambdas, which have no ``return``, and for "multi-return" from a nested function.) +This is mainly of interest for lambdas, which have no `return`, and for "multi-return" from a nested function. It is important to recognize a call to an escape continuation as such, because the argument given to an escape continuation is essentially a return value. If this argument is itself a call, it needs the TCO transformation to be applied to it. -For escape continuations in ``tco`` and ``continuations`` blocks, only basic uses of ``call_ec`` are supported, for automatically harvesting names referring to an escape continuation. In addition, the literal function names ``ec``, ``brk`` and ``throw`` are always *understood as referring to* an escape continuation. +For escape continuations in `tco` and `continuations` blocks, only basic uses of `call_ec` are supported, for automatically extracting names referring to an escape continuation. *Basic use* is defined as either of these two cases: -The name ``ec``, ``brk`` or ``throw`` alone is not sufficient to make a function into an escape continuation, even though ``tco`` (and ``continuations``) will think of it as such. The function also needs to actually implement some kind of an escape mechanism. An easy way to get an escape continuation, where this has already been done for you, is to use ``call_ec``. Another such mechanism is the ``catch``/``throw`` pair. +```python +from unpythonic import call_ec -See the docstring of ``unpythonic.syntax.tco`` for details. +# use as decorator +@call_ec +def result(ec): + ... + +# use directly on a literal lambda (effectively, as a decorator) +result = call_ec(lambda ec: ...) +``` + +When macro expansion of the ``with tco`` block starts, names of escape continuations created **anywhere lexically within** the ``with tco`` block are captured, provided that the creation takes place using one of the above *basic use* patterns. +In addition, the literal function names `ec`, `brk` and `throw` are always *understood as referring to* an escape continuation. The name `ec` is the customary name for the parameter of a function passed to `call_ec`. The name `brk` is the customary name for the break continuation created by `@breakably_looped` and `@breakably_looped_over`. The name `throw` is understood as referring to the function `unpythonic.throw`. -### ``continuations``: call/cc for Python +Obviously, having a name of `ec`, `brk` or `throw` is not by itself sufficient to make a function into an escape continuation, even though `tco` (and `continuations`) will think of it as such. The function also needs to actually implement some kind of an escape mechanism. An easy way to get an escape continuation, where this has already been done for you, is to use `call_ec`. Another such mechanism is the `catch`/`throw` pair. + +See the docstring of `unpythonic.syntax.tco` for details. + + +### `continuations`: call/cc for Python *Where control flow is your playground.* -We provide **genuine multi-shot continuations for Python**. Compare generators and coroutines, which are resumable functions, or in other words, single-shot continuations. In single-shot continuations, once execution passes a certain point, it cannot be rewound. Multi-shot continuations [can be emulated](https://gist.github.com/yelouafi/858095244b62c36ec7ebb84d5f3e5b02), but this makes the execution time `O(n**2)`, because when we want to restart again at an already passed point, the execution must start from the beginning, replaying the history. In contrast, **we implement continuations that can natively resume execution multiple times from the same point.** +We provide **genuine multi-shot continuations for Python**. Compare generators and coroutines, which are resumable functions, or in other words, single-shot continuations. In single-shot continuations, once execution passes a certain point, it cannot be rewound. Multi-shot continuations [can be emulated](https://gist.github.com/yelouafi/858095244b62c36ec7ebb84d5f3e5b02) using single-shot continuations, but this makes the execution time `O(n**2)`, because when we want to restart again at an already passed point, the execution must start from the beginning, replaying the whole history. In contrast, **we implement continuations that can natively resume execution multiple times from the same point.** + +**CAUTION**: This feature has some limitations, and is mainly intended for experimenting with, and teaching, multi-shot continuations in a Python setting. Particularly: -This feature has some limitations and is mainly intended for teaching continuations in a Python setting. + - There are seams between continuation-enabled code and regular Python code. (This happens with any feature that changes the semantics of only a part of a program.) -- Especially, there are seams between continuation-enabled code and regular Python code. (This happens with any feature that changes the semantics of only a part of a program.) + - There is no [`dynamic-wind`](https://docs.racket-lang.org/reference/cont.html#%28def._%28%28quote._~23~25kernel%29._dynamic-wind%29%29): Scheme's generalization of `try/finally`, which beside the `finally` exit hook, has an *entry hook* for when control jumps back into the block from outside it. -- There's no [`dynamic-wind`](https://docs.racket-lang.org/reference/cont.html#%28def._%28%28quote._~23~25kernel%29._dynamic-wind%29%29) (the generalization of `try/finally`, when control can jump back in to the block from outside it). + - Interaction of continuations with exceptions is not fully thought out. -- Interaction of continuations with exceptions isn't fully thought out. Interaction with async functions **is currently not even implemented**. This is quite simply because this feature is primarily for teaching, and the implementation is already quite complex. + - Interaction with async functions **is not even implemented**. For this reason, an `async def` or `await` appearing inside a `with continuations` block is considered a syntax error. -- The implicit `cc` parameter might not be a good idea in the long run, and it might or might not change in a future release. It suffers from the same lack of transparency as the implicit `this` in many languages (e.g. C++ and JavaScript). - - Because it's implicit, it's easy to forget that each function definition implicitly introduces its own `cc`. - - This introduces a bug when one introduces an inner function, and attempts to use the outer `cc` inside the inner function body, forgetting that inside the inner function the name `cc` points to **the inner function's** own `cc`. - - Not introducing its own `this` [was precisely why](http://tc39wiki.calculist.org/es6/arrow-functions/) the arrow function syntax was introduced to JavaScript in ES6. - - Python gets `self` right in that while it's conveniently *passed* implicitly, it must be *declared* explicitly, eliminating the transparency issue. - - On the other hand, a semi-explicit `cc`, like Python's `self`, was tried in a previous release, and it led to a lot of boilerplate. It's especially bad that it effectively needs to be a keyword parameter, necessitating the user to write `def f(x, *, cc)`. + - The implicit `cc` parameter might not be a good idea in the long run. + - This design suffers from the same lack of transparency, whence the same potential for bugs, as the implicit `this` in many languages (e.g. C++ and JavaScript). + - Because `cc` is *declared* implicitly, it is easy to forget that *every* function definition *anywhere* inside the `with continuations` block introduces its own `cc` parameter. + - Particularly, also a `lambda` is a function definition. + - This introduces a bug when one introduces an inner function, and attempts to use the outer `cc` inside the inner function body, forgetting that inside the inner function, the name `cc` points to **the inner function's** own `cc`. + - The correct pattern is to `outercc = cc` in the outer function, and then use `outercc` inside the inner function body. + - Not introducing its own `this` [was precisely why](http://tc39wiki.calculist.org/es6/arrow-functions/) the arrow function syntax was introduced to JavaScript in ES6. + - Python gets `self` right in that while it is conveniently *passed* implicitly, it must be *declared* explicitly, eliminating the transparency issue. + - On the other hand, a semi-explicit `cc`, like Python's `self`, was tried in an early version of this continuations subsystem, and it led to a lot of boilerplate. + - It is especially bad that to avoid easily avoidable bugs regarding passing in the wrong arguments, `cc` effectively must be a keyword parameter, necessitating the user to write `def f(x, *, cc)`. Not having to type out the `, *, cc` is much nicer, albeit not as pythonic. #### General remarks on continuations -If you're new to continuations, see the [short and easy Python-based explanation](https://www.ps.uni-saarland.de/~duchier/python/continuations.html) of the basic idea. +If you are new to continuations, see the [short and easy Python-based explanation](https://www.ps.uni-saarland.de/~duchier/python/continuations.html) of the basic idea. -We provide a very loose pythonification of Paul Graham's continuation-passing macros, chapter 20 in [On Lisp](http://paulgraham.com/onlisp.html). +This continuations system in `unpythonic` began as a very loose pythonification of Paul Graham's continuation-passing macros, chapter 20 in [On Lisp](http://paulgraham.com/onlisp.html). -The approach differs from native continuation support (such as in Scheme or Racket) in that the continuation is captured only where explicitly requested with ``call_cc[]``. This lets most of the code work as usual, while performing the continuation magic where explicitly desired. +The approach differs from native continuation support (such as in Scheme or Racket) in that the continuation is captured only where explicitly requested with `call_cc[]`. This lets most of the code work as usual, while performing the continuation magic where explicitly desired. -As a consequence of the approach, our continuations are [*delimited*](https://en.wikipedia.org/wiki/Delimited_continuation) in the very crude sense that the captured continuation ends at the end of the body where the *currently dynamically outermost* ``call_cc[]`` was used (and it returns a value). Hence, if porting some code that uses ``call/cc`` from Racket to Python, in the Python version the ``call_cc[]`` may be need to be placed further out to capture the relevant part of the computation. For example, see ``amb`` in the demonstration below; a Scheme or Racket equivalent usually has the ``call/cc`` placed inside the ``amb`` operator itself, whereas in Python we must place the ``call_cc[]`` at the call site of ``amb``. +As a consequence of the approach, our continuations are [*delimited*](https://en.wikipedia.org/wiki/Delimited_continuation) in the very crude sense that the captured continuation ends at the end of the body where the *currently dynamically outermost* `call_cc[]` was invoked. Notably, in `unpythonic`, a continuation eventually terminates and returns a value (provided that the code contained in the continuation itself terminates), without hijacking the rest of the whole-program execution. + +Hence, if porting some code that uses `call/cc` from Racket to Python, in the Python version the `call_cc[]` may be need to be placed further out to capture the relevant part of the computation. For example, see `amb` in the demonstration below; a Scheme or Racket equivalent usually has the `call/cc` placed inside the `amb` operator itself, whereas in Python we must place the `call_cc[]` at the call site of `amb`, so that the continuation captures the remainder of the call site. + +Observe that while our outermost `call_cc` already somewhat acts like a prompt (in the sense of delimited continuations), we are currently missing the ability to set a prompt wherever (inside code that already uses `call_cc` somewhere) and make the continuation terminate there. So what we have right now is something between proper delimited continuations and classic whole-computation continuations - not really [co-values](http://okmij.org/ftp/continuations/undelimited.html), but not really delimited continuations, either. + +(TODO: If I interpret the wiki page right, our `call_cc` performs the job of `reset`; the called function forms the body of the `reset`. The `cc` argument passed into the called function performs the job of `shift`.) For various possible program topologies that continuations may introduce, see [these clarifying pictures](callcc_topology.pdf). -For full documentation, see the docstring of ``unpythonic.syntax.continuations``. The unit tests [[1]](../unpythonic/syntax/test/test_conts.py) [[2]](../unpythonic/syntax/test/test_conts_escape.py) [[3]](../unpythonic/syntax/test/test_conts_gen.py) [[4]](../unpythonic/syntax/test/test_conts_topo.py) may also be useful as usage examples. +For full documentation, see the docstring of `unpythonic.syntax.continuations`. The unit tests [[1]](../unpythonic/syntax/tests/test_conts.py) [[2]](../unpythonic/syntax/tests/test_conts_escape.py) [[3]](../unpythonic/syntax/tests/test_conts_gen.py) [[4]](../unpythonic/syntax/tests/test_conts_topo.py) may also be useful as usage examples. + +**Note on debugging**: If a function containing a `call_cc[]` crashes below a line that has a `call_cc[]` invocation, the stack trace will usually have the continuation function somewhere in it, containing the line number information, so as usual, you can pinpoint the source code line where the error occurred. For a function `f`, continuation definitions created by `call_cc[]` invocations within its body are named `f_cont_`. -**Note on debugging**: If a function containing a ``call_cc[]`` crashes below the ``call_cc[]``, the stack trace will usually have the continuation function somewhere in it, containing the line number information, so you can pinpoint the source code line where the error occurred. (For a function ``f``, it is named ``f_cont``, ``f_cont1``, ...) But be aware that especially in complex macro combos (e.g. ``continuations, curry, lazify``), the other block macros may spit out many internal function calls *after* the relevant stack frame that points to the actual user program. So check the stack trace as usual, but check further up than usual. +Be aware that especially in complex block macro combos (e.g. `with lazify, autocurry, continuations`), the other block macros may have spit out many internal function calls that, at run time, get called *after* the relevant stack frame that points to the actual user program. So check the stack trace as usual, but check further up than usual. + +Using the `with step_expansion` macro from `mcpyrate.debug` may help in understanding how the macro-expanded code actually looks like. **Note on exceptions**: Raising an exception, or [signaling and restarting](features.md#handlers-restarts-conditions-and-restarts), will partly unwind the call stack, so the continuation *from the level that raised the exception* will be cancelled. This is arguably exactly the expected behavior. @@ -1013,7 +1303,7 @@ from unpythonic.syntax import macros, continuations, call_cc with continuations: # basic example - how to call a continuation manually: - k = None # kontinuation + k = None # a kontinuation is konventionally kalled k def setk(*args, cc): global k k = cc @@ -1044,7 +1334,7 @@ with continuations: # Pythagorean triples def pt(): z = call_cc[amb(range(1, 21))] - y = call_cc[amb(range(1, z+1)))] + y = call_cc[amb(range(1, z+1))] x = call_cc[amb(range(1, y+1))] if x*x + y*y != z*z: return fail() @@ -1057,92 +1347,98 @@ with continuations: print(fail()) print(fail()) ``` -Code within a ``with continuations`` block is treated specially. + +Code within a `with continuations` block is treated specially.

Roughly: -> - Each function definition (``def`` or ``lambda``) in a ``with continuations`` block has an implicit formal parameter ``cc``, **even if not explicitly declared** in the formal parameter list. -> - The continuation machinery will set the default value of ``cc`` to the default continuation (``identity``), which just returns its arguments. -> - The default value allows these functions to be called also normally without passing a ``cc``. In effect, the function will then return normally. -> - If ``cc`` is not declared explicitly, it is implicitly declared as a by-name-only parameter named ``cc``, and the default value is set automatically. -> - If ``cc`` is declared explicitly, the default value is set automatically if ``cc`` is in a position that can accept a default value, and no default has been set by the user. +> - Each function definition (`def` or `lambda`) in a `with continuations` block has an implicit formal parameter `cc`, **even if not explicitly declared** in the formal parameter list. +> - The continuation machinery will set the default value of `cc` to the default continuation (`identity`), which just returns its argument(s). +> - The default value allows these functions to be called also normally without passing a `cc`. In effect, the function will then return normally. +> - If `cc` is not declared explicitly, it is implicitly declared as a by-name-only parameter named `cc`, and the default value is set automatically. +> - If `cc` is declared explicitly, the default value is set automatically if `cc` is in a position that can accept a default value, and no default has been set by the user. > - Positions that can accept a default value are the last positional parameter that has no default, and a by-name-only parameter in any syntactically allowed position. -> - Having a hidden parameter is somewhat magic, but overall improves readability, as this allows declaring ``cc`` only where actually explicitly needed. -> - **CAUTION**: Usability trap: in nested function definitions, each ``def`` and ``lambda`` comes with **its own** implicit ``cc``. -> - In the above ``amb`` example, the local variable is named ``ourcc``, so that the continuation passed in from outside (into the ``lambda``, by closure) will have a name different from the ``cc`` implicitly introduced by the ``lambda`` itself. +> - Having a hidden parameter is somewhat magic, but overall improves readability, as this allows declaring `cc` only where actually explicitly needed. +> - **CAUTION**: Usability trap: in nested function definitions, each `def` and `lambda` comes with **its own** implicit `cc`. +> - In the above `amb` example, the local variable is named `ourcc`, so that the continuation passed in from outside (into the `lambda`, by closure) will have a name different from the `cc` implicitly introduced by the `lambda` itself. > - This is possibly subject to change in a future version (pending the invention of a better API), but for now just be aware of this gotcha. -> - Beside ``cc``, there's also a mechanism to keep track of the captured tail of a computation, which is important to have edge cases work correctly. See the note on **pcc** (*parent continuation*) in the docstring of ``unpythonic.syntax.continuations``, and [the pictures](callcc_topology.pdf). +> - Beside `cc`, there's also a mechanism to keep track of the captured tail of a computation, which is important to have edge cases work correctly. See the note on **pcc** (*parent continuation*) in the docstring of `unpythonic.syntax.continuations`, and [the pictures](callcc_topology.pdf). > -> - In a function definition inside the ``with continuations`` block: +> - In a function definition inside the `with continuations` block: > - Most of the language works as usual; especially, any non-tail function calls can be made as usual. -> - ``return value`` or ``return v0, ..., vn`` is actually a tail-call into ``cc``, passing the given value(s) as arguments. -> - As in other parts of ``unpythonic``, returning a tuple means returning multiple-values. -> - This is important if the return value is received by the assignment targets of a ``call_cc[]``. If you get a ``TypeError`` concerning the arguments of a function with a name ending in ``_cont``, check your ``call_cc[]`` invocations and the ``return`` in the call_cc'd function. -> - ``return func(...)`` is actually a tail-call into ``func``, passing along (by default) the current value of ``cc`` to become its ``cc``. -> - Hence, the tail call is inserted between the end of the current function body and the start of the continuation ``cc``. -> - To override which continuation to use, you can specify the ``cc=...`` kwarg, as in ``return func(..., cc=mycc)``. -> - The ``cc`` argument, if passed explicitly, **must be passed by name**. -> - **CAUTION**: This is **not** enforced, as the machinery does not analyze positional arguments in any great detail. The machinery will most likely break in unintuitive ways (or at best, raise a mysterious ``TypeError``) if this rule is violated. -> - The function ``func`` must be a defined in a ``with continuations`` block, so that it knows what to do with the named argument ``cc``. -> - Attempting to tail-call a regular function breaks the TCO chain and immediately returns to the original caller (provided the function even accepts a ``cc`` named argument). -> - Be careful: ``xs = list(args); return xs`` and ``return list(args)`` mean different things. -> - TCO is automatically applied to these tail calls. This uses the exact same machinery as the ``tco`` macro. +> - `return value` or `return Values(...)` is actually a tail-call into `cc`, passing the given value(s) as arguments. +> - As in other parts of `unpythonic`, returning a `Values` means returning multiple-return-values and/or named-return-values. +> - This is important if the return value is received by the assignment targets of a `call_cc[]`. If you get a `TypeError` concerning the arguments of a function with a name ending in `_cont_`, check your `call_cc[]` invocations and the `return` in the call_cc'd function. +> - **Changed in v0.15.0.** *Up to v0.14.3, multiple return values used to be represented as a `tuple`. Now returning a `tuple` means returning one value that is a tuple.* +> - `return func(...)` is actually a tail-call into `func`, passing along (by default) the current value of `cc` to become its `cc`. +> - Hence, the tail call is inserted *between* the end of the current function body and the start of the continuation `cc`. +> - To override which continuation to use, you can specify the `cc=...` kwarg, as in `return func(..., cc=mycc)`, as was done in the `amb` example above. +> - The `cc` argument, if passed explicitly, **must be passed by name**. +> - **CAUTION**: This is **not** enforced, as the machinery does not analyze positional arguments in any great detail. The machinery will most likely break in unintuitive ways (or at best, raise a mysterious `TypeError`) if this rule is violated. +> - The function `func` must be a defined in a `with continuations` block, so that it knows what to do with the named argument `cc`. +> - Attempting to tail-call a regular function breaks the TCO chain and immediately returns to the original caller (provided the function even accepts a `cc` named argument; if not, you will get a `TypeError`). +> - Be careful: `xs = list(args); return xs` and `return list(args)` mean different things. +> - Because `list(args)` is a function call, `return list(args)` will attempt to tail-call `list` as a continuation-enabled function (which it is not, you will get a `TypeError`), before passing its result into the current continuation. +> - Using `return xs` instead will pass an inert data value into the current continuation. +> - TCO is automatically applied to these tail calls. The TCO processing of `continuations` uses the exact same machinery as the `tco` macro, performing some additional AST edits via hooks. > -> - The ``call_cc[]`` statement essentially splits its use site into *before* and *after* parts, where the *after* part (the continuation) can be run a second and further times, by later calling the callable that represents the continuation. This makes a computation resumable from a desired point. +> - The `call_cc[]` statement essentially splits its use site into *before* and *after* parts, where the *after* part (the continuation) can be run a second and further times, by later calling the callable that represents the continuation. This makes a computation resumable from a desired point. > - The continuation is essentially a closure. -> - Just like in Scheme/Racket, only the control state is checkpointed by ``call_cc[]``; any modifications to mutable data remain. -> - Assignment targets can be used to get the return value of the function called by ``call_cc[]``. -> - Just like in Scheme/Racket's ``call/cc``, the values that get bound to the ``call_cc[]`` assignment targets on second and further calls (when the continuation runs) are the arguments given to the continuation when it is called (whether implicitly or manually). -> - A first-class reference to the captured continuation is available in the function called by ``call_cc[]``, as its ``cc`` argument. -> - The continuation is a function that takes positional arguments, plus a named argument ``cc``. -> - The call signature for the positional arguments is determined by the assignment targets of the ``call_cc[]``. -> - The ``cc`` parameter is there only so that a continuation behaves just like any continuation-enabled function when tail-called, or when later used as the target of another ``call_cc[]``. -> - Basically everywhere else, ``cc`` points to the identity function - the default continuation just returns its arguments. +> - Just like in Scheme/Racket, only the control state is checkpointed by `call_cc[]`; any modifications to mutable data remain. +> - Assignment targets can be used to get the return value of the function called by `call_cc[]`. +> - Just like in Scheme/Racket's `call/cc`, the values that get bound to the `call_cc[]` assignment targets on second and further calls (when the continuation runs) are the arguments given to the continuation when it is called (whether implicitly or manually). +> - A first-class reference to the captured continuation is available in the function called by `call_cc[]`, as its `cc` argument. +> - The continuation itself is a function that takes positional arguments, plus a named argument `cc`. +> - The call signature for the positional arguments is determined by the assignment targets of the `call_cc[]`. +> - The `cc` parameter is there only so that a continuation behaves just like any continuation-enabled function when tail-called, or when later used as the target of another `call_cc[]`. +> - Basically everywhere else, `cc` points to the identity function - the default continuation just returns its argument(s). > - This is unlike in Scheme or Racket, which implicitly capture the continuation at every expression. -> - Inside a ``def``, ``call_cc[]`` generates a tail call, thus terminating the original (parent) function. (Hence ``call_ec`` does not combo well with this.) -> - At the top level of the ``with continuations`` block, ``call_cc[]`` generates a normal call. In this case there is no return value for the block (for the continuation, either), because the use site of the ``call_cc[]`` is not inside a function. +> - Inside a `def`, `call_cc[]` generates a tail call, thus terminating the original (parent) function. Hence `call_ec` does **not** combo with `with continuations`. +> - At the top level of the `with continuations` block, `call_cc[]` generates a normal call. In this case there is no return value for the block (for the continuation, either), because the use site of the `call_cc[]` is not inside a function.
-#### Differences between ``call/cc`` and certain other language features +#### Differences between `call/cc` and certain other language features - - Unlike **generators**, ``call_cc[]`` allows resuming also multiple times from an earlier checkpoint, even after execution has already proceeded further. Generators can be easily built on top of ``call/cc``. [Python version](../unpythonic/syntax/test/test_conts_gen.py), [Racket version](https://github.com/Technologicat/python-3-scicomp-intro/blob/master/examples/beyond_python/generator.rkt). + - Unlike **generators**, `call_cc[]` allows resuming also multiple times from an earlier checkpoint, even after execution has already proceeded further. Generators can be easily built on top of `call/cc`. [Python version](../unpythonic/syntax/tests/test_conts_gen.py), [Racket version](https://github.com/Technologicat/python-3-scicomp-intro/blob/master/examples/beyond_python/generator.rkt). - The Python version is a pattern that could be packaged into a macro with `mcpyrate`; the Racket version has been packaged as a macro. - Both versions are just demonstrations for teaching purposes. In production code, use the language's native functionality. - - Python's built-in generators have no restriction on where ``yield`` can be placed, and provide better performance. + - Python's built-in generators have no restriction on where `yield` can be placed, and provide better performance. - Racket's standard library provides [generators](https://docs.racket-lang.org/reference/Generators.html). - - Unlike **exceptions**, which only perform escapes, ``call_cc[]`` allows to jump back at an arbitrary time later, also after the dynamic extent of the original function where the ``call_cc[]`` appears. Escape continuations are a special case of continuations, so exceptions can be built on top of ``call/cc``. + - Unlike **exceptions**, which only perform escapes, `call_cc[]` allows to jump back at an arbitrary time later, also *after* the dynamic extent of the original function where the `call_cc[]` appears. Escape continuations are a special case of continuations, so exceptions can be built on top of `call/cc`. - [As explained in detail by Matthew Might](http://matt.might.net/articles/implementing-exceptions/), exceptions are fundamentally based on (escape) continuations; the *"unwinding the call stack"* mental image is ["not even wrong"](https://en.wikiquote.org/wiki/Wolfgang_Pauli). -So if all you want is generators or exceptions (or even resumable exceptions a.k.a. [conditions](http://www.gigamonkeys.com/book/beyond-exception-handling-conditions-and-restarts.html)), then a general ``call/cc`` mechanism is not needed. The point of ``call/cc`` is to provide the ability to *resume more than once* from *the same*, already executed point in the program. In other words, ``call/cc`` is a general mechanism for bookmarking the control state. +So if all you want is generators or exceptions (or even resumable exceptions a.k.a. [conditions](http://www.gigamonkeys.com/book/beyond-exception-handling-conditions-and-restarts.html)), then a general `call/cc` mechanism is not needed. The point of `call/cc` is to provide the ability to *resume more than once* from *the same*, already executed point in the program. In other words, **`call/cc` is a general mechanism for bookmarking the control state**. However, its usability leaves much to be desired. This has been noted e.g. in [Oleg Kiselyov: An argument against call/cc](http://okmij.org/ftp/continuations/against-callcc.html) and [John Shutt: Guarded continuations](http://fexpr.blogspot.com/2012/01/guarded-continuations.html). For example, Shutt writes: *The traditional Scheme device for acquiring a first-class continuation object is **call/cc**, which calls a procedure and passes to that procedure the continuation to which that call would normally return. Frankly, this was always a very clumsy way to work with continuations; one might almost suspect it was devised as an "esoteric programming language" feature, akin to INTERCAL's COME FROM statement.* -#### ``call_cc`` API reference +#### `call_cc` API reference + +To keep things relatively straightforward, our `call_cc[]` is only allowed to appear **at the top level** of: -To keep things relatively straightforward, our ``call_cc[]`` is only allowed to appear **at the top level** of: + - the `with continuations` block itself + - a `def` inside that block - - the ``with continuations`` block itself - - a ``def`` or ``async def`` +Nested defs are ok; here *top level* only means the top level of the *currently innermost* `def`. -Nested defs are ok; here *top level* only means the top level of the *currently innermost* ``def``. +If you need to place `call_cc[]` inside a loop, use `@looped` et al. from the module `unpythonic.fploop`; this has the loop body represented as the top level of a `def`. Keep in mind that **only the control state is bookmarked**. -If you need to place ``call_cc[]`` inside a loop, use ``@looped`` et al. from ``unpythonic.fploop``; this has the loop body represented as the top level of a ``def``. +Multiple `call_cc[]` statements in the same function body are allowed. These essentially create nested closures. -Multiple ``call_cc[]`` statements in the same function body are allowed. These essentially create nested closures. +In any invalid position, `call_cc[]` is considered a syntax error at macro expansion time. **Syntax**: -In ``unpythonic``, ``call_cc`` is a **statement**, with the following syntaxes: +In `unpythonic`, `call_cc` is a **statement**, with the following syntaxes: ```python -x = call_cc[func(...)] -*xs = call_cc[func(...)] -x0, ... = call_cc[func(...)] -x0, ..., *xs = call_cc[func(...)] -call_cc[func(...)] +x = call_cc[f(...)] +*xs = call_cc[f(...)] +x0, ... = call_cc[f(...)] +x0, ..., *xs = call_cc[f(...)] +call_cc[f(...)] x = call_cc[f(...) if p else g(...)] *xs = call_cc[f(...) if p else g(...)] @@ -1151,23 +1447,25 @@ x0, ..., *xs = call_cc[f(...) if p else g(...)] call_cc[f(...) if p else g(...)] ``` -*NOTE*: ``*xs`` may need to be written as ``*xs,`` in order to explicitly make the LHS into a tuple. The variant without the comma seems to work when run from a ``.py`` file with the `macropython` bootstrapper from [`mcpyrate`](https://pypi.org/project/mcpyrate/), but fails in code run interactively in the `mcpyrate` REPL. +*NOTE*: `*xs` may need to be written as `*xs,` in order to explicitly make the LHS into a tuple. The variant without the comma seems to work when run from a `.py` file with the `macropython` bootstrapper from [`mcpyrate`](https://pypi.org/project/mcpyrate/), but fails in code run interactively in the `mcpyrate` REPL. -*NOTE*: ``f()`` and ``g()`` must be **literal function calls**. Sneaky trickery (such as calling indirectly via ``unpythonic.misc.call`` or ``unpythonic.fun.curry``) is not supported. (The ``prefix`` and ``curry`` macros, however, **are** supported; just order the block macros as shown in the final section of this README.) This limitation is for simplicity; the ``call_cc[]`` needs to patch the ``cc=...`` kwarg of the call being made. +*NOTE*: `f()` and `g()` must be **literal function calls**. Sneaky trickery (such as calling indirectly via `unpythonic.call` or `unpythonic.curry`) is not supported. This limitation is for simplicity; the `call_cc[]` invocation needs to patch the `cc=...` kwarg of the call being made. + +The `prefix` and `curry` macros, however, **are** supported; just order the block macros as in [The xmas tree combo](#the-xmas-tree-combo). **Assignment targets**: - - To destructure a multiple-values (from a tuple return value), use a tuple assignment target (comma-separated names, as usual). + - To destructure positional multiple-values (from a `Values` return value of the function called by the `call_cc`), use a tuple assignment target (comma-separated names, as usual). Destructuring *named* return values from a `call_cc` is currently not supported due to syntactic limitations. - - The last assignment target may be starred. It is transformed into the vararg (a.k.a. ``*args``, star-args) of the continuation function. (It will capture a whole tuple, or any excess items, as usual.) + - The last assignment target may be starred. It is transformed into the vararg (a.k.a. `*args`, star-args) of the continuation function created by the `call_cc`. It will capture a whole tuple, or any excess items, as usual. - - To ignore the return value, just omit the assignment part. Useful if ``func`` was called only to perform its side-effects (the classic side effect is to stash ``cc`` somewhere for later use). + - To ignore the return value of the `call_cc`'d function, just omit the assignment part. This is useful if `f` was called only to perform its side-effects. The classic side effect is to stash `cc` somewhere for later use. **Conditional variant**: - - ``p`` is any expression. If truthy, ``f(...)`` is called, and if falsey, ``g(...)`` is called. + - `p` is any expression. It is evaluated at run time, as usual. When the result is truthy, `f(...)` is called, and when falsey, `g(...)` is called. - - Each of ``f(...)``, ``g(...)`` may be ``None``. A ``None`` skips the function call, proceeding directly to the continuation. Upon skipping, all assignment targets (if any are present) are set to ``None``. The starred assignment target (if present) gets the empty tuple. + - Each of `f(...)`, `g(...)` may be `None`. A `None` skips the function call, proceeding directly to the continuation. Upon skipping, all assignment targets (if any are present) are set to `None`. The starred assignment target (if present) gets the empty tuple. The main use case of the conditional variant is for things like: @@ -1182,45 +1480,51 @@ with continuations: ... ``` -**Main differences to ``call/cc`` in Scheme and Racket**: +**Main differences to `call/cc` in Scheme and Racket**: -Compared to Scheme/Racket, where ``call/cc`` will capture also expressions occurring further up in the call stack, our ``call_cc`` may be need to be placed differently (further out, depending on what needs to be captured) due to the delimited nature of the continuations implemented here. +Compared to Scheme/Racket, where `call/cc` will capture also expressions occurring further up in the call stack, our `call_cc` may be need to be placed differently (further out, depending on what needs to be captured) due to the delimited nature of the continuations implemented here. -Scheme and Racket implicitly capture the continuation at every position, whereas we do it explicitly, only at the use sites of the ``call_cc[]`` macro. +Scheme and Racket implicitly capture the continuation at every position, whereas we do it explicitly, only at the use sites of the `call_cc[]` macro. -Also, since there are limitations to where a ``call_cc[]`` may appear, some code may need to be structured differently to do some particular thing, if porting code examples originally written in Scheme or Racket. +Also, since there are limitations to where a `call_cc[]` may appear, some code may need to be structured differently to do some particular thing, if porting code examples originally written in Scheme or Racket. -Unlike ``call/cc`` in Scheme/Racket, our ``call_cc`` takes **a function call** as its argument, not just a function reference. Also, there's no need for it to be a one-argument function; any other args can be passed in the call. The ``cc`` argument is filled implicitly and passed by name; any others are passed exactly as written in the client code. +Unlike `call/cc` in Scheme/Racket, our `call_cc` takes **a function call** as its argument, not just a function reference. Also, there is no need for it to be a one-argument function; any other args can be passed in the call. The `cc` argument is filled implicitly and passed by name; any others are passed exactly as you write in the invocation. #### Combo notes -**CAUTION**: Do not use ``with tco`` inside a ``with continuations`` block; ``continuations`` already implies TCO. The ``continuations`` macro **makes no attempt** to skip ``with tco`` blocks inside it. +**CAUTION**: Do not use `with tco` inside a `with continuations` block; `continuations` already implies TCO. The `continuations` macro **makes no attempt** to skip `with tco` blocks inside it. -If you need both ``continuations`` and ``multilambda`` simultaneously, the incantation is: +If you want to use `multilambda` inside a `with continuations` block, it needs to go on the outside: ```python +from unpythonic.syntax import macros, continuations, multilambda + with multilambda, continuations: f = lambda x: [print(x), x**2] assert f(42) == 1764 ``` -This works, because the ``continuations`` macro understands already expanded ``let[]`` and ``do[]``, and ``multilambda`` generates and expands a ``do[]``. (Any explicit use of ``do[]`` in a lambda body or in a ``return`` is also ok; recall that macros expand from inside out.) +This works, because the `continuations` macro understands already expanded `let[]` and `do[]`, and `multilambda` generates and expands a `do[]`. (Any explicit use of `do[]` in a lambda body or in a `return` is also ok; recall that macros expand from inside out.) -Similarly, if you need ``quicklambda``, apply it first: +Similarly, if you want to use `quicklambda` inside a `with continuations` block, place it on the outside: ```python +from unpythonic.syntax import macros, continuations, quicklambda, fn + with quicklambda, continuations: - g = f[_**2] + g = fn[_**2] assert g(42) == 1764 ``` -This ordering makes the ``f[...]`` notation expand into standard ``lambda`` notation before ``continuations`` is expanded. +This ordering makes the `f[...]` notation expand into standard `lambda` notation before `continuations` is expanded. -To enable both of these, use ``with quicklambda, multilambda, continuations`` (although the usefulness of this combo may be questionable). +To enable both of these, use `with quicklambda, multilambda, continuations` (although the usefulness of this combo may be questionable). #### Continuations as an escape mechanism -Pretty much by the definition of a continuation, in a ``with continuations`` block, a trick that *should* at first glance produce an escape is to set ``cc`` to the ``cc`` of the caller, and then return the desired value. There is however a subtle catch, due to the way we implement continuations. +An escape continuation `ec` is a continuation, too. How can we use `cc` to escape? + +Pretty much by the definition of a continuation, in a `with continuations` block, a trick that *should* at first glance produce an escape is to set `cc` to the `cc` of the caller, and then return the desired value. There is however a subtle catch, due to the way we implement continuations. First, consider this basic strategy, without any macros: @@ -1230,7 +1534,7 @@ from unpythonic import call_ec def double_odd(x, ec): if x % 2 == 0: # reject even "x" ec("not odd") - return 2*x + return 2 * x @call_ec def result1(ec): y = double_odd(42, ec) @@ -1245,7 +1549,9 @@ assert result1 == "not odd" assert result2 == "not odd" ``` -Now, can we use the same strategy with the continuation machinery? +Here `ec` is the escape continuation of the `result1`/`result2` block, due to the placement of the `call_ec`. + +Now, can we use the same strategy with the general continuation machinery? ```python from unpythonic.syntax import macros, continuations, call_cc @@ -1255,9 +1561,9 @@ with continuations: if x % 2 == 0: cc = ec return "not odd" - return 2*x + return 2 * x def main1(cc): - # cc actually has a default, so it's ok to not pass anything as cc here. + # cc actually has a default (`identity`), so it's ok to not pass anything as cc here. y = double_odd(42, ec=cc) # y = "not odd" z = double_odd(21, ec=cc) # we could tail-call, but let's keep this similar to the first example. return z @@ -1269,11 +1575,13 @@ with continuations: assert main2() == "not odd" ``` -In the first example, ``ec`` is the escape continuation of the ``result1``/``result2`` block, due to the placement of the ``call_ec``. In the second example, the ``cc`` inside ``double_odd`` is the implicitly passed ``cc``... which, naively, should represent the continuation of the current call into ``double_odd``. So far, so good. +The `cc` inside `double_odd` is the implicitly passed `cc`... which, naively, should represent the continuation of the current call into `double_odd`. So far, so good. -However, because the example code contains no ``call_cc[]`` statements, the actual value of ``cc``, anywhere in this example, is always just ``identity``. *It's not the actual continuation.* Even though we pass the ``cc`` of ``main1``/``main2`` as an explicit argument "``ec``" to use as an escape continuation (like the first example does with ``ec``), it is still ``identity`` - and hence cannot perform an escape. +However, because the example contains no `call_cc[]` statements, the actual value of `cc`, anywhere in this example, is always just `identity`. Scan that again: *in this example, `cc` is not the actual continuation, because no continuation captures were requested.* -We must ``call_cc[]`` to request a capture of the actual continuation: +Even though we pass the `cc` of `main1`/`main2` as an explicit argument "`ec`" to use as an escape continuation (like the first example does with `ec`), it is still `identity` - and hence cannot perform an escape. + +We must `call_cc[]` to request a capture of the continuation, hence populating `cc` with something useful: ```python from unpythonic.syntax import macros, continuations, call_cc @@ -1283,7 +1591,7 @@ with continuations: if x % 2 == 0: cc = ec return "not odd" - return 2*x + return 2 * x def main1(cc): y = call_cc[double_odd(42, ec=cc)] # <-- the only change is adding the call_cc[] z = call_cc[double_odd(21, ec=cc)] # <-- @@ -1298,49 +1606,52 @@ with continuations: This variant performs as expected. -There's also a second, even subtler catch; instead of setting ``cc = ec`` and returning a value, just tail-calling ``ec`` with that value doesn't do what we want. This is because - as explained in the rules of the ``continuations`` macro, above - a tail-call is *inserted* between the end of the function, and whatever ``cc`` currently points to. +There is also a second, even subtler catch; instead of setting `cc = ec` and returning a value, as we did, just tail-calling `ec` with that same value does **not** do what we want. Why? Because - as explained in the rules of the `continuations` macro, above - a tail-call is *inserted* between the end of the function, and whatever continuation `cc` currently points to. -Most often that's exactly what we want, but in this particular case, it causes *both* continuations to run, in sequence. But if we overwrite ``cc``, then the function's original ``cc`` argument (the one given by ``call_cc[]``) is discarded, so it never runs - and we get the effect we want, *replacing* the ``cc`` by the ``ec``. +Most often that is exactly what we want, but in this particular case, it causes *both* continuations to run, in sequence. But if, instead of performing a tail call to the `ec`, we set `cc = ec`, then the function's original `cc` argument (the one supplied by `call_cc[]`) is discarded, hence that continuation never runs - and we get the effect we want, *replacing* the `cc` by the `ec`. -Such subtleties arise essentially from the difference between a language that natively supports continuations (Scheme, Racket) and one that has continuations hacked on top of it as macros performing a CPS conversion only partially (like Python with ``unpythonic.syntax``, or Common Lisp with PG's continuation-passing macros). The macro approach works, but the programmer needs to be careful. +Such subtleties arise essentially from the difference between a language that natively supports continuations (Scheme, Racket) and one that has continuations hacked on top of it as macros performing a CPS conversion only partially (like Python with `unpythonic.syntax`, or Common Lisp with PG's continuation-passing macros). The macro approach works, but the programmer needs to be careful. #### What can be used as a continuation? -In ``unpythonic`` specifically, a continuation is just a function. ([As John Shutt has pointed out](http://fexpr.blogspot.com/2014/03/continuations-and-term-rewriting-calculi.html), in general this is not true. The calculus underlying the language becomes much cleaner if continuations are defined as a separate control flow mechanism orthogonal to function application. Continuations are not intrinsically a whole-computation device, either.) +In `unpythonic` specifically, a continuation is just a function. ([As John Shutt has pointed out](http://fexpr.blogspot.com/2014/03/continuations-and-term-rewriting-calculi.html), in general this is not true. The calculus underlying the language becomes much cleaner if continuations are defined as a separate control flow mechanism orthogonal to function application. Continuations are [not intrinsically a whole-computation device](https://en.wikipedia.org/wiki/Delimited_continuation), either.) The continuation function must be able to take as many positional arguments as the previous function in the TCO chain is trying to pass into it. Keep in mind that: - - In ``unpythonic``, a tuple represents multiple return values. So a ``return a, b``, which is being fed into the continuation, implies that the continuation must be able to take two positional arguments. + - In `unpythonic`, multiple return values (and named return values) are represented as a `Values` object. So if your function does `return Values(a, b)`, and that is being fed into the continuation, this implies that the continuation must be able to take two positional arguments. + + **Changed in v0.15.0.** *Up to v0.14.3, a `tuple` used to represent multiple-return-values; now it denotes a single return value that is a tuple. The `Values` type allows not only multiple return values, but also **named** return values. Named return values are fed as kwargs.* - - At the end of any function in Python, at least an implicit bare ``return`` always exists. It will try to pass in the value ``None`` to the continuation, so the continuation must be able to accept one positional argument. (This is handled automatically for continuations created by ``call_cc[]``. If no assignment targets are given, ``call_cc[]`` automatically creates one ignored positional argument that defaults to ``None``.) + - At the end of any function in Python, at least an implicit bare `return` always exists. It will try to pass in the value `None` to the continuation, so a continuation must be able to accept one positional argument. + - This is handled automatically for continuations created by `call_cc[]`. If no assignment targets are given, `call_cc[]` automatically creates one ignored positional argument that defaults to `None`. -If there is an arity mismatch, Python will raise ``TypeError`` as usual. (The actual error message may be unhelpful due to the macro transformations; look for a mismatch in the number of values between a ``return`` and the call signature of a function used as a continuation (most often, the ``f`` in a ``cc=f``).) +If there is an arity mismatch, Python will raise `TypeError` as usual. The actual error message may be unhelpful due to macro transformations. Look for a mismatch between a `return` and the call signature of a function used as a continuation. Most often, this is the `f` in a `cc=f`. -Usually, a function to be used as a continuation is defined inside the ``with continuations`` block. This automatically introduces the implicit ``cc`` parameter, and in general makes the source code undergo the transformations needed by the continuation machinery. +Usually, a function to be used as a continuation is defined inside the `with continuations` block. This automatically introduces the implicit `cc` parameter, and in general makes the source code undergo the transformations needed by the continuation machinery. -However, as the only exception to this rule, if the continuation is meant to act as the endpoint of the TCO chain - i.e. terminating the chain and returning to the original top-level caller - then it may be defined outside the ``with continuations`` block. Recall that in a ``with continuations`` block, returning an inert data value (i.e. not making a tail call) transforms into a tail-call into the ``cc`` (with the given data becoming its argument(s)); it does not set the ``cc`` argument of the continuation being called, or even require that it has a ``cc`` parameter that could accept one. +However, as the only exception to this rule, if the continuation is meant to act as the endpoint of the TCO chain - i.e. terminating the chain and returning to the original top-level caller - then it may be defined outside the `with continuations` block. Recall that in a `with continuations` block, returning an inert data value (i.e. not making a tail call) transforms into a tail-call into the `cc` (with the given data becoming its argument(s)); it does not set the `cc` argument of the continuation being called, or even require that it has a `cc` parameter that could accept one. -(Note also that a continuation that has no ``cc`` parameter cannot be used as the target of an explicit tail-call in the client code, since a tail-call in a ``with continuations`` block will attempt to supply a ``cc`` argument to the function being tail-called. Likewise, it cannot be used as the target of a ``call_cc[]``, since this will also attempt to supply a ``cc`` argument.) +These observations make `unpythonic.identity` eligible as a continuation, even though it is defined elsewhere in the library and it has no `cc` parameter. -These observations make ``unpythonic.fun.identity`` eligible as a continuation, even though it is defined elsewhere in the library and it has no ``cc`` parameter. +Finally, note that a function that has no `cc` parameter cannot be used as the target of an explicit tail-call inside a `with continuations` block, since a tail-call there will attempt to supply a `cc` argument to the function being tail-called. Likewise, it cannot be used as the function called by a `call_cc[]`, since this will also attempt to supply a `cc` argument. -#### This isn't ``call/cc``! +#### This isn't `call/cc`! -Strictly speaking, ``True``. The implementation is very different (much more than just [exposing a hidden parameter](https://www.ps.uni-saarland.de/~duchier/python/continuations.html)), not to mention it has to be a macro, because it triggers capture - something that would not need to be requested for separately, had we converted the whole program into [CPS](https://en.wikipedia.org/wiki/Continuation-passing_style). +Strictly speaking, `True`. The implementation is very different (much more than just [exposing a hidden parameter](https://www.ps.uni-saarland.de/~duchier/python/continuations.html)), not to mention it has to be a macro, because it triggers capture - something that would not need to be requested for separately, had we converted the whole program into [CPS](https://en.wikipedia.org/wiki/Continuation-passing_style). -The selective capture approach is however more efficient when we implement the continuation system in Python, indeed *on Python* (in the sense of [On Lisp](paulgraham.com/onlisp.html)), since we want to run most of the program the usual way with no magic attached. This way there is no need to sprinkle absolutely every statement and expression with a ``def`` or a ``lambda``. (Not to mention Python's ``lambda`` is underpowered due to the existence of some language features only as statements, so we would need to use a mixture of both, which is already unnecessarily complicated.) Function definitions are not intended as [the only control flow construct](https://dspace.mit.edu/handle/1721.1/5753) in Python, so the compiler likely wouldn't optimize heavily enough (i.e. eliminate **almost all** of the implicitly introduced function definitions), if we attempted to use them as such. +The selective capture approach is however more efficient when we implement the continuation system in Python, indeed *on Python* (in the sense of [On Lisp](paulgraham.com/onlisp.html)), since we want to run most of the program the usual way with no magic attached. This way there is no need to sprinkle absolutely every statement and expression with a `def` or a `lambda`. (Not to mention Python's `lambda` is underpowered due to the existence of some language features only as statements, so we would need to use a mixture of both, which is already unnecessarily complicated.) Function definitions are not intended as [the only control flow construct](https://dspace.mit.edu/handle/1721.1/5753) in Python, so the compiler likely would not optimize heavily enough (i.e. eliminate **almost all** of the implicitly introduced function definitions), if we attempted to use them as such. Continuations only need to come into play when we explicitly request for one ([ZoP §2](https://www.python.org/dev/peps/pep-0020/)); this avoids introducing any more extra function definitions than needed. -The name is nevertheless ``call_cc``, because the resulting behavior is close enough to ``call/cc``. +The name is nevertheless `call_cc`, because the resulting behavior is close enough to `call/cc`. Instead of *call with current continuation*, we could retcon the name to mean *call with **captured** continuation*. -Note our implementation provides a rudimentary form of *delimited* continuations. See [Oleg Kiselyov: Undelimited continuations are co-values rather than functions](http://okmij.org/ftp/continuations/undelimited.html). Delimited continuations return a value and can be composed, so they at least resemble functions (even though are not, strictly speaking, actually functions), whereas undelimited continuations do not even return. (For two different debunkings of the continuations-are-functions myth, approaching the problem from completely different angles, see the above post by Oleg Kiselyov, and [John Shutt: Continuations and term-rewriting calculi](http://fexpr.blogspot.com/2014/03/continuations-and-term-rewriting-calculi.html).) +Note our implementation provides a rudimentary form of *delimited* continuations. See [Oleg Kiselyov: Undelimited continuations are co-values rather than functions](http://okmij.org/ftp/continuations/undelimited.html). Delimited continuations return a value and can be composed, so they at least resemble functions (even though are not, strictly speaking, actually functions), whereas undelimited continuations do not even return. For two different debunkings of the continuations-are-functions myth, approaching the problem from completely different angles, see the above post by Oleg Kiselyov, and [John Shutt: Continuations and term-rewriting calculi](http://fexpr.blogspot.com/2014/03/continuations-and-term-rewriting-calculi.html). Racket provides a thought-out implementation of delimited continuations and [prompts](https://docs.racket-lang.org/guide/prompt.html) to control them. #### Why this syntax? -As for a function call in ``call_cc[...]`` vs. just a function reference: Typical lispy usage of ``call/cc`` uses an inline lambda, with the closure property passing in everything except ``cc``, but in Python ``def`` is a statement. A technically possible alternative syntax would be: +As for a function call in `call_cc[...]` vs. just a function reference: Typical lispy usage of `call/cc` uses an inline lambda, with the closure property passing in everything except `cc`, but in Python `def` is a statement. A technically possible alternative syntax would be: ```python with call_cc(f): # this syntax not supported! @@ -1350,17 +1661,17 @@ with call_cc(f): # this syntax not supported! but the expr macro variant provides better options for receiving multiple return values, and perhaps remains closer to standard Python. -The ``call_cc[]`` explicitly suggests that these are (almost) the only places where the ``cc`` argument obtains a non-default value. It also visually indicates the exact position of the checkpoint, while keeping to standard Python syntax. +The `call_cc[]` explicitly suggests that these are (almost) the only places where the `cc` argument obtains a non-default value. It also visually indicates the exact position of the checkpoint, while keeping to standard Python syntax. -(*Almost*: As explained above, a tail call passes along the current value of ``cc``, and ``cc`` can be set manually.) +(*Almost*: As explained above, a tail call passes along the current value of `cc`, and `cc` can be set manually.) -### ``prefix``: prefix function call syntax for Python +### `prefix`: prefix function call syntax for Python Write Python almost like Lisp! -Lexically inside a ``with prefix`` block, any literal tuple denotes a function call, unless quoted. The first element is the operator, the rest are arguments. Bindings of the ``let`` macros and the top-level tuple in a ``do[]`` are left alone, but ``prefix`` recurses inside them (in the case of bindings, on each RHS). +Lexically inside a `with prefix` block, any literal tuple denotes a function call, unless quoted. The first element is the operator, the rest are arguments. Bindings of the `let` macros and the top-level tuple in a `do[]` are left alone, but `prefix` recurses inside them (in the case of let-bindings, on each RHS). The rest is best explained by example: @@ -1402,7 +1713,7 @@ with prefix: # in case of duplicate name across kws, rightmost wins assert (f, kw(a="hi there"), kw(b="Tom"), kw(b="Jerry")) == (q, "hi there", "Jerry") - # give *args with unpythonic.fun.apply, like in Lisps: + # give *args with unpythonic.apply, like in Lisps: lst = [1, 2, 3] def g(*args): return args @@ -1411,7 +1722,9 @@ with prefix: assert (apply, g, "hi", "ho", lst) == (q, "hi" ,"ho", 1, 2, 3) ``` -This comboes with ``autocurry`` for an authentic *LisThEll* programming experience: +If you use the `q`, `u` and `kw()` operators, they must be macro-imported. The `q`, `u` and `kw()` operators may only appear in a tuple inside a prefix block. In any invalid position, any of them is considered a syntax error at macro expansion time. + +The `prefix` macro comboes with `autocurry` for an authentic *Listhell* programming experience: ```python from unpythonic.syntax import macros, autocurry, prefix, q, u, kw @@ -1424,14 +1737,20 @@ with prefix, autocurry: # important: apply prefix first, then autocurry assert (mymap, double, (q, 1, 2, 3)) == ll(2, 4, 6) ``` -**CAUTION**: The ``prefix`` macro is experimental and not intended for use in production code. +See also [the Listhell dialect](dialects/listhell.md), which pre-packages that combo. + +**CAUTION**: The `prefix` macro is experimental and not intended for use in production code. + + +### `autoreturn`: implicit `return` in tail position +**Changed in v0.15.0.** *If the item in tail position is a function definition or class definition, return the thing that was defined. This functionality being missing in earlier versions was an oversight.* -### ``autoreturn``: implicit ``return`` in tail position +In Lisps, a function implicitly returns the value of the expression in tail position along the code path being executed. That is, "the last value" is automatically returned when the function terminates normally. No `return` keyword is needed. -In Lisps, a function implicitly returns the value of the expression in tail position (along the code path being executed). Python's ``lambda`` also behaves like this (the whole body is just one return-value expression), but ``def`` doesn't. +Python's `lambda` also already behaves like this; the whole body is just one expression, whose value will be returned. -Now ``def`` can, too: +However, `def` requires a `return`, even in tail position. Enter the `autoreturn` macro: ```python from unpythonic.syntax import macros, autoreturn @@ -1455,67 +1774,85 @@ with autoreturn: assert g(42) == "something else" ``` -Each ``def`` function definition lexically within the ``with autoreturn`` block is examined, and if the last item within the body is an expression ``expr``, it is transformed into ``return expr``. Additionally: +Each `def` or `async def` function definition lexically within the `with autoreturn` block is examined. + +Any explicit `return` statements are left alone, so `return` can still be used as usual. This is especially useful if you want to return early (before execution reaches the tail position). + +To find and transform the statement(s) in tail position, we look at the last statement within the function definition. If it is: + + - An expression `expr`, it is transformed into `return expr`. - - If the last item is an ``if``/``elif``/``else`` block, the transformation is applied to the last item in each of its branches. + - A function or class definition, a return statement is appended to return that function/class. **Added in v0.15.0.** - - If the last item is a ``with`` or ``async with`` block, the transformation is applied to the last item in its body. + - An `if`/`elif`/`else` block, the transformation is applied recursively to the last item in each of its branches. + - **CAUTION**: If the final `else` of an `if`/`elif`/`else` is omitted, as often in Python, then only the `else` item is in tail position with respect to the function definition - likely not what you want. So with `autoreturn`, the final `else` should be written out explicitly, to include the `else` branch into the `if`/`elif`/`else` statement. - - If the last item is a ``try``/``except``/``else``/``finally`` block: - - **If** an ``else`` clause is present, the transformation is applied to the last item in it; **otherwise**, to the last item in the ``try`` clause. These are the positions that indicate a normal return (no exception was raised). - - In both cases, the transformation is applied to the last item in each of the ``except`` clauses. - - The ``finally`` clause is not transformed; the intention is it is usually a finalizer (e.g. to release resources) that runs after the interesting value is already being returned by ``try``, ``else`` or ``except``. + - A `with` or `async with` block, the transformation is applied recursively to the last item in its body. -If needed, the above rules are applied recursively to locate the tail position(s). + - A `try`/`except`/`else`/`finally` block: + - **If** an `else` clause is present, the transformation is applied recursively to the last item in it; **otherwise**, to the last item in the `try` clause. These are the positions that indicate a normal return (i.e. no exception was raised). + - In both cases, the transformation is applied recursively to the last item in each of the `except` clauses. + - The `finally` clause is not transformed; it is intended as a finalizer (e.g. to release resources) that runs after the interesting value is already being returned by `try`, `else` or `except`. -Any explicit ``return`` statements are left alone, so ``return`` can still be used as usual. +**CAUTION**: `for`, `async for`, `while` are currently not analyzed; effectively, these are defined as always returning `None`. If the last item in your function body is a loop, use an explicit return. -**CAUTION**: If the final ``else`` of an ``if``/``elif``/``else`` is omitted, as often in Python, then only the ``else`` item is in tail position with respect to the function definition - likely not what you want. So with ``autoreturn``, the final ``else`` should be written out explicitly, to make the ``else`` branch part of the same ``if``/``elif``/``else`` block. +**CAUTION**: With `autoreturn` enabled, functions no longer return `None` by default; the whole point of this macro is to change the default return value. The default return value becomes `None` only if the tail position contains a statement other than `if`, `with`, `async with` or `try`. -**CAUTION**: ``for``, ``async for``, ``while`` are currently not analyzed; effectively, these are defined as always returning ``None``. If the last item in your function body is a loop, use an explicit return. +If you wish to omit `return` in tail calls, `autoreturn` comboes with `tco`. For the correct invocation order, see [the xmas tree combo](#the-xmas-tree-combo). -**CAUTION**: With ``autoreturn`` enabled, functions no longer return ``None`` by default; the whole point of this macro is to change the default return value. The default return value is ``None`` only if the tail position contains a statement other than ``if``, ``with``, ``async with`` or ``try``. +For code using **conditions and restarts**: there is no special integration between `autoreturn` and the conditions-and-restarts subsystem of `unpythonic`. However, these should work together, because: -If you wish to omit ``return`` in tail calls, this comboes with ``tco``; just apply ``autoreturn`` first (either ``with autoreturn, tco:`` or in nested format, ``with tco:``, ``with autoreturn:``). + - The `with restarts` form is just a `with` block, so it gets the `autoreturn` treatment. + - The handlers in a `with handlers` form are either separately defined functions, or lambdas. + - Lambdas need no `autoreturn`. + - If you `def` the handler functions in a `with autoreturn` block (either the same one or a different one; this does not matter), they will get the `autoreturn` treatment. + - The `with handlers` form itself is just `with` block, so it also gets the `autoreturn` treatment. -### ``forall``: nondeterministic evaluation +### `forall`: nondeterministic evaluation -Behaves the same as the multiple-body-expression tuple comprehension ``unpythonic.amb.forall``, but implemented purely by AST transformation, with real lexical variables. This is essentially a macro implementation of Haskell's do-notation for Python, specialized to the List monad (but the code is generic and very short; see ``unpythonic.syntax.forall``). +**Changed in v0.15.3.** *Env-assignment now uses the assignment expression syntax `x := range(3)`. The old syntax `x << range(3)` is still supported for backward compatibility.* + +This is essentially a macro implementation of Haskell's do-notation for Python, specialized to the List monad. + +The `forall[]` expr macro behaves the same as the multiple-body-expression tuple comprehension `unpythonic.forall`, but the macro is implemented purely by AST transformation, using real lexical variables. + +The implementation is generic and very short; if interested, see the module [`unpythonic.syntax.forall`](../unpythonic/syntax/forall.py). Compare the module [`unpythonic.amb`](../unpythonic/amb.py), which implements the same functionality with a source code generator and `eval`, without macros. The macro implementation is both shorter and more readable; this is effectively a textbook example of a situation where macros are the clean solution. ```python -from unpythonic.syntax import macros, forall, insist, deny +from unpythonic.syntax import macros, forall +from unpythonic.syntax import insist, deny # regular functions, not macros -out = forall[y << range(3), - x << range(3), +out = forall[y := range(3), + x := range(3), insist(x % 2 == 0), (x, y)] assert out == ((0, 0), (2, 0), (0, 1), (2, 1), (0, 2), (2, 2)) # pythagorean triples -pt = forall[z << range(1, 21), # hypotenuse - x << range(1, z+1), # shorter leg - y << range(x, z+1), # longer leg +pt = forall[z := range(1, 21), # hypotenuse + x := range(1, z+1), # shorter leg + y := range(x, z+1), # longer leg insist(x*x + y*y == z*z), (x, y, z)] assert tuple(sorted(pt)) == ((3, 4, 5), (5, 12, 13), (6, 8, 10), (8, 15, 17), (9, 12, 15), (12, 16, 20)) ``` -Assignment (with List-monadic magic) is ``var << iterable``. It is only valid at the top level of the ``forall`` (e.g. not inside any possibly nested ``let``). +Assignment, **with** List-monadic magic, is `var := iterable`. It is only valid at the top level of the `forall` (e.g. not inside any possibly nested `let`). -``insist`` and ``deny`` are not really macros; they are just the functions from ``unpythonic.amb``, re-exported for convenience. +`insist` and `deny` are not macros; they are just the functions from `unpythonic.amb`, re-exported for convenience. -The error raised by an undefined name in a ``forall`` section is ``NameError``. +The error raised by an undefined name in a `forall[]` section is `NameError`. ## Convenience features Small macros that are not essential but make some things easier or simpler. -### ``cond``: the missing ``elif`` for ``a if p else b`` +### `cond`: the missing `elif` for `a if p else b` -Now lambdas too can have multi-branch conditionals, yet remain human-readable: +With `cond`, lambdas too can have multi-branch conditionals, yet remain human-readable: ```python from unpythonic.syntax import macros, cond @@ -1526,9 +1863,9 @@ answer = lambda x: cond[x == 2, "two", print(answer(42)) ``` -Syntax is ``cond[test1, then1, test2, then2, ..., otherwise]``. Expansion raises an error if the ``otherwise`` branch is missing. +Syntax is `cond[test1, then1, test2, then2, ..., otherwise]`. A missing `otherwise` branch is considered a syntax error at macro expansion time. -Any part of ``cond`` may have multiple expressions by surrounding it with brackets: +Any part of `cond` may have multiple expressions by surrounding it with brackets: ```python cond[[pre1, ..., test1], [post1, ..., then1], @@ -1537,24 +1874,32 @@ cond[[pre1, ..., test1], [post1, ..., then1], [postn, ..., otherwise]] ``` -To denote a single expression that is a literal list, use an extra set of brackets: ``[[1, 2, 3]]``. +This is just the extra bracket syntax that denotes an implicit `do[]`. To denote a single expression that is a literal list, double the brackets: `[[1, 2, 3]]`. Just like in a `let[]` form, the outer brackets enable multiple-expression mode, and then the inner brackets denote a list. The multiple-expression mode is allowed also when there is just one expression. + +Inspired by the `cond` form of many Lisps. There is some variation between Lisp dialects on whether `cond` or `if` is preferable if the dialect provides both. For example, in [Racket](https://racket-lang.org/), `cond` is the [preferred](https://docs.racket-lang.org/style/Choosing_the_Right_Construct.html#%28part._.Conditionals%29) construct for writing conditionals. -### ``aif``: anaphoric if +### `aif`: anaphoric if -This is mainly of interest as a point of [comparison with Racket](https://github.com/Technologicat/python-3-scicomp-intro/blob/master/examples/beyond_python/aif.rkt); ``aif`` is about the simplest macro that relies on either the lack of hygiene or breaking thereof. +**Changed in v0.15.0.** *The `it` helper macro may only appear in the `then` and `otherwise` branches of an `aif[]`. Anywhere else, it is considered a syntax error at macro expansion time.* + +In linguistics, an [*anaphor*](https://en.wikipedia.org/wiki/Anaphora_(linguistics)) is an expression that refers to another, such as the English word *"it"*. [Anaphoric macros](https://en.wikipedia.org/wiki/Anaphoric_macro) are a lispy take on the concept. An anaphoric macro may, for example, implicitly define an `it` that the user code can then use, with the meaning defined by the macro. This is sometimes a useful technique to shorten code, but it can also make code unreadable by hiding definitions, so it should be used sparingly. + +Particularly, the *anaphoric if* is a classic macro, where `it` is automatically bound to the result of the test. We provide that macro as `aif[]`. + +Concerning readability, the anaphoric if is relatively harmless, because it is *almost* obvious from context that the only `it` that makes sense for a human to refer to is the test expression. ```python -from unpythonic.syntax import macros, aif +from unpythonic.syntax import macros, aif, it -aif[2*21, +aif[2 * 21, print(f"it is {it}"), print("it is falsey")] ``` -Syntax is ``aif[test, then, otherwise]``. The magic identifier ``it`` refers to the test result while (lexically) inside the ``aif``, and does not exist outside the ``aif``. +Syntax is `aif[test, then, otherwise]`. The magic identifier `it` (which **must** be imported as a macro) refers to the test result while (lexically) inside the `then` and `otherwise` branches of an `aif[]`, and anywhere else is considered a syntax error at macro expansion time. -Any part of ``aif`` may have multiple expressions by surrounding it with brackets (implicit ``do[]``): +Any part of `aif` may have multiple expressions by surrounding it with brackets: ```python aif[[pre, ..., test], @@ -1562,12 +1907,16 @@ aif[[pre, ..., test], [post_false, ..., otherwise]] # "otherwise" branch ``` -To denote a single expression that is a literal list, use an extra set of brackets: ``[[1, 2, 3]]``. +This is just the extra bracket syntax that denotes an implicit `do[]`. To denote a single expression that is a literal list, double the brackets: `[[1, 2, 3]]`. Just like in a `let[]` form, the outer brackets enable multiple-expression mode, and then the inner brackets denote a list. The multiple-expression mode is allowed also when there is just one expression. + +If interested, [compare with a Racket implementation](https://github.com/Technologicat/python-3-scicomp-intro/blob/master/examples/beyond_python/aif.rkt); `aif` is probably *the* simplest macro that relies on either the lack of [macro hygiene](https://en.wikipedia.org/wiki/Hygienic_macro) or intentional *breaking* thereof. -### ``autoref``: implicitly reference attributes of an object +### `autoref`: implicitly reference attributes of an object -Ever wish you could ``with(obj)`` to say ``x`` instead of ``obj.x`` to read attributes of an object? Enter the ``autoref`` block macro: +**CAUTION**: *This is a really, really bad idea that comes with serious readability and security implications. Python does not provide this construct itself, for good reason. Details below. Use with care, if at all.* + +Ever wish you could `with(obj)` to say `x` instead of `obj.x` to read attributes of an object? Enter the `autoref` block macro: ```python from unpythonic.syntax import macros, autoref @@ -1581,26 +1930,28 @@ with autoref(e): assert c == 3 # no c in e, so just c ``` -The transformation is applied for names in ``Load`` context only, including names found in ``Attribute`` or ``Subscript`` nodes. +The transformation is applied for names in `Load` context only, including names found inside `Attribute` or `Subscript` AST nodes, so things like `a[1]` and `a.x` are also valid (looking up `a` in `e`). -Names in ``Store`` or ``Del`` context are not redirected. To write to or delete attributes of ``o``, explicitly refer to ``o.x``, as usual. +Names in `Store` or `Del` context are not redirected. To write to or delete attributes of `o`, explicitly refer to `o.x`, as usual. Nested autoref blocks are allowed (lookups are lexically scoped). -Reading with ``autoref`` can be convenient e.g. for data returned by [SciPy's ``.mat`` file loader](https://docs.scipy.org/doc/scipy/reference/generated/scipy.io.loadmat.html). +Reading with `autoref` can be convenient e.g. for data returned by [SciPy's `.mat` file loader](https://docs.scipy.org/doc/scipy/reference/generated/scipy.io.loadmat.html). -See the [unit tests](../unpythonic/syntax/test/test_autoref.py) for more usage examples. +See the [unit tests](../unpythonic/syntax/tests/test_autoref.py) for more usage examples. This is similar to the JavaScript [`with` construct](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/with), which is nowadays [deprecated](https://2ality.com/2011/06/with-statement.html). See also [the ES6 reference on `with`](https://www.ecma-international.org/ecma-262/6.0/#sec-with-statement). -**CAUTION**: This construct was deprecated in JavaScript **for security reasons**. Since the autoref'd object **will hijack all name lookups**, use `with autoref` only with an object you trust! +**NOTE**: The JavaScript `with` and the Python `with` have nothing in common except the name. + +**CAUTION**: The `with` construct of JavaScript was deprecated **for security reasons**. Since the autoref'd object **will hijack all name lookups**, use `with autoref` only with an object you trust! In most Python code, this does not matter, as we are all adults here, but this *may* matter if a Python object arrives from an untrusted source in a networked app. -**CAUTION**: `with autoref` also complicates static code analysis or makes it outright infeasible, for the same reason. It is impossible to statically know whether something that looks like a bare name in the source code is actually a true bare name, or a reference to an attribute of the autoref'd object. That status can also change at any time, since the lookup is dynamic, and attributes can be added and removed dynamically. +**CAUTION**: `with autoref` complicates static code analysis or makes it outright infeasible. It is impossible to statically know whether something that looks like a bare name in the source code is actually a true bare name, or a reference to an attribute of the autoref'd object. That status can also change at any time, since the lookup is dynamic, and attributes can be added and removed dynamically. ## Testing and debugging -### ``unpythonic.test.fixtures``: a test framework for macro-enabled Python +### `unpythonic.test.fixtures`: a test framework for macro-enabled Python **Added in v0.14.3.** @@ -1646,51 +1997,124 @@ with session("simple framework demo"): try: import blargly except ImportError: - error["blargly not installed, cannot test integration with it."] + warn["blargly not installed, skipping integration tests."] else: ... # blargly integration tests go here + # Unconditional errors and failures can be emitted with `error[]` and `fail[]`. + # with testset("not implemented"): + # fail["not implemented yet!"] + with testset(postproc=terminate): test[2 * 2 == 5] # fails, terminating the nearest dynamically enclosing `with session` test[2 * 2 == 4] # not reached ``` -By default, running this script through the `macropython` wrapper (from `mcpyrate`) will produce an ANSI-colored test report in the terminal. To actually see how the output looks like, for actual runnable examples, see `unpythonic`'s own automated tests. +By default, running this script through the `macropython` wrapper (from `mcpyrate`) will produce an ANSI-colored test report in the terminal. To actually see how the output looks like, and for actual runnable examples, see `unpythonic`'s own automated tests. + +If you want to turn coloring off (e.g. for the purposes of redirecting stderr to a file), see the `TestConfig` bunch of constants in `unpythonic.test.fixtures`. -If you want to turn coloring off (e.g. for redirecting stderr to a file), see the `TestConfig` bunch of constants in `unpythonic.test.fixtures`. +The following is an overview of the framework. For details, look at the docstrings of the various constructs in `unpythonic.test.fixtures` (which provides much of this), those of the testing macros, and finally, the automated tests of `unpythonic` itself. Tests can be found in subfolders named `tests`: [regular code](../unpythonic/tests/), [macros](../unpythonic/syntax/tests/), [dialects](../unpythonic/dialects/tests/). -The following is an overview of the framework. For details, look at the docstrings of the various constructs in `unpythonic.test.fixtures` (which provides much of this), those of the test macros, and finally, the automated tests of `unpythonic` itself. +Examples of how to test code using conditions and restarts can be found in [`unpythonic.tests.test_conditions`](../unpythonic/tests/test_conditions.py). -How to test code using conditions and restarts can be found in [`unpythonic.test.test_conditions`](../unpythonic/test/test_conditions.py). +Examples of how to test macro utilities (e.g. syntax transformer functions that operate on ASTs) can be found in [`unpythonic.syntax.tests.test_letdoutil`](../unpythonic/syntax/tests/test_letdoutil.py). -How to test macro utilities (e.g. syntax transformer functions that operate on ASTs) can be found in [`unpythonic.syntax.test.test_letdoutil`](../unpythonic/syntax/test/test_letdoutil.py). +**NOTE**: If you want to compartmentalize macro expansion in your tests (so that an error during macro expansion will not crash your test unit), `mcpyrate` offers more than one way to invoke the macro expander at run time ([*of your test unit*](https://github.com/Technologicat/mcpyrate/blob/master/doc/troubleshooting.md#macro-expansion-time-where-exactly)), depending on what exactly you want to do. One is the `mcpyrate.metatools.expand` family of macros, and another are the functions in the module `mcpyrate.compiler`. See [the `mcpyrate` user manual](https://github.com/Technologicat/mcpyrate/blob/master/doc/main.md): specifically on [`metatools` (and quasiquoting)](https://github.com/Technologicat/mcpyrate/blob/master/doc/quasiquotes.md) and on [`compiler`](https://github.com/Technologicat/mcpyrate/blob/master/doc/compiler.md). The tests of `mcpyrate` itself provide some examples on how to use `compiler`. #### Overview -We provide the low-level syntactic constructs `test[]`, `test_raises[]` and `test_signals[]`, with the usual meanings. The last one is for testing code that uses the `signal` function and its sisters (related to conditions and restarts à la Common Lisp); see [`unpythonic.conditions`](features.md#handlers-restarts-conditions-and-restarts). +All testing *macros* are provided in the module `unpythonic.syntax`. All regular functions related to testing are provided in the module `unpythonic.test.fixtures`. + +We provide the low-level syntactic constructs `test[]`, `test_raises[]` and `test_signals[]`, with the usual meanings. The last one is for testing code that uses `unpythonic.signal` and its sisters (related to conditions and restarts à la Common Lisp); see the module [`unpythonic.conditions`](../unpythonic/conditions.py), and the user manual section on [conditions and restarts](features.md#handlers-restarts-conditions-and-restarts). + +By default, the `test[expr]` macro asserts that the value of `expr` is truthy. If you want to assert only that `expr` runs to completion normally, use `test[returns_normally(expr)]`. Here `returns_normally` is a regular function, which is available in the module `unpythonic.test.fixtures`. + +All three testing constructs also come in block variants, `with test`, `with test_raises[exctype]`, `with test_signals[exctype]`. + +As usual in test frameworks, the testing constructs behave somewhat like `assert`, with the difference that a failure or error will not abort the whole unit, unless explicitly asked to do so. There is no return value; upon success, the testing constructs return `None`. Upon failure (test assertion not satisfied) or error (unexpected exception or signal), the failure or error is reported, and further tests continue running. + +All the variants of the testing constructs catch any uncaught exceptions and signals from inside the test expression or block. Any unexpected uncaught exception or signal is considered an error. + +Because `unpythonic.test.fixtures` is, by design, a minimalistic *no-framework* (cf. "NoSQL"), it is up to you to define - in your custom test runner - whether having any failures, errors or warnings should lead to the whole test suite failing. Whether the program's exit code is zero, is important e.g. for GitHub's CI workflows. + +For example, in `unpythonic`'s own tests, warnings do not cause the test suite to fail, but errors and failures do. The top-level [`runtests.py`](../runtests.py) is a complete test runner using the reusable `unpythonic.test.runner` module: + +```python +import os +from unpythonic.test.runner import discover_testmodules, run + +import mcpyrate.activate # noqa: F401 + +testsets = [("my tests", discover_testmodules(os.path.join("mypackage", "tests")))] +if not run(testsets): + raise SystemExit(1) +``` + +`discover_testmodules` finds `test_*.py` files in a directory and returns dotted module names. `run` wraps the session/testset/import pattern, with automatic version-suffix gating (e.g. `test_foo_3_11.py` is skipped with a warning on Python < 3.11). + +#### Important: bytecode cache pitfall + +**Never compile `.py` files in a macro-enabled project** using `py_compile`, `python -m compileall`, pip's `--compile` flag, or any other mechanism that bypasses the macro expander. These tools produce `.pyc` files that do not contain macro-expanded code, which will break macro imports at run time. + +The symptom is typically `ImportError: cannot import name 'macros' from 'mcpyrate.quotes'` (or similar). This happens because the stale `.pyc` is loaded instead of the `.py` source, so the macro expander never runs. + +To fix this, clean the bytecode caches: -By default, the `test[expr]` macro asserts that the value of `expr` is truthy. If you want to assert only that `expr` runs to completion normally, use `test[returns_normally(expr)]`. +```bash +macropython -c mypackage +``` + +This removes all `__pycache__` directories under the given path. After cleaning, re-run your tests normally — the macro expander will recompile the source files correctly. -The test macros also come in block variants, `with test`, `with test_raises(exctype)`, `with test_signals(exctype)`. +#### Reading test results -As usual in test frameworks, the test constructs behave somewhat like `assert`, with the difference that a failure or error will not abort the whole unit (unless explicitly asked to do so). There is no return value; upon success, the test constructs return `None`. Upon failure (test assertion not satisfied) or error (unexpected exception or signal), the failure or error is reported, and further tests continue running. +The framework reports **Pass**, **Fail**, **Error**, and **Total** per testset, with optional **Warn** counts. These categories mean: -All the test variants catch any uncaught exceptions and signals from inside the test expression or block. Any unexpected uncaught exception or signal is considered an error. +- **Pass**: test assertion succeeded. +- **Fail**: test ran to completion, but the assertion was not satisfied. +- **Error**: test did not run to completion (unexpected exception or signal inside a `test[]` expression). This also includes intentional `error[]` signals — so a few errors from skip patterns (e.g. optional dependency not installed) may be normal. Check the actual error messages, not just the count. (Since 2.0.0, optional dependency skips use `warn[]` instead.) +- **Warn**: a human-initiated warning (via `warn[]` or `emit_warning()`). Warnings are not counted in the total, and do not cause the test suite to fail. -Because `unpythonic.test.fixtures` is, by design, a minimalistic *no-framework* (cf. "NoSQL"), it is up to you to define - in your custom test runner - whether having any failures, errors or warnings should lead to the whole test suite failing (whether the program's exit code is zero is important e.g. for GitHub's CI workflows). For example, in `unpythonic`'s own tests (see the very short [`runtests.py`](../runtests.py)), warnings do not cause the test suite to fail, but errors and failures do. +Nested testsets show hierarchy with indentation and asterisk depth (`**`, `****`, `******`, etc.). Counts propagate upward — the top-level summary reflects all tests across all testsets. #### Testing syntax quick reference -**Imports**: +**Imports** - complete list: ```python from unpythonic.syntax import (macros, test, test_raises, test_signals, - fail, error, warn, the) + fail, error, warn, the, expand_testing_macros_first) from unpythonic.test.fixtures import (session, testset, returns_normally, catch_signals, terminate) ``` -**Overall structure** - session and testsets: +**Overall structure** of typical unit test module: + +```python +from unpythonic.syntax import macros, test, test_raises, the +from unpythonic.test.fixtures import session, testset + +def runtests(): + with testset("something 1"): + test[...] + test_raises[TypeError, ...] + test_raises[ValueError, ...] + ... + with testset("something 2"): + ... + ... + +if __name__ == '__main__': # pragma: no cover + with session(__file__): + runtests() +``` + +The if-main idiom allows running this test module individually, but it is tagged with `# pragma: no cover`, so that the coverage reporter will not yell about it when the module is run by the test runner as part of the complete test suite (which, incidentally, is also a good opportunity to [measure coverage](../measure_coverage.sh)). + +If you want to ensure that testing macros expand before anything else - including your own code-walking block macros (when you have tests inside the body of a `with` block that invokes a code-walking block macro) - import the macro `expand_testing_macros_first`, and put a `with expand_testing_macros_first` around the affected code. (See [Expansion order](#expansion-order), below.) + +**Sessions and testsets**: ```python with session(name): @@ -1699,7 +2123,7 @@ with session(name): with testset(name): ... - with testset(name): + with testset(name): # nested testset ... with testset(name): @@ -1707,11 +2131,11 @@ with session(name): ... ``` -Each `name` above is human-readable and optional. The purpose of the naming feature is to improve [scannability](https://www.teachingenglish.org.uk/article/scanning) of the testing report for the human reader. +Each `name` above is human-readable and optional. The purpose of the naming feature is to improve [scannability](https://www.teachingenglish.org.uk/article/scanning) of the testing report, and of the unit test source code, for the human reader. Note that even if `name` is omitted, the parentheses are still mandatory, because `session` and `testset` are just garden variety context managers that must be instantiated in order for them to perform their jobs. -A session implicitly introduces a top-level testset, for convenience. +A session implicitly introduces a top-level testset, for convenience - so if you only a have a few tests and don't want to group them, you do not need to use `with testset` at all. Testsets can be nested arbitrarily deep. @@ -1721,13 +2145,13 @@ Additional tools for code using **conditions and restarts**: The `catch_signals` context manager controls the signal barrier of `with testset` and the `test` family of syntactic constructs. It is provided for writing tests for code that uses conditions and restarts. -Used as `with catch_signals(False)`, it disables the signal barrier. Within the dynamic extent of the block, an uncaught signal (in the sense of `unpythonic.conditions.signal` and its sisters) is not considered an error. This can be useful, because sometimes leaving a signal uncaught is the right thing to do. See [`unpythonic.test.test_conditions`](../unpythonic/test/test_conditions.py) for examples. +Used as `with catch_signals(False)`, it disables the signal barrier for the dynamic extent of the block. When the barrier is disabled, an uncaught signal (in the sense of `unpythonic.signal` and its sisters) is not considered as an error. This can be useful, because sometimes leaving a signal uncaught is the right thing to do. See [`unpythonic.tests.test_conditions`](../unpythonic/tests/test_conditions.py) for examples. -It can be nested. Used as `with catch_signals(True)`, it re-enables the barrier, if currently disabled. +The `with catch_signals` construct can be nested. Used as `with catch_signals(True)`, it re-enables the barrier, if currently disabled, for the dynamic extent of that inner `with catch_signals` block. When a `with catch_signals` block exits, the previous state of the signal barrier is automatically restored. -**Expression** forms: +**Expression** forms - complete list: ```python test[expr] @@ -1743,41 +2167,42 @@ error[message] warn[message] ``` -Inside a `test`, the helper macro `the[]` is available to mark interesting subexpressions inside `expr`, for failure and error reporting. An `expr` may contain an arbitrary number of `the[]`. By default, if `expr` is a comparison, the leftmost term is automatically marked (so that e.g. `test[x < 3]` will automatically report the value of `x` if the test fails); otherwise nothing. The default is only used if there is no explicit `the[]` inside `expr`. +Inside a `test[]`, the helper macro `the[]` is available to mark one or more interesting subexpressions inside `expr`, for failure and error reporting. An `expr` may contain an arbitrary number of `the[]`. By default, if `expr` is a comparison, the leftmost term is implicitly marked (so that e.g. `test[x < 3]` will automatically report the value of `x` if the test fails); otherwise nothing. The default is only used when there is **no** explicit `the[]` inside `expr`. The constructs `test_raises`, `test_signals`, `fail`, `error` and `warn` do **not** support `the[]`. Tests can be nested; this is sometimes useful as an explicit signal barrier. -Note the macros `error[]` and `warn[]` have nothing to do with the functions with the same name in `unpythonic.conditions`. The macros are part of the test framework; the functions with the same name are signaling protocols of the conditions and restarts system. Following the usual naming conventions in both systems, this naming conflict is unfortunately what we get. +Note that the testing constructs `error[]` and `warn[]`, which are macros, have nothing to do with the functions with the same name in the module `unpythonic.conditions`. The macros are part of the test framework; the functions with the same name are signaling protocols of the conditions and restarts system. Following the usual naming conventions separately in both systems, this naming conflict is unfortunately what we get. -**Block** forms: +**Block** forms - complete list: ```python with test: body ... + # no `return`; assert just that the block completes normally with test: body ... - return expr -with test(message): + return expr # assert that `expr` is truthy +with test[message]: body ... -with test(message): +with test[message]: body ... return expr -with test_raises(exctype): +with test_raises[exctype]: body ... -with test_raises(exctype, message): +with test_raises[exctype, message]: body ... -with test_signals(exctype): +with test_signals[exctype]: body ... -with test_signals(exctype, message): +with test_signals[exctype, message]: body ... ``` @@ -1788,27 +2213,56 @@ The constructs `with test_raises`, `with test_signals` do **not** support `the[] Tests can be nested; this is sometimes useful as an explicit signal barrier. +#### Expansion order + +**Changed in v0.15.0**. *The testing macros now expand outside-in; this allows `mcpyrate.debug.step_expansion` to treat them as a separate step. In v0.14.3, which introduced the test framework, they used to be two-pass macros.* + +Your test macro invocations may get partially expanded code, if those invocations reside in the body of an invocation of a block macro that also expands outside-in: + +```python +with yourblockmacro: # outside-in + test[...] +``` + +Here the `...` may be edited by `yourblockmacro` before `test[]` sees it. (It likely **will** be edited, since this pattern will commonly appear in the tests for `yourblockmacro`, where the whole point is to have the `...` depend on what `yourblockmacro` outputs.) + +If you need testing macros to expand before anything else even in this scenario (so you can more clearly see where in the unexpanded source code a particular expression in a failing/erroring test came from), you can do this: + +```python +from unpythonic.syntax import macros, expand_testing_macros_first + +with expand_testing_macros_first: + with yourblockmacro: + test[...] +``` + +The `expand_testing_macros_first` macro is itself a code-walking block macro that does as it says on the tin. The testing macros are identified by scanning the bindings of the current macro expander; names do not matter, so it respects as-imports. + +This does imply that `yourblockmacro` will then receive the expanded form of `test[...]` as input, but that's macros for you. You will have to choose which is more important: seeing the unexpanded code in error messages, or receiving unexpanded `test[]` expressions in `yourblockmacro`. + #### `with test`: test blocks Test blocks are meant for testing code that requires Python statements; i.e. does not fit into Python's expression sublanguage. -In `unpythonic.test.fixtures`, a test block is implicitly lifted into a function. Hence, any local variables assigned to inside the block remain local to the implicit function. Use Python's `nonlocal` and `global` keywords, if needed. +In `unpythonic.test.fixtures`, **a test block is implicitly lifted into a function**. Hence, any local variables assigned to inside the block remain local to the implicit function. Use Python's `nonlocal` and `global` keywords, if needed. By default, a `with test` block asserts just that it completes normally. If you instead want to assert that an expression is truthy, use `return expr` to terminate the implicit function and return the value of the desired `expr`. The return value is passed to the test asserter for checking that it is truthy. -(Another way to view the default behavior is that the `with test` macro injects a `return True` at the end of the block, if there is no `return`. This is actually how the default behavior is implemented.) +Another way to view the default behavior is that the `with test` macro injects a `return True` at the end of the block to terminate the implicit function, if there is no explicit `return`. This is actually how the default behavior is implemented. -The `with test_raises(exctype)` and `with test_signals(exctype)` blocks assert that the block raises (respectively, signals) the declared exception (condition) type. These blocks are implicitly lifted into functions, too, but they do not check the return value. For them, **not** raising/signaling the declared exception/condition type is considered a test failure. Raising/signaling some other (hence unexpected) exception/condition type is considered an error. +The `with test_raises[exctype]` and `with test_signals[exctype]` blocks assert that the block raises (respectively, signals) the declared exception type. These blocks are implicitly lifted into functions, too, but they do not check the return value. For them, **not** raising/signaling the declared exception type is considered a test failure. Raising/signaling some other (hence unexpected) exception type is considered an error. #### `the`: capture the value of interesting subexpressions -The point of `unpythonic.test.fixtures` is to make testing macro-enabled Python as frictionless as reasonably possible. +The point of `unpythonic.test.fixtures` is to make testing macro-enabled Python as frictionless as reasonably possible. Thus we provide this convenience feature. -Inside a `test[]` expression, or anywhere within the code in a `with test` block, the `the[]` macro can be used to declare any number of subexpressions as interesting, for capturing the source code and value into the test failure message, which is shown if the test fails. Source code is captured in the first pass (outside in), before any nested second-pass (inside out) macros expand. (Most of the macros defined by `unpythonic` expand in the second pass.) The value is captured at run time as a side effect just after the value has been evaluated. +Inside a `test[]` expression, or anywhere within the code in a `with test` block, the `the[]` macro can be used to declare any number of subexpressions as interesting, for capturing the source code and value into the test failure message, which is shown if the test fails. Each `the[]` captures one subexpression (as many times as it is evaluated, in the order evaluated). + +Because test macros expand outside-in, the source code is captured before any nested inside-out macros expand. (Many macros defined by `unpythonic` expand inside-out.) The value is captured at run time as a side effect just after the value has been evaluated. By default (if no explicit `the[]` is present), `test[]` implicitly inserts a `the[]` for the leftmost term if the top-level expression is a comparison (common use case), and otherwise does not capture anything. -When nothing is captured, if the test fails, the value of the whole expression is shown. Of course, you'll then already know the value is falsey, but there's still the possibly useful distinction of whether it's, say, `False`, `None`, `0` or `[]`. +When nothing is captured, if the test fails, the value of the whole expression is shown. Of course, you will then already know the value is falsey, but there is still the possibly useful distinction of whether it is, say, `False`, `None`, `0` or `[]`. A `test[]` or `with test` can have any number of subexpressions marked as `the[]`. It is possible to even nest a `the[]` inside another `the[]`, if you need the value of some subexpression as well as one of *its* subexpressions. The captured values are gathered, in the order they were evaluated (by Python's standard evaluation rules), into a list that is shown upon test failure. @@ -1818,25 +2272,25 @@ In case of nested `test[]` or nested `with test`, each `the[...]` is understood The `the[]` mechanism is smart enough to skip reporting trivialities for literals, such as `(1, 2, 3) = (1, 2, 3)` in `test[4 in the[(1, 2, 3)]]`, or `4 = 4` in `test[4 in (1, 2, 3)]`. In the second case, note the implicit `the[]` on the LHS, because `in` is a comparison operator. -If nothing but such trivialities were captured, the failure message will instead report the value of the whole expression. (The captures still remain inspectable in the exception instance.) +If nothing but such trivialities were captured, the failure message will instead report the value of the whole expression. The captures still remain inspectable in the exception instance. -To make testing/debugging macro code more convenient, the `the[]` mechanism automatically unparses an AST value into its source code representation for display in the test failure message. This is meant for debugging macro utilities, to which a test case hands some quoted code (i.e. code lifted into its AST representation using mcpyrate's `q[]` macro). See [`unpythonic.syntax.test.test_letdoutil`](unpythonic/syntax/test/test_letdoutil.py) for some examples. (Note the unparsing is done for display only; the raw value remains inspectable in the exception instance.) +To make testing/debugging macro code more convenient, the `the[]` mechanism automatically unparses an AST value into its source code representation for display in the test failure message. This is meant for debugging macro utilities, to which a test case hands some quoted code (i.e. code lifted into its AST representation using mcpyrate's `q[]` macro). See [`unpythonic.syntax.tests.test_letdoutil`](unpythonic/syntax/tests/test_letdoutil.py) for some examples. Note the unparsing is done for display only; the raw value remains inspectable in the exception instance. -**CAUTION**: The source code is back-converted from the AST representation; hence its surface syntax may look slightly different to the original (e.g. extra parentheses). See ``mcpyrate.unparse``. +**CAUTION**: The source code is back-converted from the AST representation; hence its surface syntax may look slightly different to the original (e.g. extra parentheses). See `mcpyrate.unparse`. -**CAUTION**: The name of the `the[]` construct was inspired by Common Lisp, but the semantics are completely different. Common Lisp's `THE` is a return-type declaration (pythonistas would say *return-type annotation*), meant as a hint for the compiler to produce performance-optimized compiled code (see [chapter 32 of Peter Seibel's Practical Common Lisp](http://www.gigamonkeys.com/book/conclusion-whats-next.html)), whereas our `the[]` captures a value for test reporting. The only common factors are the name, and that neither construct changes the semantics of the marked code, much. In `unpythonic.test.fixtures`, the reason behind picking this name was that it doesn't change the flow of the source code as English that much, specifically to suggest, between the lines, that it doesn't change the semantics much. The reasoning behind CL's `THE` may be similar. +**CAUTION**: The name of the `the[]` construct was inspired by Common Lisp, but that is where the similarities end. The `THE` construct of Common Lisp is a return-type declaration (pythonistas would say *return-type annotation*), meant as a hint for the compiler to produce performance-optimized compiled code. See [chapter 32 in Practical Common Lisp by Peter Seibel](http://www.gigamonkeys.com/book/conclusion-whats-next.html). In contrast, our `the[]` captures a value for test reporting. The only common factors are the name, and that neither construct changes the semantics of the marked code, much. In `unpythonic.test.fixtures`, the reason behind picking this name was that it does not change the flow of the source code as English that much, specifically to suggest, between the lines, that it does not change the semantics much. The reasoning behind CL's `THE` may be similar, but I have not researched its etymology. #### Test sessions and testsets The `with session()` in the example session above is optional. The human-readable session name is also optional, used for display purposes only. The session serves two roles: it provides an exit point for `terminate`, and defines an implicit top-level `testset`. -Tests can optionally be grouped into testsets. Each `testset` tallies passed, failed and errored tests within it, and displays the totals when it exits. Testsets can be named and nested. +Tests can optionally be grouped into testsets. Each `testset` tallies passed, failed and errored tests within it, and displays the totals when the context exits. Testsets can be named and nested. -It is useful to have at least one `testset` (e.g. the implicit top-level one established by `with session`), because the `testset` mechanism forms one half of the test framework. It is possible to use the test macros without a `testset`, but that is only intended for building alternative test frameworks. +It is useful to have at least one `testset` (the implicit top-level one established by `with session` is fine), because the `testset` mechanism forms fully one half of the test framework. It is technically possible to use the testing macros without a `testset`, but that is only intended for building alternative test frameworks. Testsets also provide an option to locally install a `postproc` handler that gets a copy of each failure or error in that testset (and by default, any of its inner testsets), after the failure or error has been printed. In nested testsets, the dynamically innermost `postproc` wins. A failure is an instance of `unpythonic.test.fixtures.TestFailure`, an error is an instance of `unpythonic.test.fixtures.TestError`, and a warning is an instance of `unpythonic.test.fixtures.TestWarning`. All three inherit from `unpythonic.test.fixtures.TestingException`. Beside the human-readable message, these exception types contain attributes with programmatically inspectable information about what happened. -If you want to set a default global `postproc`, which is used when no local `postproc` is in effect, this too is configured in the `TestConfig` bunch of constants in `unpythonic.test.fixtures`. +If you want to set a default global `postproc`, which is used when no local `postproc` is in effect, this is configured in the `TestConfig` bunch of constants in `unpythonic.test.fixtures`. The `with testset` construct comes with one other important feature. The nearest dynamically enclosing `with testset` **catches any stray exceptions or signals** that occur within its dynamic extent, but outside a test construct. @@ -1844,7 +2298,7 @@ In case of an uncaught signal, the error is reported, and the testset resumes. In case of an uncaught exception, the error is reported, and the testset terminates, because the exception model does not support resuming. -Catching of uncaught *signals*, in both the low-level `test` constructs and the high-level `testset`, can be disabled using `with catch_signals(False)`. This is useful in testing code that uses conditions and restarts; sometimes allowing a signal (e.g. from `unpythonic.conditions.warn`) to remain uncaught is the right thing to do. +Catching of uncaught *signals*, in both the low-level `test` constructs and the high-level `testset`, can be disabled using `with catch_signals(False)`. This is useful in testing code that uses conditions and restarts; sometimes allowing a signal (e.g. from `unpythonic.warn` in the conditions-and-restarts system) to remain uncaught is the right thing to do. #### Producing unconditional failures, errors, and warnings @@ -1854,15 +2308,15 @@ The helper macros `fail[message]`, `error[message]` and `warn[message]` uncondit - `error[...]` if some part of your tests is unable to run. - `warn[...]` if some tests are temporarily disabled and need future attention, e.g. for syntactic compatibility to make the code run for now on an old Python version. -Currently (v0.14.3), warnings produced by `warn[]` are not counted in the total number of tests run. But you can still get the warning count from the separate counter `unpythonic.test.fixtures.tests_warned` (see `unpythonic.collections.box`; basically you can `b.get()` or `unbox(b)` to read the value currently inside a box). +Currently (v0.14.3), warnings produced by `warn[]` are not counted in the total number of tests run. But you can still get the warning count from the separate counter `unpythonic.test.fixtures.tests_warned` (see `unpythonic.box`; basically you can `b.get()` or `unbox(b)` to read the value currently inside a box). #### Advanced: building a custom test framework -If `unpythonic.test.fixtures` does not fit your needs and you want to experiment with creating your own framework, the test asserter macros are reusable. For reference, their implementations can be found in `unpythonic.syntax.testingtools`. They refer to a few objects in `unpythonic.test.fixtures`; consider these a common ground that is not strictly part of the surrounding framework. +If `unpythonic.test.fixtures` does not fit your needs and you want to experiment with creating your own framework, the test asserter macros are reusable. Their implementations can be found in `unpythonic.syntax.testingtools`. They refer to a few objects in `unpythonic.test.fixtures`; consider these a common ground that is not strictly part of the surrounding framework. Start by reading the docstring of the `test` macro, which documents some low-level details. -Set up a condition handler to intercept test failures and errors. These will be signaled via `cerror`, using the conditions and restarts mechanism. See `unpythonic.conditions`. Report the failure/error in any way you desire, and then invoke the `proceed` restart (from your condition handler) to let testing continue. +Set up a condition handler to intercept test failures and errors. These will be signaled via `cerror`, using the conditions and restarts mechanism. See the module `unpythonic.conditions`. Report the failure/error in any way you desire, and then invoke the `proceed` restart (from your condition handler) to let testing continue. Look at the implementation of `testset` as an example. @@ -1870,34 +2324,40 @@ Look at the implementation of `testset` as an example. Because `unpythonic` is effectively a language extension, the standard options were not applicable. -The standard library's [`unittest`](https://docs.python.org/3/library/unittest.html) fails with `unpythonic` due to technical reasons related to `unpythonic`'s unfortunate choice of module names. The `unittest` framework chokes if a module in a library exports anything that has the same name as the module itself, and the library's top-level init then `from`-imports that construct into its namespace, causing the *module reference*, that was [implicitly brought in](http://python-notes.curiousefficiency.org/en/latest/python_concepts/import_traps.html#the-submodules-are-added-to-the-package-namespace-trap) by the `from`-import itself, to be overwritten with what was explicitly imported: a reference to the construct that has the same name as the module. (Bad naming on my part, yes, but we're stuck with it at least until v0.15.0. As of v0.14.3, I see no reason to cross that particular bridge yet.) +The standard library's [`unittest`](https://docs.python.org/3/library/unittest.html) fails with `unpythonic` due to technical reasons related to `unpythonic`'s unfortunate choice of module names. The `unittest` framework crashes if a module in a library exports anything that has the same name as the module itself, and the library's top-level init then `from`-imports that construct into its namespace, causing the *module reference*, that was [implicitly brought in](http://python-notes.curiousefficiency.org/en/latest/python_concepts/import_traps.html#the-submodules-are-added-to-the-package-namespace-trap) by the `from`-import itself, to be overwritten with what was explicitly imported: a reference to the construct that has the same name as the module. This is bad naming on my part, yes, but as of v0.15.0, I see no reason to cross that particular bridge yet. + +Also, in my opinion, `unittest` is overly verbose to use; automated tests are already a particularly verbose kind of program, even if the testing syntax is minimal. Eliminating extra verbosity encourages writing more tests. -Also, in my opinion, `unittest` is overly verbose to use; automated tests are already a particularly verbose kind of program, even if the testing syntax is minimal. +[Pytest](https://docs.pytest.org/en/latest/), on the other hand, provides compact syntax by hijacking the assert statement, but its import hook (to provide that syntax) cannot coexist with a macro expander, which also needs to install a (different) import hook. Pytest is also fairly complex. -[Pytest](https://docs.pytest.org/en/latest/), on the other hand, provides compact syntax by hijacking the assert statement, but its import hook (to provide that syntax) can't coexist with a macro expander, which also needs to install a different import hook. It's also fairly complex. +The central functional requirement for whatever would be used for testing `unpythonic` was to be able to *easily* deal with macro-enabled Python. No hoops to jump through, compared to testing regular Python, in order to be able to test all of `unpythonic` (including `unpythonic.syntax`) in a uniform way. -The central functional requirement for whatever would be used for testing `unpythonic` was to be able to easily deal with macro-enabled Python. No hoops to jump through, compared to testing regular Python, in order to be able to test all of `unpythonic` (including `unpythonic.syntax`) in a uniform way. +Also, if I was going to build my own framework, it would be nice for it to work seamlessly with code that uses conditions and restarts - since those are part of `unpythonic`, but not standard Python. -Simple and minimalistic would be a bonus. As of v0.14.3, the whole test framework is about 1.3k SLOC, counting docstrings, comments and blanks; under 600 SLOC if counting only active code lines. Add another 800 SLOC (all) / 200 SLOC (active code lines) for the machinery that implements conditions and restarts. +Simple and minimalistic would be a bonus. As of v0.15.0, the whole test framework is about 1.8k SLOC, counting docstrings, comments and blanks; under 700 SLOC if counting only active code lines. Add another 1k SLOC (all) / 200 SLOC (active code lines) for the machinery that implements conditions and restarts. -The framework will likely still evolve a bit as I find more holes in the [UX](https://en.wikipedia.org/wiki/User_experience) - which so far has led to features such as `the[]` and AST value auto-unparsing - but most of the desired functionality is already there. For example, I consider pytest-style implicit fixtures and a central test discovery system as outside the scope of this system. +The framework will likely still evolve a bit as I find more holes in the [UX](https://en.wikipedia.org/wiki/User_experience) - which so far has led to features such as `the[]` and AST value auto-unparsing - but most of the desired functionality is already present and working fine. For example, I consider pytest-style implicit fixtures and a central test discovery system as outside the scope of this framework. It does make the code shorter, but is perhaps slightly too much magic. -It's clear that `unpythonic.test.fixtures` is not going to replace `pytest`, nor does it aim to do so - [any more than Chuck Moore's Forth-based VLSI tools](https://yosefk.com/blog/my-history-with-forth-stack-machines.html) were intended to replace the commercial [VLSI](https://en.wikipedia.org/wiki/Very_Large_Scale_Integration) offerings. +It is clear that `unpythonic.test.fixtures` is not going to replace `pytest`, nor does it aim to do so - [any more than Chuck Moore's Forth-based VLSI tools](https://yosefk.com/blog/my-history-with-forth-stack-machines.html) were intended to replace the commercial [VLSI](https://en.wikipedia.org/wiki/Very_Large_Scale_Integration) offerings. -What we have is small, simple, custom-built for its purpose (works well with macro-enabled Python; integrates with conditions and restarts), arguably somewhat pedagogic (demonstrates how to build a test framework in under 1k SLOC), and importantly, works just fine. +What we have is small, simple, custom-built for its purpose (works well with macro-enabled Python; integrates with conditions and restarts), arguably somewhat pedagogic (demonstrates how to build a test framework in under 700 active SLOC), and importantly, works just fine. #### Etymology and roots -[Test fixture](https://en.wikipedia.org/wiki/Test_fixture) *is an environment used to consistently test some item, device, or piece of software*. In automated tests, it is typically a piece of code that is reused within the test suite of a project, to perform initialization and/or teardown tasks common to several test cases. +A [test fixture](https://en.wikipedia.org/wiki/Test_fixture) is defined as *an environment used to consistently test some item, device, or piece of software*. In automated tests, it is typically a piece of code that is reused within the test suite of a project, to perform initialization and/or teardown tasks common to several test cases. -A test framework can be reused across many different projects, and the error-catching and reporting code, if anything, is something that is shared across all test cases. Also, following our naming scheme, it had to be called `unpythonic.test.something`, and `fixtures` just happened to fit the theme. +A test framework can be reused across many different projects, and the error-catching and reporting code, if anything, is something that is shared across all test cases. Also, following our naming scheme, the framework had to be called `unpythonic.test.something`, and `fixtures` just happened to fit the theme. Inspired by [Julia](https://julialang.org/)'s standard-library [`Test` package](https://docs.julialang.org/en/v1/stdlib/Test/), and [chapter 9 of Peter Seibel's Practical Common Lisp](http://www.gigamonkeys.com/book/practical-building-a-unit-test-framework.html). -### ``dbg``: debug-print expressions with source code +### `dbg`: debug-print expressions with source code + +**Changed in v0.15.0.** *We now use the [`mcpyrate`](https://github.com/Technologicat/mcpyrate/) macro expander instead of `macropy`. Updated the REPL note below.* -**Changed in 0.14.2.** The `dbg[]` macro now works in the REPL, too. You can use `mcpyrate.repl.console` (a.k.a. `macropython -i` in the shell) or the IPython extension `mcpyrate.repl.iconsole`. +*Also, `dbgprint_expr` is now a dynvar.* + +**Changed in 0.14.2.** *The `dbg[]` macro now works in the REPL, too. You can use `mcpyrate.repl.console` (a.k.a. `macropython -i` in the shell) or the IPython extension `mcpyrate.repl.iconsole`.* [DRY](https://en.wikipedia.org/wiki/Don't_repeat_yourself) out your [qnd](https://en.wiktionary.org/wiki/quick-and-dirty) debug printing code. Both block and expression variants are provided: @@ -1920,7 +2380,7 @@ z = dbg[25 + 17] # --> [file.py:15] (25 + 17): 42 assert z == 42 # surrounding an expression with dbg[...] doesn't alter its value ``` -**In the block variant**, just like in ``nb``, a custom print function can be supplied as the first positional argument. This avoids transforming any uses of built-in ``print``: +**In the block variant**, just like in `nb`, a custom print function can be supplied as the first positional argument. This avoids transforming any uses of built-in `print`: ```python prt = lambda *args, **kwargs: print(*args) @@ -1937,13 +2397,13 @@ with dbg[prt]: ``` -The reference to the custom print function (i.e. the argument to the ``dbg`` block) **must be a bare name**. Support for methods may or may not be added in a future version. +The reference to the custom print function (i.e. the argument to the `dbg` block) **must be a bare name**. Support for methods may or may not be added in a future version. -**In the expr variant**, to customize printing, just assign a function to the dynvar ``dbgprint_expr`` via `with dyn.let(dbgprint_expr=...)`. If no custom printer is set, a default implementation is used. +**In the expr variant**, to customize printing, just assign a function to the dynvar `dbgprint_expr` via `with dyn.let(dbgprint_expr=...)`. If no custom printer is set, a default implementation is used. -For details on implementing custom debug print functions, see the docstrings of ``unpythonic.syntax.dbgprint_block`` and ``unpythonic.syntax.dbgprint_expr``, which provide the default implementations. +For details on implementing custom debug print functions, see the docstrings of `unpythonic.syntax.dbgprint_block` and `unpythonic.syntax.dbgprint_expr`, which provide the default implementations. -**CAUTION**: The source code is back-converted from the AST representation; hence its surface syntax may look slightly different to the original (e.g. extra parentheses). See ``mcpyrate.unparse``. +**CAUTION**: The source code is back-converted from the AST representation; hence its surface syntax may look slightly different to the original (e.g. extra parentheses). See `mcpyrate.unparse`. Inspired by the [dbg macro in Rust](https://doc.rust-lang.org/std/macro.dbg.html). @@ -1951,9 +2411,9 @@ Inspired by the [dbg macro in Rust](https://doc.rust-lang.org/std/macro.dbg.html Stuff that didn't fit elsewhere. -### ``nb``: silly ultralight math notebook +### `nb`: silly ultralight math notebook -Mix regular code with math-notebook-like code in a ``.py`` file. To enable notebook mode, ``with nb``: +Mix regular code with math-notebook-like code in a `.py` file. To enable notebook mode, `with nb`: ```python from unpythonic.syntax import macros, nb @@ -1973,9 +2433,9 @@ with nb[pprint]: assert _ == 3 * x * y ``` -Expressions at the top level auto-assign the result to ``_``, and auto-print it if the value is not ``None``. Only expressions do that; for any statement that is not an expression, ``_`` retains its previous value. +Expressions at the top level auto-assign the result to `_`, and auto-print it if the value is not `None`. Only expressions do that; for any statement that is not an expression, `_` retains its previous value. -A custom print function can be supplied as the first positional argument to ``nb``. This is useful with SymPy (and [latex-input](https://github.com/clarkgrubb/latex-input) to use α, β, γ, ... as actual variable names). +A custom print function can be supplied as the first positional argument to `nb`. This is useful with SymPy (and [latex-input](https://github.com/clarkgrubb/latex-input) to use α, β, γ, ... as actual variable names). Obviously not intended for production use, although is very likely to work anywhere. @@ -1985,38 +2445,85 @@ Is this just a set of macros, a language extension, or a compiler for a new lang ### The xmas tree combo -The macros in ``unpythonic.syntax`` are designed to work together, but some care needs to be taken regarding the order in which they expand. +The macros in `unpythonic.syntax` are designed to work together, but some care needs to be taken regarding the order in which they expand. This complexity unfortunately comes with any pick-and-mix-your-own-language kit, because some features inevitably interact. For example, it is possible to lazify [continuation-enabled](https://en.wikipedia.org/wiki/Continuation-passing_style) code, but running the transformations the other way around produces nonsense. -The block macros are designed to run **in the following order (leftmost first)**: +The correct **xmas tree invocation** is: +```python +with prefix, autoreturn, quicklambda, multilambda, envify, lazify, namedlambda, autoref, autocurry, tco: + ... ``` -prefix > autoreturn, quicklambda > multilambda > continuations or tco > ... - ... > curry > namedlambda, autoref > lazify > envify + +Here `tco` can be replaced with `continuations`, if needed. + +We have taken into account that: + + - Outside-in: `prefix`, `autoreturn`, `quicklambda`, `multilambda` + - Two-pass: `envify`, `lazify`, `namedlambda`, `autoref`, `autocurry`, `tco`/`continuations` + +[The dialect examples](dialects.md) use this ordering. + +For simplicity, **the block macros make no attempt to prevent invalid combos**, unless there is a specific technical reason to do that for some particular combination. Be careful; e.g. do not nest several `with tco` blocks (lexically), that will not work. + +As an example of a specific technical reason, the `tco` macro skips already expanded `with continuations` blocks lexically contained within the `with tco`. This allows the [Lispython dialect](dialects/lispython.md) to support `continuations`. + + +#### AST edit order vs. macro invocation order + +The **AST edits** performed by the block macros are designed to run in the following order (leftmost first): + +``` +prefix > nb > autoreturn, quicklambda > multilambda > continuations or tco > ... + ... > autocurry > namedlambda, autoref > lazify > envify ``` -The ``let_syntax`` (and ``abbrev``) block may be placed anywhere in the chain; just keep in mind what it does. +The `let_syntax` (and `abbrev`) block may be placed anywhere in the chain; just keep in mind what it does. -The ``dbg`` block can be run at any position after ``prefix`` and before ``tco`` (or ``continuations``). (It must be able to see regular function calls.) +The `dbg` block can be run at any position after `prefix` and before `tco` (or `continuations`). It must be able to see function calls in Python's standard format, for detecting calls to the print function. -For simplicity, **the block macros make no attempt to prevent invalid combos** (unless there is a specific technical reason to do that for some particular combination). Be careful; e.g. don't nest several ``with tco`` blocks (lexically), that won't work. +The correct ordering for **block macro invocations** - which is the actual user-facing part - is somewhat complicated by the fact that some of the above are two-pass macros. Consider this artificial example, where `mac` is a two-pass macro: + +```python +with mac: + with cheese: + ... +``` + +The invocation `with mac` is *lexically on the outside*, thus the macro expander sees it first. The expansion order then becomes: + + 1. First pass (outside in) of `with mac`. + 2. Explicit recursion by `with mac`. This expands the `with cheese`. + 3. Second pass (inside out) of `with mac`. + +So, for example, even though `lazify` must *perform its AST edits* after `autocurry`, it happens to be a two-pass macro. The first pass (outside in) only performs some preliminary analysis; the actual lazification happens in the second pass (inside out). So the correct invocation comboing these two is `with lazify, autocurry`. Similarly, `with lazify, continuations` is correct, even though the CPS transformation must occur first; these are both two-pass macros that perform their edits in the inside-out pass. + +Further details on individual block macros can be found in our [notes on macros](design-notes.md#detailed-notes-on-macros). + + +#### Single-line vs. multiline invocation format Example combo in the single-line format: ```python -with autoreturn, tco, lazify: +with autoreturn, lazify, tco: ... ``` -In the multiline format: +The same combo in the multiline format: ```python -with lazify: - with tco: - with autoreturn: +with autoreturn: + with lazify: + with tco: ... ``` -See our [notes on macros](../doc/design-notes.md#detailed-notes-on-macros) for more information. +In MacroPy (which was used up to v0.14.3), there sometimes were [differences](https://github.com/azazel75/macropy/issues/21) between the behavior of the single-line and multi-line invocation format, but in `mcpyrate` (which is used by v0.15.0 and later), they should behave the same. + +With `mcpyrate`, there is still [a minor difference](https://github.com/Technologicat/mcpyrate/issues/3) if there are at least three nested macro invocations, and a macro is scanning the tree for another macro invocation; then the tree looks different depending on whether the single-line or the multi-line format was used. The differences in that are as one would expect knowing [how `with` statements look like](https://greentreesnakes.readthedocs.io/en/latest/nodes.html#With) in the Python AST. The reason the difference manifests only for three or more macro invocations is that `mcpyrate` pops the macro that is being expanded before it hands over the tree to the macro code; hence if there are only two, the inner tree will have only one "context manager" in its `with`. + +**NOTE** to the curious, and to future documentation maintainers: To see if something is a two-pass macro, grep the codebase for `expander.visit_recursively`; that is the *explicit recursion* mentioned above, and means that within that function, anything below that line will run in the inside-out pass. See [the `mcpyrate` manual](https://github.com/Technologicat/mcpyrate/blob/master/doc/main.md#expand-macros-inside-out). + ### Emacs syntax highlighting @@ -2024,7 +2531,7 @@ This Elisp snippet can be used to add syntax highlighting for keywords specific ```elisp (defun my/unpythonic-syntax-highlight-setup () - "Set up additional syntax highlighting for `unpythonic.syntax' and MacroPy in Python mode." + "Set up additional syntax highlighting for `unpythonic.syntax' and `mcpyrate` in Python mode." ;; adapted from code in dash.el (let ((new-keywords '("test" "test_raises" "test_signals" "fail" "the" "error" "warn" ; both testing macros and condition signaling protocols @@ -2036,7 +2543,7 @@ This Elisp snippet can be used to add syntax highlighting for keywords specific "where" "do" "local" "delete" "continuations" "call_cc" - "curry" "lazify" "envify" "tco" "prefix" "autoreturn" "forall" + "autocurry" "lazify" "envify" "tco" "prefix" "autoreturn" "forall" "multilambda" "namedlambda" "quicklambda" "cond" "aif" "autoref" "dbg" "nb" "macros" "dialects" "q" "u" "n" "a" "s" "t" "h")) ; mcpyrate @@ -2057,12 +2564,12 @@ Tested with `anaconda-mode`. #### How to use (for Emacs beginners) -If you use the [Spacemacs](http://spacemacs.org/) kit, the right place to insert the snippet is into the function `dotspacemacs/user-config`. Here's [my spacemacs.d](https://github.com/Technologicat/spacemacs.d/) for reference; the snippet is in `prettify-symbols-config.el`, and it's invoked from `dotspacemacs/user-config` in `init.el`. +If you use the [Spacemacs](http://spacemacs.org/) kit, the right place to insert the snippet is into the function `dotspacemacs/user-config`. Here's [my spacemacs.d](https://github.com/Technologicat/spacemacs.d/) for reference; the snippet is in `prettify-symbols-config.el`, and it is invoked from `dotspacemacs/user-config` in `init.el`. In a basic Emacs setup, the snippet goes into the `~/.emacs` startup file, or if you have an `.emacs.d/` directory, then into `~/.emacs.d/init.el`. ### This is semantics, not syntax! -[Strictly speaking](https://stackoverflow.com/questions/17930267/what-is-the-difference-between-syntax-and-semantics-of-programming-languages), ``True``. We just repurpose Python's existing syntax to give it new meanings. However, in [the Racket reference](https://docs.racket-lang.org/reference/), **a** *syntax* designates a macro, in contrast to a *procedure* (regular function). We provide syntaxes in this particular sense. The name ``unpythonic.syntax`` is also shorter to type than ``unpythonic.semantics``, less obscure, and close enough to convey the intended meaning. +[Strictly speaking](https://stackoverflow.com/questions/17930267/what-is-the-difference-between-syntax-and-semantics-of-programming-languages), `True`. We just repurpose Python's existing syntax to give it new meanings. However, in [the Racket reference](https://docs.racket-lang.org/reference/), **a** *syntax* designates a macro, in contrast to a *procedure* (regular function). We provide syntaxes in this particular sense. The name `unpythonic.syntax` is also shorter to type than `unpythonic.semantics`, less obscure, and close enough to convey the intended meaning. If you want custom *syntax* proper, or want to package a set of block macros as a custom language that extends Python, then you may be interested in our sister project [`mcpyrate`](https://github.com/Technologicat/mcpyrate). diff --git a/doc/readings.md b/doc/readings.md index 76a146ba..8ded43a7 100644 --- a/doc/readings.md +++ b/doc/readings.md @@ -1,3 +1,24 @@ +**Navigation** + +- [README](../README.md) +- [Pure-Python feature set](features.md) +- [Syntactic macro feature set](macros.md) +- [Examples of creating dialects using `mcpyrate`](dialects.md) +- [REPL server](repl.md) +- [Troubleshooting](troubleshooting.md) +- [Design notes](design-notes.md) +- [Essays](essays.md) +- **Additional reading** +- [Contribution guidelines](../CONTRIBUTING.md) + + +**Table of Contents** + +- [Links to relevant reading](#links-to-relevant-reading) +- [Python-related FP resources](#python-related-fp-resources) + + + # Links to relevant reading This document collects links to blog posts, online articles and actual scientific papers on topics at least somewhat relevant in the context of `unpythonic`. @@ -16,7 +37,7 @@ The common denominator is programming. Some relate to language design, some to c - [William R. Cook, OOPSLA 2009: On Understanding Data Abstraction, Revisited](https://www.cs.utexas.edu/~wcook/Drafts/2009/essay.pdf). - This is a nice paper illustrating the difference between *abstract data types* and *objects*. - In section 4.3: *"In the 1970s [...] Reynolds noticed that abstract data types facilitate adding new operations, while 'procedural data values' (objects) facilitate adding new representations. Since then, this duality has been independently discovered at least three times [18, 14, 33]."* Then: *"The extensibility problem has been solved in numerous ways, and it still inspires new work on extensibility of data abstractions [48, 15]. Multi-methods are another approach to this problem [11]."* - - Multi-methods (as in multiple dispatch in CLOS or in Julia) seem nice, in that they don't enfore a particular way to slice the operation/representation matrix. Instead, one fills in individual cells as desired. + - Multimethods (as in multiple dispatch in CLOS or in Julia) seem nice, in that they don't enfore a particular way to slice the operation/representation matrix. Instead, one fills in individual cells as desired. It solves [the expression problem](https://en.wikipedia.org/wiki/Expression_problem). - In section 5.4, on Smalltalk: *"One conclusion you could draw from this analysis is that the untyped λ-calculus was the first object-oriented language."* - In section 6: *"Academic computer science has generally not accepted the fact that there is another form of data abstraction besides abstract data types. Hence the textbooks give the classic stack ADT and then say 'objects are another way to implement abstract data types'. [...] Some textbooks do better than others. Louden [38] and Mitchell [43] have the only books I found that describe the difference between objects and ADTs, although Mitchell does not go so far as to say that objects are a distinct kind of data abstraction."* @@ -61,12 +82,21 @@ The common denominator is programming. Some relate to language design, some to c - [Clean Code for Python](https://github.com/zedr/clean-code-python) - *Software engineering principles, from Robert C. Martin's book [Clean Code](https://www.amazon.com/Clean-Code-Handbook-Software-Craftsmanship/dp/0132350882), adapted for Python.* -- [PyPy3](http://pypy.org/), fast, JIT-ing Python 3 that's mostly a drop-in replacement for CPython 3.6. Macro expanders (`macropy`, `mcpyrate`) work, too. +- [PyPy3](http://pypy.org/), fast, JIT-ing Python 3 that's mostly a drop-in replacement for CPythons 3.6 and 3.7. As of April 2021, support for 3.8 is in the works. Macro expanders (`macropy`, `mcpyrate`) work, too. -- [Brython](https://brython.info/): Python 3 in the browser, as a replacement for JavaScript. - - No separate compile step - the compiler is implemented in JS. Including a script tag of type text/python invokes it. - - Doesn't have the `ast` module, so no way to run macro expanders. - - Also quite a few other parts are missing, understandably. Keep in mind the web client is rather different as an environment from the server side or the desktop. So for new apps, Brython is ok, but if you have some existing Python code you want to move into the browser, it might or might not work, depending on what your code needs. +- [Pyodide](https://github.com/pyodide/pyodide): Python with the scientific stack, compiled to WebAssembly. + - [Docs](https://pyodide.org/en/stable/). + - [Online REPL](https://pyodide.org/en/stable/console.html). + - Has **the scientific Python stack**, and also supports **any pure-Python PyPI wheel**. + - The `ast` module works. This should be able to run `mcpyrate` and `unpythonic` in the browser! + +- Historical Python-in-the-browser efforts: + - [Brython](https://brython.info/): Python 3 in the browser, as a replacement for JavaScript. + - No separate compile step - the compiler is implemented in JS. Including a script tag of type text/python invokes it. + - Doesn't have the `ast` module, so no way to run macro expanders. + - Also quite a few other parts are missing, understandably. Keep in mind the web client is rather different as an environment from the server side or the desktop. So for new apps, Brython is ok, but if you have some existing Python code you want to move into the browser, it might or might not work, depending on what your code needs. + - [PyPy.js](http://pypyjs.org/): PyPy python interpreter, compiled for the web via [emscripten](http://emscripten.org/), with a custom JIT backend that emits [asm.js](http://asmjs.org/) code at runtime. + - Last updated in 2015, no longer working. - Counterpoint: [Eric Torreborre (2019): When FP does not save us](https://medium.com/barely-functional/when-fp-does-not-save-us-92b26148071f) @@ -129,7 +159,7 @@ The common denominator is programming. Some relate to language design, some to c - A special `uninitialized` value (which the paper calls ☠) is needed, because Scope - in the sense of controlling lexical name resolution - is a static (purely lexical) concept, but whether a particular name (once lexically resolved) has been initialized (or, say, whether it has been deleted) is a dynamic (run-time) feature. (I would say "property", if that word didn't have an entirely different technical meaning in Python.) - Our `continuations` macro essentially does what the authors call *a standard [CPS](https://en.wikipedia.org/wiki/Continuation-passing_style) transformation*, plus some technical details due to various bits of impedance mismatch. -- [John Shutt's blog](https://fexpr.blogspot.com/) contains many interesting posts on programming language design. He's the author of the [Kernel](https://web.cs.wpi.edu/~jshutt/kernel.html) Lisp dialect. Some pickings from the blog: +- [John Shutt's blog](https://fexpr.blogspot.com/) contains many interesting posts on programming language design. He [was](http://lambda-the-ultimate.org/node/5623) the author of the [Kernel](https://web.cs.wpi.edu/~jshutt/kernel.html) Lisp dialect. Some pickings from his blog: - [Fexpr (2011)](https://fexpr.blogspot.com/2011/04/fexpr.html). - The common wisdom that macros were a better choice is misleading. - [Bypassing no-go theorems (2013)](https://fexpr.blogspot.com/2013/07/bypassing-no-go-theorems.html). @@ -137,12 +167,66 @@ The common denominator is programming. Some relate to language design, some to c - [Abstractive power (2013)](https://fexpr.blogspot.com/2013/12/abstractive-power.html). - [Where do types come from? (2011)](https://fexpr.blogspot.com/2011/11/where-do-types-come-from.html). - [Continuations and term-rewriting calculi (2014)](https://fexpr.blogspot.com/2014/03/continuations-and-term-rewriting-calculi.html). + - [Interpreted programming languages (2016)](https://fexpr.blogspot.com/2016/08/interpreted-programming-languages.html) - Discussion of Kernel on LtU: [Decomposing lambda - the Kernel language](http://lambda-the-ultimate.org/node/1680). - [Walid Taha 2003: A Gentle Introduction to Multi-stage Programming](https://www.researchgate.net/publication/221024597_A_Gentle_Introduction_to_Multi-stage_Programming) +- *Holy traits*: + - [Tom Kwong 2020: Holy Traits Pattern](https://ahsmart.com/pub/holy-traits-design-patterns-and-best-practice-book/) (book excerpt) + - [Lyndon White 2019: Emergent features of Julialang: Part II - Traits](https://www.juliabloggers.com/the-emergent-features-of-julialang-part-ii-traits/) + - [Harrison Grodin 2019: Multiple inheritance, sans inheritance](https://github.com/HarrisonGrodin/radical-julia/tree/master/traits) + - [Types vs. traits for dispatch](https://discourse.julialang.org/t/types-vs-traits-for-dispatch/46296) (discussion) + - We have a demonstration in [unpythonic.tests.test_dispatch](../unpythonic/tests/test_dispatch.py). + +- [Pascal Costanza's Highly Opinionated Guide to Lisp (2013)](http://www.p-cos.net/lisp/guide.html) + +- [Peter Seibel (2005): Practical Common Lisp](https://gigamonkeys.com/book/) + - This book is an excellent introduction that walks through Common Lisp, including some advanced features. It is also useful for non-lispers to take home interesting ideas from CL. + +- R. Kent Dybvig, Simon Peyton Jones, Amr Sabry (2007). A Monadic Framework for Delimited Continuations. Journal of functional programming, 17(6), 687-730. Preprint [here](https://legacy.cs.indiana.edu/~dyb/pubs/monadicDC.pdf). + - Particularly approachable explanation of delimited continuations. + - Could try building that for `unpythonic` in a future version. + +- [Wat: Concurrency and Metaprogramming for JS](https://github.com/manuel/wat-js) + - [pywat: Interpreter of the Wat language written in Python](https://github.com/piokuc/pywat) + - [Example of Wat in Manuel Simoni's blog (2013)](http://axisofeval.blogspot.com/2013/05/green-threads-in-browser-in-20-lines-of.html) + +- [Richard P. Gabriel, Kent M. Pitman (2001): Technical Issues of Separation in Function Cells and Value Cells](https://dreamsongs.com/Separation.html) + - A discussion of [Lisp-1 vs. Lisp-2](https://en.wikipedia.org/wiki/Lisp-1_vs._Lisp-2), particularly of historical interest. + - Summary: Lisp-1 often leads to more readable code than Lisp-2, but by the time this became clear, for Common Lisp that train had already sailed. The authors suggest that instead of fixing CL with a backward compatibility breaking change, future Lisps would do well to take lessons learned from both Scheme and Common Lisp. In my own opinion, [Racket](https://racket-lang.org/) indeed has. + - Interestingly, there are more namespaces in Lisps than just values and functions, so, as the authors note, the popular names "Lisp-1" and "Lisp-2" are actually misnomers. For example, the labels for the Common Lisp construct `TAGBODY`/`GO` live in their own namespace. + - If explained using Python terminology, a Common Lisp symbol instance essentially has one attribute for each namespace, that stores the value bound to that symbol in that namespace. + +- [`hoon`: The C of Functional Programming](https://urbit.org/docs/hoon/) + - Interesting take on an alternative computing universe where the functional camp won systems programming. These people have built [a whole operating system](https://github.com/urbit/urbit) on a Turing-complete non-lambda automaton, Nock. + - For my take, see [the opinion piece in Essays](essays.md#hoon-the-c-of-functional-programming). + - Judging by the docs, `hoon` is definitely ha-ha-only-serious, but I am not sure of whether it is serious-serious. See the comments to [the entry on Manuel Simoni's blog](http://axisofeval.blogspot.com/2015/07/what-i-learned-about-urbit-so-far.html) - some people do think `hoon` is actually useful. + - Technical points: + - `hoon` does not have syntactic macros. The reason given in the docs is the same as sometimes heard in the Python community - having a limited number of standard control structures, you always know what you are looking at. + - Interestingly, `hoon` has uniform support for *wide* and *tall* modes; it does not use parentheses, but uses a single space (in characteristic `hoon` fashion, termed an *ace*) versus multiple spaces (respectively, a *gap*). "Multiple spaces" allows also newlines, like in LaTeX. So [SRFI-110](https://srfi.schemers.org/srfi-110/srfi-110.html) is not the only attempt at a two-mode uniform grouping syntax. + +- *Ab initio* programming language efforts: + - `hoon`, see separate entry above. + - [Arc](http://www.paulgraham.com/arc.html) by Paul Graham and Robert Morris. + - [Discussion on](https://news.ycombinator.com/item?id=10535364) the Nile programming language developed by Ian Piumarta, Alan Kay, et al. + - Especially the low-level [Maru](https://www.piumarta.com/software/maru/) language by Ian Piumarta seems interesting. + - *Maru is a symbolic expression evaluator that can compile its own implementation language.* + - It compiles s-expressions to IA32 machine code, and has a metacircular evaluator implemented in less than 2k SLOC. It bootstraps from C. + +- [LtU: Why is there no widely accepted progress for 50 years?](http://lambda-the-ultimate.org/node/5590) + - Discussion on how programming languages *have* improved. + - Contains interesting viewpoints, such as dmbarbour's suggestion that much of modern hardware is essentially "compiled" from a hardware description language such as VHDL. + +- [Matthew Might: First-class (run-time) macros and meta-circular evaluation](https://matt.might.net/articles/metacircular-evaluation-and-first-class-run-time-macros/) + - *First-class macros are macros that can be bound to variables, passed as arguments and returned from functions. First-class macros expand and evaluate syntax at run-time.* + +- Useful concepts for programming language design: + - [Cognitive dimensions of notations](https://en.wikipedia.org/wiki/Cognitive_dimensions_of_notations) + - [System quality attributes](https://en.wikipedia.org/wiki/List_of_system_quality_attributes) + -## Python-related FP resources +# Python-related FP resources Python clearly wants to be an impure-FP language. A decorator with arguments *is a curried closure* - how much more FP can you get? @@ -161,6 +245,7 @@ Python clearly wants to be an impure-FP language. A decorator with arguments *is - [pyrsistent: Persistent/Immutable/Functional data structures for Python](https://github.com/tobgu/pyrsistent) - [pampy: Pattern matching for Python](https://github.com/santinic/pampy) (pure Python, no AST transforms!) + - Note that Python got [native support for pattern matching in 3.10](https://docs.python.org/3/whatsnew/3.10.html#pep-634-structural-pattern-matching) using the `match`/`case` construct. - [List of languages that compile to Python](https://github.com/vindarel/languages-that-compile-to-python) including Hy, a Lisp (in the [Lisp-2](https://en.wikipedia.org/wiki/Lisp-1_vs._Lisp-2) family) that can use Python libraries. diff --git a/doc/repl.md b/doc/repl.md index 93d2a7fd..d253e928 100644 --- a/doc/repl.md +++ b/doc/repl.md @@ -1,3 +1,34 @@ +**Navigation** + +- [README](../README.md) +- [Pure-Python feature set](features.md) +- [Syntactic macro feature set](macros.md) +- [Examples of creating dialects using `mcpyrate`](dialects.md) +- **REPL server** +- [Troubleshooting](troubleshooting.md) +- [Design notes](design-notes.md) +- [Essays](essays.md) +- [Additional reading](readings.md) +- [Contribution guidelines](../CONTRIBUTING.md) + + +**Table of Contents** + +- [The Unpythonic REPL server](#the-unpythonic-repl-server) + - [Try the server](#try-the-server) + - [Connect with the client](#connect-with-the-client) + - [Netcat compatibility](#netcat-compatibility) + - [Embed the server in your Python app](#embed-the-server-in-your-python-app) + - [SECURITY WARNING!](#security-warning) + - [Design for hot-patching](#design-for-hot-patching) + - [ZODB in 5 minutes](#zodb-in-5-minutes) + - [Why a custom REPL server/client](#why-a-custom-repl-serverclient) + - [Future directions](#future-directions) + - [Authentication and encryption](#authentication-and-encryption) + - [Note on macro-enabled consoles](#note-on-macro-enabled-consoles) + + + # The Unpythonic REPL server Hot-patch a running Python process! With **syntactic macros** in the [REPL](https://en.wikipedia.org/wiki/Read%E2%80%93eval%E2%80%93print_loop)! diff --git a/doc/troubleshooting.md b/doc/troubleshooting.md new file mode 100644 index 00000000..fb0d027e --- /dev/null +++ b/doc/troubleshooting.md @@ -0,0 +1,161 @@ +**Navigation** + +- [README](../README.md) +- [Pure-Python feature set](features.md) +- [Syntactic macro feature set](macros.md) +- [Examples of creating dialects using `mcpyrate`](dialects.md) +- [REPL server](repl.md) +- **Troubleshooting** +- [Design notes](design-notes.md) +- [Essays](essays.md) +- [Additional reading](readings.md) +- [Contribution guidelines](../CONTRIBUTING.md) + + +**Table of Contents** + +- [Common issues and questions](#common-issues-and-questions) + - [Do I need a macro expander to use `unpythonic`?](#do-i-need-a-macro-expander-to-use-unpythonic) + - [Why `mcpyrate` and not MacroPy?](#why-mcpyrate-and-not-macropy) + - [Cannot import the name `macros`?](#cannot-import-the-name-macros) + - [But I did run my program with `macropython`?](#but-i-did-run-my-program-with-macropython) + - [I'm hacking a macro inside a module in `unpythonic.syntax`, and my changes don't take?](#im-hacking-a-macro-inside-a-module-in-unpythonicsyntax-and-my-changes-dont-take) + - [Both `unpythonic` and library `x` provide language-extension feature `y`. Which is better?](#both-unpythonic-and-library-x-provide-language-extension-feature-y-which-is-better) + - [How to list the whole public API, and only the public API?](#how-to-list-the-whole-public-api-and-only-the-public-api) + + + +## Common issues and questions + +### Do I need a macro expander to use `unpythonic`? + +If you intend to only use the [Pure-Python feature set](features.md), then no. This is why `unpythonic` does not automatically pull in a macro expander when you install it. + +On the other hand, `unpythonic` is a kitchen-sink language extension, and half of the functionality comes from macros. Even the test framework for `unpythonic`'s own automated tests uses macros! + +If you intend to **use** `unpythonic.syntax` or `unpythonic.dialects`, or if you intend to **develop** `unpythonic` (specifically: to be able to run its test suite), then you will need a macro expander. + +As of v0.15.0, specifically you will need [`mcpyrate`](https://github.com/Technologicat/mcpyrate). + + +### Why `mcpyrate` and not MacroPy? + +[`mcpyrate`](https://github.com/Technologicat/mcpyrate) is an advanced, third-generation macro expander (and language lab) for Python, taking in the lessons learned from both [`macropy3`](https://github.com/azazel75/macropy) and [`mcpy`](https://github.com/delapuente/mcpy), and expanding (pun not intended) on that. + +Beside the advanced features, the reason we use `mcpyrate` is that the `unpythonic.syntax` rabbit hole has become deep enough to benefit from agile experimentation at the meta-metaprogramming level. Allowing the macro expander and the syntax layer of `unpythonic` to co-evolve results in better software. + + +### Cannot import the name `macros`? + +In `mcpyrate`-based programs, there is no run-time object named `macros`, so failing to import that usually means that, for some reason, the macro expander is not enabled. + +Macro-enabled, `mcpyrate`-based programs expect to be run with `macropython` (included in the [`mcpyrate` PyPI package](https://pypi.org/project/mcpyrate/)) instead of bare `python3`. + +Basically, you can `macropython script.py` or `macropython -m some.module`, like you would with `python3`. The advantage is you can run macro-enabled code without a per-project bootstrapper, since `macropython` handles bootstrapping the macro expander for you. + +See the [`macropython` documentation](https://github.com/Technologicat/mcpyrate/blob/master/doc/repl.md#macropython-the-universal-bootstrapper) for details. + + +### But I did run my program with `macropython`? + +The problem could be a stale bytecode cache that `mcpyrate` thinks is still valid. This can happen especially if you first accidentally run `python3 some_macro_program.py`, and only then realize the invocation should have been `macropython some_macro_program.py`. + +The invocation with bare Python may compile to bytecode successfully and write the bytecode cache, but there is indeed no run-time object named `macros`, so the program will crash at that point. When the program is run again via `macropython`, `mcpyrate`'s loader sees the existing bytecode cache, and because its `mtime` (as compared to the `.py` file) suggests it's up to date, the `.py` file is not automatically recompiled. + +Try clearing the bytecode caches in the affected directory with: +```bash +macropython -c . +``` +This will force a recompile of the `.py` files the next time they are loaded. Then run normally, with `macropython some_macro_program.py`. + + +### I'm hacking a macro inside a module in `unpythonic.syntax`, and my changes don't take? + +This is also likely due to a stale bytecode cache. As of `mcpyrate` 3.4.0, macro re-exports, used by `unpythonic.syntax.__init__`, are not seen by the macro-dependency analyzer that determines bytecode cache validity. + +The important point to realize here is that as per macropythonic tradition, in `mcpyrate`, a function being a macro is a property of its **use site**, not of its definition site. So how do we re-export a macro? We simply re-export the macro function, like we would do for any other function. + +The import to make that re-export happen does not look like a macro-import. This is the right way to do it, since we want to make the object (macro function) available for clients to import, **not** establish bindings in the macro expander *for compiling the module `unpythonic.syntax.__init__` itself*. (The latter is what a macro-import does - it establishes macro bindings *for the module it lexically appears in*.) + +The problem is, the macro-dependency analyzer only looks at the macro-import dependency graph, not the full dependency graph, so when analyzing the user program (e.g. a unit test module in `unpythonic.syntax.tests`), it does not scan the re-export that points to the changed macro definition. + +I might modify the `mcpyrate` analyzer in the future, but doing so will make the dependency scan a lot slower than it needs to be in most circumstances, because a large majority of imports in Python have nothing to do with macros. + +For now, we just note that this issue mainly concerns developers of large macro packages (such as `unpythonic.syntax`) that need to split - for factoring reasons - their macro definitions into separate modules, while presenting all macros to the user in one interface module. This issue does not affect the development of macro-using programs, or any programs where macros are imported from their original definition site (like they always were with MacroPy). + +Try clearing the bytecode cache in `unpythonic/`; this will force a recompile. + + +### Both `unpythonic` and library `x` provide language-extension feature `y`. Which is better? + +The point of having these features in `unpythonic` is integration, and a consistent API. So if you need only one specific language-extension feature, then a library that concentrates on that particular feature is likely a good choice. If you need the kitchen sink, too, then it's better to use our implementation, since our implementations of the various features are designed to work together. + +In some cases (e.g. the condition system), our implementation may offer extra features not present in the original library that inspired it. + +In other cases (e.g. multiple dispatch), the *other* implementation may be better (e.g. runs much faster). + + +### How to list the whole public API, and only the public API? + +In short, use Python's introspection capabilities. There are some subtleties here; below are some ready-made recipes. + +To view **the public API of a given submodule**: + +```python +import sys +print(sys.modules["unpythonic.collections"].__all__) # for example +``` + +If the `__all__` attribute for some submodule is missing, that submodule has no public API. + +For most submodules, you could just + +```python +print(unpythonic.collections.__all__) # for example +``` + +but there are some public API symbols in `unpythonic` that have the same name as a submodule. In these cases, the object overrides the submodule in the top-level namespace of `unpythonic`. So, for example, for `unpythonic.llist`, the second approach fails because `unpythonic.llist` points to a function, not to a module. Therefore, the first approach is preferable, as it always works. + +To view **the whole public API**, grouped by submodule: + +```python +import sys + +import unpythonic + +submodules = [name for name in dir(unpythonic) + if f"unpythonic.{name}" in sys.modules] + +for name in submodules: + module = sys.modules[f"unpythonic.{name}"] + if hasattr(module, "__all__"): # has a public API? + print("=" * 79) + print(f"Public API of 'unpythonic.{name}':") + print(module.__all__) +``` + +Note that even if you examine the API grouped by submodule, `unpythonic` guarantees all of its public API symbols to be present in the top-level namespace, too, so when you actually import the symbols, you can import them from the top-level namespace. (Actually, the macros expect you to do so, to recognize uses of various `unpythonic` constructs when analyzing code.) + +**Do not** do this to retrieve the submodules: + +```python +import types +submodules_wrong = [name for name in dir(unpythonic) + if issubclass(type(getattr(unpythonic, name)), types.ModuleType)] +``` + +for the same reason as above; in this variant, any submodules that have the same name as an object will be missing from the list. + +To view **the whole public API** available in the top-level namespace: + +```python +import types + +import unpythonic + +non_module_names = [name for name in dir(unpythonic) + if not issubclass(type(getattr(unpythonic, name)), types.ModuleType)] +print(non_module_names) +``` + +Now be very very careful: for the same reason as above, for the correct semantics we must use `issubclass(..., types.ModuleType)`, not `... in sys.modules`. Here we want to list each symbol in the top-level namespace of `unpythonic` that does not point to a module; **including** any objects that override a module in the top-level namespace. diff --git a/flake8rc b/flake8rc index 23a1ff37..3e3fe033 100644 --- a/flake8rc +++ b/flake8rc @@ -25,6 +25,10 @@ ignore = E704, # do not assign a lambda expression, use a def (because autopep8 applies it blindly) E731, + # whitespace before ':' (false positive on alignment and slices; Black/Ruff agree) + E203, + # line break before binary operator (PEP 8 recommends Knuth's style, i.e. break before) + W503, # line break after binary operator W504 exclude = .git,__pycache__,docs/source/conf.py,old,build,dist,node_modules,instance,00_stuff,00_old diff --git a/makedist.sh b/makedist.sh index 338298d3..b6c03991 100755 --- a/makedist.sh +++ b/makedist.sh @@ -1,2 +1,2 @@ #!/bin/bash -python3 setup.py sdist bdist_wheel +pdm build diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..e6808283 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,76 @@ +[project] +name = "unpythonic" +description = "Supercharge your Python with parts of Lisp and Haskell." +authors = [ + { name = "Juha Jeronen", email = "juha.m.jeronen@gmail.com" }, +] +requires-python = ">=3.10,<3.15" + +# the `read` function and long_description_content_type from setup.py are no longer needed, +# modern build tools like pdm/hatch already know how to handle markdown if you point them at a .md file +# they will set the long_description and long_description_content_type for you +readme = "README.md" + +license = { text = "BSD" } + +# This tells whichever build backend you use (pdm in our case) to run its own mechanism to find the version +# of the project and plug it into the metadata +# details for how we instruct pdm to find the version are in the `[tool.pdm.version]` section below +dynamic = ["version"] + +dependencies = [ + "mcpyrate @ file:///home/jje/Documents/koodit/mcpyrate", + "sympy>=1.13" +] +keywords=["functional-programming", "language-extension", "syntactic-macros", + "tail-call-optimization", "tco", "continuations", "currying", "lazy-evaluation", + "dynamic-variable", "macros", "lisp", "scheme", "racket", "haskell"] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Environment :: Console", + "Intended Audience :: Developers", + "License :: OSI Approved :: BSD License", + "Operating System :: POSIX :: Linux", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", + "Topic :: Software Development :: Libraries", + "Topic :: Software Development :: Libraries :: Python Modules" +] + +[project.urls] +Repository = "https://github.com/Technologicat/unpythonic" + +[build-system] +requires = ["pdm-backend"] +build-backend = "pdm.backend" + +[tool.pdm.version] +# the `file` source tells pdm to look for a line in a file that matches the regex `__version__ = ".*"` +# The regex parse is fairly robust, it can handle arbitray whitespace and comments +source = "file" +path = "unpythonic/__init__.py" + +[tool.pdm.build] +# we don't need to explicitly inclue `mcpyrate.repl`. Unlink with setuptools, pdm automatically includes +# all packages and modules in the source tree pointed to by `includes`, minus any paths matching `excludes` +includes = ["unpythonic"] +excludes = ["**/tests", "**/__pycache__"] + +# note the exclusion of an equivalent to zip_safe. I used to think that zip_safe was a core python metadata flag +# telling pip and other python tools not to include the package in any kind of zip-import or zipapp file. +# I was wrong. zip_safe is a setuptools-specific flag that tells setuptools to not include the package in a bdist_egg +# Since bdist_eggs are no longer really used by anything and have been completely supplanted by wheels, zip_safe has no meaningful effect. +# The effect i think you hoped to achieve with zip_safe is achieved by excluding `__pycache__` folders from +# the built wheels, using the `excludes` field in the `[tool.pdm.build]` section above. + +# most python tools at this point, including mypy, have support for sourcing configuration from pyproject.toml +# making the setup.cfg file unnecessary +[tool.mypy] +show_error_codes = true diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 8f75191e..00000000 --- a/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -mcpyrate>=3.3.0 -sympy>=1.4 diff --git a/runtests.py b/runtests.py index f1276cd4..759424d6 100644 --- a/runtests.py +++ b/runtests.py @@ -1,57 +1,29 @@ # -*- coding: utf-8 -*- -"""Run all tests for `unpythonic`. +"""Run all tests for ``unpythonic``. The test framework uses macros, but this top-level script does not. This can be -run under regular `python3` (i.e. does not need the `macropython` wrapper from -`mcpyrate`). +run under regular ``python3`` (i.e. does not need the ``macropython`` wrapper +from ``mcpyrate``). """ import os -import re import sys -from importlib import import_module -from unpythonic.test.fixtures import session, testset, tests_errored, tests_failed -from unpythonic.collections import unbox +from unpythonic.test.runner import discover_testmodules, run import mcpyrate.activate # noqa: F401 -def listtestmodules(path): - testfiles = listtestfiles(path) - testmodules = [modname(path, fn) for fn in testfiles] - return list(sorted(testmodules)) - -def listtestfiles(path, prefix="test_", suffix=".py"): - return [fn for fn in os.listdir(path) if fn.startswith(prefix) and fn.endswith(suffix)] - -def modname(path, filename): # some/dir/mod.py --> some.dir.mod - modpath = re.sub(os.path.sep, r".", path) - themod = re.sub(r"\.py$", r"", filename) - return ".".join([modpath, themod]) - def main(): - with session(): - # All folders containing unit tests are named `tests` (plural). - # - # The testing framework is called `unpythonic.test.fixtures`, - # so it lives in the only subfolder in the project that is named - # `test` (singular). - testsets = (("regular code", (listtestmodules(os.path.join("unpythonic", "tests")) + - listtestmodules(os.path.join("unpythonic", "net", "tests")))), - ("macros", listtestmodules(os.path.join("unpythonic", "syntax", "tests")))) - for tsname, modnames in testsets: - with testset(tsname): - for m in modnames: - # Wrap each module in its own testset to protect the umbrella testset - # against ImportError as well as any failures at macro expansion time. - with testset(m): - # TODO: We're not inside a package, so we currently can't use a relative import. - # TODO: So we just hope this resolves to the local `unpythonic` source code, - # TODO: not to an installed copy of the library. - mod = import_module(m) - mod.runtests() - all_passed = (unbox(tests_failed) + unbox(tests_errored)) == 0 - return all_passed + # All folders containing unit tests are named `tests` (plural). + # + # The testing framework is called `unpythonic.test.fixtures`, + # so it lives in the only subfolder in the project that is named + # `test` (singular). + testsets = [("regular code", (discover_testmodules(os.path.join("unpythonic", "tests")) + + discover_testmodules(os.path.join("unpythonic", "net", "tests")))), + ("macros", discover_testmodules(os.path.join("unpythonic", "syntax", "tests"))), + ("dialects", discover_testmodules(os.path.join("unpythonic", "dialects", "tests")))] + return run(testsets) if __name__ == '__main__': if not main(): diff --git a/setup.py b/setup.py deleted file mode 100644 index 478cbf8c..00000000 --- a/setup.py +++ /dev/null @@ -1,101 +0,0 @@ -# -*- coding: utf-8 -*- -# -"""setuptools-based setup.py for unpythonic. - -Tested on Python 3.8. - -Usage as usual with setuptools: - python3 setup.py build - python3 setup.py sdist - python3 setup.py bdist_wheel --universal - python3 setup.py install - -For details, see - http://setuptools.readthedocs.io/en/latest/setuptools.html#command-reference -or - python3 setup.py --help - python3 setup.py --help-commands - python3 setup.py --help bdist_wheel # or any command -""" - -import ast -import os - -from setuptools import setup # type: ignore[import] - - -def read(*relpath, **kwargs): # https://blog.ionelmc.ro/2014/05/25/python-packaging/#the-setup-script - with open(os.path.join(os.path.dirname(__file__), *relpath), - encoding=kwargs.get('encoding', 'utf8')) as fh: - return fh.read() - -# Extract __version__ from the package __init__.py -# (since it's not a good idea to actually run __init__.py during the build process). -# -# http://stackoverflow.com/questions/2058802/how-can-i-get-the-version-defined-in-setup-py-setuptools-in-my-package -# -init_py_path = os.path.join("unpythonic", "__init__.py") -version = None -try: - with open(init_py_path) as f: - for line in f: - if line.startswith("__version__"): - module = ast.parse(line) - expr = module.body[0] - assert isinstance(expr, ast.Assign) - v = expr.value - if type(v) is ast.Constant: # Python 3.8+ - # mypy understands `isinstance(..., ...)` but not `type(...) is ...`, - # and we want to match on the exact type, not any subclass that might be - # added in some future Python version. - assert isinstance(v, ast.Constant) - version = v.value - elif type(v) is ast.Str: - assert isinstance(v, ast.Str) # mypy - version = v.s - break -except FileNotFoundError: - pass -if not version: - raise RuntimeError(f"Version information not found in {init_py_path}") - -######################################################### -# Call setup() -######################################################### - -setup( - name="unpythonic", - version=version, - packages=["unpythonic", "unpythonic.syntax"], - provides=["unpythonic"], - keywords=["functional-programming", "language-extension", "syntactic-macros", - "tail-call-optimization", "tco", "continuations", "currying", "lazy-evaluation", - "dynamic-variable", "macros", "lisp", "scheme", "racket", "haskell"], - install_requires=[], # mcpyrate is optional for us, so we can't really put it here even though we recommend it. - python_requires=">=3.6,<3.10", - author="Juha Jeronen", - author_email="juha.m.jeronen@gmail.com", - url="https://github.com/Technologicat/unpythonic", - description="Supercharge your Python with parts of Lisp and Haskell.", - long_description=read("README.md"), - long_description_content_type="text/markdown", - license="BSD", - platforms=["Linux"], - classifiers=["Development Status :: 4 - Beta", - "Environment :: Console", - "Intended Audience :: Developers", - "License :: OSI Approved :: BSD License", - "Operating System :: POSIX :: Linux", - "Programming Language :: Python", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: Implementation :: CPython", - "Programming Language :: Python :: Implementation :: PyPy", - "Topic :: Software Development :: Libraries", - "Topic :: Software Development :: Libraries :: Python Modules" - ], - zip_safe=False # macros are not zip safe, because the zip importer fails to find sources. -) diff --git a/unpythonic/__init__.py b/unpythonic/__init__.py index 037d1e53..48e202e4 100644 --- a/unpythonic/__init__.py +++ b/unpythonic/__init__.py @@ -7,7 +7,7 @@ for a trip down the rabbit hole. """ -__version__ = '0.14.3' +__version__ = '2.0.0' from .amb import * # noqa: F401, F403 from .arity import * # noqa: F401, F403 @@ -17,6 +17,7 @@ from .dispatch import * # noqa: F401, F403 from .dynassign import * # noqa: F401, F403 from .ec import * # noqa: F401, F403 +from .excutil import * # noqa: F401, F403 from .fix import * # noqa: F401, F403 from .fold import * # noqa: F401, F403 from .fploop import * # noqa: F401, F403 @@ -25,9 +26,9 @@ from .gmemo import * # noqa: F401, F403 from .gtco import * # noqa: F401, F403 from .it import * # noqa: F401, F403 -from .let import * # no guarantees on evaluation order (before Python 3.6), nice syntax # noqa: F401, F403 +from .let import * # # noqa: F401, F403 -# guaranteed evaluation order, clunky syntax +# As of 0.15.0, lispylet is nowadays primarily a code generation target API for macros. from .lispylet import (let as ordered_let, letrec as ordered_letrec, # noqa: F401 dlet as ordered_dlet, dletrec as ordered_dletrec, blet as ordered_blet, bletrec as ordered_bletrec) @@ -40,9 +41,25 @@ from .slicing import * # noqa: F401, F403 from .symbol import * # noqa: F401, F403 from .tco import * # noqa: F401, F403 +from .timeutil import * # noqa: F401, F403 from .typecheck import * # noqa: F401, F403 -# HACK: break dependency loop +# -------------------------------------------------------------------------------- +# HACK: break dependency loops for circular imports + from .lazyutil import _init_module _init_module() del _init_module +# We're slightly selective here, because user code likely doesn't need `islazy`, `passthrough_lazy_args`, +# or `maybe_force_args`, although strictly speaking those functions are part of the public API. +from .lazyutil import Lazy, force1, force # noqa: F401 + +from .funutil import _init_module +_init_module() +del _init_module +from .funutil import * # noqa: F401, F403 + +from .numutil import _init_module +_init_module() +del _init_module +from .numutil import * # noqa: F401, F403 diff --git a/unpythonic/amb.py b/unpythonic/amb.py index 45e8cd3b..cf8d3f8e 100644 --- a/unpythonic/amb.py +++ b/unpythonic/amb.py @@ -14,7 +14,7 @@ - Presents the source code in the same order as it actually runs. -The implementation is based on the List monad. This is a hack with the bare +The implementation is based on the list monad. This is a hack with the bare minimum of components to make it work, complete with a semi-usable syntax. If you use `mcpyrate`: @@ -59,7 +59,7 @@ def forall(*lines): """Nondeterministically evaluate lines. This is essentially a bastardized variant of Haskell's do-notation, - specialized for the List monad. + specialized for the list monad. Examples:: @@ -83,8 +83,8 @@ def forall(*lines): - All choices are evaluated, depth first, and set of results is returned as a tuple. - - If a line returns an iterable, it is implicitly converted into a List - monad containing the same items. + - If a line returns an iterable, it is implicitly converted into a + list monad containing the same items. - This applies also to the RHS of a ``choice``. @@ -94,11 +94,11 @@ def forall(*lines): This allows easily returning a tuple (as one result item) from the computation, as in the above pythagorean triples example. - - If a line returns a single item, it is wrapped into a singleton List - (a List containing that one item). + - If a line returns a single item, it is wrapped into a singleton + list monad (a MonadicList containing that one item). - The final result (containing all the results) is converted from - List monad to tuple for output. + the list monad to tuple for output. - The values currently picked by the choices are bound to names in the environment. To access it, use a ``lambda e: ...`` like in @@ -199,6 +199,9 @@ def begin(*exprs): # args eagerly evaluated by Python mlst = eval(allcode, {"e": e, "bodys": bodys, "begin": begin, "monadify": monadify}) return tuple(mlst) +# -------------------------------------------------------------------------------- +# This low-level machinery is shared with the macro version, `unpythonic.syntax.forall`. + def monadify(value, unpack=True): """Pack value into a monadic list if it is not already. @@ -212,7 +215,7 @@ def monadify(value, unpack=True): return MonadicList.from_iterable(value) except TypeError: pass # fall through - return MonadicList(value) # unit(List, value) + return MonadicList(value) # unit(MonadicList, value) class MonadicList: # TODO: This if anything is **the** place to use @typed. """A monadic list.""" @@ -223,7 +226,7 @@ def __init__(self, *elts): returns: M a """ # Accept the sentinel nil as a special **item** that, when passed to - # the List constructor, produces an empty list. + # the MonadicList constructor, produces an empty list. if len(elts) == 1 and elts[0] is nil: self.x = () else: @@ -243,8 +246,8 @@ def __rshift__(self, f): """ # bind ma f = join (fmap f ma) return self.fmap(f).join() - # done manually, essentially List.from_iterable(flatmap(lambda elt: f(elt), self.x)) - #return List.from_iterable(result for elt in self.x for result in f(elt)) + # done manually, essentially MonadicList.from_iterable(flatmap(lambda elt: f(elt), self.x)) + # return MonadicList.from_iterable(result for elt in self.x for result in f(elt)) def then(self, f): """Sequence, a.k.a. "then"; standard notation ">>" in Haskell. @@ -257,7 +260,7 @@ def then(self, f): """ cls = self.__class__ if not isinstance(f, cls): - raise TypeError(f"Expected a List monad, got {type(f)} with value {repr(f)}") + raise TypeError(f"Expected a MonadicList, got {type(f)} with value {repr(f)}") return self >> (lambda _: f) @classmethod @@ -282,10 +285,10 @@ def guard(cls, b): cancels the rest of that branch of the computation. """ if b: - return cls(True) # List with one element; value not intended to be actually used. - return cls() # 0-element List; short-circuit this branch of the computation. + return cls(True) # MonadicList with one element; value not intended to be actually used. + return cls() # 0-element MonadicList; short-circuit this branch of the computation. - # make List iterable so that "for result in f(elt)" works (when f outputs a List monad) + # make MonadicList iterable so that "for result in f(elt)" works (when f outputs a list monad) def __iter__(self): return iter(self.x) def __len__(self): @@ -330,7 +333,7 @@ def copy(self): @classmethod def lift(cls, f): - """Lift a regular function into a List-producing one. + """Lift a regular function into a MonadicList-producing one. f: a -> b returns: a -> M b @@ -355,7 +358,7 @@ def join(self): """ cls = self.__class__ if not all(isinstance(elt, cls) for elt in self.x): - raise TypeError(f"Expected a nested List monad, got {type(self.x)} with value {self.x}") + raise TypeError(f"Expected a nested MonadicList, got {type(self.x)} with value {self.x}") # list of lists - concat them return cls.from_iterable(elt for sublist in self.x for elt in sublist) diff --git a/unpythonic/arity.py b/unpythonic/arity.py index f99adcba..1ffc460f 100644 --- a/unpythonic/arity.py +++ b/unpythonic/arity.py @@ -8,18 +8,20 @@ __all__ = ["getfunc", "arities", "arity_includes", "required_kwargs", "optional_kwargs", "kwargs", - "resolve_bindings", "tuplify_bindings", + "resolve_bindings", "resolve_bindings_partial", "tuplify_bindings", "UnknownArity"] -from inspect import signature, Parameter, ismethod from collections import OrderedDict +import copy +from inspect import signature, Parameter, ismethod, BoundArguments, _empty +import itertools import operator class UnknownArity(ValueError): """Raised when the arity of a function cannot be inspected.""" # HACK: some built-ins report incorrect arities (0, 0) at least in Python 3.4 -# TODO: re-test on 3.8 and on PyPy3 (3.7), just to be sure. +# TODO: re-test on 3.8, 3.9, 3.10, 3.11, 3.12 and on PyPy3 (3.8 and later), just to be sure. # # Full list of built-ins: # https://docs.python.org/3/library/functions.html @@ -206,7 +208,7 @@ def arities(f): This uses inspect.signature; note that the signature of builtin functions cannot be inspected. This is worked around to some extent, but e.g. methods of built-in classes (such as ``list``) might not be inspectable - (at least on CPython < 3.7). + (at least on old CPython < 3.7). For bound methods, ``self`` or ``cls`` does not count toward the arity, because these are passed implicitly by Python. Note a `@classmethod` becomes @@ -217,6 +219,9 @@ def arities(f): does not implicitly provide a `self`, because there is none to be had. This behavior is reflected in the return value of `arities`.) + If `f` is `@generic` (see `unpythonic.dispatch`), we scan its multimethods, + and return the smallest `min_arity` and the largest `max_arity`. + Parameters: `f`: function The function to inspect. @@ -238,6 +243,20 @@ def arities(f): return _builtin_arities[f] except TypeError: # f is of an unhashable type pass + + # Integration with the multiple-dispatch system (multimethods). + from .dispatch import isgeneric, list_methods # circular import + if isgeneric(f): + min_lower = _infty + max_upper = 0 + for (thecallable, type_signature) in list_methods(f): + lower, upper = arities(thecallable) # let UnknownArity propagate + if lower < min_lower: + min_lower = lower + if upper > max_upper: + max_upper = upper + return min_lower, max_upper + try: lower = 0 upper = 0 @@ -258,25 +277,40 @@ def arities(f): raise UnknownArity(*e.args) def required_kwargs(f): - """Return a set containing the names of required name-only arguments of f. + """Return a set containing the names of required name-only arguments of `f`. - "Required": has no default. + *Required* means the parameter has no default. - Raises UnknownArity if inspection failed. + If `f` is `@generic` (see `unpythonic.dispatch`), we scan its multimethods, + and return the names of required kwargs accepted by *any* of its multimethods. + + Raises `UnknownArity` if inspection failed. """ return _kwargs(f, optionals=False) def optional_kwargs(f): - """Return a set containing the names of optional name-only arguments of f. + """Return a set containing the names of optional name-only arguments of `f`. - "Optional": has a default. + *Optional* means the parameter has a default. - Raises UnknownArity if inspection failed. + If `f` is `@generic` (see `unpythonic.dispatch`), we scan its multimethods, + and return the names of optional kwargs accepted by *any* of its multimethods. + + Raises `UnknownArity` if inspection failed. """ return _kwargs(f, optionals=True) def _kwargs(f, optionals=True): f, _ = getfunc(f) + + # Integration with the multiple-dispatch system (multimethods). + from .dispatch import isgeneric, list_methods # circular import + if isgeneric(f): + thekwargs = {} + for (thecallable, type_signature) in list_methods(f): + thekwargs.update(_kwargs(thecallable, optionals=optionals)) + return thekwargs + try: if optionals: pred = lambda v: v.default is not Parameter.empty # optionals @@ -305,13 +339,29 @@ def arity_includes(f, n): lower, upper = arities(f) return lower <= n <= upper -# TODO: Can we replace this by `inspect.Signature.bind`, provided by Python 3.5+? +def resolve_bindings_partial(f, *args, **kwargs): + """Like `resolve_bindings`, but use `inspect.Signature.bind_partial`. + + That is, it is acceptable for some parameters of `f` not to have a binding. + """ + return _resolve_bindings(f, args, kwargs, _partial=True) + def resolve_bindings(f, *args, **kwargs): """Resolve parameter bindings established by `f` when called with the given args and kwargs. This is an inspection tool, which does not actually call `f`. This is useful for memoizers and other similar decorators that need a canonical representation of `f`'s parameter bindings. - If you want a hashable result, postprocess the return value with `tuplify_bindings(result)`. + + **NOTE**: This is a thin wrapper on top of `inspect.Signature.bind`, which was added in Python 3.5. + In `unpythonic` 0.14.2 and 0.14.3, we used to have our own implementation of the parameter binding + algorithm (that ran also on Python 3.4), but it is no longer needed, since as of v0.15.3, + we support only Python 3.8 and later. + + The only thing we do beside call `inspect.Signature.bind` is that we apply default values + (from the definition of `f`) automatically. + + The return value is an `inspect.BoundArguments`. If you want a hashable result, + postprocess the return value with `tuplify_bindings(result)`. For illustration, consider a simplistic memoizer:: @@ -358,152 +408,207 @@ def f(a): f(42) f(a=42) # now the cache hits + """ + return _resolve_bindings(f, args, kwargs, _partial=False) - The return value of `resolve_bindings` is an `OrderedDict` with five keys: - args: `OrderedDict` of bindings made for regular parameters - (positional only, positional or keyword, keyword only). - vararg: `tuple` of arguments gathered by the vararg (`*args`) parameter - if the function definition has one; otherwise `None`. - vararg_name: `str`, the name of the vararg parameter; or `None`. - kwarg: `OrderedDict` of bindings gathered by `**kwargs` if the - function definition has one; otherwise `None`. - kwarg_name: `str`, the name of the kwarg parameter; or `None`. +def _resolve_bindings(f, args, kwargs, *, _partial): + thesignature = signature(f) + if _partial: + bound_arguments = thesignature.bind_partial(*args, **kwargs) + else: + bound_arguments = thesignature.bind(*args, **kwargs) + bound_arguments.apply_defaults() + return bound_arguments + +def tuplify_bindings(bound_arguments): + """Convert the return value of `resolve_bindings` into a hashable form. - **NOTE**: + This is useful for memoizers and similar use cases, which need to use a + representation of the bindings as a dictionary key. - We attempt to implement the exact same algorithm Python itself uses for - resolving argument bindings. The process is explained in the language - reference, although not in a step-by-step algorithmic form. + `bound_arguments` is an `inspect.BoundArguments` object. - https://docs.python.org/3/reference/compound_stmts.html#function-definitions - https://docs.python.org/3/reference/expressions.html#calls + In our return value, `bound_arguments.arguments` itself, as well as the value of + the `**kwargs` parameter contained in it, if any, are converted from `OrderedDict` + to `tuple` using `tuple(od.items())`. - This function should report exactly those bindings that would actually be - established if `f` was actually called with the given `args` and `kwargs`. + The result is hashable, if all the passed arguments are. - If you encounter a case with any difference between what the result claims and - how Python itself assigns the bindings, that is a bug in our code. In such a - case, please report the issue, so it can be fixed, and then added to the unit - tests to ensure it won't come back. + See `resolve_bindings` for an example. """ - f, _ = getfunc(f) - params = signature(f).parameters - - # https://docs.python.org/3/library/inspect.html#inspect.Signature - # https://docs.python.org/3/library/inspect.html#inspect.Parameter - poskinds = set((Parameter.POSITIONAL_ONLY, - Parameter.POSITIONAL_OR_KEYWORD)) - kwkinds = set((Parameter.POSITIONAL_OR_KEYWORD, - Parameter.KEYWORD_ONLY)) - varkinds = set((Parameter.VAR_POSITIONAL, - Parameter.VAR_KEYWORD)) - - index = {} - nposparams = 0 - varpos = varkw = None - for slot, param in enumerate(params.values()): - if param.kind in poskinds: - nposparams += 1 - if param.kind in kwkinds: - index[param.name] = slot - if param.kind == Parameter.VAR_POSITIONAL: - varpos = slot - varpos_name = param.name - elif param.kind == Parameter.VAR_KEYWORD: - varkw = slot - varkw_name = param.name - - # https://docs.python.org/3/reference/compound_stmts.html#function-definitions - # https://docs.python.org/3/reference/expressions.html#calls - unassigned = object() # gensym("unassigned"), but object() is much faster, and we don't need a label, or pickle support. - slots = [unassigned for _ in range(len(params))] # yes, varparams too - - # fill from positional arguments - for slot, (param, value) in enumerate(zip(params.values(), args)): - if param.kind in varkinds: # these are always last in the function def + def tuplify(ordereddict): + return tuple(ordereddict.items()) + + # Tuplify the **kwargs dict. + # + # The information of which parameter it is, if any, is not contained in the + # `arguments` attribute of the `BoundArguments` instance; we need to scan + # the signature (stored in the `signature` attribute) against which the + # bindings were made. + for parameter in bound_arguments.signature.parameters.values(): + if parameter.kind == Parameter.VAR_KEYWORD: + kwargs_param = parameter.name break - slots[slot] = value - - if varpos is not None: - slots[varpos] = [] - if varkw is not None: - slots[varkw] = OrderedDict() - vkdict = slots[varkw] - - # gather excess positional arguments - if len(args) > nposparams: - if varpos is None: - raise TypeError(f"{f.__name__}() takes {nposparams} positional arguments but {len(args)} were given") - slots[varpos] = args[nposparams:] - - # fill from keyword arguments - for identifier, value in kwargs.items(): - if identifier in index: - slot = index[identifier] - if slots[slot] is unassigned: - slots[slot] = value + else: + kwargs_param = None + + if kwargs_param: + thearguments = copy.copy(bound_arguments.arguments) # avoid mutating our input + thearguments[kwargs_param] = tuplify(thearguments[kwargs_param]) + else: + thearguments = bound_arguments.arguments + + return tuplify(thearguments) + +# This is `inspect.Signature.bind` from Python 3.8.5, modified for our purposes so we can determine +# unbound *and extra* arguments (both positional and by-name) without raising a `TypeError`. +# We need this for kwargs support in `curry`, because we want to pass through unmatched args and kwargs +# (which otherwise trigger a `TypeError`). +# +# This is only for `curry`; all other code uses the standard implementation. +# +# Used under the PSF license. Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +# 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020 Python Software Foundation; All Rights Reserved +def _bind(thesignature, args, kwargs, *, partial): + """Private method. Don't use directly.""" + + arguments = OrderedDict() + + parameters = iter(thesignature.parameters.values()) + parameters_ex = () + arg_vals = iter(args) + + # These are added for `unpythonic`. + unbound_parameters = [] + extra_args = [] + extra_kwargs = OrderedDict() + kwargs = copy.copy(kwargs) # the caller might need the original later + + while True: + # Let's iterate through the positional arguments and corresponding + # parameters + try: + arg_val = next(arg_vals) + except StopIteration: + # No more positional arguments + try: + param = next(parameters) + except StopIteration: + # No more parameters. That's it. Just need to check that + # we have no `kwargs` after this while loop + break else: - raise TypeError(f"{f.__name__}() got multiple values for argument '{identifier}'") - elif varkw is not None: # gather excess keyword arguments - vkdict[identifier] = value + if param.kind == Parameter.VAR_POSITIONAL: + # That's OK, just empty *args. Let's start parsing + # kwargs + break + elif param.name in kwargs: + if param.kind == Parameter.POSITIONAL_ONLY: + msg = '{arg!r} parameter is positional only, ' \ + 'but was passed as a keyword' + msg = msg.format(arg=param.name) + raise TypeError(msg) from None + parameters_ex = (param,) + break + elif (param.kind == Parameter.VAR_KEYWORD or + param.default is not _empty): + # That's fine too - we have a default value for this + # parameter. So, lets start parsing `kwargs`, starting + # with the current parameter + parameters_ex = (param,) + break + else: + # No default, not VAR_KEYWORD, not VAR_POSITIONAL, + # not in `kwargs` + if partial: + parameters_ex = (param,) + break + else: + # msg = 'missing a required argument: {arg!r}' + # msg = msg.format(arg=param.name) + # raise TypeError(msg) from None + unbound_parameters.append(param) else: - raise TypeError(f"{f.__name__}() got an unexpected keyword argument '{identifier}'") - - # fill missing with defaults from function definition - failures = [] - for slot, param in enumerate(params.values()): - if slots[slot] is unassigned: - if param.default is Parameter.empty: - failures.append(param.name) - slots[slot] = param.default - # Python 3.6 goes so far to make this particular error message into proper - # English, that aping the standard error message takes the most effort here... - if failures: - if len(failures) == 1: - n1 = failures[0] - raise TypeError(f"{f.__name__}() missing required positional argument: '{n1}'") - if len(failures) == 2: - n1, n2 = failures - raise TypeError(f"{f.__name__}() missing 2 required positional arguments: '{n1}' and '{n2}'") - wrapped = [f"'{x}'" for x in failures] - others = ", ".join(wrapped[:-1]) - msg = f"{f.__name__}() missing {len(failures)} required positional arguments: {others}, and '{failures[-1]}'" - raise TypeError(msg) - - # build the result - regularargs = OrderedDict() - for param, value in zip(params.values(), slots): - if param.kind in varkinds: # skip varpos, varkw + # We have a positional argument to process + try: + param = next(parameters) + except StopIteration: + # raise TypeError('too many positional arguments') from None + extra_args.append(arg_val) + else: + if param.kind in (Parameter.VAR_KEYWORD, Parameter.KEYWORD_ONLY): + # Looks like we have no parameter for this positional + # argument + # raise TypeError( + # 'too many positional arguments') from None + extra_args.append(arg_val) + + if param.kind == Parameter.VAR_POSITIONAL: + # We have an '*args'-like argument, let's fill it with + # all positional arguments we have left and move on to + # the next phase + values = [arg_val] + values.extend(arg_vals) + arguments[param.name] = tuple(values) + break + + if param.name in kwargs and param.kind != Parameter.POSITIONAL_ONLY: + raise TypeError( + 'multiple values for argument {arg!r}'.format( + arg=param.name)) from None + + arguments[param.name] = arg_val + + # Now, we iterate through the remaining parameters to process + # keyword arguments + kwargs_param = None + for param in itertools.chain(parameters_ex, parameters): + if param.kind == Parameter.VAR_KEYWORD: + # Memorize that we have a '**kwargs'-like parameter + kwargs_param = param continue - regularargs[param.name] = value - # Naming of the fields matches `ast.arguments` - # https://greentreesnakes.readthedocs.io/en/latest/nodes.html#arguments - bindings = OrderedDict() - bindings["args"] = regularargs - bindings["vararg"] = slots[varpos] if varpos is not None else None - bindings["vararg_name"] = varpos_name if varpos is not None else None # for introspection - bindings["kwarg"] = slots[varkw] if varkw is not None else None - bindings["kwarg_name"] = varkw_name if varkw is not None else None # for introspection - - return bindings - -def tuplify_bindings(bindings): - """Convert the return value of `resolve_bindings` into a hashable form. + if param.kind == Parameter.VAR_POSITIONAL: + # Named arguments don't refer to '*args'-like parameters. + # We only arrive here if the positional arguments ended + # before reaching the last parameter before *args. + continue - This is useful for memoizers and similar use cases, which need to use a - representation of the bindings as a dictionary key. + param_name = param.name + try: + arg_val = kwargs.pop(param_name) + except KeyError: + # We have no value for this parameter. It's fine though, + # if it has a default value, or it is an '*args'-like + # parameter, left alone by the processing of positional + # arguments. + if (not partial and param.kind != Parameter.VAR_POSITIONAL and + param.default is _empty): + # raise TypeError('missing a required argument: {arg!r}'. + # format(arg=param_name)) from None + unbound_parameters.append(param) - The values stored in the `"args"` and `"kwarg"` keys, as well as `bindings` - itself, are converted from `OrderedDict` to `tuple` using `tuple(od.items())`. - The result is hashable, if all the arguments passed in the bindings are. - """ - def tuplify(od): - return tuple(od.items()) - result = OrderedDict() - result["args"] = tuplify(bindings["args"]) - result["vararg"] = bindings["vararg"] - result["vararg_name"] = bindings["vararg_name"] - result["kwarg"] = tuplify(bindings["kwarg"]) if bindings["kwarg"] is not None else None - result["kwarg_name"] = bindings["kwarg_name"] - return tuplify(result) + else: + if param.kind == Parameter.POSITIONAL_ONLY: + # This should never happen in case of a properly built + # Signature object (but let's have this check here + # to ensure correct behaviour just in case) + raise TypeError('{arg!r} parameter is positional only, ' + 'but was passed as a keyword'. + format(arg=param.name)) + + arguments[param_name] = arg_val + + if kwargs: + if kwargs_param is not None: + # Process our '**kwargs'-like parameter + arguments[kwargs_param.name] = kwargs + else: + # raise TypeError( + # 'got an unexpected keyword argument {arg!r}'.format( + # arg=next(iter(kwargs)))) + extra_kwargs.update(kwargs) + + return (BoundArguments(thesignature, arguments), + tuple(unbound_parameters), + (tuple(extra_args), extra_kwargs)) diff --git a/unpythonic/collections.py b/unpythonic/collections.py index b71b91ff..280f8704 100644 --- a/unpythonic/collections.py +++ b/unpythonic/collections.py @@ -19,10 +19,17 @@ from operator import lt, le, ge, gt import threading -from .llist import cons, Nil -from .misc import getattrrec +# Some of these are used only to detect (and perhaps mogrify) our own cat food in `mogrify`. +# +# Still, importing these has the rather unpleasant consequence of creating dependency loops +# (circular imports), because many other modules in `unpythonic` use definitions from the +# `unpythonic.collections` module. from .env import env from .dynassign import _Dyn +from .funutil import Values +from .it import drop +from .llist import cons, Nil +from .misc import getattrrec, CountingIterator def get_abcs(cls): """Return a set of the collections.abc superclasses of cls (virtuals too).""" @@ -75,8 +82,12 @@ def mogrify(func, container): just like in ``map``. """ def doit(x): + if isinstance(x, Values): + new_rets = doit(x.rets) + new_kwrets = doit(x.kwrets) + return Values(*new_rets, **new_kwrets) # mutable containers - if isinstance(x, MutableSequence): + elif isinstance(x, MutableSequence): y = [doit(elt) for elt in x] if hasattr(x, "clear"): x.clear() # list has this, but not guaranteed by MutableSequence @@ -125,7 +136,7 @@ def doit(x): elif isinstance(x, MappingView): return {doit(elt) for elt in x} # env and dyn provide the Mapping API, but shouldn't get the general Mapping treatment here. - # (This is important for the curry and lazify macros.) + # (This is important for the autocurry and lazify macros.) elif isinstance(x, Mapping) and not isinstance(x, (env, _Dyn)): ctor = type(x) return ctor({k: doit(v) for k, v in x.items()}) @@ -142,6 +153,9 @@ def doit(x): # ----------------------------------------------------------------------------- +# TODO: Make `box` support pickle with per-use-site uuids, to keep a shared box shared across a pickle roundtrip? +# TODO: See the `gsym` implementation in `unpythonic.symbol` for how to do this in a thread-safe manner. +# TODO: Think how those semantics should work with `ThreadLocalBox`. class box: """Minimalistic, mutable single-item container à la Racket. @@ -276,6 +290,9 @@ class Some: In a way, `Some` is a relative of `box`: it's an **immutable** single-item container. It supports `.get` and `unbox`, but no `<<` or `.set`. + + It is also the logical opposite of a bare `None`, also syntactically: + `Some(...) is not None`. """ def __init__(self, x=None): self.x = x @@ -730,16 +747,34 @@ class ShadowedSequence(Sequence, _StrReprEqMixin): Essentially, ``out[k] = v[index_in_slice(k, ix)] if in_slice(k, ix) else seq[k]``, but doesn't actually allocate ``out``. - ``ix`` may be integer (if ``v`` represents one item only) or slice (if ``v`` - is intended as a sequence). The default ``None`` means ``out[k] = seq[k]`` + ``ix`` may be integer (if ``v`` represents one item only) or ``slice`` (if ``v`` + is intended as a sequence). The default ``ix=None`` means ``out[k] = seq[k]`` with no shadower. + + If ``ix`` is a ``slice``, then: + + - If the replacement specification requires reading ``v`` backwards, + and/or if you plan to iterate over the ``ShadowedSequence`` more + than once, then ``v`` must implement ``collections.abc.Sequence``, + i.e. it must have ``__len__`` and ``__getitem__`` methods. + + - If the replacement specification only needs reading ``v`` forwards, + **AND** if you plan to read the ``ShadowedSequence`` only once (e.g. + as part of a `fupdate` or `fup` operation), then it is sufficient + for ``v`` to implement only ``collections.abc.Iterator``, i.e. the + ``__iter__`` and ``__next__`` methods only. """ def __init__(self, seq, ix=None, v=None): if ix is not None and not isinstance(ix, (slice, int)): raise TypeError(f"ix: expected slice or int, got {type(ix)} with value {ix}") + if not isinstance(seq, Sequence): + raise TypeError(f"seq: expected a sequence, got {type(seq)} with value {seq}") + if isinstance(ix, slice) and not isinstance(v, (Sequence, Iterable)): + raise TypeError(f"v: when ix is a slice, v must be a sequence or an iterable; got {type(v)} with value {v}") self.seq = seq self.ix = ix self.v = v + self._v_it = None # Provide __iter__ (even though implemented using len() and __getitem__()) # so that our __getitem__ can raise IndexError when needed, without it @@ -781,70 +816,92 @@ def _getone(self, k): return self.v # just one item # we already know k is in ix, so skip validation for speed. i = _index_in_slice(k, ix, n, _validate=False) - if i >= len(self.v): - raise IndexError(f"Replacement sequence too short; attempted to access index {i} with len {len(self.v)} (items: {self.v})") - return self.v[i] + if isinstance(self.v, Sequence): + if i >= len(self.v): + raise IndexError(f"Replacement sequence too short; attempted to access index {i} with len {len(self.v)} (items: {self.v})") + return self.v[i] + elif isinstance(self.v, Iterable): + if not self._v_it: + self._v_it = CountingIterator(self.v) + if i < self._v_it.count: + # Special case for `unpythonic.gmemo._MemoizedGenerator`, + # to support reverse-walking the start of a memoized infinite replacement + # that was created using `imemoize`/`fimemoize`/`gmemoize`. + # It has the `__len__` and `__getitem__` methods, but does + # **not** support the full `collections.abc.Sequence` API. + # At this point, the memo contains all the items accessed or dropped so far. + bare_it = self._v_it._it + if all(hasattr(bare_it, name) for name in ("__len__", "__getitem__")): + assert i < len(bare_it) # because we counted them! + return bare_it[i] + raise IndexError(f"Trying to read an already consumed item of a non-sequence iterable; attempted to access index {i} with {self._v_it.count} items already consumed.") + n_skip = i - self._v_it.count + assert n_skip >= 0 + if n_skip: + # NOTE: If the iterable is memoized, the items we drop here will enter the memo. + self._v_it = drop(n_skip, self._v_it) + return next(self._v_it) + else: + assert False return self.seq[k] # not in slice -# TODO: fix flake8 E741 ambiguous variable name "l". Here it's part of the public API, so we'll have to wait until 15.0 to change the name. -def in_slice(i, s, l=None): +def in_slice(i, s, length=None): """Return whether the int i is in the slice s. For convenience, ``s`` may be int instead of slice; then return whether ``i == s``. - The optional ``l`` is the length of the sequence being indexed, used for + The optional ``length`` is the length of the sequence being indexed, used for interpreting any negative indices, and default start and stop values (if ``s.start`` or ``s.stop`` is ``None``). - If ``l is None``, negative or missing ``s.start`` or ``s.stop`` may raise - ValueError. (A negative ``s.step`` by itself does not need ``l``.) + If ``length is None``, negative or missing ``s.start`` or ``s.stop`` may raise + ValueError. (A negative ``s.step`` by itself does not need ``length``.) """ if not isinstance(s, (slice, int)): raise TypeError(f"s must be slice or int, got {type(s)} with value {s}") if not isinstance(i, int): raise TypeError(f"i must be int, got {type(i)} with value {i}") - wrap = _make_negidx_converter(l) + wrap = _make_negidx_converter(length) i = wrap(i) if isinstance(s, int): s = wrap(s) return i == s - start, stop, step = _canonize_slice(s, l, wrap) + start, stop, step = _canonize_slice(s, length, wrap) cmp_start, cmp_end = (ge, lt) if step > 0 else (le, gt) at_or_after_start = cmp_start(i, start) before_stop = cmp_end(i, stop) on_grid = (i - start) % step == 0 return at_or_after_start and on_grid and before_stop -# TODO: fix flake8 E741 ambiguous variable name "l". Here it's part of the public API, so we'll have to wait until 15.0 to change the name. -def index_in_slice(i, s, l=None): +def index_in_slice(i, s, length=None): """Return the index of the int i in the slice s, or None if i is not in s. (I.e. how-manyth item of the slice the index i is.) - The optional sequence length ``l`` works the same as in ``in_slice``. + The optional sequence length ``length`` works the same as in ``in_slice``. """ - return _index_in_slice(i, s, l) + return _index_in_slice(i, s, length) # efficiency: allow skipping the validation check for call sites # that have already checked with in_slice(). -def _index_in_slice(i, s, n=None, _validate=True): # n: length of sequence being indexed - if (not _validate) or in_slice(i, s, n): - wrap = _make_negidx_converter(n) - start, _, step = _canonize_slice(s, n, wrap) +def _index_in_slice(i, s, length=None, _validate=True): + if (not _validate) or in_slice(i, s, length): + wrap = _make_negidx_converter(length) + start, _, step = _canonize_slice(s, length, wrap) return (wrap(i) - start) // step -def _make_negidx_converter(n): # n: length of sequence being indexed - if n is not None: - if not isinstance(n, int): - raise TypeError(f"n must be int, got {type(n)} with value {n}") - if n <= 0: - raise ValueError(f"n must be an int >= 1, got {n}") +def _make_negidx_converter(length): + if length is not None: + if not isinstance(length, int): + raise TypeError(f"length must be int, got {type(length)} with value {length}") + if length <= 0: + raise ValueError(f"length must be an int >= 1, got {length}") def apply_conversion(k): - return k % n + return k % length else: def apply_conversion(k): - raise ValueError("Need n to interpret negative indices") + raise ValueError("Need length to interpret negative indices") def convert(k): if k is not None: if not isinstance(k, int): @@ -856,12 +913,12 @@ def convert(k): # Almost standard semantics for negative indices. Usually -n < k < n, # but here we must allow for conversion of the end position, for # which the last valid value is one past the end. - if n is not None and not -n <= k <= n: - raise IndexError(f"Should have -n <= k <= n, but n = {n}, and k = {k}") + if length is not None and not -length <= k <= length: + raise IndexError(f"Should have -length <= k <= length, but length = {length}, and k = {k}") return apply_conversion(k) if k < 0 else k return convert -def _canonize_slice(s, n=None, wrap=None): # convert negatives, inject defaults. +def _canonize_slice(s, length=None, wrap=None): # convert negatives, inject defaults. if not isinstance(s, slice): # Not triggered in the current code, because this is an internal function # and `in_slice` already checks; but let's be careful in case this is later @@ -874,23 +931,23 @@ def _canonize_slice(s, n=None, wrap=None): # convert negatives, inject defaults if step == 0: raise ValueError("slice step cannot be zero") # message copied from range(5)[0:4:0] - wrap = wrap or _make_negidx_converter(n) + wrap = wrap or _make_negidx_converter(length) start = wrap(s.start) if start is None: if step > 0: start = 0 else: - if n is None: - raise ValueError("Need n to determine default start for step < 0") + if length is None: + raise ValueError("Need length to determine default start for step < 0") start = wrap(-1) stop = wrap(s.stop) if stop is None: if step > 0: - if n is None: - raise ValueError("Need n to determine default stop for step > 0") - stop = n + if length is None: + raise ValueError("Need length to determine default stop for step > 0") + stop = length else: stop = -1 # yes, really -1 to have index 0 inside the slice diff --git a/unpythonic/conditions.py b/unpythonic/conditions.py index 1605add7..a02e853c 100644 --- a/unpythonic/conditions.py +++ b/unpythonic/conditions.py @@ -57,7 +57,8 @@ "available_restarts", "available_handlers", "restarts", "with_restarts", "handlers", - "ControlError"] + "ControlError", + "resignal_in", "resignal"] import threading from collections import deque, namedtuple @@ -68,7 +69,8 @@ from .collections import box, unbox from .arity import arity_includes, UnknownArity -from .misc import namelambda, equip_with_traceback, safeissubclass +from .excutil import equip_with_traceback +from .misc import namelambda, safeissubclass _stacks = threading.local() def _ensure_stacks(): # per-thread init @@ -86,7 +88,7 @@ class ControlError(Exception): when no handler handles the signal. """ -def signal(condition, *, cause=None): +def signal(condition, *, cause=None, protocol=None): """Signal a condition. Signaling a condition works similarly to raising an exception (pass an @@ -113,7 +115,18 @@ def signal(condition, *, cause=None): perform the restart and continue. If none of the matching handlers invokes a restart, `signal` returns - normally. There is no meaningful return value, it is always `None`. + normally. + + For most use cases, the return value is not needed. But for defining + custom error-handling protocols on top of `signal`, it can be very useful. + + The return value is the input `condition`, canonized to an instance + (even if originally, an exception *type* was passed to `signal`), + with its `__cause__` and `__protocol__` attributes filled in, + and with a traceback attached. For example, the `error` protocol + uses the return value to chain the unhandled signal properly into + a `ControlError` exception; as a result, the error report looks + like a standard exception chain, with nice-looking tracebacks. If you want to error out on unhandled conditions, see `error`, which is otherwise the same as `signal`, except it raises if `signal` would have @@ -124,6 +137,16 @@ def signal(condition, *, cause=None): keyword, it essentially performs a `signal ... from ...`. The default `cause=None` performs a plain `signal ...`. + The optional `protocol` argument is a low-level detail, meant for use by + error-handling protocols (including custom ones). + + The `protocol` is stored into the `__protocol__` attribute of the condition + instance. It is the callable that was used to perform the signaling. If not + given, it defaults to the `signal` function itself. The main use case is for + `resignal` (for signal type conversion); using this information, it can + automatically emit the new signal using the same protocol that was used + for the original signal (so that e.g. an `error` remains an `error`). + **Notes** This condition system is implemented on top of exceptions. The magic trick @@ -139,9 +162,8 @@ def signal(condition, *, cause=None): You can signal any exception or warning object, both builtins and any custom ones. - On Python 3.7 and later, the exception object representing the signaled - condition is equipped with a traceback, just like a raised exception. - On Python 3.6 this is not possible, so the traceback is `None`. + The exception object representing the signaled condition is equipped + with a traceback, just like a raised exception. """ # Since the handler is called normally, we don't unwind the call stack, # remaining inside the `signal()` call in the low-level code. @@ -149,6 +171,16 @@ def signal(condition, *, cause=None): # The unwinding, when it occurs, is performed when `invoke` is # called from inside the condition handler in the user code. + # stacklevel: in the traceback, omit equip_with_traceback(), _prepare_signal_instance(), and signal(). + # + # We can't have signal() there, because it would look like the call to `_prepare_signal_instance` + # was the cause of the signal, which is nonsense. + # + # Nicely, the resulting stack trace happens to be similar to how Python handles `raise` - the use site + # of `raise` (of an uncaught exception) is shown, but the internals of `raise` are not. + protocol = protocol or signal + condition = _prepare_signal_instance(condition, cause=cause, protocol=protocol, stacklevel=3) + def accepts_arg(f): try: if arity_includes(f, 1): @@ -157,6 +189,18 @@ def accepts_arg(f): return True # just assume it return False + for handler in _find_handlers(type(condition)): + if accepts_arg(handler): + handler(condition) + else: + handler() + + # When unhandled, return the condition instance. + # `error()` uses this return value; this allows us to provide a unified format for tracebacks. + return condition + +def _prepare_signal_instance(condition, *, cause, protocol, stacklevel): + """Canonize a condition, and populate its technical data.""" # Consistency with behavior of exceptions in Python: # Even if a class is raised, as in `raise StopIteration`, the `raise` statement # converts it into an instance by instantiating with no args. So we need no @@ -173,25 +217,17 @@ def canonize(exc, err_reason): return exc() # instantiate with no args, like `raise` does except TypeError: # "issubclass() arg 1 must be a class" pass - error(ControlError(f"Only exceptions and subclasses of Exception can {err_reason}; got {type(condition)} with value {repr(condition)}.")) + error(ControlError(f"Only instances (derived too) and subclasses of BaseException can {err_reason}; got {type(condition)} with value {repr(condition)}.")) condition = canonize(condition, "be signaled") cause = canonize(cause, "act as the cause of another signal") condition.__cause__ = cause + condition.__protocol__ = protocol - # Embed a stack trace in the signal, like Python does for raised exceptions. - # This only works on Python 3.7 and later, because we need to create a traceback object in pure Python code. - try: - # In the result, omit equip_with_traceback() and signal(). - condition = equip_with_traceback(condition, stacklevel=2) - except NotImplementedError: # pragma: no cover - pass # well, we tried! + # Embed a stack trace in the signal, like Python does for raised exceptions. This API was added in Python 3.7. + condition = equip_with_traceback(condition, stacklevel=stacklevel) - for handler in _find_handlers(type(condition)): - if accepts_arg(handler): - handler(condition) - else: - handler() + return condition def invoke(name_or_restart, *args, **kwargs): """Invoke a restart currently in scope. Known as `INVOKE-RESTART` in Common Lisp. @@ -241,23 +277,23 @@ def invoke(name_or_restart, *args, **kwargs): A handler that just invokes the `use_value` restart is such a common use case that it is useful to have an abbreviation for it. This:: - with handlers((OhNoes, lambda c: invoke("use_value", 42))): + with handlers((OhNoes, lambda: invoke("use_value", 42))): ... can be abbreviated to:: - with handlers((OhNoes, lambda c: use_value(42))): + with handlers((OhNoes, lambda: use_value(42))): ... -The `lambda c:` is still required, for consistency with Common Lisp, as well as -to allow the user code to access the condition instance if needed. - -(A common use case is to embed, in the condition instance, the data needed +A common use case is to embed, in the condition instance, the data needed for constructing the actual value to be sent to the `use_value` restart. In Seibel's log file parser example, when the parser sees a corrupt log entry, it embeds that data into the condition instance, and sends it to the handler, which then can in principle repair the log entry, and then invoke `use_value` -with the repaired log entry.) +with the repaired log entry. Then you can do something like:: + + with handlers((OhNoes, lambda c: use_value(produce_fixed_entry_from(c)))): + ... **Notes**: @@ -270,26 +306,27 @@ def invoke(name_or_restart, *args, **kwargs): This pattern can be useful for defining similar shorthands for your own restarts. -(Note that restarts are looked up by name, so a single module-level definition +Note that restarts are looked up by name, so a single module-level definition of a shorthand for each uniquely named restart is enough. You can re-use the same shorthand for any restart that has the same name - just like there is just one `use_value` function, even though the `use_value` restart itself is defined separately at each `with restarts` site that provides it (since only -each site itself knows how to "use a value").) +each site itself knows how to "use a value"). If you want a version for use cases where the condition instance argument is not needed, so you could in those cases omit the `lambda c:`, you can write that as:: - use_constant = partial(invoker, "use_value") - with handlers((OhNoes, use_constant(42))): + make_use_constant = partial(invoker, "use_value") + use_42 = make_use_constant(42) + with handlers((OhNoes, use_42)): ... Note `invoker`, not `invoke`, and we are still left with a factory (since `invoker` itself is a factory and `partial` defers the call until it gets more arguments). You then call the factory function with your desired constant args/kwargs, to instantiate a handler that sends that specific -set of args/kwargs. +set of constant args/kwargs. """ def invoker(restart_name, *args, **kwargs): @@ -349,8 +386,21 @@ def invoker(restart_name, *args, **kwargs): with handlers((OhNoes, lambda c: use_value(42))): ... # calling some code that may cerror(OhNoes("ouch")) - (The `use_value` function is convenient especially when the value being sent - is not a constant, but depends on data in the condition instance `c`.) + The `use_value` function is convenient especially when the value being sent + is not a constant, but depends on data in the condition instance `c`. + To do the same for your own restart, use this pattern (see `invoke`):: + + frobnicate = partial(invoke, "frobnicate") + with handlers((OhNoes, frobnicate)): + ... + + In this case, the `frobnicate` restart - if it accepts one positional + argument - will receive the condition instance. To send something else, + you can also do something like this:: + + frobnicate = partial(invoke, "frobnicate") + with handlers((OhNoes, lambda c: frobnicate(c.args[0] * 42))): + ... **Notes** @@ -445,7 +495,7 @@ def __init__(self, *bindings): super().__init__(bindings) self.dq = _stacks.handlers -class InvokeRestart(Exception): +class InvokeRestart(BaseException): def __init__(self, restart, *args, **kwargs): # e is the context self.restart, self.a, self.kw = restart, args, kwargs # message when uncaught @@ -693,15 +743,7 @@ def error(condition, *, cause=None): keyword, it essentially performs a `error ... from ...`. The default `cause=None` performs a plain `error ...`. """ - signal(condition, cause=cause) - # TODO: If we want to support the debugger at some point in the future, - # TODO: this is the appropriate point to ask the user what to do, - # TODO: before the call stack unwinds. - # - # TODO: Do we want to give one last chance to handle the ControlError? - # TODO: And do we want to raise ControlError, or the original condition? - condition.__cause__ = cause # chain the causes, since we'll add a new one next. - raise ControlError("Unhandled error condition") from condition + _error(condition, cause=cause, protocol=error) def cerror(condition, *, cause=None): """Like `error`, but allow a handler to instruct the caller to ignore the error. @@ -727,17 +769,31 @@ class OddNumberError(Exception): def __init__(self, value): self.value = value - with handlers=((OddNumberError, proceed)): + with handlers((OddNumberError, proceed)): out = [] for x in range(10): if x % 2 == 1: cerror(OddNumberError(x)) # if unhandled, raises ControlError out.append(x) - assert out == [0, 2, 4, 6, 8] + assert out == list(range(10)) """ with restarts(proceed=(lambda: None)): # just for control, no return value - error(condition, cause=cause) + _error(condition, cause=cause, protocol=cerror) + +def _error(condition, *, cause, protocol): + # The return value is canonized to an instance (even if `condition` was an exception *type*), + # and importantly, it has a nice-looking traceback that points to this line here. + # If the signal goes unhandled, Python's exception system will want to show that traceback + # when our `ControlError` *exception* goes uncaught. + condition = signal(condition, cause=cause, protocol=protocol) + # TODO: If we want to support the debugger at some point in the future, + # TODO: this is the appropriate point to ask the user what to do, + # TODO: before the call stack unwinds. + # + # TODO: Do we want to give one last chance to handle the ControlError? + # TODO: And do we want to raise ControlError, or the original condition? + raise ControlError("Unhandled error condition") from condition def warn(condition, *, cause=None): """Like `signal`, but emit a warning if the condition is not handled. @@ -787,7 +843,7 @@ def __init__(self, value): """ with restarts(muffle=(lambda: None)): # just for control, no return value with restarts(_proceed=(lambda: None)): # for internal use by unpythonic.test.fixtures - signal(condition, cause=cause) + signal(condition, cause=cause, protocol=warn) if isinstance(condition, Warning): warnings.warn(condition, stacklevel=2) # 2 to ignore our lispy `warn` wrapper. else: @@ -800,3 +856,107 @@ def __init__(self, value): muffle = invoker("muffle") muffle.__doc__ = "Invoke the 'muffle' restart. Restart function for use with `warn`." + +# Library to application signal type auto-conversion + +def _resignal_handler(mapping, condition): + """Remap a condition instance to another condition type. + + `mapping`: dict-like, `{LibraryExc0: ApplicationExc0, ...}` + + Each `LibraryExc` must be an exception type or a tuple of + exception types. It will be matched using `isinstance`. + + Each `ApplicationExc` can be an exception type or an exception + instance. If an instance, then that exact instance is signaled + as the converted signal. + + `condition`: the exception instance that was signaled, and is to + be converted (if it matches an entry in `mapping`). + When converted, it is automatically chained into + an `ApplicationExc` signal. + + Conversions in `mapping` are tried in the order specified; hence, + just like in `with handlers`, place more specific types first. + + If no key in the mapping matches, this delegates to the next outer + signal handler. + """ + for LibraryExc, ApplicationExc in mapping.items(): + if isinstance(condition, LibraryExc): + # Resignal using the same error-handling protocol as the original signal + # (so that e.g. an `error(...)` resignals into an `error(...)` of the new type). + if not hasattr(condition, "__protocol__"): + error(f"Cannot resignal: protocol information missing in condition instance {condition}") + resignaler = condition.__protocol__ + resignaler(ApplicationExc, cause=condition) + # cancel and delegate to the next outer handler + +def resignal_in(body, mapping): + """Remap condition types in an expression. + + Like `unpythonic.excutil.reraise_in` (which see), but for conditions. + + Usage:: + + resignal_in(body, + {LibraryExc: ApplicationExc, + ...}) + + Whenever `body` signals an `exc` for which it holds that + `isinstance(exc, LibraryExc)`, that signal will be transparently + chained into an `ApplicationExc` signal. The automatic conversion + is in effect for the dynamic extent of `body`. + + ``body`` is a thunk (0-argument function). + + ``mapping`` is dict-like, ``{input0: output0, ...}``, where each + ``input`` is either an exception type, + or a tuple of exception types. + It will be matched using `isinstance`. + ``output`` is an exception type or an exception + instance. If an instance, then that exact + instance is signaled as the converted + signal. + + Conversions are tried in the order specified; hence, just like in + `with handlers`, place more specific types first. + + See also `resignal` for a block form. + """ + with handlers((BaseException, partial(_resignal_handler, mapping))): + return body() + +@contextlib.contextmanager +def resignal(mapping): + """Remap condition types. Context manager. + + Like `unpythonic.excutil.reraise` (which see), but for conditions. + + Usage:: + + with resignal({LibraryExc: ApplicationExc, ...}): + body0 + ... + + Whenever the body signals an `exc` for which it holds that + `isinstance(exc, LibraryExc)`, that signal will be transparently + chained into an `ApplicationExc` signal. The automatic conversion + is in effect for the dynamic extent of the `with` block. + + ``mapping`` is dict-like, ``{input0: output0, ...}``, where each + ``input`` is either an exception type, + or a tuple of exception types. + It will be matched using `isinstance`. + ``output`` is an exception type or an exception + instance. If an instance, then that exact + instance is signaled as the converted + signal. + + Conversions are tried in the order specified; hence, just like in + `with handlers`, place more specific types first. + + See also `resignal_in` for an expression form. + """ + with handlers((BaseException, partial(_resignal_handler, mapping))): + yield diff --git a/unpythonic/dialects/__init__.py b/unpythonic/dialects/__init__.py new file mode 100644 index 00000000..644a5cee --- /dev/null +++ b/unpythonic/dialects/__init__.py @@ -0,0 +1,17 @@ +# -*- coding: utf-8 -*- +"""Dialects: Python the way you want it. + +These dialects, i.e. whole-module syntax transformations, are powered by +`mcpyrate`'s dialect subsystem. The user manual is at: + https://github.com/Technologicat/mcpyrate/blob/master/doc/dialects.md + +We provide these dialects mainly to demonstrate how to use that subsystem +to customize Python beyond what a local macro expander can do. + +For examples of how to use these particular dialects, see the unit tests. +""" + +# re-exports +from .lispython import * # noqa: F401, F403 +from .listhell import * # noqa: F401, F403 +from .pytkell import * # noqa: F401, F403 diff --git a/unpythonic/dialects/lispython.py b/unpythonic/dialects/lispython.py new file mode 100644 index 00000000..28265c5a --- /dev/null +++ b/unpythonic/dialects/lispython.py @@ -0,0 +1,88 @@ +# -*- coding: utf-8 -*- +"""Lispython: The love child of Python and Scheme. + +Powered by `mcpyrate` and `unpythonic`. +""" + +__all__ = ["Lispython", "Lispy"] + +__version__ = '2.0.0' + +from mcpyrate.quotes import macros, q # noqa: F401 + +from mcpyrate.dialects import Dialect +from mcpyrate.splicing import splice_dialect + +class Lispython(Dialect): + """**Schemers rejoice!** + + Multiple musings mix in a lambda, + Lament no longer the lack of let. + Languish no longer labelless, lambda, + Linked lists cons and fold. + Tail-call into recursion divine, + The final value always provide. + """ + + def transform_ast(self, tree): # tree is an ast.Module + with q as template: + __lang__ = "Lispython" # noqa: F841, just provide it to user code. + from unpythonic.syntax import (macros, tco, autoreturn, # noqa: F401, F811 + multilambda, quicklambda, namedlambda, fn, + where, + let, letseq, letrec, + dlet, dletseq, dletrec, + blet, bletseq, bletrec, + local, delete, do, do0, + let_syntax, abbrev, block, expr, + cond) + from unpythonic import cons, car, cdr, ll, llist, nil, prod, dyn, Values # noqa: F401, F811 + with autoreturn, quicklambda, multilambda, namedlambda, tco: + __paste_here__ # noqa: F821, just a splicing marker. + + # Beginning with 3.6.0, `mcpyrate` makes available the source location info + # of the dialect-import that imported this dialect. + if hasattr(self, "lineno"): # mcpyrate 3.6.0+ + tree.body = splice_dialect(tree.body, template, "__paste_here__", + lineno=self.lineno, col_offset=self.col_offset) + else: + tree.body = splice_dialect(tree.body, template, "__paste_here__") + + return tree + + +class Lispy(Dialect): + """**Pythonistas rejoice!** + + O language like Lisp, like Python! + Semantic changes sensibly carry, + Python's primary virtue vindicate. + Ire me not with implicit imports, + Let my IDE label mistakes. + """ + + def transform_ast(self, tree): # tree is an ast.Module + with q as template: + __lang__ = "Lispy" # noqa: F841, just provide it to user code. + from unpythonic.syntax import (macros, tco, autoreturn, # noqa: F401, F811 + multilambda, quicklambda, namedlambda) + # The important point is none of these expect the user code to look like + # anything but regular Python, so IDEs won't yell about undefined names; + # just the semantics are slightly different. + # + # Even if the user code uses `fn[]` (to make `quicklambda` actually do anything), + # that macro must be explicitly imported. It works, because `splice_dialect` + # hoists macro-imports from the top level of the user code into the top level + # of the template. + with autoreturn, quicklambda, multilambda, namedlambda, tco: + __paste_here__ # noqa: F821, just a splicing marker. + + # Beginning with 3.6.0, `mcpyrate` makes available the source location info + # of the dialect-import that imported this dialect. + if hasattr(self, "lineno"): # mcpyrate 3.6.0+ + tree.body = splice_dialect(tree.body, template, "__paste_here__", + lineno=self.lineno, col_offset=self.col_offset) + else: + tree.body = splice_dialect(tree.body, template, "__paste_here__") + + return tree diff --git a/unpythonic/dialects/listhell.py b/unpythonic/dialects/listhell.py new file mode 100644 index 00000000..9d1defb6 --- /dev/null +++ b/unpythonic/dialects/listhell.py @@ -0,0 +1,35 @@ +# -*- coding: utf-8 -*- +"""Listhell: It's not Lisp, it's not Python, it's not Haskell. + +Powered by `mcpyrate` and `unpythonic`. +""" + +__all__ = ["Listhell"] + +__version__ = '2.0.0' + +from mcpyrate.quotes import macros, q # noqa: F401 + +from mcpyrate.dialects import Dialect +from mcpyrate.splicing import splice_dialect + +class Listhell(Dialect): + def transform_ast(self, tree): # tree is an ast.Module + with q as template: + __lang__ = "Listhell" # noqa: F841, just provide it to user code. + from unpythonic.syntax import macros, prefix, q, u, kw, autocurry # noqa: F401, F811 + # Auxiliary syntax elements for the macros + from unpythonic import apply # noqa: F401 + from unpythonic import composerc as compose # compose from Right, Currying # noqa: F401 + with prefix, autocurry: + __paste_here__ # noqa: F821, just a splicing marker. + + # Beginning with 3.6.0, `mcpyrate` makes available the source location info + # of the dialect-import that imported this dialect. + if hasattr(self, "lineno"): # mcpyrate 3.6.0+ + tree.body = splice_dialect(tree.body, template, "__paste_here__", + lineno=self.lineno, col_offset=self.col_offset) + else: + tree.body = splice_dialect(tree.body, template, "__paste_here__") + + return tree diff --git a/unpythonic/dialects/pytkell.py b/unpythonic/dialects/pytkell.py new file mode 100644 index 00000000..f3780794 --- /dev/null +++ b/unpythonic/dialects/pytkell.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- +"""Pytkell: Because it's good to have a kell. + +Powered by `mcpyrate` and `unpythonic`. +""" + +__all__ = ["Pytkell"] + +__version__ = '2.0.0' + +from mcpyrate.quotes import macros, q # noqa: F401 + +from mcpyrate.dialects import Dialect +from mcpyrate.splicing import splice_dialect + +class Pytkell(Dialect): + def transform_ast(self, tree): # tree is an ast.Module + with q as template: + __lang__ = "Pytkell" # noqa: F841, just provide it to user code. + from unpythonic.syntax import (macros, lazy, lazyrec, lazify, autocurry, # noqa: F401, F811 + where, + let, letseq, letrec, + dlet, dletseq, dletrec, + blet, bletseq, bletrec, + local, delete, do, do0, + cond, forall) + # Auxiliary syntax elements for the macros. + from unpythonic.syntax import insist, deny # noqa: F401 + # Functions that have a haskelly feel to them. + from unpythonic import (foldl, foldr, scanl, scanr, # noqa: F401 + s, imathify, gmathify, frozendict, + memoize, fupdate, fup, + gmemoize, imemoize, fimemoize, + islice, take, drop, split_at, first, second, nth, last, + flip, rotate) + from unpythonic import composerc as compose # compose from Right, Currying (Haskell's . operator) # noqa: F401 + # This is a bit lispy, but we're not going out of our way to provide + # a haskelly surface syntax for these. + from unpythonic import cons, car, cdr, ll, llist, nil # noqa: F401 + with lazify, autocurry: + __paste_here__ # noqa: F821, just a splicing marker. + + # Beginning with 3.6.0, `mcpyrate` makes available the source location info + # of the dialect-import that imported this dialect. + if hasattr(self, "lineno"): # mcpyrate 3.6.0+ + tree.body = splice_dialect(tree.body, template, "__paste_here__", + lineno=self.lineno, col_offset=self.col_offset) + else: + tree.body = splice_dialect(tree.body, template, "__paste_here__") + + return tree diff --git a/unpythonic/dialects/tests/test_lispy.py b/unpythonic/dialects/tests/test_lispy.py new file mode 100644 index 00000000..8c5f8aed --- /dev/null +++ b/unpythonic/dialects/tests/test_lispy.py @@ -0,0 +1,107 @@ +# -*- coding: utf-8 -*- +"""Test the Lispy dialect. + +Like Lispython, but more pythonic: nothing is imported implicitly, +except the macros injected by the dialect template (to perform the +whole-module semantic changes at macro expansion time). +""" + +from ...dialects import dialects, Lispy # noqa: F401 + +from ...syntax import macros, test, the # noqa: F401 +from ...test.fixtures import session, testset + +from ...syntax import macros, continuations, call_cc, letrec, fn, local, cond # noqa: F401, F811 +from ...syntax import _ # optional, makes IDEs happy +from ...funutil import Values + +def runtests(): + print(f"Hello from {__lang__}!") # noqa: F821, the dialect template defines it. + + # auto-TCO (both in defs and lambdas), implicit return in tail position + with testset("implicit tco, implicit autoreturn"): + def fact(n): + def f(k, acc): + if k == 1: + return acc # "return" still available for early return + f(k - 1, k * acc) + f(n, acc=1) + test[fact(4) == 24] + fact(5000) # no crash (and correct result, since Python uses bignums transparently) + + t = letrec[[evenp << (lambda x: (x == 0) or oddp(x - 1)), # noqa: F821 + oddp << (lambda x:(x != 0) and evenp(x - 1))] in # noqa: F821 + evenp(10000)] # no crash # noqa: F821 + test[t is True] + + # lambdas are named automatically + with testset("implicit namedlambda"): + square = lambda x: x**2 + test[square(3) == 9] + test[square.__name__ == "square"] + + # the underscore (in Lispy, the `fn` macro must be imported explicitly) + cube = fn[_**3] + test[cube(3) == 27] + test[cube.__name__ == "cube"] + + my_mul = fn[_ * _] + test[my_mul(2, 3) == 6] + test[my_mul.__name__ == "my_mul"] + + # lambdas can have multiple expressions and local variables + # + # If you need to return a literal list from a lambda, use an extra set of + # brackets; the outermost brackets always enable multiple-expression mode. + # + with testset("implicit multilambda"): + # In Lispy, the `local` macro must be imported explicitly. + # `local[name << value]` makes a local variable in a multilambda (or in any `do[]` environment). + mylam = lambda x: [local[y << 2 * x], # noqa: F821 + y + 1] # noqa: F821 + test[mylam(10) == 21] + + a = lambda x: [local[t << x % 2], # noqa: F821 + cond[t == 0, "even", # noqa: F821 + t == 1, "odd", + None]] # cond[] requires an else branch + test[a(2) == "even"] + test[a(3) == "odd"] + + # MacroPy #21; namedlambda must be in its own with block in the + # dialect implementation or the particular combination of macros + # invoked by Lispy will fail (uncaught jump, __name__ not set). + # + # With `mcpyrate` this shouldn't matter, but we're keeping the example. + with testset("autonamed letrec lambdas, multiple-expression let body"): + t = letrec[[evenp << (lambda x: (x == 0) or oddp(x - 1)), # noqa: F821 + oddp << (lambda x:(x != 0) and evenp(x - 1))] in # noqa: F821 + [local[x << evenp(100)], # noqa: F821, multi-expression let body is a do[] environment + (x, evenp.__name__, oddp.__name__)]] # noqa: F821 + test[t == (True, "evenp", "oddp")] + + with testset("integration with continuations"): + with continuations: # has TCO; should be skipped by the implicit `with tco` inserted by the dialect + k = None # kontinuation + def setk(*args, cc): + nonlocal k + k = cc # current continuation, i.e. where to go after setk() finishes + Values(*args) # multiple-return-values + def doit(): + lst = ['the call returned'] + *more, = call_cc[setk('A')] + lst + list(more) + test[doit() == ['the call returned', 'A']] + # We can now send stuff into k, as long as it conforms to the + # signature of the assignment targets of the "call_cc". + test[k('again') == ['the call returned', 'again']] + test[k('thrice', '!') == ['the call returned', 'thrice', '!']] + + # We must have some statement here to make the implicit autoreturn happy, + # because the continuations testset is the last one, and the top level of + # a `with continuations` block is not allowed to have a `return`. + pass + +if __name__ == '__main__': + with session(__file__): + runtests() diff --git a/unpythonic/dialects/tests/test_lispython.py b/unpythonic/dialects/tests/test_lispython.py new file mode 100644 index 00000000..4085b07f --- /dev/null +++ b/unpythonic/dialects/tests/test_lispython.py @@ -0,0 +1,150 @@ +# -*- coding: utf-8 -*- +"""Test the Lispython dialect.""" + +from ...dialects import dialects, Lispython # noqa: F401 + +from ...syntax import macros, test, the # noqa: F401 +from ...test.fixtures import session, testset + +from ...syntax import macros, continuations, call_cc # noqa: F401, F811 + +# `unpythonic` is effectively Lispython's stdlib; not everything gets imported by default. +from ...fold import foldl + +# Of course, all of Python's stdlib is available too. +# +# So is **any** Python library; the ability to use arbitrary Python libraries in +# a customized Python-based language is pretty much the whole point of dialects. +# +from operator import mul + +def runtests(): + print(f"Hello from {__lang__}!") # noqa: F821, the dialect template defines it. + + with testset("dialect builtins"): + test[prod((2, 3, 4)) == 24] # noqa: F821, bye missing battery, hello new dialect builtin + test[foldl(mul, 1, (2, 3, 4)) == 24] + + # cons, car, cdr, ll, llist are builtins (for more linked list utils, import them from unpythonic) + c = cons(1, 2) # noqa: F821 + test[tuple(c) == (1, 2)] + test[car(c) == 1] # noqa: F821 + test[cdr(c) == 2] # noqa: F821 + test[ll(1, 2, 3) == llist((1, 2, 3))] # noqa: F821 + + # all unpythonic.syntax let[], letseq[], letrec[] constructs are considered dialect builtins + # (including the decorator versions, let_syntax and abbrev) + x = let[[a << 21] in 2 * a] # noqa: F821 + test[x == 42] + + x = letseq[[a << 1, # noqa: F821 + a << 2 * a, # noqa: F821 + a << 2 * a] in # noqa: F821 + a] # noqa: F821 + test[x == 4] + + # rackety cond + a = lambda x: cond[x < 0, "nope", # noqa: F821 + x % 2 == 0, "even", + "odd"] + test[a(-1) == "nope"] + test[a(2) == "even"] + test[a(3) == "odd"] + + # auto-TCO (both in defs and lambdas), implicit return in tail position + with testset("implicit tco, implicit autoreturn"): + def fact(n): + def f(k, acc): + if k == 1: + return acc # "return" still available for early return + f(k - 1, k * acc) + f(n, acc=1) + test[fact(4) == 24] + fact(5000) # no crash (and correct result, since Python uses bignums transparently) + + t = letrec[[evenp << (lambda x: (x == 0) or oddp(x - 1)), # noqa: F821 + oddp << (lambda x:(x != 0) and evenp(x - 1))] in # noqa: F821 + evenp(10000)] # no crash # noqa: F821 + test[t is True] + + # lambdas are named automatically + with testset("implicit namedlambda"): + square = lambda x: x**2 + test[square(3) == 9] + test[square.__name__ == "square"] + + # the underscore (NOTE: due to this, "fn" is a reserved name in Lispython) + cube = fn[_**3] # noqa: F821 + test[cube(3) == 27] + test[cube.__name__ == "cube"] + + my_mul = fn[_ * _] # noqa: F821 + test[my_mul(2, 3) == 6] + test[my_mul.__name__ == "my_mul"] + + # lambdas can have multiple expressions and local variables + # + # If you need to return a literal list from a lambda, use an extra set of + # brackets; the outermost brackets always enable multiple-expression mode. + # + with testset("implicit multilambda"): + mylam = lambda x: [local[y << 2 * x], # noqa: F821, local[name << value] makes a local variable + y + 1] # noqa: F821 + test[mylam(10) == 21] + + a = lambda x: [local[t << x % 2], # noqa: F821 + cond[t == 0, "even", # noqa: F821 + t == 1, "odd", + None]] # cond[] requires an else branch + test[a(2) == "even"] + test[a(3) == "odd"] + + # MacroPy #21; namedlambda must be in its own with block in the + # dialect implementation or the particular combination of macros + # invoked by Lispython will fail (uncaught jump, __name__ not set). + # + # With `mcpyrate` this shouldn't matter, but we're keeping the example. + with testset("autonamed letrec lambdas, multiple-expression let body"): + t = letrec[[evenp << (lambda x: (x == 0) or oddp(x - 1)), # noqa: F821 + oddp << (lambda x:(x != 0) and evenp(x - 1))] in # noqa: F821 + [local[x << evenp(100)], # noqa: F821, multi-expression let body is a do[] environment + (x, evenp.__name__, oddp.__name__)]] # noqa: F821 + test[t == (True, "evenp", "oddp")] + + # actually the multiple-expression environment is an unpythonic.syntax.do[], + # which can be used in any expression position. + with testset("do and do0"): + x = do[local[z << 2], # noqa: F821 + 3 * z] # noqa: F821 + test[x == 6] + + # do0[] is the same, but returns the value of the first expression instead of the last one. + x = do0[local[z << 3], # noqa: F821 + print("hi from do0, z is {}".format(z))] # noqa: F821 + test[x == 3] + + with testset("integration with continuations"): + with continuations: # has TCO; should be skipped by the implicit `with tco` inserted by the dialect + k = None # kontinuation + def setk(*args, cc): + nonlocal k + k = cc # current continuation, i.e. where to go after setk() finishes + Values(*args) # multiple-return-values # noqa: F821, Lispython imports Values by default. + def doit(): + lst = ['the call returned'] + *more, = call_cc[setk('A')] + lst + list(more) + test[doit() == ['the call returned', 'A']] + # We can now send stuff into k, as long as it conforms to the + # signature of the assignment targets of the "call_cc". + test[k('again') == ['the call returned', 'again']] + test[k('thrice', '!') == ['the call returned', 'thrice', '!']] + + # We must have some statement here to make the implicit autoreturn happy, + # because the continuations testset is the last one, and the top level of + # a `with continuations` block is not allowed to have a `return`. + pass + +if __name__ == '__main__': + with session(__file__): + runtests() diff --git a/unpythonic/dialects/tests/test_listhell.py b/unpythonic/dialects/tests/test_listhell.py new file mode 100644 index 00000000..8959c9ad --- /dev/null +++ b/unpythonic/dialects/tests/test_listhell.py @@ -0,0 +1,119 @@ +# -*- coding: utf-8 -*- +"""Test the Listhell dialect.""" + +# from mcpyrate.debug import dialects, StepExpansion +from ...dialects import dialects, Listhell # noqa: F401 + +from ...syntax import macros, test # noqa: F401 +from ...test.fixtures import session, testset + +from ...syntax import macros, let, where, local, delete, do # noqa: F401, F811 +from unpythonic import foldr, cons, nil, ll + +def runtests(): + # Function calls can be made in prefix notation, like in Lisps. + # The first element of a literal tuple is the function to call, + # the rest are its arguments. + (print, f"Hello from {__lang__}!") # noqa: F821, the dialect template defines it. + + x = 42 # can write any regular Python, too + + # quote operator q locally turns off the function-call transformation: + t1 = (q, 1, 2, (3, 4), 5) # q takes effect recursively # noqa: F821, the dialect template defines `q`. + t2 = (q, 17, 23, x) # unlike in Lisps, x refers to its value even in a quote # noqa: F821 + (print, t1, t2) + + # Calls to the test framework are written with pythonic function call notation, + # because if the `prefix` macro isn't working, then writing them in prefix notation + # could cause a crash while testing. + with testset("quoting"): + # unquote operator u locally turns the transformation back on: + t3 = (q, (u, print, 42), (print, 42), "foo", "bar") # noqa: F821 + test[t3 == (q, None, (print, 42), "foo", "bar")] # noqa: F821 + + # quotes nest; call transformation made when quote level == 0 + t4 = (q, (print, 42), (q, (u, u, print, 42)), "foo", "bar") # noqa: F821 + test[t4 == (q, (print, 42), (None,), "foo", "bar")] # noqa: F821 + + # Be careful: + # + # In Listhell, `(x,)` means "call the 0-arg function `x`". + # But if `x` is not callable, `currycall` will return + # the value as-is (needed for interaction with `call_ec` + # and some other replace-def-with-value decorators). + # + # `(q, x)` means "the tuple where the first element is `x`". + test[(x,) == 42] + test[(q, x) == (tuple, [x])] # noqa: F821 + + # give named args with kw(...) [it's syntax, not really a function!]: + with testset("named arguments with kw()"): + def f(*, a, b): + return (q, a, b) # noqa: F821 + # in one kw(...), or... + test[(f, kw(a="hi there", b="foo")) == (q, "hi there", "foo")] # noqa: F821 + # in several kw(...), doesn't matter + test[(f, kw(a="hi there"), kw(b="foo")) == (q, "hi there", "foo")] # noqa: F821 + # in case of duplicate name across kws, rightmost wins + test[(f, kw(a="hi there"), kw(b="foo"), kw(b="bar")) == (q, "hi there", "bar")] # noqa: F821 + + # give *args with unpythonic.apply, like in Lisps: + with testset("starargs with apply()"): + lst = [1, 2, 3] + def g(*args, **kwargs): + return args + (tuple, (sorted, (kwargs.items,))) + test[(apply, g, lst) == (q, 1, 2, 3)] # noqa: F821 + # lst goes last; may have other args first + test[(apply, g, "hi", "ho", lst) == (q, "hi", "ho", 1, 2, 3)] # noqa: F821 + # named args in apply are also fine + test[(apply, g, "hi", "ho", lst, kw(myarg=4)) == (q, "hi", "ho", 1, 2, 3, ('myarg', 4))] # noqa: F821 + + # Function call transformation only applies to tuples in load context + # (i.e. NOT on the LHS of an assignment) + with testset("no transform on LHS of assignment"): + a, b = (q, 100, 200) # noqa: F821 + test[a == 100 and b == 200] + a, b = (q, b, a) # pythonic swap in prefix syntax; must quote RHS # noqa: F821 + test[a == 200 and b == 100] + + with testset("transform of let bindings"): + # the prefix syntax leaves alone the let binding syntax even when using tuples, ((name0, value0), ...) + a = let[(x, 42)][x << x + 1] + test[a == 43] + + # but the RHSs of the bindings are transformed normally: + def double(x): + return 2 * x + a = let[(x, (double, 21))][x << x + 1] + test[a == 43] + + # As of v0.15.0, the preferred let bindings syntax is env-assignment, + # so these examples become: + a = let[x << 42][x << x + 1] + test[a == 43] + + a = let[x << (double, 21)][x << x + 1] + test[a == 43] + + # similarly, the prefix syntax leaves the "body tuple" of a do alone + # (syntax, not semantically a tuple), but recurses into it: + with testset("transform of do body"): + a = do[1, 2, 3] + test[a == 3] + a = do[1, 2, (double, 3)] + test[a == 6] + + # the extra bracket syntax (implicit do) has no danger of confusion, as it's a list, not tuple + a = let[x << 3][[ + 1, + 2, + (double, x)]] + test[a == 6] + + with testset("final example"): + my_map = lambda f: (foldr, (compose, cons, f), nil) # noqa: F821 + test[(my_map, double, (q, 1, 2, 3)) == (ll, 2, 4, 6)] # noqa: F821 + +if __name__ == '__main__': + with (session, __file__): + (runtests,) diff --git a/unpythonic/dialects/tests/test_pytkell.py b/unpythonic/dialects/tests/test_pytkell.py new file mode 100644 index 00000000..3c78cd63 --- /dev/null +++ b/unpythonic/dialects/tests/test_pytkell.py @@ -0,0 +1,236 @@ +# -*- coding: utf-8 -*- +"""Test the Pytkell dialect.""" + +# from mcpyrate.debug import dialects, StepExpansion +from ...dialects import dialects, Pytkell # noqa: F401 + +from ...syntax import macros, test, the, test_raises # noqa: F401 +from ...test.fixtures import session, testset + +from ...syntax import macros, continuations, call_cc, tco # noqa: F401, F811 +from ...funutil import Values +from ...misc import timer + +from types import FunctionType +from operator import add, mul + +def runtests(): + print(f"Hello from {__lang__}!") # noqa: F821, the dialect template defines it. + + # function definitions (both def and lambda) and calls are auto-curried + with testset("implicit autocurry"): + def add3(a, b, c): + return a + b + c + + a = add3(1) + test[isinstance(the[a], FunctionType)] + a = a(2) + test[isinstance(the[a], FunctionType)] + a = a(3) + test[isinstance(the[a], int)] + + # actually partial evaluation so any of these works + test[add3(1)(2)(3) == 6] + test[add3(1, 2)(3) == 6] + test[add3(1)(2, 3) == 6] + test[add3(1, 2, 3) == 6] + + # arguments of a function call are auto-lazified (converted to promises, lazy[]) + with testset("implicit lazify"): + def addfirst2(a, b, c): + # a and b are read, so their promises are forced + # c is not used, so not evaluated either + return a + b + test[addfirst2(1)(2)(1 / 0) == 3] + + # let-bindings are auto-lazified + with test["y is unused, so it should not be evaluated"]: + x = let[[x << 42, # noqa: F821 + y << 1 / 0] in x] # noqa: F821 + return x == 42 # access `x`, to force the promise + + # assignments are not (because they can imperatively update existing names) + with test_raises[ZeroDivisionError]: + a = 1 / 0 + + # so if you want that, use lazy[] manually (it's a builtin in Pytkell) + with test: + a = lazy[1 / 0] # this blows up only when the value is read (name 'a' in Load context) # noqa: F821 + + # manually lazify items in a data structure literal, recursively (see unpythonic.syntax.lazyrec): + with test: + a = lazyrec[(1, 2, 3 / 0)] # noqa: F821 + return a[:-1] == (1, 2) # reading a slice forces only that slice + + # laziness passes through + def g(a, b): + return a # b not used + def f(a, b): + return g(a, b) # b is passed along, but its value is not used + test[f(42, 1 / 0) == 42] + + def f(a, b): + return (a, b) + test[f(1, 2) == (1, 2)] + test[(flip(f))(1, 2) == (2, 1)] # NOTE flip reverses all (doesn't just flip the first two) # noqa: F821 + + # flip reverses only those arguments that are passed *positionally* + test[(flip(f))(1, b=2) == (1, 2)] # b -> kwargs # noqa: F821 + + # http://www.cse.chalmers.se/~rjmh/Papers/whyfp.html + with testset("iterables"): + my_sum = foldl(add, 0) # noqa: F821 + my_prod = foldl(mul, 1) # noqa: F821 + my_map = lambda f: foldr(compose(cons, f), nil) # compose is unpythonic.fun.composerc # noqa: F821 + + test[my_sum(range(1, 5)) == 10] + test[my_prod(range(1, 5)) == 24] + test[tuple(my_map((lambda x: 2 * x), (1, 2, 3))) == (2, 4, 6)] + + test[tuple(scanl(add, 0, (1, 2, 3))) == (0, 1, 3, 6)] # noqa: F821 + test[tuple(scanr(add, 0, (1, 2, 3))) == (0, 3, 5, 6)] # NOTE output ordering different from Haskell # noqa: F821 + + with testset("let constructs"): + # let-in + x = let[[a << 21] in 2 * a] # noqa: F821 + test[x == 42] + + x = let[[a << 21, # noqa: F821 + b << 17] in # noqa: F821 + 2 * a + b] # noqa: F821 + test[x == 59] + + # let-where + x = let[2 * a, where[a << 21]] # noqa: F821 + test[x == 42] + + x = let[2 * a + b, # noqa: F821 + where[a << 21, # noqa: F821 + b << 17]] # noqa: F821 + test[x == 59] + + # nondeterministic evaluation (essentially do-notation in the List monad) + # + # pythagorean triples + with testset("nondeterministic evaluation"): + # TODO: This is very slow in Pytkell; investigate whether the cause is `lazify`, `autocurry`, or both. + # + # Running the same code in a macro-enabled IPython (i.e. without Pytkell), there is no noticeable delay + # after you press enter, before it gives the result. If you want to try it, you'll need to: + # + # %load_ext mcpyrate.repl.iconsole + # from unpythonic.syntax import macros, forall, test + # from unpythonic import insist + # + pt = forall[z << range(1, 21), # hypotenuse # noqa: F821 + x << range(1, z + 1), # shorter leg # noqa: F821 + y << range(x, z + 1), # longer leg # noqa: F821 + insist(x * x + y * y == z * z), # see also deny() # noqa: F821 + (x, y, z)] # noqa: F821 + test[tuple(sorted(pt)) == ((3, 4, 5), (5, 12, 13), (6, 8, 10), + (8, 15, 17), (9, 12, 15), (12, 16, 20))] + + with testset("functional update"): + # functional update for sequences + tup1 = (1, 2, 3, 4, 5) + tup2 = fup(tup1)[2:] << (10, 20, 30) # fup(sequence)[idx_or_slice] << sequence_of_values # noqa: F821 + test[tup2 == (1, 2, 10, 20, 30)] + test[tup1 == (1, 2, 3, 4, 5)] + + # immutable dict, with functional update + d1 = frozendict(foo='bar', bar='tavern') # noqa: F821 + d2 = frozendict(d1, bar='pub') # noqa: F821 + test[tuple(sorted(d1.items())) == (('bar', 'tavern'), ('foo', 'bar'))] + test[tuple(sorted(d2.items())) == (('bar', 'pub'), ('foo', 'bar'))] + + # s = mathematical Sequence (const, arithmetic, geometric, power) + with testset("mathematical sequences with s()"): + test[last(take(10000, s(1, ...))) == 1] # noqa: F821 + test[last(take(5, s(0, 1, ...))) == 4] # noqa: F821 + test[last(take(5, s(1, 2, 4, ...))) == (1 * 2 * 2 * 2 * 2)] # 16 # noqa: F821 + test[last(take(5, s(2, 4, 16, ...))) == (((((2)**2)**2)**2)**2)] # 65536 # noqa: F821 + + # s() takes care to avoid roundoff + test[last(take(1001, s(0, 0.001, ...))) == 1] # noqa: F821 + + # iterables returned by s() support infix math + # (to add infix math support to some other iterable, imathify(iterable)) + c = s(1, 3, ...) + s(2, 4, ...) # noqa: F821 + test[tuple(take(5, c)) == (3, 7, 11, 15, 19)] # noqa: F821 + test[tuple(take(5, c)) == (23, 27, 31, 35, 39)] # consumed! # noqa: F821 + + # imemoize = memoize Iterable (makes a gfunc, drops math support) + # gmathify returns a new gfunc that adds infix math support + # to generators the original gfunc makes. + # + # see also gmemoize, fimemoize in unpythonic + # + with testset("mathematical sequences utilities"): + mi = lambda x: gmathify(imemoize(x)) # noqa: F821 + a = mi(s(1, 3, ...)) # noqa: F821 + b = mi(s(2, 4, ...)) # noqa: F821 + c = lambda: a() + b() + test[tuple(take(5, c())) == (3, 7, 11, 15, 19)] # noqa: F821 + test[tuple(take(5, c())) == (3, 7, 11, 15, 19)] # now it's a new instance; no recomputation # noqa: F821 + + factorials = mi(scanl(mul, 1, s(1, 2, ...))) # 0!, 1!, 2!, ... # noqa: F821 + test[last(take(6, factorials())) == 120] # noqa: F821 + test[first(drop(5, factorials())) == 120] # noqa: F821 + + squares = s(1, 2, ...)**2 # noqa: F821 + test[last(take(10, squares)) == 100] # noqa: F821 + + harmonic = 1 / s(1, 2, ...) # noqa: F821 + test[last(take(10, harmonic)) == 1 / 10] # noqa: F821 + + # unpythonic's continuations are supported + with testset("integration with continuations"): + with continuations: + k = None # kontinuation + def setk(*args, cc): + nonlocal k + k = cc # current continuation, i.e. where to go after setk() finishes + return Values(*args) # multiple-return-values + def doit(): + lst = ['the call returned'] + *more, = call_cc[setk('A')] + return lst + list(more) + test[doit() == ['the call returned', 'A']] + # We can now send stuff into k, as long as it conforms to the + # signature of the assignment targets of the "call_cc". + test[k('again') == ['the call returned', 'again']] + test[k('thrice', '!') == ['the call returned', 'thrice', '!']] + + # as is unpythonic's tco + with testset("integration with tco"): + with tco: + def fact(n): + def f(k, acc): + if k == 1: + return acc + return f(k - 1, k * acc) + return f(n, acc=1) + test[fact(4) == 24] + + # **CAUTION**: Pytkell is slow, because so much happens at run time. On an i7-4710MQ: + # + # - The performance test below, `fact(5000)`, completes in about 500ms. + # + # **Without** Pytkell, using a macro-enabled IPython session: + # + # - `fact(5000)` with the same definition (the `with tco` block above) completes in about 15ms. + # - `prod(range(1, 5001))` completes in about 7ms. (This is `unpythonic.prod`, which uses + # `unpythonic`'s custom fold implementation.) + # - The simplest thing that works: + # n = 1 + # for k in range(1, 5001): + # n *= k + # completes in about 5ms. + print("Performance...") + with timer() as tictoc: + fact(5000) # no crash + print(" Time taken for factorial of 5000: {:g}s".format(tictoc.dt)) + +if __name__ == '__main__': + with session(__file__): + runtests() diff --git a/unpythonic/dispatch.py b/unpythonic/dispatch.py index 1c8d4f46..5f06e414 100644 --- a/unpythonic/dispatch.py +++ b/unpythonic/dispatch.py @@ -1,19 +1,37 @@ # -*- coding: utf-8; -*- -"""A multiple-dispatch decorator for Python. +"""A multiple-dispatch system (a.k.a. multimethods) for Python. + +Terminology: + + - The function that supports multiple call signatures is a *generic function*. + - Its individual implementations are *multimethods*. + +We use the term *multimethod* to distinguish them from the usual sense of *method* +in Python, and because this is multiple dispatch. Somewhat like `functools.singledispatch`, but for multiple dispatch. https://docs.python.org/3/library/functools.html#functools.singledispatch + +Somewhat like `typing.overload`, but for run-time use, not static type-checking. +Here the implementations are given in the multimethod bodies. + + https://docs.python.org/3/library/typing.html#typing.overload """ -__all__ = ["generic", "generic_for", "typed"] +# TODO: Reimplement in the same spirit as `functools.singledispatch`? +# TODO: The complication is, we support `typing` type specifications, not only concrete types. +# TODO: OTOH, `singledispatch` does handle the specific case of ABCs, via the subtype hooks. + +__all__ = ["isgeneric", "generic", "augment", "typed", + "methods", "format_methods", "list_methods"] from functools import partial, wraps from itertools import chain import inspect import typing -from .arity import resolve_bindings, getfunc +from .arity import getfunc, _resolve_bindings from .typecheck import isoftype from .regutil import register_decorator @@ -26,61 +44,76 @@ # TODO: meh, a list instance's __doc__ is not writable. Put this doc somewhere. # -# self_parameter_names.__doc__ = """self/cls parameter names for `@generic`. +# self_parameter_names.__doc__ = """`self`/`cls` parameter names for `@generic`. # # When one of these parameter names appears in the first positional parameter position # of a function decorated with `@generic` (or `@typed`), it is detected as being an # OOP-related `self` or `cls` parameter, triggering special handling. # -# If you use something other than the usual Python naming conventions for the self/cls +# If you use something other than the usual Python naming conventions for the `self`/`cls` # parameter, just append the names you use to this list. # """ +def isgeneric(f): + """Return whether the callable `f` is a generic function. + + If `f` was declared `@generic` (which see), return the string `"generic"`. + If `f` was declared `@typed` (which see), return the string `"typed"`. + Else return `False`. + """ + if hasattr(f, "_method_registry"): + if hasattr(f, "_register"): + return "generic" + return "typed" + return False + +# TODO: We essentially need the fullname because the second and further invocations +# TODO: of `@generic`, for the same generic function, receive an entirely different +# TODO: run-time object - the new multimethod. There is no way to know which existing +# TODO: dispatcher to connect that to, other than having a registry that maps the +# TODO: fullname of each already-existing generic function to its dispatcher object. @register_decorator(priority=98) -def generic_for(target): - """Parametric decorator. Add a method to function `target`. +def generic(f): + """Decorator. Make `f` a generic function (in the sense of CLOS or Julia). - Like `@generic`, but the target function on which the method will be - registered is chosen separately, so that you can extend a generic - function previously defined in some other `.py` source file. + Multiple dispatch solves *the expression problem*: + https://en.wikipedia.org/wiki/Expression_problem - Usage:: + Practical use cases: - # example.py - from unpythonic import generic + - Eliminate `if`/`elif`/`elif`... blocks that switch by `isinstance` on + function arguments, and then raise `TypeError` in the final `else`, + by having a central implementation for this machinery. - @generic - def f(x: int): - ... + This not only kills boilerplate, but makes the dispatch extensible, + since the dispatcher lives outside the original function definition. + There is no need to monkey-patch the original to add a new case. + See `@augment`. - # main.py - from unpythonic import generic_for - import example + - Dispatch on an extensible hierarchy of abstract features (called *traits*) + that is separate from the concrete type hierarchy, using the *holy traits* + pattern. For example, "behaves like a number" can be a trait. - @generic_for(example.f) - def f(x: float): - ... - """ - # TODO: maybe needs some more official way to detect if `target` has been declared `@generic`. - if not hasattr(target, "_method_registry"): - raise TypeError(f"{target} is not a generic function, cannot add methods to it.") - return partial(_register_generic, _getfullname(target)) + See `unpythonic/tests/test_dispatch.py` for an example. -@register_decorator(priority=98) -def generic(f): - """Decorator. Make `f` a generic function (in the sense of CLOS or Julia). + - Functions like the builtin `range`, where the *role* of an argument in a + particular position depends on the *number of* arguments passed in the call. + With `@generic`, each case can have its parameters named descriptively. **How to use**: - Just make several function definitions, one for each call signature you - want to support, and decorate each of them with `@generic`. Here - *signature* refers to specific combinations of argument types and/or - different shapes for the argument list. + Make several function definitions, with the same name in the same lexical + scope, one for each call signature you want to support, and decorate each of + them with `@generic`. Here *signature* refers to specific combinations of + argument types and/or different shapes for the argument list. The first definition implicitly creates the generic function (like in Julia). All of the definitions, including the first one, become registered - as *methods* of the *generic function*. + as *multimethods* of the *generic function*. + + The return value of `generic` is the multiple-dispatch dispatcher + for the generic function that was created or modified. A generic function is identified by its *fullname*, defined as "{f.__module__}.{f.__qualname__}". The fullname is computed @@ -95,7 +128,7 @@ def generic(f): @generic def example(x: int, y: int): ... # implementation here - @generic # noqa: F811, registered as a method of the same generic function. + @generic # noqa: F811, registered as a multimethod of the same generic function. def example(x: str, y: int): ... # implementation here @generic # noqa: F811 @@ -114,40 +147,36 @@ def example(): then, nested lambdas are not supported. Be careful that if you later rebind a variable that refers to a generic - function; that will not remove previously existing method definitions. + function, that will not remove previously existing method definitions. If you later rebind the same name again, pointing to a new generic function, it will suddenly gain all of the methods of the previous function that had the same fullname. - **Method lookup**: + As of v0.15.0, multimethods cannot be unregistered. - Each method definition must specify type hints **on all of its parameters** - except `**kwargs` (if it has one). Then, at call time, the types of **all** - arguments (except any bound to `**kwargs`), as well as the number of - arguments, are automatically used for *dispatching*, i.e. choosing which - method to call. In other words, multiple parameters participate in - dispatching, thus the term *multiple dispatch*. + **Multimethod lookup**: - **Varargs are supported**. To have the contents of `*args` participate in - dispatching, annotate the parameter as `*args: typing.Tuple[...]`. For the - `...` part, see the documentation of the `typing` module. Both homogeneous - and heterogeneous tuples are supported. + Each method definition must specify type hints **on all of its parameters**. + Then, at call time, the types of **all** arguments, as well as the number + of arguments, are automatically used for *dispatching*, i.e. choosing which + implementation to call. In other words, multiple parameters participate in + dispatching, hence *multiple dispatch*. - **The first method that matches wins, in most-recently-registered order.** - (This is unlike in Julia, which matches the most specific applicable method.) + **Varargs are supported**. Vararg type hint examples:: - In other words, later definitions override earlier ones. So specify the - implementation with the most generic types first, and then move on to the - more specific ones. The mnemonic is, "the function is generally defined - like this, except if the arguments match these particular types..." + - `*args: typing.Tuple[int, ...]` means "any number of `int`s" + - `*args: typing.Tuple[int, float, str]` means "exactly `(int, float, str)`, + in that order" + - `**kwargs: typing.Dict[str, int]` means "all **kwargs are of type `int`". + Note the key type for the `**kwargs` dict is always `str`. - The main point of this feature is to eliminate `if`/`elif`/`elif`... blocks - that switch by `isinstance` on arguments, and then raise `TypeError` - in the final `else`, by implementing this machinery centrally. + **The first multimethod that matches wins, in most-recently-registered order.** + (This is unlike in Julia, which matches the most specific applicable multimethod.) - Another use case of `@generic` are functions like the builtin `range`, where - the *role* of an argument in a particular position depends on the *number of* - arguments passed in the call. + In other words, later multimethod definitions override earlier ones. So specify + the implementation with the most generic types first, and then move on to the + more specific ones. The mnemonic is, "the function is generally defined like + this, except if the arguments match these particular types..." **Differences to tools in the standard library**: @@ -155,28 +184,28 @@ def example(): no public `register` attribute. Instead, generic functions are saved in a global registry. - Unlike `typing.overload`, the implementations are given in the method bodies. + Unlike `typing.overload`, the implementations are given in the multimethod + bodies. **Interaction with OOP**: Beside regular functions, `@generic` can be installed on instance, class - or static methods (in the OOP sense). `self` and `cls` parameters do not + or static *methods* (in the OOP sense). `self` and `cls` parameters do not participate in dispatching, and need no type annotation. On instance and class methods, the self-like parameter, beside appearing as the first positional-or-keyword parameter, **must be named** one of `self`, `this`, `cls`, or `klass` to be detected by the ignore mechanism. This limitation is due to implementation reasons; while a class body is being - evaluated, the context needed to distinguish a method (OOP sense) from a - regular function is not yet present. + evaluated, the context needed to distinguish a method from a regular function + is not yet present. When `@generic` is installed on an instance method or on a `@classmethod`, - then at call time, classes are tried in MRO order. **All** generic-function - methods of the OOP method defined in the class currently being looked up - are tested for matches first, **before** moving on to the next class in the - MRO. (This has subtle consequences, related to in which class in the - hierarchy the various generic-function methods for a particular OOP method - are defined.) + then at call time, classes are tried in MRO order. **All** multimethods + of the method defined in the class currently being looked up are tested + for matches first, **before** moving on to the next class in the MRO. + This has subtle consequences, related to in which class in the hierarchy + the various multimethods for a particular method are defined. For *static methods* MRO lookup is not supported. Basically, one of the roles of `cls` or `self` is to define the MRO; a `@staticmethod` doesn't @@ -185,221 +214,81 @@ def example(): To work with OOP inheritance, in the decorator list, `@generic` must be on inside of (i.e. run before) `@classmethod` or `@staticmethod`. + **Interaction with `curry`**: + + Starting with v0.15.0, `curry` supports `@generic`. In the case where the + *number* of positional arguments supplied so far is acceptable for *some* + registered multimethod, but some parameters of that multimethod are still + missing bindings (i.e. it is not a full match), `curry` waits for more + arguments (returning the curried function). + + Passing an argument of an invalid type at any step of currying immediately + raises `TypeError`. Here "invalid type" means that for the partial application + constructed so far, no registered multimethod accepts the new argument(s). + **CAUTION**: - To declare a parameter of a method as dynamically typed, explicitly + To declare a parameter of a multimethod as dynamically typed, explicitly annotate it as `typing.Any`; don't just omit the type annotation. Explicit is better than implicit; **this is a feature**. - Dispatching by the contents of the `**kwargs` dictionary is not (yet) - supported. - See the limitations in `unpythonic.typecheck` for which features of the `typing` module are supported and which are not. - At the moment, `@generic` does not work with `curry`. Adding curry support - needs changes to the dispatch logic in `curry`. + Code using the `with lazify` macro cannot usefully use `@generic` or `@typed`, + because all arguments of each function call will be wrapped in a promise + (`unpythonic.lazyutil.Lazy`) that carries no type information on its contents. """ - return _register_generic(_getfullname(f), f) + return _setup(_function_fullname(f), f) -# Modeled after `mcpyrate.utils.format_macrofunction`, which does the same thing for macros. -def _getfullname(f): - function, _ = getfunc(f) - if not function.__module__: # At least macros defined in the REPL have `__module__=None`. - return function.__qualname__ - return f"{function.__module__}.{function.__qualname__}" +@register_decorator(priority=98) +def augment(target): + """Parametric decorator. Add a multimethod to generic function `target`. -def _register_generic(fullname, f): - """Register a method for a generic function. + Like `@generic`, but the generic function on which the method will be + registered is chosen separately, so that you can augment a generic + function previously defined in some other `.py` source file. - This is a low-level function; you'll likely want `generic` or `generic_for`. + The return value of `augment` is the multiple-dispatch dispatcher + for the generic function that was modified. - fullname: str, fully qualified name of target function to register - the method on, used as key in the dispatcher registry. + Usage:: - Registering the first method on a given `fullname` makes - that function generic, and creates the dispatcher for it. + # example.py + from unpythonic import generic - f: callable, the new method to register. + @generic + def f(x: int): + ... - Return value is the dispatcher that replaces the original function. - """ - # HACK for cls/self analysis - def name_of_1st_positional_parameter(f): - function, _ = getfunc(f) - params = inspect.signature(function).parameters - poskinds = set((inspect.Parameter.POSITIONAL_ONLY, - inspect.Parameter.POSITIONAL_OR_KEYWORD)) - for param in params.values(): - if param.kind in poskinds: - return param.name - return None - if fullname not in _dispatcher_registry: - # Create the dispatcher. This will replace the original f. - @wraps(f) - def dispatcher(*args, **kwargs): - # `signature` comes from typing.get_type_hints. - # `bindings` is populated in the surrounding scope below. - def match_argument_types(type_signature): - # TODO: handle **kwargs (bindings["kwarg"], bindings["kwarg_name"]) - args_items = bindings["args"].items() - if bindings["vararg_name"]: - vararg_item = (bindings["vararg_name"], bindings["vararg"]) # *args - all_items = tuple(chain(args_items, (vararg_item,))) - else: - all_items = args_items - - for parameter, value in all_items: - assert parameter in type_signature # resolve_bindings should already TypeError when not. - expected_type = type_signature[parameter] - if not isoftype(value, expected_type): - return False - return True - - # Dispatch. - def methods(): - # For regular functions, ours is the only registry we need to look at: - relevant_registries = [reversed(dispatcher._method_registry)] - - # But if this dispatcher is installed on an OOP method, we must - # look up generic function methods also in the class's MRO. - # - # For *static methods* MRO is not supported. Basically, one of - # the roles of `cls` or `self` is to define the MRO; a static - # method doesn't have that. - # - # See discussions on interaction between `@staticmethod` and `super` in Python: - # https://bugs.python.org/issue31118 - # https://stackoverflow.com/questions/26788214/super-and-staticmethod-interaction/26807879 - # - # TODO/FIXME: Not possible to detect self/cls parameters correctly. - # Here we're operating at the wrong abstraction level for that, - # since we see just bare functions. - # - # Let's see if we might have a self/cls parameter, and if so, get its value. - first_param_name = name_of_1st_positional_parameter(f) - if first_param_name in self_parameter_names: - if len(args) < 1: # pragma: no cover, shouldn't happen. - raise TypeError(f"MRO lookup failed: no value provided for self-like parameter {repr(first_param_name)} when calling generic-function OOP method {fullname}") - first_arg_value = args[0] - dynamic_instance = first_arg_value # self/cls - theclass = None - if isinstance(dynamic_instance, type): # cls - theclass = dynamic_instance - elif hasattr(dynamic_instance, "__class__"): # self - theclass = dynamic_instance.__class__ - if theclass is not None: # ignore false positives when possible - for base in theclass.__mro__[1:]: # skip the class itself in the MRO - if hasattr(base, f.__name__): # does this particular super have f? - base_oop_method = getattr(base, f.__name__) - base_raw_function, _ = getfunc(base_oop_method) - if hasattr(base_raw_function, "_method_registry"): # it's @generic - base_registry = getattr(base_raw_function, "_method_registry") - relevant_registries.append(reversed(base_registry)) - - return chain.from_iterable(relevant_registries) - for method, signature in methods(): - try: - bindings = resolve_bindings(method, *args, **kwargs) - except TypeError: # arity mismatch, so this method can't be the one the call is looking for. - continue - if match_argument_types(signature): - return method(*args, **kwargs) - - # No match, report error. - # - # TODO: It would be nice to show the type signature of the args actually given, - # TODO: but in the general case this is difficult. We can't just `type(x)`, since - # TODO: the signature may specify something like `Sequence[int]`. Knowing a `list` - # TODO: was passed doesn't help debug that it was `Sequence[str]` when a `Sequence[int]` - # TODO: was expected. The actual value at least implicitly contains the type information. - # - # TODO: Compute closest candidates, like Julia does? (see methods, MethodError) - a = [repr(a) for a in args] - sep = ", " if kwargs else "" - kw = [f"{k}={str(v)}" for k, v in kwargs] - def format_method(method): # Taking a page from Julia and some artistic liberty here. - thecallable, type_signature = method - function, _ = getfunc(thecallable) - filename = inspect.getsourcefile(function) - source, firstlineno = inspect.getsourcelines(function) - return f"{type_signature} from {filename}:{firstlineno}" - methods_str = [f" {format_method(x)}" for x in methods()] - candidates = "\n".join(methods_str) - function, _ = getfunc(f) - args_str = ", ".join(a) - kws_str = ", ".join(kw) - msg = (f"No method found matching {function.__qualname__}({args_str}{sep}{kws_str}).\n" - f"Candidate signatures (in order of match attempts):\n{candidates}") - raise TypeError(msg) + # main.py + from unpythonic import augment + import example - dispatcher._method_registry = [] - def register(thecallable): - """Decorator. Register a new method for this generic function. - - The method must have type annotations for all of its parameters; - these are used for dispatching. - - An exception is the `self` or `cls` parameter of an OOP instance - method or class method; that does not participate in dispatching, - and does not need a type annotation. - """ - # Using `inspect.signature` et al., we could auto-`Any` parameters - # that have no type annotation, but that would likely be a footgun. - # So we require a type annotation for each parameter. - # - # One exception: the self/cls parameter of OOP instance methods and - # class methods is not meaningful for dispatching, and we don't - # have a runtime value to auto-populate its expected type when the - # definition runs. So we set it to `typing.Any` in the method's - # expected type signature, which makes the dispatcher ignore it. - - function, kind = getfunc(thecallable) - params = inspect.signature(function).parameters - params_names = [p.name for p in params.values()] - type_signature = typing.get_type_hints(function) - - # In the type signature, auto-`Any` the self/cls parameter, if any. - # - # TODO/FIXME: Not possible to detect self/cls parameters correctly. - # - # The `@generic` decorator runs while the class body is being - # evaluated. In that context, an instance method looks just like a - # regular function. - # - # Also if `@generic` runs before `@classmethod` (to place Python's - # implicit `cls` handling outermost), also a class method looks - # just like a regular function to us. - # - # So we HACK, and special-case some suggestive *parameter names* - # when they appear the first position, though **Python itself - # doesn't do that**. For any crazy person not following Python - # naming conventions, our approach won't work. - if len(params_names) >= 1 and params_names[0] in self_parameter_names: - # In Python 3.6+, `dict` preserves insertion order. Make sure - # the `self` parameter appears first, for clearer error messages - # when no matching method is found. - type_signature = {params_names[0]: typing.Any, **type_signature} - - if not all(name in type_signature for name in params_names): - failures = [name for name in params_names if name not in type_signature] - plural = "s" if len(failures) > 1 else "" - wrapped_list = [f"'{x}'" for x in failures] - wrapped_str = ", ".join(wrapped_list) - msg = f"Method definition missing type annotation for parameter{plural}: {wrapped_str}" - raise TypeError(msg) - - dispatcher._method_registry.append((thecallable, type_signature)) - return dispatcher # Replace the callable with the dispatcher for this generic function. - - dispatcher._register = register # save it for use by us later - _dispatcher_registry[fullname] = dispatcher - dispatcher = _dispatcher_registry[fullname] - if hasattr(dispatcher, "_register"): # co-operation with @typed, below - return dispatcher._register(f) - raise TypeError("@typed: cannot register additional methods.") + class MyOwnType: + ... + @augment(example.f) + def f(x: MyOwnType): + ... + + **CAUTION**: Beware of type piracy when you use `@augment`. That is: + + 1. For arbitrary input types you don't own, augment only a function you own, OR + 2. Augment a function defined somewhere else only if at least one parameter + (in the call signature you are adding) is of a type you own. + + Satisfying **one** of these conditions is sufficient to avoid type piracy. + + See: + https://lexi-lambda.github.io/blog/2016/02/18/simple-safe-multimethods-in-racket/ + https://en.wikipedia.org/wiki/Action_at_a_distance_(computer_programming) + https://docs.julialang.org/en/v1/manual/style-guide/#Avoid-type-piracy + """ + if not isgeneric(target): + raise TypeError(f"{_function_fullname(target)} is not a generic function, cannot add multimethods to it.") + return partial(_setup, _function_fullname(target)) @register_decorator(priority=98) def typed(f): @@ -412,20 +301,460 @@ def typed(f): Also, unlike a basic `isinstance` check, this allows using features from the `typing` stdlib module in the type specifications. - After a `@typed` function has been created, no more methods can be + Once a `@typed` function has been created, no more multimethods can be attached to it. - `@typed` works with `curry`, because the function has only one call - signature, as usual. - **CAUTION**: - If used with `curry`, argument type errors will only be detected when - `curry` triggers the actual call. To fix this, `curry` would need to - perform some more introspection on the callable, and to actually know - about this dispatch system. It's not high on the priority list. + Code using the `with lazify` macro cannot usefully use `@generic` or `@typed`, + because all arguments of each function call will be wrapped in a promise + (`unpythonic.lazyutil.Lazy`) that carries no type information on its contents. """ - # TODO: Fix the epic fail at fail-fast, and update the corresponding test. s = generic(f) del s._register # remove the ability to register more methods return s + +def methods(f): + """Print, to stdout, a human-readable list of multimethods currently registered to `f`. + + For introspection in the REPL. This works by calling `list_methods`, which see. + + Example - entering this in an IPython session:: + + from unpythonic import generic, methods + + @generic + def f(x: int): + return "int" + + @generic + def f(x: float): + return "float" + + methods(f) + + the result is: + + Multimethods for @generic __main__.f: + f(x: float) from :1 + f(x: int) from :1 + + This is like the `methods` function of Julia. + """ + print(format_methods(f)) + +def format_methods(f): + """Format, as a string, a human-readable list of multimethods currently registered to `f`. + + One level lower than `methods`; format a human-readable message, but return it + instead of printing it. + + This works by calling `list_methods`, which see. + """ + function, _ = getfunc(f) + multimethods = list_methods(f) + if multimethods: + thecallables = [thecallable for thecallable, type_signature in multimethods] + methods_list = [f" {_format_callable(x)}" for x in thecallables] + methods_str = "\n".join(methods_list) + else: # pragma: no cover, in practice a generic should always have at least one method. + methods_str = " " + return f"Multimethods for @{isgeneric(f)} {_function_fullname(function)}:\n{methods_str}" + +def list_methods(f): + """Return a list of the multimethods currently registered to `f`. + + The multimethods are returned in the order they would be tested by the dispatcher + when the generic function is called. + + The return value is a list, where each item is `(callable, type_signature)`. + Each type signature is in the format returned by `typing.get_type_hints`. + + `f`: a callable that has been declared `@generic` or `@typed`. + + **Interaction with OOP**: + + Bound methods are resolved to the underlying function automatically. + The `self`/`cls` argument is extracted from the `__self__` attribute of + the bound method, enabling linked dispatcher lookups in the MRO. + + **CAUTION**: + + Recall that in Python, instance methods when accessed through the *class* + are just raw functions; the method becomes bound, and thus `self` is set, + when accessed through *an instance* of that class. + + Let `Cat` be a class with an OOP instance method `meow`, and `cat` an + instance of that class. If you call `list_methods(cat.meow)`, you get the + MRO lookup for linked dispatchers, as expected. + + But if you call `list_methods(Cat.meow)` instead, it won't see the MRO, + because the value of the `self` argument isn't set for an unbound method + (which is really just a raw function). + + If `Cat` has a `@classmethod` `iscute`, calling `list_methods(Cat.iscute)` + performs the MRO lookup for linked dispatchers. This is because a class + method is already bound (to the class, so the `cls` argument already has + a value) when it is accessed through the class. + + Finally, note that while that is how `list_methods` works, it is not the + mechanism actually used to determine `self`/`cls` when *calling* the + generic function. There, the value of `self`/`cls` is extracted from the + first positional argument of the call. This is because the dispatcher is + actually installed on the underlying raw function, so it has no access to + the metadata of the bound method (which, as seen from the dispatcher, is + on the outside). + """ + function, _ = getfunc(f) + if not isgeneric(function): + raise TypeError(f"{_function_fullname(function)} is not a generic function, it does not have multimethods.") + + # In case of a bound method (either `Foo.classmeth` or `foo.instmeth`), + # we can get the value for `self`/`cls` argument from its `__self__` attribute. + # + # Otherwise we have a regular function, an unbound method, or a `@staticmethod`; + # in those cases, there's no `self`/`cls`. (Technically, an unbound method has + # a parameter to receive it, but no value has been set yet.) + self_or_cls = f.__self__ if hasattr(f, "__self__") else None + return _list_multimethods(function, self_or_cls) + +# -------------------------------------------------------------------------------- + +# Modeled after `mcpyrate.utils.format_macrofunction`, which does the same thing for macros. +def _function_fullname(f): + """Return the full name of the callable `f`, including also its module name.""" + function, _ = getfunc(f) # get the raw function also for OOP methods + if not function.__module__: # At least macros defined in the REPL have `__module__=None`. + return function.__qualname__ + return f"{function.__module__}.{function.__qualname__}" + +def _name_of_1st_positional_parameter(f): + """Return the name, as a string, of the first positional parameter of the callable `f`.""" + function, _ = getfunc(f) # get the raw function also for OOP methods + parameters = inspect.signature(function).parameters + poskinds = set((inspect.Parameter.POSITIONAL_ONLY, + inspect.Parameter.POSITIONAL_OR_KEYWORD)) + for param in parameters.values(): + if param.kind in poskinds: + return param.name + return None + +def _list_multimethods(dispatcher, self_or_cls=None): + """List multimethods currently registered to a given dispatcher. + + `self_or_cls`: If `dispatcher` is installed on an instance method + or on a `@classmethod`, set this to perform MRO + lookups to find linked dispatchers. + """ + # TODO: Compute closest candidates, like Julia does? (see `methods`, `MethodError` in Julia) + # TODO: (If we do that, we need to look at the bound arguments. When just listing multimethods + # TODO: in the REPL, the current ordering is probably fine.) + + # For regular functions, ours is the only registry we need to look at: + relevant_registries = [reversed(dispatcher._method_registry)] + + # But if this dispatcher is installed on a method, we must + # look up multimethods also in the class's MRO. + # + # For *static methods* MRO is not supported. Basically, one of + # the roles of `cls` or `self` is to define the MRO; a static + # method doesn't have that. + # + # See discussions on interaction between `@staticmethod` and `super` in Python: + # https://bugs.python.org/issue31118 + # https://stackoverflow.com/questions/26788214/super-and-staticmethod-interaction/26807879 + if self_or_cls: + if isinstance(self_or_cls, type): + cls = self_or_cls + elif hasattr(self_or_cls, "__class__"): + cls = self_or_cls.__class__ + else: + assert False + + for base in cls.__mro__[1:]: # skip the class itself in the MRO + if hasattr(base, dispatcher.__name__): # does this particular super have f? + base_oop_method = getattr(base, dispatcher.__name__) + base_raw_function, _ = getfunc(base_oop_method) + if isgeneric(base_raw_function): # it's @generic or @typed + base_registry = getattr(base_raw_function, "_method_registry") + relevant_registries.append(reversed(base_registry)) + + return list(chain.from_iterable(relevant_registries)) + +# TODO: move this utility to `unpythonic.fun`? Belongs there, but doing so introduces a circular dependency. +def _format_callable(thecallable): + """Format, as a string, a human-readable description of a callable. + + The returned string includes the call signature, and the source filename + and starting line number. This output format takes a page from Julia, + with some artistic liberty. + """ + # Our `type_signature` is based on `typing.get_type_hints`, + # but for the error message, we need something that formats + # like source code. Hence we use `inspect.signature`. + thesignature = inspect.signature(thecallable) + function, _ = getfunc(thecallable) # raw function for OOP methods, too + # TODO: Python 3.8: filename sometimes detected incorrectly + # - This is because `inspect.getsourcefile` uses `inspect.getfile`, which looks at + # the `co_filename` of the code object. If the function is decorated, then it sees + # the source file where the decorator was defined, not the original function. + # function = inspect.unwrap(function) # maybe this helps? But now I can't reproduce the bug to test it. + filename = inspect.getsourcefile(function) + source, firstlineno = inspect.getsourcelines(function) + return f"{thecallable.__qualname__}{str(thesignature)} from {filename}:{firstlineno}" + +def _resolve_multimethod(dispatcher, args, kwargs, *, _partial=False): + """Return the first matching multimethod on `dispatcher` for the given `args` and `kwargs`. + + If `_partial` is `True`, allow leaving some parameters of the function unbound, + and return the first multimethod that matches the given partial `args` and `kwargs`. + + The partial mode is useful for type-checking arguments for partial application of a generic + function. If any multimethod matches (this function returns something other than `None`), + then the generic function can accept those partial arguments. + + Note it is only possible to dispatch, i.e. determine which multimethod is the one to be + called, only once we have full (non-partial) `args` and `kwargs`, because in general + the remaining not-yet-passed `args` or `kwargs` may cause the search to match a + different multimethod. In partial mode, this function says only that there is + *at least one* match when given those partial arguments. + """ + multimethods = _list_multimethods(dispatcher, _extract_self_or_cls(dispatcher, args)) + for thecallable, type_signature in multimethods: + try: + bound_arguments = _resolve_bindings(thecallable, args, kwargs, _partial=_partial) + if not _get_argument_type_mismatches(type_signature, bound_arguments): + return thecallable + except TypeError: # could not accept the given arguments; this isn't the multimethod we're looking for. + continue + return None + +def _get_argument_type_mismatches(type_signature, bound_arguments, *, skip_unannotated=False): + """Match bound arguments against the given type signature. + + Return a list of type mismatches. If it is empty, everything is ok. + When not, each item is of the form `(parameter, value, expected_type)`. + + `type_signature`: in the format returned by `typing.get_type_hints`. + + Is allowed to contain additional items not present + in `bound_arguments`, useful for type-checking during + partial application. + + If `skip_unannotated=False` (default), `type_signature` + **must** contain an item for each key of `bound_arguments`. + + If `skip_unannotated=True`, then any binding whose key + is not in `type_signature` will not be type-checked. + + In plain English, the function can have some unannotated + parameters, to denote those parameters should not be + type-checked. + + `unpythonic`'s multiple-dispatch subsystem requires + explicitly annotating `typing.Any` instead of omitting + the type annotation, but this function can be used + by other parts of `unpythonic`. + + `bound_arguments`: see `unpythonic.arity.resolve_bindings`. + """ + mismatches = [] + for parameter, value in bound_arguments.arguments.items(): + if parameter not in type_signature: + if skip_unannotated: + continue + raise ValueError(f"type_signature has no item for parameter {parameter}, which was supplied in `bound_arguments`. If that was intended, please use `skip_unannotated=True`.") + expected_type = type_signature[parameter] + if not isoftype(value, expected_type): + mismatches.append((parameter, value, expected_type)) + return mismatches + +def _extract_self_or_cls(thecallable, args): + """From `thecallable` and positional arguments `args`, extract the value of `self`/`cls`, if any. + + Return value is either the value bound that would be bound to `self`/`cls` + (the first positional parameter), or `None`. + """ + # TODO/FIXME: Not possible to detect `self`/`cls` parameters correctly. + # + # Here we're operating at the wrong abstraction level for that, + # since we see just bare functions. In the OOP case, the dispatcher + # is installed on the raw function before it becomes a bound method. + # (That in itself is just as it should be.) + first_param_name = _name_of_1st_positional_parameter(thecallable) + most_likely_an_oop_method = first_param_name in self_parameter_names + + # Let's see if we might have been passed a `self`/`cls` parameter, + # and if so, get its value. (Recall that in Python, it is always + # the first positional parameter.) + if most_likely_an_oop_method: + if len(args) < 1: # pragma: no cover, shouldn't happen. + raise TypeError(f"MRO lookup failed: no value provided for self-like parameter {repr(first_param_name)} for OOP method-like generic function {_function_fullname(thecallable)}") + self_or_cls = args[0] + else: + self_or_cls = None + return self_or_cls + +def _raise_multiple_dispatch_error(dispatcher, args, kwargs, *, candidates, _partial=False): + """Raise a nicely formatted `TypeError` regarding a failed multiple dispatch (no matching multimethod). + + `candidates`: list of `(thecallable, type_signature)` that were attempted, but did not match. + `_partial`: if `True`, report a failure in a *partial application*. + if `False`, report a failure in a *call*. + """ + # For `@typed` functions, which have just one valid call signature, we can easily + # report which args or kwargs failed to match. + if len(candidates) == 1: + # TODO: There's some repeated error-reporting code in `unpythonic.fun`. + thecallable, type_signature = candidates[0] + bound_arguments = _resolve_bindings(thecallable, args, kwargs, _partial=_partial) + mismatches = _get_argument_type_mismatches(type_signature, bound_arguments) + mismatches_list = [f"{parameter}={repr(value)}, expected {expected_type}" + for parameter, value, expected_type in mismatches] + mismatches_str = "; ".join(mismatches_list) + one_multimethod_msg_str = f"\nParameter binding(s) do not match type specification: {mismatches_str}" + else: + one_multimethod_msg_str = "" + + # TODO: It would be nice to show the type signature of the args actually given, + # TODO: but in the general case this is difficult. We can't just `type(x)`, since + # TODO: the signature may specify something like `Sequence[int]`. Knowing a `list` + # TODO: was passed doesn't help debug that it was `Sequence[str]` when a `Sequence[int]` + # TODO: was expected. The actual value at least implicitly contains the type information. + args_list = [repr(x) for x in args] + args_str = ", ".join(args_list) + if _partial and args_str: + args_str += ", ..." + sep = ", " if args and kwargs else "" + kws_list = [f"{k}={repr(v)}" for k, v in kwargs.items()] + kws_str = ", ".join(kws_list) + if _partial and kws_str: + kws_str += ", ..." + if _partial and not args_str and not kws_str: + args_str = "..." + thecallables = [thecallable for thecallable, type_signature in candidates] + methods_list = [f" {_format_callable(x)}" for x in thecallables] + methods_str = "\n".join(methods_list) + op = "partial application" if _partial else "call" + msg = (f"No multiple-dispatch match for the {op} {dispatcher.__qualname__}({args_str}{sep}{kws_str}).\n" + f"Multimethods for @{isgeneric(dispatcher)} {_function_fullname(dispatcher)} (most recent match attempt last):\n{methods_str}" + f"{one_multimethod_msg_str}") + raise TypeError(msg) + +def _setup(fullname, multimethod): + """Register a multimethod for a generic function, creating the generic function if necessary. + + This is a low-level function; you'll likely want `@generic` or `@augment`. + + `fullname`: str, fully qualified name of function to register the multimethod + on, used as key in the dispatcher registry. + + Registering the first multimethod on a given `fullname` makes + that function generic, and creates the dispatcher for it. + + Second and further registrations using the same `fullname` add + the new multimethod to the existing dispatcher. + + `multimethod`: callable, the new multimethod to register. + + Return value is the dispatcher. + """ + if fullname not in _dispatcher_registry: + # Create the dispatcher. This will replace the original function. + @wraps(multimethod) + def dispatcher(*args, **kwargs): + thecallable = _resolve_multimethod(dispatcher, args, kwargs) + if thecallable: + return thecallable(*args, **kwargs) + _raise_multiple_dispatch_error(dispatcher, args, kwargs, + candidates=_list_multimethods(dispatcher, + _extract_self_or_cls(dispatcher, args))) + + dispatcher._method_registry = [] + dispatcher._register = partial(_register_to, dispatcher) + _dispatcher_registry[fullname] = dispatcher + + dispatcher = _dispatcher_registry[fullname] + if isgeneric(dispatcher) == "typed": + raise TypeError("@typed: cannot register additional multimethods.") + return dispatcher._register(multimethod) # this returns the *dispatcher* + +def _register_to(dispatcher, multimethod): + """Decorator. Register a new `multimethod` to `dispatcher`. + + This is a low-level function used by `_setup`. + + The multimethod must have type annotations for all of its parameters; + these are used for dispatching. + + An exception is the `self` or `cls` parameter of an OOP instance + method or class method; that does not participate in dispatching, + and does not need a type annotation. + + After registering, this returns `dispatcher`. + """ + # Using `inspect.signature` et al., we could auto-`Any` parameters + # that have no type annotation, but that would likely be a footgun. + # So we require a type annotation for each parameter. + # + # One exception: the `self`/`cls` parameter of OOP instance methods and + # class methods is not meaningful for dispatching, and we don't + # have a runtime value to auto-populate its expected type when the + # definition runs. So we set it to `typing.Any` in the multimethod's + # expected type signature, which makes the dispatcher ignore it. + + function, _ = getfunc(multimethod) + parameters = inspect.signature(function).parameters + parameter_names = [p.name for p in parameters.values()] + type_signature = typing.get_type_hints(function) + + # In the type signature, auto-`Any` the `self`/`cls` parameter, if any. + # + # TODO/FIXME: Not possible to detect `self`/`cls` parameters correctly. + # + # The `@generic` decorator runs while the class body is being + # evaluated. In that context, an instance method looks just like a + # regular function. + # + # Also if `@generic` runs before `@classmethod` (to place Python's + # implicit `cls` handling outermost), also a class method looks + # just like a regular function to us. + # + # So we HACK, and special-case some suggestive *parameter names* + # when they appear the first position, though **Python itself + # doesn't do that**. For any crazy person not following Python + # naming conventions, our approach won't work. + if len(parameter_names) >= 1 and parameter_names[0] in self_parameter_names: + # In Python 3.6+, `dict` preserves insertion order. Make sure + # the `self` parameter appears first, for clearer error messages + # when no matching method is found. + type_signature = {parameter_names[0]: typing.Any, **type_signature} + + if not all(name in type_signature for name in parameter_names): + failures = [name for name in parameter_names if name not in type_signature] + plural = "s" if len(failures) > 1 else "" + repr_list = [repr(x) for x in failures] + repr_str = ", ".join(repr_list) + msg = f"Multimethod definition missing type annotation for parameter{plural}: {repr_str}" + raise TypeError(msg) + + dispatcher._method_registry.append((multimethod, type_signature)) + + # Update entry point docstring to include docs for the new multimethod, + # and its call signature. + call_signature_desc = _format_callable(multimethod) + our_doc = call_signature_desc + if multimethod.__doc__: + our_doc += "\n" + multimethod.__doc__ + + isfirstmultimethod = len(dispatcher._method_registry) == 1 + if isfirstmultimethod or not dispatcher.__doc__: + # Override the original doc of the function that was converted + # into the dispatcher; this adds the call signature to the top. + dispatcher.__doc__ = our_doc + else: + # Add the call signature and doc for the new multimethod. + dispatcher.__doc__ += "\n\n" + ("-" * 80) + "\n" + dispatcher.__doc__ += our_doc + + return dispatcher # Replace the multimethod callable with this generic function's dispatcher. diff --git a/unpythonic/ec.py b/unpythonic/ec.py index 466d65de..303d3d5c 100644 --- a/unpythonic/ec.py +++ b/unpythonic/ec.py @@ -24,31 +24,13 @@ http://www.gigamonkeys.com/book/the-special-operators.html """ -__all__ = ["throw", "catch", "call_ec", - "setescape", "escape"] # old names, pre-0.14.2, will go away in 0.15.0 +__all__ = ["throw", "catch", "call_ec"] -from warnings import warn from functools import wraps from .regutil import register_decorator # from .symbol import gensym -def escape(value, tag=None, allow_catchall=True): # pragma: no cover - """Alias for `throw`, for backward compatibility. - - Will be removed in 0.15.0. - """ - warn("`escape` has been renamed `throw` as in Common Lisp; this alias will be removed in 0.15.0.", FutureWarning) - return throw(value, tag, allow_catchall) - -def setescape(tags=None, catch_untagged=True): # pragma: no cover - """Alias for `catch`, for backward compatibility. - - Will be removed in 0.15.0. - """ - warn("`setescape` has been renamed `catch` as in Common Lisp; this alias will be removed in 0.15.0.", FutureWarning) - return catch(tags, catch_untagged) - def throw(value, tag=None, allow_catchall=True): """Escape to a dynamically surrounding ``@catch``. @@ -77,7 +59,7 @@ def throw(value, tag=None, allow_catchall=True): """ raise Escape(value, tag, allow_catchall) -class Escape(Exception): +class Escape(BaseException): """Exception that essentially represents the invocation of an escape continuation. Constructor parameters: see ``throw()``. diff --git a/unpythonic/env.py b/unpythonic/env.py index eac43868..d5538e4a 100644 --- a/unpythonic/env.py +++ b/unpythonic/env.py @@ -55,9 +55,16 @@ class env: "_direct_write", "_reserved_names") _direct_write = ("_env", "_finalized") + # For pickle support, since unpickling calls `__new__` but not `__init__`. + # If `self._env` is not present, `__getattr__` will crash with an infinite loop. So create it as early as possible. + def __new__(cls, **kwargs): + instance = super().__new__(cls) + instance._env = {} + instance._finalized = False # "let" sets this once env setup done + instance.__init__(**kwargs) + return instance + def __init__(self, **bindings): - self._env = {} - self._finalized = False # "let" sets this once env setup done for name, value in bindings.items(): setattr(self, name, value) diff --git a/unpythonic/excutil.py b/unpythonic/excutil.py new file mode 100644 index 00000000..fc9c6b26 --- /dev/null +++ b/unpythonic/excutil.py @@ -0,0 +1,416 @@ +# -*- coding: utf-8 -*- +"""Exception-related utilities.""" + +__all__ = ["raisef", "tryf", + "equip_with_traceback", + "async_raise", + "reraise_in", "reraise"] + +from contextlib import contextmanager +import sys +import threading +from types import TracebackType + +# For async_raise only. Note `ctypes.pythonapi` is not an actual module; +# you'll get a `ModuleNotFoundError` if you try to import it. +# +# TODO: The "pycapi" PyPI package would allow us to regularly import the C API, +# but right now we don't want introduce dependencies, especially for a minor feature. +# https://github.com/brandtbucher/pycapi +if sys.implementation.name == "cpython": + import ctypes + PyThreadState_SetAsyncExc = ctypes.pythonapi.PyThreadState_SetAsyncExc +else: # pragma: no cover, coverage is measured on CPython. + ctypes = None + PyThreadState_SetAsyncExc = None + +from .arity import arity_includes, UnknownArity + + +def raisef(exc, *, cause=None): + """``raise`` as a function, to make it possible for lambdas to raise exceptions. + + Example:: + + raisef(ValueError("message")) + + is (almost) equivalent to:: + + raise ValueError("message") + + Parameters: + exc: exception instance, or exception class + The object to raise. This is whatever you would give as the argument to `raise`. + Both instances (e.g. `ValueError("oof")`) and classes (e.g. `StopIteration`) + can be used as `exc`. + + cause: exception instance, or `None` + If `exc` was triggered as a direct consequence of another exception, + and you would like to `raise ... from ...`, pass that other exception + instance as `cause`. The default `None` performs a plain `raise ...`. + """ + if cause: + raise exc from cause + else: + raise exc + +def tryf(body, *handlers, elsef=None, finallyf=None): + """``try``/``except``/``finally`` as a function. + + This allows lambdas to handle exceptions. + + ``body`` is a thunk (0-argument function) that represents + the body of the ``try`` block. + + ``handlers`` is ``(excspec, handler), ...``, where + ``excspec`` is either an exception type, + or a tuple of exception types. + ``handler`` is a 0-argument or 1-argument + function. If it takes an + argument, it gets the exception + instance. + + Handlers are tried in the order specified. + + ``elsef`` is a thunk that represents the ``else`` block. + + ``finallyf`` is a thunk that represents the ``finally`` block. + + Upon normal completion, the return value of ``tryf`` is + the return value of ``elsef`` if that was specified, otherwise + the return value of ``body``. + + If an exception was caught by one of the handlers, the return + value of ``tryf`` is the return value of the exception handler + that ran. + + If you need to share variables between ``body`` and ``finallyf`` + (which is likely, given what a ``finally`` block is intended + to do), consider wrapping the ``tryf`` in a ``let`` and storing + your variables there. If you want them to leak out of the ``tryf``, + you can also just create an ``env`` at an appropriate point, + and store them there. + """ + def accepts_arg(f): + try: + if arity_includes(f, 1): + return True + except UnknownArity: # pragma: no cover + return True # just assume it + return False + + def isexceptiontype(exc): + try: + if issubclass(exc, BaseException): + return True + except TypeError: # "issubclass() arg 1 must be a class" + pass + return False + + # validate handlers + for excspec, handler in handlers: + if isinstance(excspec, tuple): # tuple of exception types + if not all(isexceptiontype(t) for t in excspec): + raise TypeError(f"All elements of a tuple excspec must be exception types, got {excspec}") + elif not isexceptiontype(excspec): # single exception type + raise TypeError(f"excspec must be an exception type or tuple of exception types, got {excspec}") + + # run + try: + ret = body() + except BaseException as exception: + # Even if a class is raised, as in `raise StopIteration`, the `raise` statement + # converts it into an instance by instantiating with no args. So we need no + # special handling for the "class raised" case. + # https://docs.python.org/3/reference/simple_stmts.html#the-raise-statement + # https://stackoverflow.com/questions/19768515/is-there-a-difference-between-raising-exception-class-and-exception-instance/19768732 + exctype = type(exception) + for excspec, handler in handlers: + if isinstance(excspec, tuple): # tuple of exception types + # this is safe, exctype is always a class at this point. + if any(issubclass(exctype, t) for t in excspec): + if accepts_arg(handler): + return handler(exception) + else: + return handler() + else: # single exception type + if issubclass(exctype, excspec): + if accepts_arg(handler): + return handler(exception) + else: + return handler() + else: + if elsef is not None: + return elsef() + return ret + finally: + if finallyf is not None: + finallyf() + +def equip_with_traceback(exc, stacklevel=1): # Python 3.7+ + """Given an exception instance exc, equip it with a traceback. + + `stacklevel` is the starting depth below the top of the call stack, + to cull useless detail: + - `0` means the trace includes everything, also + `equip_with_traceback` itself, + - `1` means the trace includes everything up to the caller, + - And so on. + + So typically, for direct use of this function `stacklevel` should + be `1` (so it excludes `equip_with_traceback` itself, but shows + all stack levels from your code), and for use in a utility function + that itself is called from your code, it should be `2` (so it excludes + the utility function, too). + + The return value is `exc`, with its traceback set to the produced + traceback. + + This is useful mainly in special cases, where `raise` cannot be used for + some reason, and a manually created exception instance needs a traceback. + (The `signal` function in the conditions-and-restarts system uses this.) + + **CAUTION**: The `sys._getframe` function exists in CPython and in PyPy3, + but for another arbitrary Python implementation this is not guaranteed. + + Based on solution by StackOverflow user Zbyl: + https://stackoverflow.com/a/54653137 + + See also: + https://docs.python.org/3/library/types.html#types.TracebackType + https://docs.python.org/3/reference/datamodel.html#traceback-objects + https://docs.python.org/3/library/sys.html#sys._getframe + """ + if not isinstance(exc, BaseException): + raise TypeError(f"exc must be an exception instance; got {type(exc)} with value {repr(exc)}") + if not isinstance(stacklevel, int): + raise TypeError(f"stacklevel must be int, got {type(stacklevel)} with value {repr(stacklevel)}") + if stacklevel < 0: + raise ValueError(f"stacklevel must be >= 0, got {repr(stacklevel)}") + + try: + getframe = sys._getframe + except AttributeError as err: # pragma: no cover, both CPython and PyPy3 have sys._getframe. + raise NotImplementedError("Need a Python interpreter which has `sys._getframe`") from err + + frames = [] + depth = stacklevel + while True: + try: + frames.append(getframe(depth)) # 0 = top of call stack + depth += 1 + except ValueError: # beyond the root level + break + + # Python 3.7+ allows creating `types.TracebackType` objects in Python code. + tracebacks = [] + nxt = None # tb_next should point toward the level where the exception occurred. + for frame in frames: # walk from top of call stack toward the root + tb = TracebackType(nxt, frame, frame.f_lasti, frame.f_lineno) + tracebacks.append(tb) + nxt = tb + if tracebacks: + tb = tracebacks[-1] # root level + else: + tb = None + return exc.with_traceback(tb) + +# TODO: To reduce the risk of spaghetti user code, we could require a non-main thread's entrypoint to declare +# via a decorator that it's willing to accept asynchronous exceptions, and check that mark here, making this +# mechanism strictly opt-in. The decorator could inject an `asyncexc_ok` attribute to the Thread object; +# that's enough to prevent accidental misuse. +# OTOH, having no such mechanism is the simpler design. +def async_raise(thread_obj, exception): + """Raise an exception in another thread. + + thread_obj: `threading.Thread` object + The target thread to inject the exception into. Must be running. + exception: ``Exception`` + The exception to be raised. As with regular `raise`, this may be + an exception instance or an exception class object. + + No return value. Normal return indicates success. + + If the specified `threading.Thread` is not active, or the thread's ident + was not accepted by the interpreter, raises `ValueError`. + + If the raise operation failed internally, raises `SystemError`. + + If not supported for the Python implementation we're currently running on, + raises `NotImplementedError`. + + **NOTE**: This currently works only in CPython, because there is no Python-level + API to achieve what this function needs to do, and PyPy3's C API emulation layer + `cpyext` doesn't currently (January 2020) implement the function required to do + this (and the C API functions in `cpyext` are not exposed to the Python level + anyway, unlike CPython's `ctypes.pythonapi`). + + **CAUTION**: This is **potentially dangerous**. If the async raise + operation fails, the interpreter may be left in an inconsistent state. + + **NOTE**: The term `async` here has nothing to do with `async`/`await`; + instead, it refers to an asynchronous exception such as `KeyboardInterrupt`. + https://en.wikipedia.org/wiki/Exception_handling#Exception_synchronicity + + In a nutshell, a *synchronous* exception (i.e. the usual kind of exception) + has an explicit `raise` somewhere in the code that the thread that + encountered the exception is running. In contrast, an *asynchronous* + exception **doesn't**, it just suddenly magically materializes from the outside. + As such, it can in principle happen *anywhere*, with absolutely no hint about + it in any obvious place in the code. + + **Hence, use this function very, very sparingly, if at all.** + + For example, `unpythonic` only uses this to support remotely injecting a + `KeyboardInterrupt` into a REPL session running in another thread. So this + may be interesting mainly if you're developing your own REPL server/client + pair. + + (Incidentally, that's **not** how `KeyboardInterrupt` usually works. + Rather, the OS sends a SIGINT, which is then trapped by an OS signal + handler that runs in the main thread. At that point the magic has already + happened: the control of the main thread is now inside the signal handler, + as if the signal handler was called from the otherwise currently innermost + point on the call stack. All the handler needs to do is to perform a regular + `raise`, and the exception will propagate correctly. + + REPL sessions running in other threads can't use the standard mechanism, + because in CPython, OS signal handlers only run in the main thread, and even + in PyPy3, there is no guarantee *which* thread gets the signal even if you + use `with __pypy__.thread.signals_enabled` to enable OS signal trapping in + some of your other threads. Only one thread (including the main thread, plus + any currently dynamically within a `signals_enabled`) will see the signal; + which one, is essentially random and not even reproducible.) + + See also: + https://vorpus.org/blog/control-c-handling-in-python-and-trio/ + + The function necessary to perform this magic is actually mentioned right + there in the official CPython C API docs, but it's not very well known: + https://docs.python.org/3/c-api/init.html#c.PyThreadState_SetAsyncExc + + Original detective work by Federico Ficarelli and LIU Wei: + https://gist.github.com/nazavode/84d1371e023bccd2301e + https://gist.github.com/liuw/2407154 + """ + if not ctypes or not PyThreadState_SetAsyncExc: + raise NotImplementedError("async_raise not supported on this Python interpreter.") # pragma: no cover + + if not hasattr(thread_obj, "ident"): + raise TypeError(f"Expected a thread object, got {type(thread_obj)} with value '{thread_obj}'") + + target_tid = thread_obj.ident + if target_tid not in {thread.ident for thread in threading.enumerate()}: + raise ValueError("Invalid thread object, cannot find its ident among currently active threads.") + + affected_count = PyThreadState_SetAsyncExc(ctypes.c_long(target_tid), ctypes.py_object(exception)) + if affected_count == 0: + raise ValueError("PyThreadState_SetAsyncExc did not accept the thread ident, even though it was among the currently active threads.") # pragma: no cover + + # TODO: check CPython source code if this case can actually ever happen. + # + # The API docs seem to hint that 0 or 1 are the only possible return values. + # If so, we can remove this `SystemError` case and the "potentially dangerous" caution. + elif affected_count > 1: # pragma: no cover + # Clear the async exception, targeting the same thread identity, and hope for the best. + PyThreadState_SetAsyncExc(ctypes.c_long(target_tid), ctypes.c_long(0)) + raise SystemError("PyThreadState_SetAsyncExc failed, broke the interpreter state.") + +def reraise_in(body, mapping): + """Remap exception types in an expression. + + This allows conveniently converting library exceptions to application + exceptions that are more relevant for the operation being implemented, + at the level of abstraction the operation represents. + + Usage:: + + reraise_in(body, + {LibraryExc: ApplicationExc, + ...}) + + Whenever `body` raises an exception `exc` for which it holds that + `isinstance(exc, LibraryExc)`, that exception will be transparently + chained into an `ApplicationExc`. The automatic conversion is in + effect for the dynamic extent of `body`. + + ``body`` is a thunk (0-argument function). + + ``mapping`` is dict-like, ``{input0: output0, ...}``, where each + ``input`` is either an exception type, + or a tuple of exception types. + It will be matched using `isinstance`. + ``output`` is an exception type or an exception + instance. If an instance, then that exact + instance is raised as the converted + exception. + + Conversions are tried in the order specified; hence, just like in + `except` clauses, place more specific types first. + + See also `reraise` for a block form. + """ + try: + return body() + except BaseException as libraryexc: + _reraise_handler(mapping, libraryexc) + +@contextmanager +def reraise(mapping): + """Remap exception types. Context manager. + + This allows conveniently converting library exceptions to application + exceptions that are more relevant for the operation being implemented, + at the level of abstraction the operation represents. + + Usage:: + + with reraise({LibraryExc: ApplicationExc, ...}): + body0 + ... + + Whenever the body raises an exception `exc` for which it holds that + `isinstance(exc, LibraryExc)`, that exception will be transparently + chained into an `ApplicationExc`. The automatic conversion is in + effect for the dynamic extent of the `with` block. + + ``mapping`` is dict-like, ``{input0: output0, ...}``, where each + ``input`` is either an exception type, + or a tuple of exception types. + It will be matched using `isinstance`. + ``output`` is an exception type or an exception + instance. If an instance, then that exact + instance is raised as the converted + exception. + + Conversions are tried in the order specified; hence, just like in + `except` clauses, place more specific types first. + + See also `reraise_in` for an expression form. + """ + try: + yield + except BaseException as libraryexc: + _reraise_handler(mapping, libraryexc) + +def _reraise_handler(mapping, libraryexc): + """Remap an exception instance to another exception type. + + `mapping`: dict-like, `{LibraryExc0: ApplicationExc0, ...}` + + Each `LibraryExc` must be an exception type. + + Each `ApplicationExc` can be an exception type or an instance. + If an instance, then that exact instance is raised as the + converted exception. + + `libraryexc`: the exception instance to convert. It is + automatically chained into `ApplicationExc`. + + This function never returns normally. If no key in the mapping + matches, the original exception `libraryexc` is re-raised. + """ + for LibraryExc, ApplicationExc in mapping.items(): + if isinstance(libraryexc, LibraryExc): + raise ApplicationExc from libraryexc + raise diff --git a/unpythonic/fold.py b/unpythonic/fold.py index 3f6a0cb1..1897f3aa 100644 --- a/unpythonic/fold.py +++ b/unpythonic/fold.py @@ -23,6 +23,7 @@ from operator import mul #from collections import deque +from .funutil import Values #from .it import first, last, rev from .it import last, rev @@ -297,29 +298,34 @@ def step2(k): # x0, x0 + 2, x0 + 4, ... value, state = result yield value -def unfold(proc, *inits): +def unfold(proc, *inits, **kwinits): """Like unfold1, but for n-in-(1+n)-out proc. The current state is unpacked to the argument list of ``proc``. - It must return either ``(value, *newstates)``, or ``None`` to signify - that the sequence ends. + It must return either a ``Values`` object where the first positional + return value is the ``value`` to be yielded at this iteration, and + anything else is state to be unpacked to the args/kwargs of ``proc`` + at the next iteration; or a bare ``None`` to signify that the sequence ends. If your state is something simple such as one number, see ``unfold1``. Example:: def fibo(a, b): - return (a, b, a + b) + return Values(a, a=b, b=a + b) assert (tuple(take(10, unfold(fibo, 1, 1))) == (1, 1, 2, 3, 5, 8, 13, 21, 34, 55)) """ - states = inits + state = Values(*inits, **kwinits) while True: - result = proc(*states) + result = proc(*state.rets, **state.kwrets) if result is None: break - value, *states = result + if not isinstance(result, Values): + raise TypeError(f"Expected `None` (to terminate) or a `Values` (to continue), got {type(result)} with value {repr(result)}") + value, *rets = result.rets # unpack the first positional return value, keep the rest + state = Values(*rets, **result.kwrets) yield value # This is **not** how to make a right map; the result is exactly the same diff --git a/unpythonic/fun.py b/unpythonic/fun.py index a0b9c47d..e3afecf7 100644 --- a/unpythonic/fun.py +++ b/unpythonic/fun.py @@ -17,16 +17,36 @@ "to1st", "to2nd", "tokth", "tolast", "to", "withself"] -from functools import wraps, partial +from collections import namedtuple +from functools import wraps, partial as functools_partial +from inspect import signature +from threading import RLock +from typing import get_type_hints -from .arity import arities, resolve_bindings, tuplify_bindings, UnknownArity +from .arity import (_resolve_bindings, tuplify_bindings, _bind) from .fold import reducel +from .dispatch import (isgeneric, _resolve_multimethod, _format_callable, + _get_argument_type_mismatches, _raise_multiple_dispatch_error, + _list_multimethods, _extract_self_or_cls) from .dynassign import dyn, make_dynvar +from .funutil import Values from .regutil import register_decorator from .symbol import sym -# we use @passthrough_lazy_args (and handle possible lazy args) to support unpythonic.syntax.lazify. -from .lazyutil import passthrough_lazy_args, islazy, force, force1, maybe_force_args +# We use `@passthrough_lazy_args` and `maybe_force_args` to support unpythonic.syntax.lazify. +from .lazyutil import passthrough_lazy_args, islazy, force, maybe_force_args + +# -------------------------------------------------------------------------------- + +#def memoize_simple(f): # essential idea, without exception handling or thread-safety. +# memo = {} +# @wraps(f) +# def memoized(*args, **kwargs): +# k = tuplify_bindings(resolve_bindings(f, *args, **kwargs)) +# if k not in memo: +# memo[k] = f(*args, **kwargs) +# return memo[k] +# return memoized _success = sym("_success") _fail = sym("_fail") @@ -42,18 +62,35 @@ def memoize(f): **CAUTION**: ``f`` must be pure (no side effects, no internal state preserved between invocations) for this to make any sense. + + Beginning with v0.15.0, `memoize` is thread-safe even when the same memoized + function instance is called concurrently from multiple threads. Exactly one + thread will compute the result. If `f` is recursive, the thread that acquired + the lock is the one that is allowed to recurse into the memoized `f`. """ + # One lock per use site of `memoize`. We use an `RLock` to allow recursive calls + # to the memoized `f` in the thread that acquired the lock. + lock = RLock() memo = {} @wraps(f) def memoized(*args, **kwargs): - k = tuplify_bindings(resolve_bindings(f, *args, **kwargs)) - if k not in memo: - try: - result = (_success, maybe_force_args(f, *args, **kwargs)) - except BaseException as err: - result = (_fail, err) - memo[k] = result # should yell separately if k is not a valid key - kind, value = memo[k] + k = tuplify_bindings(_resolve_bindings(f, args, kwargs, _partial=False)) + try: # EAFP to eliminate TOCTTOU. + kind, value = memo[k] + except KeyError: + # But we still need to be careful to avoid race conditions. + with lock: + if k not in memo: + # We were the first thread to acquire the lock. + try: + result = (_success, maybe_force_args(f, *args, **kwargs)) + except BaseException as err: + result = (_fail, err) + memo[k] = result # should yell separately if k is not a valid key + else: + # Some other thread acquired the lock before us. + pass + kind, value = memo[k] if kind is _fail: raise value return value @@ -61,30 +98,99 @@ def memoized(*args, **kwargs): memoized = passthrough_lazy_args(memoized) return memoized -#def memoize_simple(f): # essential idea, without exception handling -# memo = {} -# @wraps(f) -# def memoized(*args, **kwargs): -# k = tuplify_bindings(resolve_bindings(f, *args, **kwargs)) -# if k not in memo: -# memo[k] = f(*args, **kwargs) -# return memo[k] -# return memoized +# -------------------------------------------------------------------------------- -make_dynvar(curry_context=[]) -@passthrough_lazy_args -def _currycall(f, *args, **kwargs): - """Co-operate with unpythonic.syntax.curry. +# Parameter naming is consistent with `functools.partial`. +# +# Note standard behavior of `functools.partial`: `kwargs` do not disappear from the call +# signature even if partially applied. The same kwarg can be sent multiple times, with the +# latest application winning. We must resist the temptation to override that behavior here, +# because there are other places in the stdlib, particularly `inspect._signature_get_partial` +# (as of Python 3.8), that expect the standard semantics. +def partial(func, *args, **kwargs): + """Type-checking `functools.partial`. - In a ``with autocurry`` block, need to call also when ``f()`` has transformed - to ``curry(f)``, but definitions can be curried as usual. + This is a wrapper that type-checks the arguments against the type annotations + on `func`, and if the type check passes, calls `functools.partial`. - Hence we provide this separate mode to curry-and-call even if no args. + Arguments can be passed by position or by name; we compute their bindings + to function parameters like Python itself does. - This mode also no-ops when ``f`` is not inspectable, instead of raising - an ``unpythonic.arity.UnknownArity`` exception. + The type annotations may use features from the `typing` stdlib module. + See `unpythonic.typecheck.isoftype` for details. + + Trying to pass an argument of a type that does not match the corresponding + parameter's type specification raises `TypeError` immediately. + + Any parameter that does not have a type annotation will be ignored in the type check. + + Note the check still occurs at run time, but at the use site of `partial`, + when the partially applied function is constructed. This makes it fail-fast-er + than an `isinstance` check inside the function. + + To conveniently make regular calls of the function type-check arguments, too, + see the decorator `unpythonic.dispatch.typed`. """ - return curry(f, *args, _curry_force_call=True, _curry_allow_uninspectable=True, **kwargs) + # HACK: As of Python 3.8, `typing.get_type_hints` does not know about `functools.partial` objects, + # HACK: but those objects have `args` and `keywords` attributes, so we can extract what we need. + # TODO: Maybe remove this hack if `typing.get_type_hints` gets support for `functools.partial` at some point. + if isinstance(func, functools_partial): + thecallable = func.func + collected_args = func.args + args + collected_kwargs = {**func.keywords, **kwargs} + else: + thecallable = func + collected_args = args + collected_kwargs = kwargs + + if isgeneric(thecallable): # multiple dispatch + # For generic functions, at least one multimethod must match the partial signature + # for the partial application to be valid. + if not _resolve_multimethod(thecallable, collected_args, collected_kwargs, _partial=True): + _raise_multiple_dispatch_error(thecallable, collected_args, collected_kwargs, + candidates=_list_multimethods(thecallable, + _extract_self_or_cls(thecallable, + args)), + _partial=True) + else: # Not `@generic` or `@typed`; just a function that might have type annotations. + # It's not very unpythonic-ic to provide this since we already have `@typed` for this use case, + # but it's much more pythonic, if the type-checking `partial` works properly for code that does + # not opt in to `unpythonic`'s multiple-dispatch subsystem. + # TODO: There's some repeated error-reporting code in `unpythonic.dispatch`. + type_signature = get_type_hints(thecallable) + if type_signature: # TODO: Python 3.8+: use walrus assignment here + # Partial mode: allow leaving some parameters unbound. + bound_arguments = _resolve_bindings(func, collected_args, collected_kwargs, _partial=True) + # Allow having some parameters without type annotations, in which case those parameters + # will not be type-checked. `@generic` requires them for all parameters except + # `self`/`cls`, but type annotations in general have no such requirement. + mismatches = _get_argument_type_mismatches(type_signature, bound_arguments, skip_unannotated=True) + if mismatches: + description = _format_callable(func) + mismatches_list = [f"{parameter}={repr(value)}, expected {expected_type}" + for parameter, value, expected_type in mismatches] + mismatches_str = "; ".join(mismatches_list) + raise TypeError(f"When partially applying {description}:\nParameter binding(s) do not match type specification: {mismatches_str}") + + # `functools.partial` already handles chaining partial applications, so send only the new args/kwargs to it. + return functools_partial(func, *args, **kwargs) + +# -------------------------------------------------------------------------------- + +#def curry_simple(f): # essential idea, without any extra features +# min_arity, _ = arities(f) +# @wraps(f) +# def curried(*args, **kwargs): +# if len(args) < min_arity: +# return curry(partial(f, *args, **kwargs)) +# return f(*args, **kwargs) +# return curried + +make_dynvar(curry_context=[]) + +def iscurried(f): + """Return whether f is a curried function.""" + return hasattr(f, "_is_curried_function") @register_decorator(priority=8) @passthrough_lazy_args @@ -92,16 +198,19 @@ def curry(f, *args, _curry_force_call=False, _curry_allow_uninspectable=False, * """Decorator: curry the function f. Essentially, the resulting function automatically chains partial application - until the minimum positional arity of ``f`` is satisfied, at which point - ``f``is called. + until all parameters of ``f`` are bound, at which point ``f`` is called. - Also more kwargs can be passed at each step, but they do not affect the - decision when the function is called. + For a callable to be curryable, its signature must be inspectable by the stdlib + function `inspect.signature`. In some versions of Python, inspection may fail + for builtin functions or methods such as ``print``, ``range``, ``operator.add``, + or ``list.append``. - For a callable to be curryable, it must be possible to inpect its signature - to determine its minimum and maximum positional arities; builtin functions - such as ``operator.add`` won't work. In such cases ``UnknownArity`` will - be raised. + **CAUTION**: Up to v0.14.3, we looked at positional arity only, and there were + workarounds in place for some of the most common builtins. As of v0.15.0, we + compute argument bindings like Python itself does. Hence we use a different + algorithm, and thus a *different subset* of builtins may have become uninspectable. + + When inspection fails, we raise ``ValueError``, like `inspect.signature` does. **Examples**:: @@ -127,9 +236,9 @@ def foo(a, b, *, c, d): **Passthrough**: - If too many args are given, any extra ones are passed through on the right. - If an intermediate result is callable, it is invoked on the remaining - positional args:: + If too many args or unacceptable kwargs are given, any extra ones are passed + through. Positional args are passed through on the right. If an intermediate + result is callable, it is invoked on the remaining args and kwargs:: map_one = lambda f: (curry(foldr))(composer(cons, to1st(f)), nil) assert curry(map_one)(double, ll(1, 2, 3)) == ll(2, 4, 6) @@ -138,9 +247,6 @@ def foo(a, b, *, c, d): is extra. The result of ``map_one`` is a callable, so it is then invoked on this tuple. - For simplicity, in passthrough, all kwargs are consumed in the first step - for which too many positional args were supplied. - By default, if any passed-through positional args are still remaining when the currently top-level curry context exits, ``curry`` raises ``TypeError``, because such usage often indicates a bug. @@ -177,16 +283,25 @@ def foo(a, b, *, c, d): clip = lambda n1, n2: composel(*with_n((n1, drop), (n2, take))) assert tuple(curry(clip, 5, 10, range(20))) == tuple(range(5, 15)) - **CAUTION**: BUG: `curry` may fail to actually call the function even after - sufficient arguments have been collected, if some of the positional-or-keyword - arguments of the function being curried are passed by name (in the first call). - It seems those arguments don't reduce the expected remaining positional arity, - although they should. See issue #61: - https://github.com/Technologicat/unpythonic/issues/61 + **Kwargs support**: + + As of v0.15.0, `curry` supports passing arguments by name at any step during the currying. + + We collect both `args` and `kwargs` across all steps, and bind arguments to function + parameters the same way Python itself does, so it shouldn't matter whether the function + parameters end up bound by position or name. When all parameters have a binding, the call + triggers. + + That means, for example, that this now works as expected:: + + @curry + def f(x, y): + return x, y + + assert f(y=2)(x=1) == (1, 2) - **Workaround**: if possible, at the definition site for your function, declare - any arguments you plan to pass by name as keyword-only; then they won't affect - the positional arity. + If you notice any semantic differences in parameter binding when using `curry`, when compared + to regular one-step function calls, please file an issue. """ f = force(f) # lazify support: we need the value of f # trivial case first: interaction with call_ec and other replace-def-with-value decorators @@ -197,45 +312,136 @@ def foo(a, b, *, c, d): if args or kwargs or _curry_force_call: return maybe_force_args(f, *args, **kwargs) return f - # TODO: improve: all required name-only args should be present before calling f. - # Difficult, partial() doesn't remove an already-set kwarg from the signature. - try: - min_arity, max_arity = arities(f) - except UnknownArity: # likely a builtin + + def fallback(): # what to do when inspection fails if not _curry_allow_uninspectable: # usual behavior raise # co-operate with unpythonic.syntax.autocurry; don't crash on builtins if args or kwargs or _curry_force_call: return maybe_force_args(f, *args, **kwargs) return f + + # Try to fail-fast with uninspectable builtins, even if no arguments were passed. + # (If we get arguments, there's no landmine, because calling the curried function + # will perform the signature analysis.) + if not (args or kwargs): + try: + signature(f) + except ValueError as err: # inspection failed in inspect.signature()? + msg = err.args[0] + if "no signature found" in msg: + return fallback() + raise + @wraps(f) def curried(*args, **kwargs): outerctx = dyn.curry_context with dyn.let(curry_context=(outerctx + [f])): - if len(args) < min_arity: + # In order to decide what to do when the curried function is called, we must first compute + # the parameter bindings. All of `f`'s parameters must be bound (whether by position or by + # name) before calling `f`. + # + # The parameter binding analysis result is needed for passthrough. + try: + action, analysis = _decide_curry_action(f, args, kwargs) + except ValueError as err: # inspection failed in inspect.signature()? + msg = err.args[0] + if "no signature found" in msg: + return fallback() + raise + + if action is _call: + return maybe_force_args(f, *args, **kwargs) + + elif action == _call_with_passthrough: + # To avoid subtle errors, we must pass the arguments the same way the user did: + # - Any arguments passed to us positionally must be passed through positionally, + # - Any arguments passed to us by name must be passed through by name. + # + # Note the impedance mismatch with our use of `functools.partial`; the `args`/`kwargs` + # here are **NOT** the full `args`/`kwargs`, but only the new ones from this step. + # + # We know these args/kwargs were extra when matched against the function's call signature: + later_args = analysis.extra_args + later_kwargs = analysis.extra_kwargs + # Hence, we should avoid passing **now** any args/kwargs that should be passed later: + if later_args: + now_args = args[:-len(later_args)] + else: + now_args = args + now_kwargs = {k: v for k, v in kwargs.items() if k not in later_kwargs} + + now_result = maybe_force_args(f, *now_args, **now_kwargs) + + # Inspect the return value(s). + # - Inject the appropriate items to `later_args` and `later_kwargs`. + if isinstance(now_result, Values): # multiple-return-values + if now_result.rets: + # `leftmost`, not `first`, for unambiguous stack traces. + leftmost, *others = now_result.rets + + # Extra positional arguments (`later_args`) are passed through *on the right*. + # Hence any further positional return values are inserted before them. + if callable(leftmost): + # If the leftmost return value is a callable, omit it from `later_args`, + # since we will call it. + later_args = tuple(others) + later_args + else: + later_args = (leftmost,) + tuple(others) + later_args + else: + # No positional return values; no changes to `later_args`. + leftmost = None + + # In case of name conflicts, named return values override earlier extra named arguments. + # (This follows the execution order: arguments were passed in, then the function ran.) + # TODO: This way, or allow named arguments to override a named return value? + # TODO: Which choice is more useful practically or mathematically? + if now_result.kwrets: + later_kwargs = {**later_kwargs, **now_result.kwrets} + else: + # The only return value is also the leftmost one. + leftmost = now_result + if callable(leftmost): + pass + else: + later_args = (leftmost,) + later_args + + # If the first positional return value is a callable, curry it and recurse. + # Currying sustains the chain in case the next action is `_call_with_passthrough` + # or `_keep_currying`. + if callable(leftmost): + if not iscurried(leftmost): + leftmost = curry(leftmost) + return maybe_force_args(leftmost, *later_args, **later_kwargs) + + # The first positional return value is not a callable. Pass the return value(s) through + # to the curried procedure waiting in outerctx (e.g. in a curried compose chain). + # + # If there is no outer curry context (i.e. we are the top-level curry context), + # by default it is an error to have any args/kwargs left over, to avoid common + # human error. (To explicitly state such intent, `with dyn.let(curry_context=["whatever"])`.) + if not outerctx: + num_positional_msg = f"{len(later_args)} positional" + num_named_msg = f"{len(later_kwargs)} named" + num_sep = " and " if later_args and later_kwargs else "" + plural = "s" if len(later_args) + len(later_kwargs) != 1 else "" + positional_msg = f"positional: {later_args}" + named_msg = f"named: {later_kwargs}" + sep = "; " if later_args and later_kwargs else "" + raise TypeError(f"Top-level curry context exited with {num_positional_msg}{num_sep}{num_named_msg} argument{plural} remaining; {positional_msg}{sep}{named_msg}") + return Values(*later_args, **later_kwargs) + + elif action is _keep_currying: + # Fail-fast: use our `partial` wrapper to type-check the partial call signature + # when we build the curried function. It delegates to `functools.partial` if the + # type check passes, and else raises a `TypeError` immediately. p = partial(f, *args, **kwargs) if islazy(f): p = passthrough_lazy_args(p) return curry(p) - # passthrough on right, like https://github.com/Technologicat/spicy - if len(args) > max_arity: - now_args, later_args = args[:max_arity], args[max_arity:] - now_result = maybe_force_args(f, *now_args, **kwargs) # use up all kwargs now - now_result = force(now_result) if not isinstance(now_result, tuple) else force1(now_result) - if callable(now_result): - # curry it now, to sustain the chain in case we have - # too many (or too few) args for it. - if not iscurried(now_result): - now_result = curry(now_result) - return now_result(*later_args) - if not outerctx: - raise TypeError(f"Top-level curry context exited with {len(later_args)} arg(s) remaining: {later_args}") - # pass through to the curried procedure waiting in outerctx - # (e.g. in a curried compose chain) - if isinstance(now_result, tuple): - return now_result + later_args - return (now_result,) + later_args - return maybe_force_args(f, *args, **kwargs) + + else: # pragma: no cover + assert False, action if islazy(f): curried = passthrough_lazy_args(curried) curried._is_curried_function = True # stash for detection @@ -244,18 +450,132 @@ def curried(*args, **kwargs): return maybe_force_args(curried, *args, **kwargs) return curried -def iscurried(f): - """Return whether f is a curried function.""" - return hasattr(f, "_is_curried_function") +@passthrough_lazy_args +def _currycall(f, *args, **kwargs): + """Co-operate with unpythonic.syntax.autocurry. -#def curry_simple(f): # essential idea, without the extra features -# min_arity, _ = arities(f) -# @wraps(f) -# def curried(*args, **kwargs): -# if len(args) < min_arity: -# return curry(partial(f, *args, **kwargs)) -# return f(*args, **kwargs) -# return curried + In a ``with autocurry`` block, we need to call `f` also when ``f()`` has + transformed to ``curry(f)``, but definitions can be curried as usual. + + Hence we provide this separate mode to curry-and-call even if no args. + + This mode no-ops when ``f`` is not inspectable, instead of raising + an ``unpythonic.arity.UnknownArity`` exception. + """ + return curry(f, *args, _curry_force_call=True, _curry_allow_uninspectable=True, **kwargs) + +# actions during currying +_call = sym("_call") +_call_with_passthrough = sym("_call_with_passthrough") +_keep_currying = sym("_keep_currying") + +_Analysis = namedtuple("_Analysis", ["bound_arguments", "unbound_parameters", "extra_args", "extra_kwargs"]) + +# For performance, it is important to have this function defined once at the top level +# of the module, instead of defining it as a closure each time `curry` is called. +def _decide_curry_action(f, args, kwargs): + """ Internal helper for `curry`. + + The `args` and `kwargs` are those added at this step of currying. + + We detect if `f` is a `functools.partial` object, and automatically extract + any previously supplied `args` and `kwargs` for analysis. + + Return value is `(action, analysis)`. See source code for details. + """ + # `functools.partial()` doesn't remove an already-set kwarg from the signature (as seen by + # `inspect.signature`), but `functools.partial` objects have a `keywords` attribute, which + # contains what we want. + # + # To support kwargs properly, we must compute argument bindings anyway, so we also use the + # `func` and `args` attributes. This allows us to compute the bindings of all arguments + # against the original function. + if isinstance(f, functools_partial): + function = f.func + collected_args = f.args + args + collected_kwargs = {**f.keywords, **kwargs} + else: + function = f + collected_args = args + collected_kwargs = kwargs + + def _bind_arguments(thecallable): + # For this check we look for a complete match, hence `_partial=False`. + bound_arguments, unbound_parameters, (extra_args, extra_kwargs) = _bind(signature(thecallable), + collected_args, + collected_kwargs, + partial=False) + return _Analysis(bound_arguments, unbound_parameters, extra_args, extra_kwargs) + + # `@generic` functions have several call signatures, so we must aggregate the results + # in a sensible way. For non-generics, there's just one call signature. + if not isgeneric(function): + # For non-generics, the curry-time type check occurs when we later call `partial`, + # so we don't need to do that here. We just compute the bindings of arguments to parameters. + analysis = _bind_arguments(function) + if not analysis.unbound_parameters and not analysis.extra_args and not analysis.extra_kwargs: + return _call, analysis + elif not analysis.unbound_parameters and (analysis.extra_args or analysis.extra_kwargs): + return _call_with_passthrough, analysis + assert analysis.unbound_parameters + return _keep_currying, analysis + + # Curry resolver for `@generic`/`@typed` (generic functions, multimethods, multiple dispatch). + # + # Iterate over multimethods, once per step: + # + # 1. If there is an exact match (all parameters bound, type check passes, no extra + # `args`/`kwargs`), call it. + # 2. If there is a complete match (all parameters bound, type check passes), but + # with extra `args`/`kwargs` (that cannot be accepted by the call signature), + # call it, arranging passthrough for the extra `args`/`kwargs`. + # 3. If there is at least one partial match (type check passes for bound arguments, + # unbound parameters remain), keep currying. In this case extra `args`/`kwargs`, + # if any, do not matter. This will fall into case 1 or 2 above after we get + # additional `args`/`kwargs` to complete a match. + # + # If none of the above match, we know at least one parameter got a binding + # that fails the type check. Raise `TypeError`. + # + # In steps 1 and 2, we use the same lookup order as the multiple dispatcher does; + # the first matching multimethod wins. Actual dispatch is still done by the dispatcher; + # we only compute the bindings to determine which case above the call falls into. + # + # `@typed` is a special case of `@generic` with just one multimethod registered. + # The resulting behavior is the same as for a non-generic function, because the + # above algorithm reduces to that. + + # We can't use the public `list_methods` here, because on OOP methods, + # decorators live on the unbound method (raw function). Thus we must + # extract `self`/`cls` from the arguments of the call (for linked + # dispatcher lookup in the MRO). + multimethods = _list_multimethods(function, + _extract_self_or_cls(function, + collected_args)) + # Step 1: exact match + for thecallable, type_signature in multimethods: + analysis = _bind_arguments(thecallable) + if not analysis.unbound_parameters and not analysis.extra_args and not analysis.extra_kwargs: + if not _get_argument_type_mismatches(type_signature, analysis.bound_arguments): + return _call, analysis + # Step 2: complete match, with extra args/kwargs + for thecallable, type_signature in multimethods: + analysis = _bind_arguments(thecallable) + if not analysis.unbound_parameters and (analysis.extra_args or analysis.extra_kwargs): + if not _get_argument_type_mismatches(type_signature, analysis.bound_arguments): + return _call_with_passthrough, analysis + # Step 3: partial match + for thecallable, type_signature in multimethods: + analysis = _bind_arguments(thecallable) + if analysis.unbound_parameters: + if not _get_argument_type_mismatches(type_signature, analysis.bound_arguments): + return _keep_currying, analysis + # No matter which multimethod we pick, at least one parameter gets a binding + # that fails the type check. + _raise_multiple_dispatch_error(function, collected_args, collected_kwargs, + candidates=multimethods, _partial=True) + +# -------------------------------------------------------------------------------- def flip(f): """Decorator: flip (reverse) the positional arguments of f.""" @@ -297,6 +617,8 @@ def rotated(*args, **kwargs): return rotated return rotate_k +# -------------------------------------------------------------------------------- + @passthrough_lazy_args def apply(f, arg0, *more, **kwargs): """Scheme/Racket-like apply. @@ -321,54 +643,74 @@ def apply(f, arg0, *more, **kwargs): lst = tuple(more[-1]) return maybe_force_args(f, *(args + lst), **kwargs) +# -------------------------------------------------------------------------------- + # Not marking this as lazy-aware works better with continuations (since this # is the default cont, and return values should be values, not lazy[]) -def identity(*args): +def identity(*args, **kwargs): """Identity function. - Accepts any positional arguments, and returns them. + Accepts any args and kwargs, and returns them. - Packs into a tuple if there is more than one. + Packs into a `Values` if anything other than one positional arg. Example:: - assert identity(1, 2, 3) == (1, 2, 3) + assert identity(1, 2, 3) == Values(1, 2, 3) assert identity(42) == 42 assert identity() is None + + **CAUTION**: Not lazy. In code using `with lazify`, all arguments + to `identity` will be forced. This is due to two reasons: + + 1. `identity` is the default continuation in `with continuations`, + producing the final return value in a continuation-enabled + computation. + + 2. `identity` just returns its arguments. Return values are + never implicitly lazy in `unpythonic`. """ - if not args: + if not args and not kwargs: return None - return args if len(args) > 1 else args[0] + return Values(*args, **kwargs) if kwargs or len(args) > 1 else args[0] # In lazify, return values are always just values, so we have to force args # to compute the return value; as a shortcut, just don't mark this as lazy. -def const(*args): +def const(*args, **kwargs): """Constant function. Returns a function that accepts any arguments (also kwargs) - and returns the args given here (packed into a tuple if more than one). + and returns the args and kwargs given here (packed into a `Values` + if anything other than one positional arg). Example:: c = const(1, 2, 3) - assert c(42, "foo") == (1, 2, 3) - assert c("anything") == (1, 2, 3) - assert c() == (1, 2, 3) + assert c(42, "foo") == Values(1, 2, 3) + assert c("anything") == Values(1, 2, 3) + assert c() == Values(1, 2, 3) c = const(42) assert c("anything") == 42 c = const() assert c("anything") is None + + **CAUTION**: Not lazy. In code using `with lazify`, all arguments + to `const` will be forced. This is because the function returned + by `const` just returns the arguments that were supplied to `const`; + return values are never implicitly lazy in `unpythonic`. """ - if not args: + if not args and not kwargs: ret = None else: - ret = args if len(args) > 1 else args[0] + ret = Values(*args, **kwargs) if kwargs or len(args) > 1 else args[0] def constant(*a, **kw): return ret return constant +# -------------------------------------------------------------------------------- + def notf(f): # Racket: negate """Return a function that returns the logical not of the result of f. @@ -398,6 +740,7 @@ def andf(*fs): # Racket: conjoin assert andf(lambda x: isinstance(x, int), lambda x: x % 2 == 0)(42) is True assert andf(lambda x: isinstance(x, int), lambda x: x % 2 == 0)(43) is False """ + @passthrough_lazy_args def conjoined(*args, **kwargs): b = True for f in fs: @@ -405,8 +748,6 @@ def conjoined(*args, **kwargs): if not b: return False return b - if all(islazy(f) for f in fs): - conjoined = passthrough_lazy_args(conjoined) return conjoined def orf(*fs): # Racket: disjoin @@ -426,6 +767,7 @@ def orf(*fs): # Racket: disjoin assert orf(isstr, iseven)("foo") is True assert orf(isstr, iseven)(None) is False # neither condition holds """ + @passthrough_lazy_args def disjoined(*args, **kwargs): b = False for f in fs: @@ -433,22 +775,46 @@ def disjoined(*args, **kwargs): if b: return b return False - if all(islazy(f) for f in fs): - disjoined = passthrough_lazy_args(disjoined) return disjoined -def _make_compose1(direction): # "left", "right" +# -------------------------------------------------------------------------------- + +def _make_compose1(direction): + """Make a function that composes functions from an iterable. + + Return value is a function `compose1(fs)` -> `composed(x)`. + + `direction`: str, one of "left", "right". Which way to compose. + + For example, let `fs = (f1, f2, f3)`. + + If `direction == "left"`, `composed` computes f3(f2(f1(x))); + the functions apply leftmost first. + + If `direction == "right"`, `composed` computes f1(f2(f3(x))); + the functions apply rightmost first. + + Standard mathematical function composition notation f1 ∘ f2 ∘ f3 takes rightmost first, + but we refuse the temptation to guess. We provide only explicit `l` and `r` variants + of all the `compose1` utilities. + """ def compose1_two(f, g): # return lambda x: f(g(x)) return lambda x: maybe_force_args(f, maybe_force_args(g, x)) if direction == "right": compose1_two = flip(compose1_two) def compose1(fs): - # direction == "left" (leftmost is innermost): + """Compose one-argument functions from iterable `fs`. + + **CAUTION**: This is a closure. Which way to compose (left or right) + was chosen when this closure instance was created. Please use the + public API functions whose names explicitly state the direction. + """ + # If `direction == "left"` leftmost is innermost: # input: a b c # elt = b -> f, acc = a(x) -> g --> b(a(x)) # elt = c -> f, acc = b(a(x)) -> g --> c(b(a(x))) - # direction == "right" (rightmost is innermost): + # If `direction == "right"`, rightmost is innermost: # input: a b c # elt = b -> g, acc = a(x) -> f --> a(b(x)) # elt = c -> g, acc = a(b(x)) -> f --> a(b(c(x))) @@ -456,8 +822,7 @@ def compose1(fs): # - if fs is empty, we output None # - if fs contains only one item, we output it as-is composed = reducel(compose1_two, fs) # op(elt, acc) - if all(islazy(f) for f in fs): - composed = passthrough_lazy_args(composed) + composed = passthrough_lazy_args(composed) return composed return compose1 @@ -496,30 +861,55 @@ def composel1i(iterable): """Like composel1, but read the functions from an iterable.""" return _compose1_left(iterable) -def _make_compose(direction): # "left", "right" +def _make_compose(direction): + """Make a function that composes functions from an iterable. + + Return value is a function `compose(fs)` -> `composed(*args, **kwargs)`. + + `direction`: str, one of "left", "right". Which way to compose. + + For example, let `fs = (f1, f2, f3)`. + + If `direction == "left"`, `composed` computes f3(f2(f1(...))); + the functions apply leftmost first. + + If `direction == "right"`, `composed` computes f1(f2(f3(...))); + the functions apply rightmost first. + + Standard mathematical function composition notation f1 ∘ f2 ∘ f3 takes rightmost first, + but we refuse the temptation to guess. We provide only explicit `l` and `r` variants + of all the `compose` utilities. + """ def compose_two(f, g): - def composed(*args): + """g is applied first, then f. + + (f ∘ g)(...) ≡ f(g(...)) + """ + def composed(*args, **kwargs): bindings = {} if iscurried(f): - # co-operate with curry: provide a top-level curry context + # Co-operate with curry: provide a top-level curry context # to allow passthrough from the function that is applied first # to the function that is applied second. bindings = {"curry_context": dyn.curry_context + [composed]} with dyn.let(**bindings): - a = maybe_force_args(g, *args) - # we could duck-test, but this is more predictable for the user - # (consider chaining functions that manipulate a generator), and - # tuple specifically is the pythonic multiple-return-values thing. - if isinstance(a, tuple): - return maybe_force_args(f, *a) + a = maybe_force_args(g, *args, **kwargs) + if isinstance(a, Values): + return maybe_force_args(f, *a.rets, **a.kwrets) return maybe_force_args(f, a) return composed if direction == "right": compose_two = flip(compose_two) def compose(fs): + """Compose functions from iterable `fs`. + + **CAUTION**: This is a closure. Which way to compose (left or right) + was chosen when this closure instance was created. Please use the + public API functions whose names explicitly state the direction. + """ + fs = force(fs) composed = reducel(compose_two, fs) # op(elt, acc) - if all(islazy(f) for f in fs): - composed = passthrough_lazy_args(composed) + composed = passthrough_lazy_args(composed) return composed return compose @@ -527,16 +917,15 @@ def compose(fs): _compose_right = _make_compose("right") def composer(*fs): - """Compose functions accepting only positional args. Right to left. + """Compose functions. Right to left. This mirrors the standard mathematical convention (f ∘ g)(x) ≡ f(g(x)). - At each step, if the output from a function is a tuple, - it is unpacked to the argument list of the next function. Otherwise, - we assume the output is intended to be fed to the next function as-is. + We support passing both positional and named values. - Especially, generators, namedtuples and any custom classes will **not** be - unpacked, regardless of whether or not they support the iterator protocol. + At each step, if the output from a function is a `Values`, it is unpacked + to the args and kwargs of the next function. Otherwise, we feed the output + to the next function as a single positional argument. """ return composeri(fs) @@ -581,16 +970,20 @@ def composelci(iterable): """Like composelc, but read the functions from an iterable.""" return composeli(map(curry, iterable)) +# -------------------------------------------------------------------------------- + # Helpers to insert one-in-one-out functions into multi-arg compose chains def tokth(k, f): """Return a function to apply f to args[k], pass the rest through. + The output is a `Values`. Named arguments are passed through as-is. + Negative indices also supported. Especially useful in multi-arg compose chains. See ``unpythonic.test.test_fun`` for examples. """ - def apply_f_to_kth_arg(*args): + def apply_f_to_kth_arg(*args, **kwargs): n = len(args) if not n: raise TypeError("Expected at least one argument") @@ -602,7 +995,7 @@ def apply_f_to_kth_arg(*args): out.append(maybe_force_args(f, args[j])) # mth argument if n > m: out.extend(args[m:]) - return tuple(out) + return Values(*out, **kwargs) if islazy(f): apply_f_to_kth_arg = passthrough_lazy_args(apply_f_to_kth_arg) return apply_f_to_kth_arg @@ -646,6 +1039,8 @@ def to(*specs): """ return composeli(tokth(k, f) for k, f in specs) +# -------------------------------------------------------------------------------- + @register_decorator(priority=80) def withself(f): """Decorator. Allow a lambda to refer to itself. diff --git a/unpythonic/funutil.py b/unpythonic/funutil.py new file mode 100644 index 00000000..68519e81 --- /dev/null +++ b/unpythonic/funutil.py @@ -0,0 +1,406 @@ +# -*- coding: utf-8 -*- +"""Function call and return value related utilities.""" + +__all__ = ["call", "callwith", + "Values", "valuify"] + +from functools import wraps + +from .lazyutil import passthrough_lazy_args, islazy, maybe_force_args, force +from .regutil import register_decorator +from .symbol import sym + +# HACK: break dependency loop llist -> fun -> funutil -> collections -> llist +_init_done = False +frozendict = sym("frozendict") # doesn't matter what the value is, will be overwritten later +def _init_module(): # called by unpythonic.__init__ when otherwise done + global frozendict, _init_done + from .collections import frozendict + _init_done = True + +# Only the single-argument form (just f) of the "call" decorator is supported by unpythonic.syntax.util.sort_lambda_decorators. +# +# This is as it should be; if given any arguments beside f, the call doesn't conform +# to the decorator API, but is a normal function call. See "callwith" if you need to +# pass arguments and then call f from a decorator position. +@register_decorator(priority=80) +@passthrough_lazy_args +def call(f, *args, **kwargs): + """Call the function f. + + **When used as a decorator**: + + Run the function immediately, then overwrite the definition by its + return value. + + Useful for making lispy not-quite-functions where the def just delimits + a block of code that runs immediately (think call-with-something in Lisps, + but without the something). + + The function will be called with no arguments. If you need to pass + arguments when using ``call`` as a decorator, see ``callwith``. + + **When called normally**: + + ``call(f, *a, **kw)`` is the same as ``f(*a, **kw)``. + + *Why ever use call() normally?* + + - Readability and aesthetics in cases like ``makef(dostuffwith(args))()``, + where ``makef`` is a function factory, and we want to immediately + call its result. + + Rewriting this as ``call(makef(dostuffwith(args)))`` relocates the + odd one out from the mass of parentheses at the end. (A real FP example + would likely have more levels of nesting.) + + - Notational uniformity with ``curry(f, *args, **kwargs)`` for cases + without currying. See ``unpythonic.fun.curry``. + + - For fans of S-expressions. Write Python almost like Lisp! + + Name inspired by "call-with-something", but since here we're calling + without any specific thing, it's just "call". + + Examples:: + + @call + def result(): # this block of code runs immediately + return "hello" + print(result) # "hello" + + # if the return value is of no interest: + @call + def _(): + ... # code with cheeky side effects goes here + + @call + def x(): + a = 2 # many temporaries that help readability... + b = 3 # ...of this calculation, but would just pollute locals... + c = 5 # ...after the block exits + return a * b * c + + @call + def _(): + for x in range(10): + for y in range(10): + if x * y == 42: + return # "multi-break" out of both loops! + ... + + Note that in the multi-break case, ``x`` and ``y`` are no longer in scope + outside the block, since the block is a function. + """ +# return f(*args, **kwargs) + return maybe_force_args(force(f), *args, **kwargs) # support unpythonic.syntax.lazify + +@register_decorator(priority=80) +@passthrough_lazy_args +def callwith(*args, **kwargs): + """Freeze arguments, choose function later. + + **Used as decorator**, this is like ``@call``, but with arguments:: + + @callwith(3) + def result(x): + return x**2 + assert result == 9 + + **Called normally**, this creates a function to apply the given arguments + to a callable to be specified later:: + + def myadd(a, b): + return a + b + def mymul(a, b): + return a * b + apply23 = callwith(2, 3) + assert apply23(myadd) == 5 + assert apply23(mymul) == 6 + + When called normally, the two-step application is mandatory. The first step + stores the given arguments. It returns a function ``f(callable)``. When + ``f`` is called, it calls its ``callable`` argument, passing in the arguments + stored in the first step. + + In other words, ``callwith`` is similar to ``functools.partial``, but without + specializing to any particular function. The function to be called is + given later, in the second step. + + Hence, ``callwith(2, 3)(myadd)`` means "make a function that passes in + two positional arguments, with values ``2`` and ``3``. Then call this + function for the callable ``myadd``". + + But if we instead write``callwith(2, 3, myadd)``, it means "make a function + that passes in three positional arguments, with values ``2``, ``3`` and + ``myadd`` - not what we want in the above example. + + Curry obviously does not help; it will happily pass in all arguments + in one go. If you want to specialize some arguments now and some later, + use ``partial``:: + + from functools import partial + + p1 = partial(callwith, 2) + p2 = partial(p1, 3) + p3 = partial(p2, 4) + apply234 = p3() # actually call callwith, get the function + def add3(a, b, c): + return a + b + c + def mul3(a, b, c): + return a * b * c + assert apply234(add3) == 9 + assert apply234(mul3) == 24 + + If the code above feels weird, it should. Arguments are gathered first, + and the function to which they will be passed is chosen in the last step. + + A pythonic alternative to the above examples is:: + + a = [2, 3] + def myadd(a, b): + return a + b + def mymul(a, b): + return a * b + assert myadd(*a) == 5 + assert mymul(*a) == 6 + + a = [2] + a += [3] + a += [4] + def add3(a, b, c): + return a + b + c + def mul3(a, b, c): + return a * b * c + assert add3(*a) == 9 + assert mul3(*a) == 24 + + Another use case of ``callwith`` is ``map``, if we want to vary the function + instead of the data:: + + m = map(callwith(3), [lambda x: 2*x, lambda x: x**2, lambda x: x**(1/2)]) + assert tuple(m) == (6, 9, 3**(1/2)) + + The pythonic alternative here is to use the comprehension notation, + which can already do this:: + + m = (f(3) for f in [lambda x: 2*x, lambda x: x**2, lambda x: x**(1/2)]) + assert tuple(m) == (6, 9, 3**(1/2)) + + Inspiration: + + *Function application with $* in + http://learnyouahaskell.com/higher-order-functions + """ + def applyfrozenargsto(f): + return maybe_force_args(force(f), *args, **kwargs) + return applyfrozenargsto + + +class Values: + """Structured multiple-return-values. + + That is, return multiple values positionally and by name. This completes + the symmetry between passing function arguments and returning values + from a function: Python itself allows passing arguments by name, but has + no concept of returning values by name. This class adds that concept. + + Having a `Values` type separate from `tuple` also helps with semantic + accuracy. In `unpythonic` 0.15.0 and later, a `tuple` return value now + means just that - one value that is a `tuple`. It is different from a + `Values` that contains several positional return values (that are meant + to be treated separately e.g. by a function composition utility). + + **When to use**: + + Most of the time, returning a tuple to denote multiple-return-values + and unpacking it is just fine, and that is exactly what `unpythonic` + does internally in many places. + + But the distinction is critically important in function composition, + so that positional return values can be automatically mapped into + positional arguments to the next function in the chain, and named + return values into named arguments. + + Accordingly, various parts of `unpythonic` that deal with function + composition use the `Values` abstraction; particularly `curry`, and + the `compose` and `pipe` families, and the `with continuations` macro. + + **Behavior**: + + `Values` is a duck-type with some features of both sequences and mappings, + but not the full `collections.abc` API of either. + + Each operation that obviously and without ambiguity makes sense only + for the positional or named part, accesses that part. + + The only exception is `__getitem__` (subscripting), which makes sense + for both parts, unambiguously, because the key types differ. If the index + expression is an `int` or a `slice`, it is an index/slice for the + positional part. If it is an `str`, it is a key for the named part. + + If you need to explicitly access either part (and its full API), + use the `rets` and `kwrets` attributes. The names are in analogy + with `args` and `kwargs`. + + `rets` is a `tuple`, and `kwrets` is an `unpythonic.collections.frozendict`. + + `Values` objects can be compared for equality. Two `Values` objects + are equal if both their `rets` and `kwrets` (respectively) are. + + Examples:: + + def f(): + return Values(1, 2, 3) + result = f() + assert isinstance(result, Values) + assert result.rets == (1, 2, 3) + assert not result.kwrets + assert result[0] == 1 + assert result[:-1] == (1, 2) + a, b, c = result # if no kwrets, can be unpacked like a tuple + a, b, c = f() + + def g(): + return Values(x=3) # named return value + result = g() + assert isinstance(result, Values) + assert not result.rets + assert result.kwrets == {"x": 3} # actually a `frozendict` + assert "x" in result # `in` looks in the named part + assert result["x"] == 3 + assert result.get("x", None) == 3 + assert result.get("y", None) is None + assert tuple(result.keys()) == ("x",) # also `values()`, `items()` + + def h(): + return Values(1, 2, x=3) + result = h() + assert isinstance(result, Values) + assert result.rets == (1, 2) + assert result.kwrets == {"x": 3} + a, b = result.rets # positionals can always be unpacked explicitly + assert result[0] == 1 + assert "x" in result + assert result["x"] == 3 + + def silly_but_legal(): + return Values(42) + result = silly_but_legal() + assert result.rets[0] == 42 + assert result.ret == 42 # shorthand for single-value case + + The last example is silly, but legal, because it is preferable to just omit + the `Values` if it is known that there is only one return value. (This also + applies when that value is a `tuple`, when the intent is to return it as a + single `tuple`, in contexts where this distinction matters.) + """ + def __init__(self, *rets, **kwrets): + """Create a `Values` object. + + `rets`: positional return values + `kwrets`: named return values + """ + self.rets = rets + self.kwrets = frozendict(kwrets) + + # Shorthand for one-value case + def _ret(self): + return self.rets[0] + ret = property(fget=_ret, doc="Shorthand for `self.rets[0]`. Read-only.") + + # Iterable + def __iter__(self): + """Values is iterable when there are no `kwrets`; this then iterates over `rets`. + + This is meant to minimize impact on existing code that receives a `tuple` + as a pythonic multiple-return-values idiom. Changing the `return` to + return a `Values` instead requires no changes at the receiving end + (unless you change the sending end to return some named values; + if you do, then it *should* yell, to avoid silently discarding + those named values). + + Note that you can iterate over `rets` or `kwrets` to explicitly state + which you mean; that always works. + """ + if self.kwrets: + raise ValueError(f"Named values present, cannot iterate over all values. Got: {self.kwrets}") + return iter(self.rets) + + # Sequence (no full support: no `__len__`, `__reversed__`, `index`, `count`) + def __getitem__(self, idx): + """Subscripting. + + Indexing by an `int` or `slice` indexes the positional part. + Indexing by an `str` indexes the named part. + + Indexing by any other type raises `TypeError`. + """ + # multi-headed hydra + if isinstance(idx, (int, slice)): + return self.rets[idx] + elif isinstance(idx, str): + return self.kwrets[idx] + raise TypeError(f"Expected either int, slice or str subscript, got {type(idx)} with value {repr(idx)}") + + # Container + def __contains__(self, k): + """The `in` operator, looks in the named part.""" + return k in self.kwrets + + # Mapping (no full support: no `__len__`) + def items(self): + """Items of the named part.""" + return self.kwrets.items() + def keys(self): + """Keys of the named part.""" + return self.kwrets.keys() + def values(self): + """Values of the named part.""" + return self.kwrets.values() + def get(self, k, default=None): + """Dict-like `get` for the named part.""" + return self[k] if k in self else default + + # comparison + def __eq__(self, other): + """Equality comparison. + + Two `Values` objects are equal if both their `rets` and `kwrets` + (respectively) are. + """ + if not isinstance(other, Values): + return False + return other.rets == self.rets and other.kwrets == self.kwrets + def __ne__(self, other): + """Inequality comparison.""" + return not (self == other) + + # no `__len__`, because we have two candidates + + # pretty-printing + def __repr__(self): # pragma: no cover + """Pretty-printing. Eval-able if the contents are.""" + rets_list = [repr(x) for x in self.rets] + rets_str = ", ".join(rets_list) + kwrets_list = [f"{name}={repr(value)}" for name, value in self.kwrets.items()] + kwrets_str = ", ".join(kwrets_list) + sep = ", " if self.rets and self.kwrets else "" + return f"Values({rets_str}{sep}{kwrets_str})" + + +@register_decorator(priority=30) +def valuify(f): + """Decorator. Convert the pythonic tuple-as-multiple-return-values idiom into `Values`. + + If `f` returns `tuple` (exactly, no subclass), convert into `Values`, else pass through. + """ + @wraps(f) + def valuified(*args, **kwargs): + result = f(*args, **kwargs) + if type(result) is tuple: # yes, exactly tuple + result = Values(*result) + return result + if islazy(f): + valuified = passthrough_lazy_args(valuified) + return valuified diff --git a/unpythonic/gmemo.py b/unpythonic/gmemo.py index 30607a5c..ecd14e65 100644 --- a/unpythonic/gmemo.py +++ b/unpythonic/gmemo.py @@ -112,6 +112,7 @@ def __init__(self, g, memo, lock): self.j = 0 # current position in memo def __repr__(self): return f"<_MemoizedGenerator object {self.g.__name__} at 0x{id(self):x}>" + # Support the `collections.abc.Iterable` API def __iter__(self): return self def __next__(self): @@ -131,6 +132,28 @@ def __next__(self): if kind is _fail: raise value return value + # Support a subset of the `collections.abc.Sequence` API for already-computed items + def __len__(self): + return len(self.memo) + def __getitem__(self, k): + if not isinstance(k, (int, slice)): + raise TypeError(f"Expected an int or slice index, got {type(k)} with value {repr(k)}") + length = len(self.memo) + if isinstance(k, slice): + # For slices where at least one item raises an exception, we raise the + # exception that is encountered first when walking the slice. + lst = [] + for kind, value in self.memo[k]: + if kind is _fail: + raise value + lst.append(value) + return lst + if k >= length or k < -length: + raise IndexError(f"memoized generator index out of range; got {k}, with {len(self.memo)} items currently available") + kind, value = self.memo[k] + if kind is _fail: + raise value + return value def imemoize(iterable): """Memoize an iterable. @@ -161,8 +184,10 @@ def imemoize(iterable): If you need to take arguments to create the iterable, see ``fimemoize``. """ - # The lambda is the gfunc; decorate it with gmemoize and return that. - return gmemoize(lambda: (yield from iterable)) + @gmemoize + def iterable_as_gfunc(): + yield from iterable + return iterable_as_gfunc @register_decorator(priority=10) def fimemoize(ifactory): diff --git a/unpythonic/it.py b/unpythonic/it.py index d5a434ff..b53caf47 100644 --- a/unpythonic/it.py +++ b/unpythonic/it.py @@ -23,10 +23,9 @@ "flatten", "flatten1", "flatten_in", "iterate", "iterate1", "partition", - "partition_int", "inn", "iindex", "find", "window", "chunked", - "within", "fixpoint", + "within", "interleave", "subset", "powerset", "allsame"] @@ -36,6 +35,8 @@ from itertools import tee, islice, zip_longest, starmap, chain, filterfalse, groupby, takewhile from collections import deque +from .funutil import Values + def rev(iterable): """Reverse an iterable. @@ -499,11 +500,17 @@ def flatten(iterable, pred=None): (or list), and return ``True`` if that tuple/list should be flattened. When ``pred`` returns False, that tuple/list is passed through as-is. - E.g. to flatten only those items that contain only tuples:: + E.g. to flatten only those items that contain only lists or tuples:: is_nested = lambda e: all(isinstance(x, (list, tuple)) for x in e) data = (((1, 2), (3, 4)), (5, 6)) assert tuple(flatten(data, is_nested)) == ((1, 2), (3, 4), (5, 6)) + + Even with a predicate, flattening is still performed recursively + in any item that matches the predicate:: + + data = (((1, 2), ((3, 4), ((5, 6), (7, 8))), (9, 10))) + assert tuple(flatten(data, is_nested)) == ((1, 2), (3, 4), (5, 6), (7, 8), (9, 10)) """ return _flatten(iterable, pred, recursive=True) @@ -557,18 +564,26 @@ def iterate1(f, x): yield x x = f(x) -def iterate(f, *args): +def iterate(f, *args, **kwargs): """Multiple-argument version of iterate1. - The function ``f`` should return a tuple or list of as many elements as it - takes positional arguments; this will be unpacked to the argument list in - the next call. + The initial ``args`` and ``kwargs`` are packed into a ``Values`` object, + which we will below denote as ``x``. When calling ``f``, ``x`` is unpacked + to its args/kwargs. + + The function ``f`` must return a ``Values`` object in the same shape + as it takes args and kwargs; this then becomes the new ``x``. - Or in other words, yield args, f(*args), f(*f(*args)), ... + Using this notation, this function behaves exactly like ``iterate1``: + the return value of ``iterate`` is an infinite generator that yields + x, f(x), f(f(x)), ... """ + x = Values(*args, **kwargs) while True: - yield args - args = f(*args) + yield x + x = f(*x.rets, **x.kwrets) + if not isinstance(x, Values): + raise TypeError(f"Expected a `Values`, got {type(x)} with value {repr(x)}") def partition(pred, iterable): """Partition an iterable to entries satifying and not satisfying a predicate. @@ -588,7 +603,7 @@ def partition(pred, iterable): It will eventually run out of memory storing all the odd numbers "to be read later".) - Not to be confused with `unpythonic.it.partition_int`, which partitions + Not to be confused with `unpythonic.numutil.partition_int`, which partitions a (small) positive integer to smaller integers, in all possible ways, such that those integers sum to the original one. """ @@ -596,63 +611,6 @@ def partition(pred, iterable): t1, t2 = tee(iterable) return filterfalse(pred, t1), filter(pred, t2) -def partition_int(n, lower=1, upper=None): - """Yield all ordered sequences of smaller positive integers that sum to `n`. - - `n` must be an integer >= 1. - - `lower` is an optional lower limit for each member of the sum. Each member - of the sum must be `>= lower`. - - (Most of the splits are a ravioli consisting mostly of ones, so it is much - faster to not generate such splits than to filter them out from the result. - The default value `lower=1` generates everything.) - - `upper` is, similarly, an optional upper limit; each member of the sum - must be `<= upper`. The default `None` means no upper limit (effectively, - in that case `upper=n`). - - It must hold that `1 <= lower <= upper <= n`. - - Not to be confused with `unpythonic.it.partition`, which partitions an - iterable based on a predicate. - - **CAUTION**: The number of possible partitions grows very quickly with `n`, - so in practice this is only useful for small numbers, or with a lower limit - that is not too much smaller than `n / 2`. A possible use case for this - function is to determine the number of letters to allocate for each - component of an anagram that may consist of several words. - - See: - https://en.wikipedia.org/wiki/Partition_(number_theory) - """ - # sanity check the preconditions, fail-fast - if not isinstance(n, int): - raise TypeError(f"n must be integer; got {type(n)} with value {repr(n)}") - if not isinstance(lower, int): - raise TypeError(f"lower must be integer; got {type(lower)} with value {repr(lower)}") - if upper is not None and not isinstance(upper, int): - raise TypeError(f"upper must be integer; got {type(upper)} with value {repr(upper)}") - upper = upper if upper is not None else n - if n < 1: - raise ValueError(f"n must be positive; got {n}") - if lower < 1 or upper < 1 or lower > n or upper > n or lower > upper: - raise ValueError(f"it must hold that 1 <= lower <= upper <= n; got lower={lower}, upper={upper}") - - def _partition(n): - for k in range(min(n, upper), lower - 1, -1): - m = n - k - if m == 0: - yield (k,) - else: - out = [] - for item in _partition(m): - out.append((k,) + item) - for term in out: - yield term - - return _partition(n) # instantiate the generator - def inn(x, iterable): """Contains-check (``x in iterable``) with automatic termination. @@ -743,8 +701,7 @@ def find(predicate, iterable, default=None): """ return next(filter(predicate, iterable), default) -# TODO: in 0.15.0, maybe switch the argument order of window() for curry-friendliness? -def window(iterable, n=2): +def window(n, iterable): """Sliding length-n window iterator for a general iterable. Acts like ``zip(s, s[1:], ..., s[n-1:])`` for a sequence ``s``, but the input @@ -828,48 +785,12 @@ def within(tol, iterable): (infinite output, or terminating the output early if a part of it looks like a converging sequence; think a local maximum of `cos(x)`). """ - for a, b in window(iterable, n=2): + for a, b in window(2, iterable): yield a if abs(a - b) <= tol: yield b return -def fixpoint(f, x0, tol=0): - """Compute the (arithmetic) fixed point of f, starting from the initial guess x0. - - (Not to be confused with the logical fixed point with respect to the - definedness ordering.) - - The fixed point must be attractive for this to work. See the Banach - fixed point theorem. - https://en.wikipedia.org/wiki/Banach_fixed-point_theorem - - If the fixed point is attractive, and the values are represented in - floating point (hence finite precision), the computation should - eventually converge down to the last bit (barring roundoff or - catastrophic cancellation in the final few steps). Hence the default tol - of zero. - - CAUTION: an arbitrary function from ℝ to ℝ **does not** necessarily - have a fixed point. Limit cycles and chaotic behavior of `f` will cause - non-termination. Keep in mind the classic example: - https://en.wikipedia.org/wiki/Logistic_map - - Examples:: - from math import cos, sqrt - from unpythonic import fixpoint, ulp - c = fixpoint(cos, x0=1) - - # Actually "Newton's" algorithm for the square root was already known to the - # ancient Babylonians, ca. 2000 BCE. (Carl Boyer: History of mathematics) - def sqrt_newton(n): - def sqrt_iter(x): # has an attractive fixed point at sqrt(n) - return (x + n / x) / 2 - return fixpoint(sqrt_iter, x0=n / 2) - assert abs(sqrt_newton(2) - sqrt(2)) <= ulp(1.414) - """ - return last(within(tol, iterate1(f, x0))) - def interleave(*iterables): """Interleave items from several iterables. Generator. @@ -998,7 +919,8 @@ def total_num_items(ld): def allsame(iterable): """Return whether all elements of an iterable are the same. - The test uses `!=` to compare. + The test uses `!=` to compare, and short-circuits at the + first item that is different. If `iterable` is empty, the return value is `True` (like for `all`). diff --git a/unpythonic/lazyutil.py b/unpythonic/lazyutil.py index 22f3e866..7058798c 100644 --- a/unpythonic/lazyutil.py +++ b/unpythonic/lazyutil.py @@ -5,14 +5,14 @@ upon which other regular code is allowed to depend. """ -__all__ = ["passthrough_lazy_args", "maybe_force_args", "force1", "force"] +__all__ = ["Lazy", "force1", "force", # intended also for end-users + "islazy", "maybe_force_args", "passthrough_lazy_args"] # mostly for use inside `unpythonic` from .regutil import register_decorator from .dynassign import make_dynvar from .symbol import sym # HACK: break dependency loop llist -> fun -> lazyutil -> collections -> llist -#from .collections import mogrify _init_done = False jump = sym("jump") # doesn't matter what the value is, will be overwritten later def _init_module(): # called by unpythonic.__init__ when otherwise done @@ -33,11 +33,20 @@ def _init_module(): # called by unpythonic.__init__ when otherwise done class Lazy: """Delayed evaluation, with memoization. (A.k.a. *promise* in Racket.)""" - def __init__(self, thunk): - """`thunk`: 0-argument callable to be stored for delayed evaluation.""" + def __init__(self, thunk, *, sourcecode=None): + """Create a `Lazy` promise. + + `thunk`: 0-argument callable to be stored for delayed evaluation. + + `sourcecode`: str, optional, for use by the `lazy[]` macro. + + Source code of the thunk, if available. Used in the `repr`, + for debug purposes. + """ if not callable(thunk): raise TypeError(f"`thunk` must be a callable, got {type(thunk)} with value {repr(thunk)}") self.thunk = thunk + self.sourcecode = sourcecode self.value = _uninitialized self.thunk_returned_normally = _uninitialized @@ -61,6 +70,11 @@ def force(self): else: raise self.value + def __repr__(self): + if self.sourcecode: + return f'' + return f"" + def force1(x): """Force a ``Lazy`` promise. @@ -97,8 +111,12 @@ def islazy(f): return hasattr(f, "_passthrough_lazy_args") or (hasattr(f, "__name__") and f.__name__ == "_let") def maybe_force_args(f, *thunks, **kwthunks): - """Internal. Helps calling strict functions from inside a ``with lazify`` block.""" - if f is jump: # special case to avoid drastic performance hit in strict code + """Internal. Helps calling strict functions from inside a ``with lazify`` block. + + If `not islazy(f)`, forces the given args and kwargs, and then calls `f` with them. + If `islazy(f)`, calls `f` without forcing the args/kwargs. + """ + if f is jump: # special case to avoid drastic performance hit in TCO'd strict code target, *argthunks = thunks return jump(force1(target), *argthunks, **kwthunks) if islazy(f): diff --git a/unpythonic/let.py b/unpythonic/let.py index 3a17d50d..3b1a2536 100644 --- a/unpythonic/let.py +++ b/unpythonic/let.py @@ -5,9 +5,9 @@ from functools import wraps -from .misc import call -from .env import env as _envcls from .arity import arity_includes, UnknownArity +from .env import env as _envcls +from .funutil import call def let(body, **bindings): """``let`` expression. @@ -106,25 +106,8 @@ def letrec(body, **bindings): body=lambda e: e.b * e.f(1)) # --> 84 - **CAUTION**: - - Simple values (non-callables) may depend on earlier definitions - in the same letrec **only in Python 3.6 and later**. - - Until Python 3.6, initialization of the bindings occurs - **in an arbitrary order**, because of the ``kwargs`` mechanism. - See PEP 468: - - https://www.python.org/dev/peps/pep-0468/ - - In Python < 3.6, in the first example above, trying to reference ``env.a`` - on the RHS of ``b`` may get either the ``lambda e: ...``, or the value ``1``, - depending on whether the binding ``a`` has been initialized at that point or not. - - If you need left-to-right initialization of bindings in Python < 3.6, - see ``unpythonic.lispylet``. - - The following applies regardless of Python version. + Simple values (non-callables) may depend on earlier definitions + in the same letrec. A callable value may depend on **any** binding, also later ones. This allows mutually recursive functions:: @@ -151,9 +134,9 @@ def letrec(body, **bindings): L = [1, 1, 3, 1, 3, 2, 3, 2, 2, 2, 4, 4, 1, 2, 3] print(u(L)) # [1, 3, 2, 4] - Works also in Python < 3.6, because here ``see`` is a callable. Hence, ``e.seen`` - doesn't have to exist when the *definition* of ``see`` is evaluated; it only has to - exist when ``e.see(x)`` is *called*. + Note that ``see`` is a callable. Hence, strictly speaking it doesn't matter + if ``e.seen`` exists when the *definition* of ``see`` is evaluated; it only + has to exist when ``e.see(x)`` is *called*. Parameters: `body`: function diff --git a/unpythonic/lispylet.py b/unpythonic/lispylet.py index 42bb8f50..da9c1516 100644 --- a/unpythonic/lispylet.py +++ b/unpythonic/lispylet.py @@ -5,9 +5,9 @@ from functools import wraps -from .misc import call -from .env import env as _envcls from .arity import arity_includes, UnknownArity +from .env import env as _envcls +from .funutil import call def let(bindings, body): """``let`` expression. diff --git a/unpythonic/mathseq.py b/unpythonic/mathseq.py index 3a78fc04..dc35342e 100644 --- a/unpythonic/mathseq.py +++ b/unpythonic/mathseq.py @@ -19,16 +19,13 @@ """ __all__ = ["s", "imathify", "gmathify", - "m", "mg", # old names, pre-0.14.3, will go away in 0.15.0 - "almosteq", "sadd", "ssub", "sabs", "spos", "sneg", "sinvert", "smul", "spow", "struediv", "sfloordiv", "smod", "sdivmod", "sround", "strunc", "sfloor", "sceil", "slshift", "srshift", "sand", "sxor", "sor", "cauchyprod", "diagonal_reduce", - "fibonacci", "primes"] + "fibonacci", "triangular", "primes"] -from warnings import warn from itertools import repeat, takewhile, count from functools import wraps from operator import (add as primitive_add, mul as primitive_mul, @@ -45,13 +42,13 @@ from .it import take, rev, window from .gmemo import imemoize, gmemoize +from .numutil import almosteq class _NoSuchType: pass # stuff to support float, mpf and SymPy expressions transparently # -from sys import float_info from math import log as math_log, copysign, trunc, floor, ceil try: from mpmath import mpf, almosteq as mpf_almosteq @@ -60,6 +57,11 @@ class _NoSuchType: mpf = _NoSuchType mpf_almosteq = None +try: + import sympy +except ImportError: # pragma: no cover, optional at runtime, but installed at development time. + sympy = None + def _numsign(x): """The sign function, for numeric inputs.""" if x == 0: @@ -98,44 +100,6 @@ def sign(x): sign = _numsign _symExpr = _NoSuchType -# TODO: Overhaul `almosteq` in v0.15.0, should work like mpf for consistency. -# TODO: Also move it to `unpythonic.misc`, where `ulp` already is. Or make a `numutil`. -def almosteq(a, b, tol=1e-8): - """Almost-equality that supports several formats. - - The tolerance ``tol`` is used for the builtin ``float`` and ``mpmath.mpf``. - - For ``mpmath.mpf``, we just delegate to ``mpmath.almosteq``, with the given - ``tol``. For ``float``, we use the strategy suggested in: - - https://floating-point-gui.de/errors/comparison/ - - Anything else, for example SymPy expressions, strings, and containers - (regardless of content), is tested for exact equality. - - **CAUTION**: Although placed in ``unpythonic.mathseq``, this function - **does not** support iterables; rather, it is a low-level tool that is - exposed in the public API in the hope it may be useful elsewhere. - """ - if a == b: # infinities and such, plus any non-float type - return True - - if isinstance(a, mpf) and isinstance(b, mpf): - return mpf_almosteq(a, b, tol) - # compare as native float if only one is an mpf - elif isinstance(a, mpf) and isinstance(b, (float, int)): - a = float(a) - elif isinstance(a, (float, int)) and isinstance(b, mpf): - b = float(b) - - if not all(isinstance(x, (float, int)) for x in (a, b)): - return False # non-float type, already determined that a != b - min_normal = float_info.min - max_float = float_info.max - d = abs(a - b) - if a == 0 or b == 0 or d < min_normal: - return d < tol * min_normal - return d / min(abs(a) + abs(b), max_float) < tol def s(*spec): """Create a lazy mathematical sequence. @@ -306,6 +270,8 @@ def s(*spec): def is_almost_int(x): try: + if sympy and isinstance(x, sympy.Expr): + x = sympy.N(x) return almosteq(float(round(x)), x) except TypeError: # likely a SymPy expression that didn't simplify to a number return False @@ -353,7 +319,7 @@ def analyze(*spec): # raw spec (part before '...' if any) --> description # Most unrecognized sequences trigger this case. raise SyntaxError(f"Specification did not match any supported formula: '{origspec}'") else: # more elements are optional but must be consistent - data = [analyze(*triplet) for triplet in window(iterable=spec, n=3)] + data = [analyze(*triplet) for triplet in window(3, spec)] seqtypes, x0s, ks = zip(*data) def isconst(xs): first, *rest = xs @@ -454,7 +420,7 @@ def arith(): return imathify(arith() if n is infty else take(n, arith())) elif seqtype == "geom": if isinstance(k, _symExpr) or abs(k) >= 1: - def geoimathify(): + def geom(): j = 0 while True: yield x0 * (k**j) @@ -466,12 +432,12 @@ def geoimathify(): # Note that 1/(1/3) --> 3.0 even for floats, so we don't actually # need to modify the detection algorithm to account for this. kinv = 1 / k - def geoimathify(): + def geom(): j = 0 while True: yield x0 / (kinv**j) j += 1 - return imathify(geoimathify() if n is infty else take(n, geoimathify())) + return imathify(geom() if n is infty else take(n, geom())) else: # seqtype == "power": if isinstance(k, _symExpr) or abs(k) >= 1: def power(): @@ -634,14 +600,6 @@ def __ge__(self, other): def __gt__(self, other): return sgt(self, other) -class m(imathify): # pragma: no cover - """Alias for `imathify`, for backward compatibility. - - Will be removed in 0.15.0.""" - def __init__(self, iterable): - warn("`m` has been renamed `imathify`, which is more descriptive; this alias will be removed in 0.15.0.", FutureWarning) - super().__init__(iterable) - def gmathify(gfunc): """Decorator: make gfunc imathify() the returned generator instances. @@ -659,14 +617,6 @@ def mathify(*args, **kwargs): return imathify(gfunc(*args, **kwargs)) return mathify -def mg(gfunc): # pragma: no cover - """Alias for `gmathify`, for backward compatibility. - - Will be removed in 0.15.0. - """ - warn("`mg` has been renamed `gmathify`, which is more descriptive; this alias will be removed in 0.15.0.", FutureWarning) - return gmathify(gfunc) - # ----------------------------------------------------------------------------- # We expose the full set of "imathify" operators also as functions à la the ``operator`` module. # Prefix "s", short for "mathematical Sequence". @@ -946,6 +896,28 @@ def fibos(): a, b = b, a + b return imathify(fibos()) +def triangular(): + """Return the triangular numbers 1, 3, 6, 10, ... as a lazy sequence. + + Etymology:: + + x + x x + x x x + x x x x + ... + """ + # We could just use Gauss's result n * (n + 1) / 2 (which can be proved by induction), + # but this algorithm is trivially correct. + def _triangular(): + s = 1 # running total + r = 2 # places in the next row of the triangle + while True: + yield s + s += r + r += 1 + return imathify(_triangular()) + # See test_gmemo.py for history. This is an FP-ized sieve of Eratosthenes. # # This version wins in speed for moderate n (1e5) on typical architectures where diff --git a/unpythonic/misc.py b/unpythonic/misc.py index 32c329d2..b259598f 100644 --- a/unpythonic/misc.py +++ b/unpythonic/misc.py @@ -1,428 +1,24 @@ # -*- coding: utf-8 -*- """Miscellaneous constructs.""" -__all__ = ["call", "callwith", "raisef", "tryf", "equip_with_traceback", - "pack", "namelambda", "timer", +__all__ = ["pack", + "namelambda", + "timer", "getattrrec", "setattrrec", "Popper", "CountingIterator", - "ulp", - "slurp", "async_raise", "callsite_filename", "safeissubclass"] + "slurp", + "callsite_filename", + "safeissubclass"] -from types import LambdaType, FunctionType, CodeType, TracebackType -from time import monotonic from copy import copy from functools import partial -from sys import version_info, float_info -from math import floor, log2 -from queue import Empty -import threading +from itertools import count import inspect - -# For async_raise only. Note `ctypes.pythonapi` is not an actual module; -# you'll get a `ModuleNotFoundError` if you try to import it. -# -# TODO: The "pycapi" PyPI package would allow us to regularly import the C API, -# but right now we don't want introduce dependencies, especially for a minor feature. -# https://github.com/brandtbucher/pycapi -import sys -if sys.implementation.name == "cpython": - import ctypes - PyThreadState_SetAsyncExc = ctypes.pythonapi.PyThreadState_SetAsyncExc -else: # pragma: no cover, coverage is measured on CPython. - ctypes = None - PyThreadState_SetAsyncExc = None +from queue import Empty +from time import monotonic +from types import FunctionType, LambdaType from .regutil import register_decorator -from .lazyutil import passthrough_lazy_args, maybe_force_args, force -from .arity import arity_includes, UnknownArity - -# Only the single-argument form (just f) of the "call" decorator is supported by unpythonic.syntax.util.sort_lambda_decorators. -# -# This is as it should be; if given any arguments beside f, the call doesn't conform -# to the decorator API, but is a normal function call. See "callwith" if you need to -# pass arguments and then call f from a decorator position. -@register_decorator(priority=80) -@passthrough_lazy_args -def call(f, *args, **kwargs): - """Call the function f. - - **When used as a decorator**: - - Run the function immediately, then overwrite the definition by its - return value. - - Useful for making lispy not-quite-functions where the def just delimits - a block of code that runs immediately (think call-with-something in Lisps). - - The function will be called with no arguments. If you need to pass - arguments when using ``call`` as a decorator, see ``callwith``. - - **When called normally**: - - ``call(f, *a, **kw)`` is the same as ``f(*a, **kw)``. - - *Why ever use call() normally?* - - - Readability and aesthetics in cases like ``makef(dostuffwith(args))()``, - where ``makef`` is a function factory, and we want to immediately - call its result. - - Rewriting this as ``call(makef(dostuffwith(args)))`` relocates the - odd one out from the mass of parentheses at the end. (A real FP example - would likely have more levels of nesting.) - - - Notational uniformity with ``curry(f, *args, **kwargs)`` for cases - without currying. See ``unpythonic.fun.curry``. - - - For fans of S-expressions. Write Python almost like Lisp! - - Name inspired by "call-with-something", but since here we're calling - without any specific thing, it's just "call". - - Examples:: - - @call - def result(): # this block of code runs immediately - return "hello" - print(result) # "hello" - - # if the return value is of no interest: - @call - def _(): - ... # code with cheeky side effects goes here - - @call - def x(): - a = 2 # many temporaries that help readability... - b = 3 # ...of this calculation, but would just pollute locals... - c = 5 # ...after the block exits - return a * b * c - - @call - def _(): - for x in range(10): - for y in range(10): - if x * y == 42: - return # "multi-break" out of both loops! - ... - - Note that in the multi-break case, ``x`` and ``y`` are no longer in scope - outside the block, since the block is a function. - """ -# return f(*args, **kwargs) - return maybe_force_args(force(f), *args, **kwargs) # support unpythonic.syntax.lazify - -@register_decorator(priority=80) -@passthrough_lazy_args -def callwith(*args, **kwargs): - """Freeze arguments, choose function later. - - **Used as decorator**, this is like ``@call``, but with arguments:: - - @callwith(3) - def result(x): - return x**2 - assert result == 9 - - **Called normally**, this creates a function to apply the given arguments - to a callable to be specified later:: - - def myadd(a, b): - return a + b - def mymul(a, b): - return a * b - apply23 = callwith(2, 3) - assert apply23(myadd) == 5 - assert apply23(mymul) == 6 - - When called normally, the two-step application is mandatory. The first step - stores the given arguments. It returns a function ``f(callable)``. When - ``f`` is called, it calls its ``callable`` argument, passing in the arguments - stored in the first step. - - In other words, ``callwith`` is similar to ``functools.partial``, but without - specializing to any particular function. The function to be called is - given later, in the second step. - - Hence, ``callwith(2, 3)(myadd)`` means "make a function that passes in - two positional arguments, with values ``2`` and ``3``. Then call this - function for the callable ``myadd``". - - But if we instead write``callwith(2, 3, myadd)``, it means "make a function - that passes in three positional arguments, with values ``2``, ``3`` and - ``myadd`` - not what we want in the above example. - - Curry obviously does not help; it will happily pass in all arguments - in one go. If you want to specialize some arguments now and some later, - use ``partial``:: - - from functools import partial - - p1 = partial(callwith, 2) - p2 = partial(p1, 3) - p3 = partial(p2, 4) - apply234 = p3() # actually call callwith, get the function - def add3(a, b, c): - return a + b + c - def mul3(a, b, c): - return a * b * c - assert apply234(add3) == 9 - assert apply234(mul3) == 24 - - If the code above feels weird, it should. Arguments are gathered first, - and the function to which they will be passed is chosen in the last step. - - A pythonic alternative to the above examples is:: - - a = [2, 3] - def myadd(a, b): - return a + b - def mymul(a, b): - return a * b - assert myadd(*a) == 5 - assert mymul(*a) == 6 - - a = [2] - a += [3] - a += [4] - def add3(a, b, c): - return a + b + c - def mul3(a, b, c): - return a * b * c - assert add3(*a) == 9 - assert mul3(*a) == 24 - - Another use case of ``callwith`` is ``map``, if we want to vary the function - instead of the data:: - - m = map(callwith(3), [lambda x: 2*x, lambda x: x**2, lambda x: x**(1/2)]) - assert tuple(m) == (6, 9, 3**(1/2)) - - The pythonic alternative here is to use the comprehension notation, - which can already do this:: - - m = (f(3) for f in [lambda x: 2*x, lambda x: x**2, lambda x: x**(1/2)]) - assert tuple(m) == (6, 9, 3**(1/2)) - - Inspiration: - - *Function application with $* in - http://learnyouahaskell.com/higher-order-functions - """ - def applyfrozenargsto(f): - return maybe_force_args(force(f), *args, **kwargs) - return applyfrozenargsto - -def raisef(exc, *args, cause=None, **kwargs): - """``raise`` as a function, to make it possible for lambdas to raise exceptions. - - Example:: - - raisef(ValueError("message")) - - is (almost) equivalent to:: - - raise ValueError("message") - - Parameters: - exc: exception instance, or exception class - The object to raise. This is whatever you would give as the argument to `raise`. - Both instances (e.g. `ValueError("oof")`) and classes (e.g. `StopIteration`) - can be used as `exc`. - - cause: exception instance, or `None` - If `exc` was triggered as a direct consequence of another exception, - and you would like to `raise ... from ...`, pass that other exception - instance as `cause`. The default `None` performs a plain `raise ...`. - - *Changed in v0.14.2.* The parameters have changed to match `raise` itself as closely - as possible. Old-style parameters are still supported, but are now deprecated. Support - for them will be dropped in v0.15.0. The old-style parameters are: - - exc: type - The object type to raise as an exception. - - *args: anything - Passed on to the constructor of exc. - - **kwargs: anything - Passed on to the constructor of exc. - """ - if args or kwargs: # old-style parameters - raise exc(*args, **kwargs) - - if cause: - raise exc from cause - else: - raise exc - -def tryf(body, *handlers, elsef=None, finallyf=None): - """``try``/``except``/``finally`` as a function. - - This allows lambdas to handle exceptions. - - ``body`` is a thunk (0-argument function) that represents - the body of the ``try`` block. - - ``handlers`` is ``(excspec, handler), ...``, where - ``excspec`` is either an exception type, - or a tuple of exception types. - ``handler`` is a 0-argument or 1-argument - function. If it takes an - argument, it gets the exception - instance. - - Handlers are tried in the order specified. - - ``elsef`` is a thunk that represents the ``else`` block. - - ``finallyf`` is a thunk that represents the ``finally`` block. - - Upon normal completion, the return value of ``tryf`` is - the return value of ``elsef`` if that was specified, otherwise - the return value of ``body``. - - If an exception was caught by one of the handlers, the return - value of ``tryf`` is the return value of the exception handler - that ran. - - If you need to share variables between ``body`` and ``finallyf`` - (which is likely, given what a ``finally`` block is intended - to do), consider wrapping the ``tryf`` in a ``let`` and storing - your variables there. If you want them to leak out of the ``tryf``, - you can also just create an ``env`` at an appropriate point, - and store them there. - """ - def accepts_arg(f): - try: - if arity_includes(f, 1): - return True - except UnknownArity: # pragma: no cover - return True # just assume it - return False - - def isexceptiontype(exc): - try: - if issubclass(exc, BaseException): - return True - except TypeError: # "issubclass() arg 1 must be a class" - pass - return False - - # validate handlers - for excspec, handler in handlers: - if isinstance(excspec, tuple): # tuple of exception types - if not all(isexceptiontype(t) for t in excspec): - raise TypeError(f"All elements of a tuple excspec must be exception types, got {excspec}") - elif not isexceptiontype(excspec): # single exception type - raise TypeError(f"excspec must be an exception type or tuple of exception types, got {excspec}") - - # run - try: - ret = body() - except BaseException as exception: - # Even if a class is raised, as in `raise StopIteration`, the `raise` statement - # converts it into an instance by instantiating with no args. So we need no - # special handling for the "class raised" case. - # https://docs.python.org/3/reference/simple_stmts.html#the-raise-statement - # https://stackoverflow.com/questions/19768515/is-there-a-difference-between-raising-exception-class-and-exception-instance/19768732 - exctype = type(exception) - for excspec, handler in handlers: - if isinstance(excspec, tuple): # tuple of exception types - # this is safe, exctype is always a class at this point. - if any(issubclass(exctype, t) for t in excspec): - if accepts_arg(handler): - return handler(exception) - else: - return handler() - else: # single exception type - if issubclass(exctype, excspec): - if accepts_arg(handler): - return handler(exception) - else: - return handler() - else: - if elsef is not None: - return elsef() - return ret - finally: - if finallyf is not None: - finallyf() - -def equip_with_traceback(exc, stacklevel=1): # Python 3.7+ - """Given an exception instance exc, equip it with a traceback. - - `stacklevel` is the starting depth below the top of the call stack, - to cull useless detail: - - `0` means the trace includes everything, also - `equip_with_traceback` itself, - - `1` means the trace includes everything up to the caller, - - And so on. - - So typically, for direct use of this function `stacklevel` should - be `1` (so it excludes `equip_with_traceback` itself, but shows - all stack levels from your code), and for use in a utility function - that itself is called from your code, it should be `2` (so it excludes - the utility function, too). - - The return value is `exc`, with its traceback set to the produced - traceback. - - Python 3.7 and later only. - - When not supported, raises `NotImplementedError`. - - This is useful mainly in special cases, where `raise` cannot be used for - some reason, and a manually created exception instance needs a traceback. - (The `signal` function in the conditions-and-restarts system uses this.) - - **CAUTION**: The `sys._getframe` function exists in CPython and in PyPy3, - but for another arbitrary Python implementation this is not guaranteed. - - Based on solution by StackOverflow user Zbyl: - https://stackoverflow.com/a/54653137 - - See also: - https://docs.python.org/3/library/types.html#types.TracebackType - https://docs.python.org/3/reference/datamodel.html#traceback-objects - https://docs.python.org/3/library/sys.html#sys._getframe - """ - if not isinstance(exc, BaseException): - raise TypeError(f"exc must be an exception instance; got {type(exc)} with value {repr(exc)}") - if not isinstance(stacklevel, int): - raise TypeError(f"stacklevel must be int, got {type(stacklevel)} with value {repr(stacklevel)}") - if stacklevel < 0: - raise ValueError(f"stacklevel must be >= 0, got {repr(stacklevel)}") - - try: - getframe = sys._getframe - except AttributeError as err: # pragma: no cover, both CPython and PyPy3 have sys._getframe. - raise NotImplementedError("Need a Python interpreter which has `sys._getframe`") from err - - frames = [] - depth = stacklevel - while True: - try: - frames.append(getframe(depth)) # 0 = top of call stack - depth += 1 - except ValueError: # beyond the root level - break - - # Python 3.7+ allows creating `types.TracebackType` objects in Python code. - try: - tracebacks = [] - nxt = None # tb_next should point toward the level where the exception occurred. - for frame in frames: # walk from top of call stack toward the root - tb = TracebackType(nxt, frame, frame.f_lasti, frame.f_lineno) - tracebacks.append(tb) - nxt = tb - if tracebacks: - tb = tracebacks[-1] # root level - else: - tb = None - except TypeError as err: # Python 3.6 or earlier - raise NotImplementedError("Need Python 3.7 or later to create traceback objects") from err - return exc.with_traceback(tb) # Python 3.7+ def pack(*args): """Multi-argument constructor for tuples. @@ -430,9 +26,6 @@ def pack(*args): In other words, the inverse of tuple unpacking, as a function. E.g. ``pack(a, b, c)`` is the same as ``(a, b, c)``. - Or, if we semantically consider a tuple as a representation for multiple - return values, this is the identity function, returning its args. - We provide this because the default constructor `tuple(...)` requires an iterable, and there are use cases where it is useful to be able to say *pack these args into a tuple*. @@ -504,30 +97,7 @@ def rename(f): f.__name__ = name idx = f.__qualname__.rfind('.') f.__qualname__ = f"{f.__qualname__[:idx]}.{name}" if idx != -1 else name - # __code__.co_name is read-only, but there's a types.CodeType constructor - # that we can use to re-create the code object with the new name. - # (This is no worse than what the stdlib's Lib/modulefinder.py already does.) - co = f.__code__ - # https://github.com/ipython/ipython/blob/master/IPython/core/interactiveshell.py - # https://www.python.org/dev/peps/pep-0570/ - # https://docs.python.org/3/library/types.html#types.CodeType - # https://docs.python.org/3/library/inspect.html#types-and-members - if version_info >= (3, 8, 0): # Python 3.8+: positional-only parameters - f.__code__ = CodeType(co.co_argcount, co.co_posonlyargcount, co.co_kwonlyargcount, - co.co_nlocals, co.co_stacksize, co.co_flags, - co.co_code, co.co_consts, co.co_names, - co.co_varnames, co.co_filename, - name, - co.co_firstlineno, co.co_lnotab, co.co_freevars, - co.co_cellvars) - else: - f.__code__ = CodeType(co.co_argcount, co.co_kwonlyargcount, - co.co_nlocals, co.co_stacksize, co.co_flags, - co.co_code, co.co_consts, co.co_names, - co.co_varnames, co.co_filename, - name, - co.co_firstlineno, co.co_lnotab, co.co_freevars, - co.co_cellvars) + f.__code__ = f.__code__.replace(co_name=name) return f return rename @@ -586,6 +156,7 @@ def setattrrec(object, name, value): o = getattr(o, name) setattr(o, name, value) +# TODO: move `Popper` to `unpythonic.it`? class Popper: """Pop-while iterator. @@ -654,10 +225,14 @@ def __next__(self): return self._pop() raise StopIteration +# TODO: move `CountingIterator` to `unpythonic.it`? class CountingIterator: """Iterator that counts how many elements it has yielded. - The count stops updating when the original iterable raises StopIteration. + Wraps the original iterator of `iterable`. Simply use + `CountingIterator(iterable)` in place of `iter(iterable)`. + + The count stops updating when the original iterator raises StopIteration. """ def __init__(self, iterable): self._it = iter(iterable) @@ -669,21 +244,6 @@ def __next__(self): self.count += 1 return x -# TODO: move to a new module unpythonic.numutil in v0.15.0. -def ulp(x): # Unit in the Last Place - """Given a float x, return the unit in the last place (ULP). - - This is the numerical value of the least-significant bit, as a float. - For x = 1.0, the ULP is the machine epsilon (by definition of machine epsilon). - - See: - https://en.wikipedia.org/wiki/Unit_in_the_last_place - """ - eps = float_info.epsilon - # m_min = abs. value represented by a mantissa of 1.0, with the same exponent as x has - m_min = 2**floor(log2(abs(x))) - return m_min * eps - def slurp(queue): """Slurp all items currently on a queue.Queue into a list. @@ -703,108 +263,6 @@ def slurp(queue): pass return out - -# TODO: To reduce the risk of spaghetti user code, we could require a non-main thread's entrypoint to declare -# via a decorator that it's willing to accept asynchronous exceptions, and check that mark here, making this -# mechanism strictly opt-in. The decorator could inject an `asyncexc_ok` attribute to the Thread object; -# that's enough to prevent accidental misuse. -# OTOH, having no such mechanism is the simpler design. -def async_raise(thread_obj, exception): - """Raise an exception in another thread. - - thread_obj: `threading.Thread` object - The target thread to inject the exception into. Must be running. - exception: ``Exception`` - The exception to be raised. As with regular `raise`, this may be - an exception instance or an exception class object. - - No return value. Normal return indicates success. - - If the specified `threading.Thread` is not active, or the thread's ident - was not accepted by the interpreter, raises `ValueError`. - - If the raise operation failed internally, raises `SystemError`. - - If not supported for the Python implementation we're currently running on, - raises `NotImplementedError`. - - **NOTE**: This currently works only in CPython, because there is no Python-level - API to achieve what this function needs to do, and PyPy3's C API emulation layer - `cpyext` doesn't currently (January 2020) implement the function required to do - this (and the C API functions in `cpyext` are not exposed to the Python level - anyway, unlike CPython's `ctypes.pythonapi`). - - **CAUTION**: This is **potentially dangerous**. If the async raise - operation fails, the interpreter may be left in an inconsistent state. - - **NOTE**: The term `async` here has nothing to do with `async`/`await`; - instead, it refers to an asynchronous exception such as `KeyboardInterrupt`. - https://en.wikipedia.org/wiki/Exception_handling#Exception_synchronicity - - In a nutshell, a *synchronous* exception (i.e. the usual kind of exception) - has an explicit `raise` somewhere in the code that the thread that - encountered the exception is running. In contrast, an *asynchronous* - exception **doesn't**, it just suddenly magically materializes from the outside. - As such, it can in principle happen *anywhere*, with absolutely no hint about - it in any obvious place in the code. - - **Hence, use this function very, very sparingly, if at all.** - - For example, `unpythonic` only uses this to support remotely injecting a - `KeyboardInterrupt` into a REPL session running in another thread. So this - may be interesting mainly if you're developing your own REPL server/client - pair. - - (Incidentally, that's **not** how `KeyboardInterrupt` usually works. - Rather, the OS sends a SIGINT, which is then trapped by an OS signal - handler that runs in the main thread. At that point the magic has already - happened: the control of the main thread is now inside the signal handler, - as if the signal handler was called from the otherwise currently innermost - point on the call stack. All the handler needs to do is to perform a regular - `raise`, and the exception will propagate correctly. - - REPL sessions running in other threads can't use the standard mechanism, - because in CPython, OS signal handlers only run in the main thread, and even - in PyPy3, there is no guarantee *which* thread gets the signal even if you - use `with __pypy__.thread.signals_enabled` to enable OS signal trapping in - some of your other threads. Only one thread (including the main thread, plus - any currently dynamically within a `signals_enabled`) will see the signal; - which one, is essentially random and not even reproducible.) - - See also: - https://vorpus.org/blog/control-c-handling-in-python-and-trio/ - - The function necessary to perform this magic is actually mentioned right - there in the official CPython C API docs, but it's not very well known: - https://docs.python.org/3/c-api/init.html#c.PyThreadState_SetAsyncExc - - Original detective work by Federico Ficarelli and LIU Wei: - https://gist.github.com/nazavode/84d1371e023bccd2301e - https://gist.github.com/liuw/2407154 - """ - if not ctypes or not PyThreadState_SetAsyncExc: - raise NotImplementedError("async_raise not supported on this Python interpreter.") # pragma: no cover - - if not hasattr(thread_obj, "ident"): - raise TypeError(f"Expected a thread object, got {type(thread_obj)} with value '{thread_obj}'") - - target_tid = thread_obj.ident - if target_tid not in {thread.ident for thread in threading.enumerate()}: - raise ValueError("Invalid thread object, cannot find its ident among currently active threads.") - - affected_count = PyThreadState_SetAsyncExc(ctypes.c_long(target_tid), ctypes.py_object(exception)) - if affected_count == 0: - raise ValueError("PyThreadState_SetAsyncExc did not accept the thread ident, even though it was among the currently active threads.") # pragma: no cover - - # TODO: check CPython source code if this case can actually ever happen. - # - # The API docs seem to hint that 0 or 1 are the only possible return values. - # If so, we can remove this `SystemError` case and the "potentially dangerous" caution. - elif affected_count > 1: # pragma: no cover - # Clear the async exception, targeting the same thread identity, and hope for the best. - PyThreadState_SetAsyncExc(ctypes.c_long(target_tid), ctypes.c_long(0)) - raise SystemError("PyThreadState_SetAsyncExc failed, broke the interpreter state.") - def callsite_filename(): """Return the filename of the call site, as a string. @@ -814,10 +272,15 @@ def callsite_filename(): This works also in the REPL (where `__file__` is undefined). """ stack = inspect.stack() - frame = stack[1].frame - filename = frame.f_code.co_filename - del frame, stack - return filename + for k in count(start=1): # ignore callsite_filename() itself + framerecord = stack[k] + # ignore our call-helpers + if framerecord.function not in ("maybe_force_args", # lazify + "curried", "curry", "_currycall", # autocurry + "call", "callwith"): # manual use of misc utils + frame = framerecord.frame + filename = frame.f_code.co_filename + return filename def safeissubclass(cls, cls_or_tuple): """Like issubclass, but if `cls` is not a class, swallow the `TypeError` and return `False`.""" diff --git a/unpythonic/net/__init__.py b/unpythonic/net/__init__.py index 2b3ca9c9..a16495b0 100644 --- a/unpythonic/net/__init__.py +++ b/unpythonic/net/__init__.py @@ -10,5 +10,12 @@ """ from .msg import * -from .ptyproxy import * +try: + from .ptyproxy import * +except ModuleNotFoundError: + import logging + logging.basicConfig(level=logging.INFO) + logger = logging.getLogger(__name__) + logger.info("`unpythonic.net.ptyproxy` could not be loaded, the REPL server will not be available. Usually this is harmless; most applications do not need the REPL server.") + PTYSocketProxy = None from .util import * diff --git a/unpythonic/net/server.py b/unpythonic/net/server.py index b8b07f1e..a62a3a05 100644 --- a/unpythonic/net/server.py +++ b/unpythonic/net/server.py @@ -32,14 +32,14 @@ With that out of the way, to enable the server in your app:: from unpythonic.net import server - server.start(locals=globals()) + server.start(locals={}) The `locals=...` argument sets the top-level namespace for variables for use by the REPL. It is shared between REPL sessions. Using `locals=globals()` makes the REPL directly use the calling module's top-level scope. If you want a clean environment, where you must access any -modules through `sys.modules`, use `locals={}`. +modules through `sys.modules`, use `locals={}` (recommended). To connect to a running REPL server (with tab completion and Ctrl+C support):: @@ -104,6 +104,11 @@ The `socketserverREPL` package uses the same default, and actually its `repl_tool.py` can talk to this server (but doesn't currently feature remote tab completion). + +The default port for the control channel is 8128, because it's for +*completing* things, and https://en.wikipedia.org/wiki/Perfect_number +This is the first one above 1024, and was already known to Nicomachus +around 100 CE. """ # TODO: use logging module instead of server-side print @@ -128,7 +133,8 @@ from code import InteractiveConsole as Console from ..collections import ThreadLocalBox, Shim -from ..misc import async_raise, namelambda +from ..excutil import async_raise +from ..misc import namelambda from ..symbol import sym from .util import ReuseAddrThreadingTCPServer, socketsource diff --git a/unpythonic/numutil.py b/unpythonic/numutil.py new file mode 100644 index 00000000..e70ff29d --- /dev/null +++ b/unpythonic/numutil.py @@ -0,0 +1,237 @@ +# -*- coding: utf-8 -*- +"""Low-level utilities for numerics.""" + +__all__ = ["almosteq", "ulp", + "fixpoint", + "partition_int", "partition_int_triangular", "partition_int_custom"] + +from itertools import takewhile +from math import floor, log2 +import sys + +from .it import iterate1, last, within, rev +from .symbol import sym + +# HACK: break dependency loop mathseq -> numutil -> mathseq +_init_done = False +triangular = sym("triangular") # doesn't matter what the value is, will be overwritten later +def _init_module(): # called by unpythonic.__init__ when otherwise done + global triangular, _init_done + from .mathseq import triangular + _init_done = True + +class _NoSuchType: + pass + +try: + from mpmath import mpf, almosteq as mpf_almosteq +except ImportError: # pragma: no cover, optional at runtime, but installed at development time. + # Can't use a gensym here since `mpf` must be a unique *type*. + mpf = _NoSuchType + mpf_almosteq = None + + +# TODO: Overhaul `almosteq` in v0.16.0, should work like mpf for consistency. +def almosteq(a, b, tol=1e-8): + """Almost-equality that supports several formats. + + The tolerance ``tol`` is used for the builtin ``float`` and ``mpmath.mpf``. + + For ``mpmath.mpf``, we just delegate to ``mpmath.almosteq``, with the given + ``tol``. For ``float``, we use the strategy suggested in: + + https://floating-point-gui.de/errors/comparison/ + + Anything else, for example SymPy expressions, strings, and containers + (regardless of content), is tested for exact equality. + """ + if a == b: # infinities and such, plus any non-float type + return True + + if isinstance(a, mpf) and isinstance(b, mpf): + return mpf_almosteq(a, b, tol) + # compare as native float if only one is an mpf + elif isinstance(a, mpf) and isinstance(b, (float, int)): + a = float(a) + elif isinstance(a, (float, int)) and isinstance(b, mpf): + b = float(b) + + if not all(isinstance(x, (float, int)) for x in (a, b)): + return False # non-float type, already determined that a != b + min_normal = sys.float_info.min + max_float = sys.float_info.max + d = abs(a - b) + if a == 0 or b == 0 or d < min_normal: + return d < tol * min_normal + return d / min(abs(a) + abs(b), max_float) < tol + + +def ulp(x): # Unit in the Last Place + """Given a float x, return the unit in the last place (ULP). + + This is the numerical value of the least-significant bit, as a float. + For x = 1.0, the ULP is the machine epsilon (by definition of machine epsilon). + + See: + https://en.wikipedia.org/wiki/Unit_in_the_last_place + """ + eps = sys.float_info.epsilon + # m_min = abs. value represented by a mantissa of 1.0, with the same exponent as x has + m_min = 2**floor(log2(abs(x))) + return m_min * eps + + +def fixpoint(f, x0, tol=0): + """Compute the (arithmetic) fixed point of f, starting from the initial guess x0. + + (Not to be confused with the logical fixed point with respect to the + definedness ordering.) + + The fixed point must be attractive for this to work. See the Banach + fixed point theorem. + https://en.wikipedia.org/wiki/Banach_fixed-point_theorem + + If the fixed point is attractive, and the values are represented in + floating point (hence finite precision), the computation should + eventually converge down to the last bit (barring roundoff or + catastrophic cancellation in the final few steps). Hence the default tol + of zero. + + CAUTION: an arbitrary function from ℝ to ℝ **does not** necessarily + have a fixed point. Limit cycles and chaotic behavior of `f` will cause + non-termination. Keep in mind the classic example: + https://en.wikipedia.org/wiki/Logistic_map + + Examples:: + from math import cos, sqrt + from unpythonic import fixpoint, ulp + c = fixpoint(cos, x0=1) + + # Actually "Newton's" algorithm for the square root was already known to the + # ancient Babylonians, ca. 2000 BCE. (Carl Boyer: History of mathematics) + # Concerning naming, see also https://en.wikipedia.org/wiki/Stigler's_law_of_eponymy + def sqrt_newton(n): + def sqrt_iter(x): # has an attractive fixed point at sqrt(n) + return (x + n / x) / 2 + return fixpoint(sqrt_iter, x0=n / 2) + assert abs(sqrt_newton(2) - sqrt(2)) <= ulp(1.414) + """ + return last(within(tol, iterate1(f, x0))) + + +def partition_int(n, lower=1, upper=None): + """Yield all ordered sequences of smaller positive integers that sum to `n`. + + `n` must be an integer >= 1. + + `lower` is an optional lower limit for each member of the sum. Each member + of the sum must be `>= lower`. + + (Most of the splits are a ravioli consisting mostly of ones, so it is much + faster to not generate such splits than to filter them out from the result. + The default value `lower=1` generates everything.) + + `upper` is, similarly, an optional upper limit; each member of the sum + must be `<= upper`. The default `None` means no upper limit (effectively, + in that case `upper=n`). + + It must hold that `1 <= lower <= upper <= n`. + + Not to be confused with `unpythonic.it.partition`, which partitions an + iterable based on a predicate. + + **CAUTION**: The number of possible partitions grows very quickly with `n`, + so in practice this is only useful for small numbers, or with a lower limit + that is not too much smaller than `n / 2`. A possible use case for this + function is to determine the number of letters to allocate for each + component of an anagram that may consist of several words. + + See: + https://en.wikipedia.org/wiki/Partition_(number_theory) + """ + # sanity check the preconditions, fail-fast + if not isinstance(n, int): + raise TypeError(f"n must be integer; got {type(n)} with value {repr(n)}") + if not isinstance(lower, int): + raise TypeError(f"lower must be integer; got {type(lower)} with value {repr(lower)}") + if upper is not None and not isinstance(upper, int): + raise TypeError(f"upper must be integer; got {type(upper)} with value {repr(upper)}") + upper = upper if upper is not None else n + if n < 1: + raise ValueError(f"n must be positive; got {n}") + if lower < 1 or upper < 1 or lower > n or upper > n or lower > upper: + raise ValueError(f"it must hold that 1 <= lower <= upper <= n; got lower={lower}, upper={upper}") + + return partition_int_custom(n, range(min(n, upper), lower - 1, -1)) # instantiate the generator + +def partition_int_triangular(n, lower=1, upper=None): + """Like `partition_int`, but allow only triangular numbers in the result. + + Triangular numbers are 1, 3, 6, 10, ... + + This function answers the timeless question: if I have `n` stackable plushies, + what are the possible stack configurations? Example:: + + configurations = partition_int_triangular(78, lower=10) + print(frozenset(tuple(sorted(c)) for c in configurations)) + + Result:: + + frozenset({(10, 10, 10, 10, 10, 28), + (10, 10, 15, 15, 28), + (15, 21, 21, 21), + (21, 21, 36), + (78,)}) + + Here `lower` sets the minimum number of plushies to allocate for one stack. + """ + if not isinstance(n, int): + raise TypeError(f"n must be integer; got {type(n)} with value {repr(n)}") + if not isinstance(lower, int): + raise TypeError(f"lower must be integer; got {type(lower)} with value {repr(lower)}") + if upper is not None and not isinstance(upper, int): + raise TypeError(f"upper must be integer; got {type(upper)} with value {repr(upper)}") + upper = upper if upper is not None else n + if n < 1: + raise ValueError(f"n must be positive; got {n}") + if lower < 1 or upper < 1 or lower > n or upper > n or lower > upper: + raise ValueError(f"it must hold that 1 <= lower <= upper <= n; got lower={lower}, upper={upper}") + + triangulars_upto_n = takewhile(lambda m: m <= n, + triangular()) + return partition_int_custom(n, rev(filter(lambda m: lower <= m <= upper, + triangulars_upto_n))) + +def partition_int_custom(n, components): + """Partition an integer in a custom way. + + `n`: integer to partition. + `components`: iterable of ints; numbers that are allowed to appear + in the partitioning result. Each number `m` must + satisfy `1 <= m <= n`. + + See `partition_int`, `partition_triangular`. + """ + if not isinstance(n, int): + raise TypeError(f"n must be integer; got {type(n)} with value {repr(n)}") + if n < 1: + raise ValueError(f"n must be positive; got {n}") + components = tuple(components) + invalid_components = [not isinstance(x, int) for x in components] + if any(invalid_components): + raise TypeError(f"each component must be an integer; got invalid components {invalid_components}") + invalid_components = [not (1 <= x <= n) for x in components] + if any(invalid_components): + raise ValueError(f"each component x must be 1 <= x <= n; got n = {n}, with invalid components {invalid_components}") + def rec(components): + for k in components: + m = n - k + if m == 0: + yield (k,) + else: + out = [] + for item in partition_int_custom(m, tuple(x for x in components if x <= m)): + out.append((k,) + item) + for term in out: + yield term + return rec(components) diff --git a/unpythonic/seq.py b/unpythonic/seq.py index 3400fc4f..3c393184 100644 --- a/unpythonic/seq.py +++ b/unpythonic/seq.py @@ -3,16 +3,19 @@ __all__ = ["begin", "begin0", "lazy_begin", "lazy_begin0", "pipe1", "piped1", "lazy_piped1", - "pipe", "piped", "getvalue", "lazy_piped", "runpipe", "exitpipe", + "pipe", "piped", "lazy_piped", "exitpipe", "pipec", # w/ curry "do", "do0", "assign"] from collections import namedtuple + +from .arity import arity_includes, UnknownArity +from .dynassign import dyn from .env import env from .fun import curry, iscurried -from .dynassign import dyn -from .arity import arity_includes, UnknownArity -from .singleton import Singleton +from .funutil import Values +from .lazyutil import force1, force, maybe_force_args, passthrough_lazy_args +from .symbol import sym # sequence side effects in a lambda def begin(*vals): @@ -23,6 +26,10 @@ def begin(*vals): f = lambda x: begin(print("hi"), 42*x) print(f(1)) # 42 + + **CAUTION**: For regular code only. If you use macros, prefer `do[]`; + the macro layer of `unpythonic` recognizes only the `do` constructs + as a sequencing abstraction. """ return vals[-1] if len(vals) else None @@ -34,6 +41,10 @@ def begin0(*vals): # eager, bodys already evaluated when this is called g = lambda x: begin0(23*x, print("hi")) print(g(1)) # 23 + + **CAUTION**: For regular code only. If you use macros, prefer `do0[]`; + the macro layer of `unpythonic` recognizes only the `do` constructs + as a sequencing abstraction. """ return vals[0] if len(vals) else None @@ -46,6 +57,10 @@ def lazy_begin(*bodys): f = lambda x: lazy_begin(lambda: print("hi"), lambda: 42*x) print(f(1)) # 42 + + **CAUTION**: For regular code only. If you use macros, prefer `do[]`; + the macro layer of `unpythonic` recognizes only the `do` constructs + as a sequencing abstraction. """ n = len(bodys) if not n: @@ -67,6 +82,10 @@ def lazy_begin0(*bodys): g = lambda x: lazy_begin0(lambda: 23*x, lambda: print("hi")) print(g(1)) # 23 + + **CAUTION**: For regular code only. If you use macros, prefer `do0[]`; + the macro layer of `unpythonic` recognizes only the `do` constructs + as a sequencing abstraction. """ n = len(bodys) if not n: @@ -81,6 +100,7 @@ def lazy_begin0(*bodys): return out # sequence one-input, one-output functions +@passthrough_lazy_args def pipe1(value0, *bodys): """Perform a sequence of operations on an initial value. @@ -138,22 +158,19 @@ def pipe1(value0, *bodys): # def x(loop, update, acc): # return loop(update(acc)) # return x - x = value0 + + # This is forced when it is passed to an eager body (or when a lazy body uses it), + # but we force it here just for symmetry with the multi-arg version of `pipe`. + x = force1(value0) for update in bodys: - x = update(x) + update = force1(update) + x = maybe_force_args(update, x) return x -class Getvalue(Singleton): # singleton sentinel with a nice repr - """Sentinel; pipe into this to exit a shell-like pipe and return the current value.""" - def __repr__(self): # pragma: no cover - return "" -getvalue = Getvalue() -runpipe = getvalue # same thing as getvalue, but semantically better name for lazy pipes - -# New unified name for v0.15.0; deprecating the separate "getvalue" and "runpipe" as of v0.14.2. -# TODO: Now that we have symbols, in v0.15.0, change this to `sym("exitpipe")` and delete the `Getvalue` class. -exitpipe = getvalue +# Singleton value for exiting the pipe abstraction. +exitpipe = sym("exitpipe") +@passthrough_lazy_args class piped1: """Shell-like piping syntax. @@ -168,10 +185,10 @@ def __or__(self, f): Return a ``piped`` object, for chainability. As the only exception, if ``f`` is the sentinel ``exitpipe``, - return the current value (useful for exiting the pipe). + return the current value (thus exiting the pipe). - A new ``piped`` object is created at each step of piping; the "update" - is purely functional, nothing is overwritten. + A new ``piped`` object is created at each step of piping; + the "update" is purely functional, nothing is overwritten. Examples:: @@ -181,13 +198,15 @@ def __or__(self, f): assert y | inc | exitpipe == 85 assert y | exitpipe == 84 # y is not modified """ + f = force1(f) if f is exitpipe: return self._x cls = self.__class__ - return cls(f(self._x)) # functional update + return cls(maybe_force_args(f, self._x)) # functional update def __repr__(self): # pragma: no cover return f"" +@passthrough_lazy_args class lazy_piped1: """Like piped, but apply the functions later. @@ -210,7 +229,7 @@ def __init__(self, x, *, _funcs=None): The ``_funcs`` parameter is for internal use. """ self._x = x - self._funcs = _funcs or () + self._funcs = force(_funcs or ()) def __or__(self, f): """Pipe the value into f; but just plan to do so, don't perform it yet. @@ -219,9 +238,9 @@ def __or__(self, f): Examples:: lst = [1] - def append_succ(l): - l.append(l[-1] + 1) - return l # important, handed to the next function in the pipe + def append_succ(lis): + lis.append(lis[-1] + 1) + return lis # important, handed to the next function in the pipe p = lazy_piped1(lst) | append_succ | append_succ # plan a computation assert lst == [1] # nothing done yet p | exitpipe # run the computation @@ -232,158 +251,232 @@ def append_succ(l): def nextfibo(state): a, b = state fibos.append(a) # store result by side effect - return (b, a + b) # new state, handed to next function in the pipe + return (b, a + b) # new state, handed to the next function in the pipe p = lazy_piped1((1, 1)) # load initial state into a lazy pipe for _ in range(10): # set up pipeline p = p | nextfibo p | exitpipe print(fibos) """ + f = force1(f) if f is exitpipe: # compute now v = self._x for g in self._funcs: - v = g(v) - return v + v = maybe_force_args(g, v) + # In `unpythonic`, return values are never implicitly lazy. + # The final result here is a return value. + # + # It is legal to pipe the initial value immediately to `exitpipe`; + # in that case, in a `with lazify` block, it will be a promise. + return force(v) # just pass on the reference to the original x. cls = self.__class__ - return cls(x=self._x, _funcs=self._funcs + (f,)) + return cls(x=self._x, _funcs=self._funcs + (force1(f),)) def __repr__(self): # pragma: no cover return f"" +@passthrough_lazy_args def pipe(values0, *bodys): """Like pipe1, but with arbitrary number of inputs/outputs at each step. - The only restriction is that each function must take as many positional - arguments as the previous one returns. + The only restriction is that the call and return signatures must match: + each function must take those positional/named arguments the previous one + returns. Use a `Values` object to denote multiple-return-values, and/or + named return values. + + At each step, if the output from a function is a `Values`, it is unpacked + to the args and kwargs of the next function. Otherwise, we feed the output + to the next function as a single positional argument. + + At the beginning of the pipe, `values0` is treated the same way; so to + feed multiple args/kwargs to the first function, use a `Values`. - At each step, if the output from a function is a tuple, - it is unpacked to the argument list of the next function. Otherwise, - we assume the output is intended to be fed to the next function as-is. + If the final return value is a `Values`, and contains only one positional + return value, we unwrap it. Otherwise the `Values` object is returned as-is. If you only need a one-in-one-out chain, ``pipe1`` is faster. Examples:: - a, b = pipe((2, 3), - lambda x, y: (x + 1, 2 * y), - lambda x, y: (x * 2, y + 1)) + a, b = pipe(Values(2, 3), + lambda x, y: Values(x + 1, 2 * y), + lambda x, y: Values(x * 2, y + 1)) + # If a `Values` object has only positional values, + # it can be unpacked like a tuple. Hence we don't + # see a `Values` wrapper here. assert (a, b) == (6, 7) - a, b, c = pipe((2, 3), - lambda x, y: (x + 1, 2 * y, "foo"), - lambda x, y, s: (x * 2, y + 1, f"got {s}")) + a, b, c = pipe(Values(2, 3), + lambda x, y: Values(x + 1, 2 * y, "foo"), + lambda x, y, s: Values(x * 2, y + 1, f"got {s}")) assert (a, b, c) == (6, 7, "got foo") - a, b = pipe((2, 3), - lambda x, y: (x + 1, 2 * y, "foo"), - lambda x, y, s: (x * 2, y + 1, f"got {s}"), - lambda x, y, s: (x + y, s)) + # Can bind arguments of the next step by name, too + a, b, c = pipe(Values(2, 3), + lambda x, y: Values(x + 1, 2 * y, s="foo"), + lambda x, y, s: Values(x * 2, y + 1, f"got {s}")) + assert (a, b, c) == (6, 7, "got foo") + + a, b = pipe(Values(2, 3), + lambda x, y: Values(x + 1, 2 * y, "foo"), + lambda x, y, s: Values(x * 2, y + 1, f"got {s}"), + lambda x, y, s: Values(x + y, s)) assert (a, b) == (13, "got foo") """ - xs = values0 + # We must force `values0` to analyze it, because we treat `Values` objects separately. + # Otherwise, in a `with lazify` block, the lazified `Values` object will get passed as + # one argument to the first body - not what we want. + xs = force1(values0) n = len(bodys) for k, update in enumerate(bodys): islast = (k == n - 1) bindings = {} + update = force1(update) if iscurried(update) and not islast: # co-operate with curry: provide a top-level curry context # to allow passthrough from a pipelined function to the next # (except the last one, since it exits the curry context). bindings = {"curry_context": dyn.curry_context + [update]} with dyn.let(**bindings): - if isinstance(xs, tuple): - xs = update(*xs) + if isinstance(xs, Values): + xs = maybe_force_args(update, *xs.rets, **xs.kwrets) else: - xs = update(xs) - if isinstance(xs, tuple): - return xs if len(xs) > 1 else xs[0] + xs = maybe_force_args(update, xs) + if isinstance(xs, Values): + return xs if xs.kwrets or len(xs.rets) > 1 else xs[0] return xs +@passthrough_lazy_args def pipec(values0, *bodys): """Like pipe, but curry each function before piping. Useful with the passthrough in ``curry``. Each function only needs to declare as many of the (leftmost) arguments as it needs to access or modify:: - a, b = pipec((1, 2), - lambda x: x + 1, # extra args passed through on the right - lambda x, y: (x * 2, y + 1)) + a, b = pipec(Values(1, 2), + # extra values passed through by curry, positionals on the right + lambda x: x + 1, + lambda x, y: Values(x * 2, y + 1)) assert (a, b) == (4, 3) """ return pipe(values0, *map(curry, bodys)) +@passthrough_lazy_args class piped: - """Like piped1, but for any number of inputs/outputs at each step.""" - def __init__(self, *xs): - """Set up a pipe and load the initial values xs into it.""" - self._xs = xs + """Like piped1, but for any number of inputs/outputs at each step. + + The only restriction is that the call and return signatures must match: + each function must take those positional/named arguments the previous one + returns. Use a `Values` object to denote multiple-return-values, and/or + named return values. + """ + def __init__(self, *xs, **kws): + """Set up a pipe and load the initial values xs and kws into it. + + The inputs are automatically packed into a `Values`. + """ + self._xs = Values(*xs, **kws) def __or__(self, f): """Pipe the values through the function f. + If the data currently in the pipe is a `Values`, it is unpacked + to the args and kwargs of `f`. Otherwise, we feed the data to `f` + as a single positional argument. + Example:: - f = lambda x, y: (2*x, y+1) - g = lambda x, y: (x+1, 2*y) - x = piped(2, 3) | f | g | exitpipe # --> (5, 8) + f = lambda x, y: Values(2*x, y+1) + g = lambda x, y: Values(x+1, 2*y) + x = piped(2, 3) | f | g | exitpipe # --> Values(5, 8) + + If the final return value is a `Values`, and contains only one positional + return value, we unwrap it. Otherwise the `Values` object is returned as-is. """ + f = force1(f) xs = self._xs + assert isinstance(xs, Values) # __init__ ensures this if f is exitpipe: - return xs if len(xs) > 1 else xs[0] + return xs if xs.kwrets or len(xs.rets) > 1 else xs[0] cls = self.__class__ - assert isinstance(xs, tuple) # __init__ ensures this - newxs = f(*xs) - if isinstance(newxs, tuple): - return cls(*newxs) + newxs = maybe_force_args(f, *xs.rets, **xs.kwrets) + if isinstance(newxs, Values): + return cls(*newxs.rets, **newxs.kwrets) return cls(newxs) def __repr__(self): # pragma: no cover return f"" +@passthrough_lazy_args class lazy_piped: """Like lazy_piped1, but for any number of inputs/outputs at each step. + The only restriction is that the call and return signatures must match: + each function must take those positional/named arguments the previous one + returns. Use a `Values` object to denote multiple-return-values, and/or + named return values. + Examples:: p1 = lazy_piped(2, 3) - p2 = p1 | (lambda x, y: (x + 1, 2 * y, "foo")) - p3 = p2 | (lambda x, y, s: (x * 2, y + 1, f"got {s}")) - p4 = p3 | (lambda x, y, s: (x + y, s)) + p2 = p1 | (lambda x, y: Values(x + 1, 2 * y, "foo")) + p3 = p2 | (lambda x, y, s: Values(x * 2, y + 1, f"got {s}")) + p4 = p3 | (lambda x, y, s: Values(x + y, s)) # nothing done yet! - assert (p4 | exitpipe) == (13, "got foo") + assert (p4 | exitpipe) == Values(13, "got foo") # lazy pipe as an unfold fibos = [] def nextfibo(a, b): # now two arguments fibos.append(a) - return (b, a + b) # two return values, still expressed as a tuple + return Values(a=b, b=(a + b)) # can return by name too p = lazy_piped(1, 1) for _ in range(10): p = p | nextfibo - p | exitpipe - print(fibos) + assert p | exitpipe == Values(a=89, b=144) # run; check final state + assert fibos == [1, 1, 2, 3, 5, 8, 13, 21, 34, 55] """ - def __init__(self, *xs, _funcs=None): - """Set up a lazy pipe and load the initial values xs into it. + def __init__(self, *xs, _funcs=None, **kws): + """Set up a lazy pipe and load the initial values xs and kws into it. + + The inputs are automatically packed into a `Values`. The ``_funcs`` parameter is for internal use. """ - self._xs = xs - self._funcs = _funcs or () + self._xs = Values(*xs, **kws) + self._funcs = force(_funcs or ()) def __or__(self, f): - """Pipe the values into f; but just plan to do so, don't perform it yet.""" + """Pipe the values into f; but just plan to do so, don't perform it yet. + + When f is `exitpipe`, perform the planned computation. + + When the computation is performed, when this `f` is reached, if the data + currently in the pipe is a `Values`, it is unpacked to the args and kwargs + of `f`. Otherwise, we feed the data to `f` as a single positional argument. + + If the final return value is a `Values`, and contains only one positional + return value, we unwrap it. Otherwise the `Values` object is returned as-is. + """ + f = force1(f) if f is exitpipe: # compute now vs = self._xs for g in self._funcs: - if isinstance(vs, tuple): - vs = g(*vs) + if isinstance(vs, Values): + vs = maybe_force_args(g, *vs.rets, **vs.kwrets) else: - vs = g(vs) - if isinstance(vs, tuple): - return vs if len(vs) > 1 else vs[0] + vs = maybe_force_args(g, vs) + if isinstance(vs, Values): + ret = vs if vs.kwrets or len(vs.rets) > 1 else vs[0] else: - return vs + ret = vs + # In `unpythonic`, return values are never implicitly lazy. + # The final result here is a return value. + # + # It is legal to pipe the initial value immediately to `exitpipe`; + # in that case, in a `with lazify` block, it will be a promise + # (or a `Values` of several promises). + return force(ret) # just pass on the references to the original xs. cls = self.__class__ - return cls(*self._xs, _funcs=self._funcs + (f,)) + return cls(*self._xs.rets, _funcs=self._funcs + (force1(f),), **self._xs.kwrets) def __repr__(self): # pragma: no cover return f"" diff --git a/unpythonic/slicing.py b/unpythonic/slicing.py index 3aea8e8e..11884073 100644 --- a/unpythonic/slicing.py +++ b/unpythonic/slicing.py @@ -22,6 +22,9 @@ def islice(iterable): start or stop will force the iterable, because that is the only way to know its length. + The desired elements are held in an internal buffer until they are yielded + by iterating over the `islice`. + - A single index (negative also allowed) is interpreted as a length-1 islice starting at that index. The slice is then immediately evaluated and the item is returned. diff --git a/unpythonic/syntax/__init__.py b/unpythonic/syntax/__init__.py index f3b24930..e4e9257d 100644 --- a/unpythonic/syntax/__init__.py +++ b/unpythonic/syntax/__init__.py @@ -4,16 +4,11 @@ Requires `mcpyrate`. """ -from mcpyrate import parametricmacro -from mcpyrate.expander import MacroExpander -from mcpyrate.utils import extract_bindings - -from ..dynassign import make_dynvar, dyn - # -------------------------------------------------------------------------------- -# This module contains the macro interface and docstrings; the submodules -# contain the actual syntax transformers (regular functions that process ASTs) -# that implement the macros. +# This module only re-exports the macro interfaces so the macros can be imported +# by `from unpythonic.syntax import macros, ...`. The submodules contain the actual +# macro interfaces (and their docstrings), as well as the syntax transformers +# (i.e. regular functions that process ASTs) that implement the macros. # -------------------------------------------------------------------------------- # -------------------------------------------------------------------------------- @@ -63,2662 +58,58 @@ # def mymacrointerface(tree, *, expander, *kw): # # perform your outside-in processing here # -# tree = expander.visit(tree) # recurse explicitly +# tree = expander.visit_recursively(tree) # recurse explicitly # # # perform your inside-out processing here # # return tree # -# If the line `tree = expander.visit(tree)` is omitted, the macro expands outside-in. +# If the line `tree = expander.visit_recursively(tree)` is omitted, the macro expands outside-in. # Note this default is different from MacroPy's! -# -# There are further cleanups of the macro layer possible with `mcpyrate`. For example: -# -# - Quasiquotes no longer auto-expand macros in the quoted code. `letseq` could use hygienic *macro* -# capture and just return an unexpanded `let` and another `letseq` (with one fewer binding), -# similarly to how Racket implements `let*`. See `unpythonic.syntax.simplelet` for a demo. -# -# - The macro interfaces and their docstrings could live inside the implementation modules, and this -# module could just re-export them. (A function being a macro is a feature of its *use site* where -# it is imported, by `from xxx import macros, ...`; `mcpyrate` has no macro registry.) -# -# - Many macros could perhaps run in the outside-in pass. Some need a redesign for their AST analysis, -# but much of that has been sufficiently abstracted (e.g. `unpythonic.syntax.letdoutil`) so that this -# is mainly a case of carefully changing the analysis mode at all appropriate use sites. -# -# However, 0.15.0 is the initial version that runs on `mcpyrate`, and the focus is to just get this running. -# Cleanups can be done in a future release. - -# TODO: Fix remaining failures and errors detected by test suite. - -# TODO: `make_isxpred` is now obsolete because `mcpyrate` does not rename hygienic captures of run-time values. Make it explicit at the use sites what they want, and remove `make_isxpred`. (E.g. `curry` wants to match both `curryf` and `currycall`, exactly. Some use sites want to match only a single thing.) - -# TODO: locref could be an ASTMarker anywhere that needs a source location reference; extract `.body` if so. - -# TODO: Brackets: use "with test[...]" instead of "with test(...)" in the test modules - -# TODO: Remove any unused `expander` kwargs from the macro interface - -# TODO: Drop `# pragma: no cover` from macro tests as appropriate, since `mcpyrate` reports coverage correctly. -# TODO: Test the q[t[...]] implementation in do0[] - -# TODO: With `mcpyrate` we could start looking at values, not names, when the aim is to detect hygienically captured `unpythonic` constructs. See use sites of `isx`; refer to `mcpyrate.quotes.is_captured_value` and `mcpyrate.quotes.lookup_value`. - -# TODO: With `mcpyrate`, we could move the macro interface functions to -# TODO: the submodules, and have just re-exports here. - -# TODO: macro docs: "first pass" -> "outside in"; "second pass" -> "inside out" -# TODO: Some macros look up others; convert lookups to mcpyrate style (accounting for as-imports) -# TODO: or hygienic macro references (`h[...]`), as appropriate. +# TODO: 0.16: With `mcpyrate` we could start looking at values, not names, when the aim is to detect hygienically captured `unpythonic` constructs. See use sites of `isx`; refer to `mcpyrate.quotes.is_captured_value` and `mcpyrate.quotes.lookup_value`. -# TODO: `isx` and `getname` from `unpythonic.syntax.nameutil` should probably live in `mcpyrate` instead +# TODO: 0.16: Consider using run-time compiler access in macro tests, like `mcpyrate` itself does. This compartmentalizes testing so that the whole test module won't crash on a macro-expansion error. -# TODO: `mcpyrate` does not auto-expand macros in quasiquoted code. -# - Consider when we should expand macros in quoted code and when not -# - Consider what changes this implies for other macros that read the partially expanded output -# (some things may change from expanded to unexpanded, facilitating easier analysis but requiring -# code changes) +# TODO: 0.16: Return a compile-time marker from all block macros? Currently only macros that need to emit a marker for a specific reason (for working together with some specific macro) do so, namely `autoref` and `continuations`. -# TODO: Consider using run-time compiler access in macro tests, like `mcpyrate` itself does. This compartmentalizes testing so that the whole test module won't crash on a macro-expansion error. +# TODO: 0.16: move `scoped_transform` to `mcpyrate` as `ScopedASTTransformer` and `ScopedASTVisitor`. -# TODO: Change decorator macro invocations to use [] instead of () to pass macro arguments. Requires Python 3.9. +# TODO: 0.16: Add call-macros to `mcpyrate`. This allows the whole expression of `kw()`/`where()` to be detected as a macro invocation. (First, think whether this is a good idea.) -# TODO: Check expansion order of several macros in the same `with` statement +# TODO: 0.16: Something like `unpythonic.syntax.nameutil` should probably live in `mcpyrate` instead. -# TODO: grep for any remaining mentions of "macropy" +# TODO: 0.16: AST pattern matching for `mcpyrate`? Would make destructuring easier. A writable representation (auto-viewify) is a pain to build, though... -# TODO: Upgrade anaphoric if's `it` into a `mcpyrate` magic variable that errors out at compile time when it appears in an invalid position (i.e. outside any `aif`). Basically, take the `aif` from `mcpyrate`. -# TODO: also let_syntax block, expr -# TODO: also kw() in unpythonic.syntax.prefix +# Parenthesis syntax for decorator macro arguments is deprecated; bracket syntax is preferred. +# Parenthesis syntax is kept for backward compatibility. -# TODO: let_syntax block, expr: syntactic consistency: change parentheses to brackets +from .autocurry import * # noqa: F401, F403 +from .autoref import * # noqa: F401, F403 +from .dbg import * # noqa: F401, F403 +from .forall import * # noqa: F401, F403 +from .ifexprs import * # noqa: F401, F403 +from .lambdatools import * # noqa: F401, F403 +from .lazify import * # noqa: F401, F403 +from .letdo import * # noqa: F401, F403 +from .letsyntax import * # noqa: F401, F403 +from .nb import * # noqa: F401, F403 +from .prefix import * # noqa: F401, F403 +from .tailtools import * # noqa: F401, F403 +from .testingtools import * # noqa: F401, F403 -# TODO: grep codebase for "0.15", may have some pending interface changes that don't have their own GitHub issue (e.g. parameter ordering of `unpythonic.it.window`) - -# TODO: ansicolor: `mcpyrate` already depends on Colorama anyway (and has a *nix-only fallback capability). -# TODO: `unpythonic` only needs the colorizer in the *macro-enabled* test framework; so we don't really need -# TODO: to provide our own colorizer; we can use the one from `mcpyrate`. (It would be different if regular code needed it.) - -# TODO: with mcpyrate, do we really need to set `ctx` in our macros? (does our macro code need it?) - -# TODO: The HasThon test (grep for it), when putting the macros in the wrong order on purpose, -# TODO: confuses the call site filename detector of the test framework. Investigate. - -# TODO: Move dialect examples from `pydialect` into a new package, `unpythonic.dialects`. -# TODO: `mcpyrate` now provides the necessary infrastructure, while `unpythonic` has the macros -# TODO: needed to make interesting things happen. Update docs accordingly for both projects. - -# Syntax transformers and internal utilities -from .autoref import autoref as _autoref -from .autocurry import autocurry as _autocurry -from .dbg import dbg_block as _dbg_block, dbg_expr as _dbg_expr -from .forall import forall as _forall -from .ifexprs import aif as _aif, cond as _cond -from .lambdatools import (multilambda as _multilambda, - namedlambda as _namedlambda, - f as _f, - envify as _envify) -from .lazify import lazy as _lazy, lazify as _lazify, lazyrec as _lazyrec -from .letdo import (local as _local, delete as _delete, - do as _do, do0 as _do0, - let as _let, letseq as _letseq, letrec as _letrec, - dlet as _dlet, dletseq as _dletseq, dletrec as _dletrec, - blet as _blet, bletseq as _bletseq, bletrec as _bletrec) -from .letdoutil import (UnexpandedLetView as _UnexpandedLetView, - canonize_bindings as _canonize_bindings) -from .letsyntax import (let_syntax_expr as _let_syntax_expr, - let_syntax_block as _let_syntax_block) -from .nb import nb as _nb -from .prefix import prefix as _prefix -from .tailtools import (autoreturn as _autoreturn, tco as _tco, - continuations as _continuations) -from .testingtools import (test_expr as _test_expr, - test_expr_signals as _test_expr_signals, - test_expr_raises as _test_expr_raises, - test_block as _test_block, - test_block_signals as _test_block_signals, - test_block_raises as _test_block_raises, - fail_expr as _fail_expr, - error_expr as _error_expr, - warn_expr as _warn_expr) +# -------------------------------------------------------------------------------- +# Initialization code, not really meant for export. -# Re-exports (for client code that uses us) -from .dbg import dbgprint_block, dbgprint_expr # noqa: F401, re-export for re-use in a decorated variant. -from .forall import insist, deny # noqa: F401 -from .ifexprs import it # noqa: F401 -from .letdoutil import where # noqa: F401 -from .lazify import force, force1 # noqa: F401 -from .letsyntax import block, expr # noqa: F401 -from .prefix import q, u, kw # noqa: F401 # TODO: bad names, `mcpyrate` uses them too. -from .tailtools import call_cc # noqa: F401 -from .testingtools import the # noqa: F401 +from ..dynassign import make_dynvar as _make_dynvar # We use `dyn` to pass the `expander` parameter to the macro implementations. class _NoExpander: - def visit(self, tree): + def __getattr__(self, k): # Make the dummy error out whenever we attempt to do anything with it. raise NotImplementedError("Macro expander instance has not been set in `dyn`.") -make_dynvar(_macro_expander=_NoExpander()) - -# ----------------------------------------------------------------------------- - -# The "kw" we have here is the parameter from mcpyrate; the "kw" we export (that -# flake8 thinks conflicts with this) is the runtime stub for our `prefix` macro. -@parametricmacro -def autoref(tree, *, args, syntax, expander, **kw): # noqa: F811 - """Implicitly reference attributes of an object. - - Example:: - - e = env(a=1, b=2) - c = 3 - with autoref[e]: - a - b - c - - The transformation is applied in ``Load`` context only. ``Store`` and ``Del`` - are not redirected. - - Useful e.g. with the ``.mat`` file loader of SciPy. - - **CAUTION**: `autoref` is essentially the `with` construct of JavaScript - (which is completely different from Python's meaning of `with`), which is - nowadays deprecated. See: - - https://www.ecma-international.org/ecma-262/6.0/#sec-with-statement - https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/with - https://2ality.com/2011/06/with-statement.html - - **CAUTION**: The auto-reference `with` construct was deprecated in JavaScript - **for security reasons**. Since the autoref'd object **will hijack all name - lookups**, use `with autoref` only with an object you trust! - - **CAUTION**: `with autoref` also complicates static code analysis or makes it - outright infeasible, for the same reason. It is impossible to statically know - whether something that looks like a bare name in the source code is actually - a true bare name, or a reference to an attribute of the autoref'd object. - That status can also change at any time, since the lookup is dynamic, and - attributes can be added and removed dynamically. - """ - if syntax != "block": - raise SyntaxError("autoref is a block macro only") - if not args: - raise SyntaxError("autoref requires an argument, the object to be auto-referenced") - - if "optional_vars" in kw: - target = kw["optional_vars"] - else: - target = None - - tree = expander.visit(tree) - - return _autoref(block_body=tree, args=args, asname=target) - -# ----------------------------------------------------------------------------- - -def aif(tree, *, syntax, expander, **kw): # noqa: F811 - """[syntax, expr] Anaphoric if. - - Usage:: - - aif[test, then, otherwise] - - aif[[pre, ..., test], - [post_true, ..., then], # "then" branch - [post_false, ..., otherwise]] # "otherwise" branch - - Inside the ``then`` and ``otherwise`` branches, the magic identifier ``it`` - (which is always named literally ``it``) refers to the value of ``test``. - - This expands into a ``let`` and an expression-form ``if``. - - Each part may consist of multiple expressions by using brackets around it; - those brackets create a `do` environment (see `unpythonic.syntax.do`). - - To represent a single expression that is a literal list, use extra - brackets: ``[[1, 2, 3]]``. - """ - if syntax != "expr": - raise SyntaxError("aif is an expr macro only") - - # Expand outside-in, but the implicit do[] needs the expander. - with dyn.let(_macro_expander=expander): - return _aif(tree) - -def cond(tree, *, syntax, expander, **kw): # noqa: F811 - """[syntax, expr] Lispy cond; like "a if p else b", but has "elif". - - Usage:: - - cond[test1, then1, - test2, then2, - ... - otherwise] - - cond[[pre1, ..., test1], [post1, ..., then1], - [pre2, ..., test2], [post2, ..., then2], - ... - [postn, ..., otherwise]] - - This allows human-readable multi-branch conditionals in an expression position. - - Each part may consist of multiple expressions by using brackets around it; - those brackets create a `do` environment (see `unpythonic.syntax.do`). - - To represent a single expression that is a literal list, use extra - brackets: ``[[1, 2, 3]]``. - """ - if syntax != "expr": - raise SyntaxError("cond is an expr macro only") - - # Expand outside-in, but the implicit do[] needs the expander. - with dyn.let(_macro_expander=expander): - return _cond(tree) - -# ----------------------------------------------------------------------------- - -def autocurry(tree, *, syntax, expander, **kw): # technically a list of trees, the body of the with block # noqa: F811 - """[syntax, block] Automatic currying. - - Usage:: - - from unpythonic.syntax import macros, autocurry - - with autocurry: - ... - - All **function calls** and **function definitions** (``def``, ``lambda``) - *lexically* inside the ``with autocurry`` block are automatically curried. - - **CAUTION**: Some builtins are uninspectable or may report their arities - incorrectly; in those cases, ``curry`` may fail, occasionally in mysterious - ways. - - The function ``unpythonic.arity.arities``, which ``unpythonic.fun.curry`` - internally uses, has a workaround for the inspectability problems of all - builtins in the top-level namespace (as of Python 3.7), but e.g. methods - of builtin types are not handled. - - Lexically inside a ``with autocurry`` block, the auto-curried function calls - will skip the curry if the function is uninspectable, instead of raising - ``TypeError`` as usual. - - Example:: - - from unpythonic.syntax import macros, autocurry - from unpythonic import foldr, composerc as compose, cons, nil, ll - - with autocurry: - def add3(a, b, c): - return a + b + c - assert add3(1)(2)(3) == 6 - assert add3(1, 2)(3) == 6 - assert add3(1)(2, 3) == 6 - assert add3(1, 2, 3) == 6 - - mymap = lambda f: foldr(compose(cons, f), nil) - double = lambda x: 2 * x - assert mymap(double, ll(1, 2, 3)) == ll(2, 4, 6) - - # The definition was auto-curried, so this works here too. - assert add3(1)(2)(3) == 6 - """ - if syntax != "block": - raise SyntaxError("autocurry is a block macro only") - - tree = expander.visit(tree) - - return _autocurry(block_body=tree) - -# ----------------------------------------------------------------------------- - -@parametricmacro -def let(tree, *, args, syntax, expander, **kw): # noqa: F811 - """[syntax, expr] Introduce expression-local variables. - - This is sugar on top of ``unpythonic.lispylet.let``. - - Usage:: - - let[(k0, v0), ...][body] - let[(k0, v0), ...][[body0, ...]] - - where ``body`` is an expression. The names bound by ``let`` are local; - they are available in ``body``, and do not exist outside ``body``. - - Alternative haskelly syntax is also available:: - - let[((k0, v0), ...) in body] - let[((k0, v0), ...) in [body0, ...]] - let[body, where((k0, v0), ...)] - let[[body0, ...], where((k0, v0), ...)] - - For a body with multiple expressions, use an extra set of brackets, - as shown above. This inserts a ``do``. Only the outermost extra brackets - are interpreted specially; all others in the bodies are interpreted - as usual, as lists. - - Note that in the haskelly syntax, the extra brackets for a multi-expression - body should enclose only the ``body`` part. - - Each ``name`` in the same ``let`` must be unique. - - Assignment to let-bound variables is supported with syntax such as ``x << 42``. - This is an expression, performing the assignment, and returning the new value. - - In a multiple-expression body, also an internal definition context exists - for local variables that are not part of the ``let``; see ``do`` for details. - - Technical points: - - - In reality, the let-bound variables live in an ``unpythonic.env``. - This macro performs the magic to make them look (and pretty much behave) - like lexical variables. - - - Compared to ``unpythonic.lispylet.let``, the macro version needs no quotes - around variable names in bindings. - - - The body is automatically wrapped in a ``lambda e: ...``. - - - For all ``x`` in bindings, the macro transforms lookups ``x --> e.x``. - - - Lexical scoping is respected (so ``let`` constructs can be nested) - by actually using a unique name (gensym) instead of just ``e``. - - - In the case of a multiple-expression body, the ``do`` transformation - is applied first to ``[body0, ...]``, and the result becomes ``body``. - """ - if syntax != "expr": - raise SyntaxError("let is an expr macro only") - - # The `let[]` family of macros expands inside out. - with dyn.let(_macro_expander=expander): - return _destructure_and_apply_let(tree, args, expander, _let) - -@parametricmacro -def letseq(tree, *, args, syntax, expander, **kw): # noqa: F811 - """[syntax, expr] Let with sequential binding (like Scheme/Racket let*). - - Like ``let``, but bindings take effect sequentially. Later bindings - shadow earlier ones if the same name is used multiple times. - - Expands to nested ``let`` expressions. - """ - if syntax != "expr": - raise SyntaxError("letseq is an expr macro only") - - with dyn.let(_macro_expander=expander): - return _destructure_and_apply_let(tree, args, expander, _letseq) - -@parametricmacro -def letrec(tree, *, args, syntax, expander, **kw): # noqa: F811 - """[syntax, expr] Let with mutually recursive binding. - - Like ``let``, but bindings can see other bindings in the same ``letrec``. - - Each ``name`` in the same ``letrec`` must be unique. - - The definitions are processed sequentially, left to right. A definition - may refer to any previous definition. If ``value`` is callable (lambda), - it may refer to any definition, including later ones. - - This is useful for locally defining mutually recursive functions. - """ - if syntax != "expr": - raise SyntaxError("letrec is an expr macro only") - - with dyn.let(_macro_expander=expander): - return _destructure_and_apply_let(tree, args, expander, _letrec) - -# NOTE: At the macro interface, the invocations `let()[...]` (empty args) -# and `let[...]` (no args) were indistinguishable in MacroPy. This was a -# problem, because it might be that the user wrote the body but simply -# forgot to put anything in the parentheses. (There's `do[]` if you need -# a `let` without making any bindings.) -# -# In `mcpyrate`, `let()[...]` is a syntax error. The preferred syntax, -# when using macro arguments, is `let[...][...]`. When this is not -# possible (in decorator position up to Python 3.8), then `let(...)[...]` -# is acceptable. But empty brackets/parentheses are not accepted. Thus, -# we will have an empty `args` list only when there are no brackets/parentheses -# for the macro arguments part. -# -# So when `args` is empty, this function assumes haskelly let syntax -# `let[(...) in ...]` or `let[..., where(...)]`. In these cases, -# both the bindings and the body reside inside the brackets (i.e., -# in the AST contained in the `tree` argument). -# -# Since the brackets/parentheses must be deleted when no macro arguments -# are given, this is now the correct assumption to make. -# -# But note that if needed elsewhere, `mcpyrate` has the `invocation` kwarg -# in the macro interface that gives a copy of the whole macro invocation -# node (so we could see the exact original syntax). -# -# allow_call_in_name_position: used by let_syntax to allow template definitions. -def _destructure_and_apply_let(tree, args, macro_expander, let_expander_function, allow_call_in_name_position=False): - with dyn.let(_macro_expander=macro_expander): # implicit do (extra bracket notation) needs this. - if args: - bs = _canonize_bindings(args, allow_call_in_name_position=allow_call_in_name_position) - return let_expander_function(bindings=bs, body=tree) - # haskelly syntax, let[(...) in ...], let[..., where(...)] - view = _UnexpandedLetView(tree) # note "tree" here is only the part inside the brackets - return let_expander_function(bindings=view.bindings, body=view.body) - -# ----------------------------------------------------------------------------- -# Decorator versions, for "let over def". - -@parametricmacro -def dlet(tree, *, args, syntax, expander, **kw): # noqa: F811 - """[syntax, decorator] Decorator version of let, for 'let over def'. - - Example:: - - @dlet[(x, 0)] - def count(): - x << x + 1 - return x - assert count() == 1 - assert count() == 2 - - **CAUTION**: function arguments, local variables, and names declared as - ``global`` or ``nonlocal`` in a given lexical scope shadow names from the - ``let`` environment *for the entirety of that lexical scope*. (This is - modeled after Python's standard scoping rules.) - - **CAUTION**: assignment to the let environment is ``name << value``; - the regular syntax ``name = value`` creates a local variable in the - lexical scope of the ``def``. - """ - if syntax != "decorator": - raise SyntaxError("dlet is a decorator macro only") - - args = expander.visit(args) - tree = expander.visit(tree) - - return _destructure_and_apply_let(tree, args, expander, _dlet) - -@parametricmacro -def dletseq(tree, *, args, syntax, expander, **kw): # noqa: F811 - """[syntax, decorator] Decorator version of letseq, for 'letseq over def'. - - Expands to nested function definitions, each with one ``dlet`` decorator. - - Example:: - - @dletseq[(x, 1), - (x, x+1), - (x, x+2)] - def g(a): - return a + x - assert g(10) == 14 - """ - if syntax != "decorator": - raise SyntaxError("dletseq is a decorator macro only") - - args = expander.visit(args) - tree = expander.visit(tree) - - return _destructure_and_apply_let(tree, args, expander, _dletseq) - -@parametricmacro -def dletrec(tree, *, args, syntax, expander, **kw): # noqa: F811 - """[syntax, decorator] Decorator version of letrec, for 'letrec over def'. - - Example:: - - @dletrec[(evenp, lambda x: (x == 0) or oddp(x - 1)), - (oddp, lambda x: (x != 0) and evenp(x - 1))] - def f(x): - return evenp(x) - assert f(42) is True - assert f(23) is False - - Same cautions apply as to ``dlet``. - """ - if syntax != "decorator": - raise SyntaxError("dletrec is a decorator macro only") - - args = expander.visit(args) - tree = expander.visit(tree) - - return _destructure_and_apply_let(tree, args, expander, _dletrec) - -@parametricmacro -def blet(tree, *, args, syntax, expander, **kw): # noqa: F811 - """[syntax, decorator] def --> let block. - - Example:: - - @blet[(x, 21)] - def result(): - return 2*x - assert result == 42 - """ - if syntax != "decorator": - raise SyntaxError("blet is a decorator macro only") - - args = expander.visit(args) - tree = expander.visit(tree) - - return _destructure_and_apply_let(tree, args, expander, _blet) - -@parametricmacro -def bletseq(tree, *, args, syntax, expander, **kw): # noqa: F811 - """[syntax, decorator] def --> letseq block. - - Example:: - - @bletseq[(x, 1), - (x, x+1), - (x, x+2)] - def result(): - return x - assert result == 4 - """ - if syntax != "decorator": - raise SyntaxError("bletseq is a decorator macro only") - - args = expander.visit(args) - tree = expander.visit(tree) - - return _destructure_and_apply_let(tree, args, expander, _bletseq) - -@parametricmacro -def bletrec(tree, *, args, syntax, expander, **kw): # noqa: F811 - """[syntax, decorator] def --> letrec block. - - Example:: - - @bletrec[(evenp, lambda x: (x == 0) or oddp(x - 1)), - (oddp, lambda x: (x != 0) and evenp(x - 1))] - def result(): - return evenp(42) - assert result is True - - Because names inside a ``def`` have mutually recursive scope, - an almost equivalent pure Python solution (no macros) is:: - - from unpythonic.misc import call - - @call - def result(): - evenp = lambda x: (x == 0) or oddp(x - 1) - oddp = lambda x: (x != 0) and evenp(x - 1) - return evenp(42) - assert result is True - """ - if syntax != "decorator": - raise SyntaxError("bletrec is a decorator macro only") - - args = expander.visit(args) - tree = expander.visit(tree) - - return _destructure_and_apply_let(tree, args, expander, _bletrec) - -# ----------------------------------------------------------------------------- -# Imperative code in expression position. - -def local(tree, *, syntax, invocation, **kw): # noqa: F811 - """[syntax] Declare a local name in a "do". - - Usage:: - - local[name << value] - - Only meaningful in a ``do[...]``, ``do0[...]``, or an implicit ``do`` - (extra bracket syntax). - - The declaration takes effect starting from next item in the ``do``, i.e. - the item that comes after the ``local[]``. It will not shadow nonlocal - variables of the same name in any earlier items of the same ``do``, and - in the item making the definition, the old bindings are still in effect - on the RHS. - - This means that if you want, you can declare a local ``x`` that takes its - initial value from a nonlocal ``x``, by ``local[x << x]``. Here the ``x`` - on the RHS is the nonlocal one (since the declaration has not yet taken - effect), and the ``x`` on the LHS is the name given to the new local variable - that only exists inside the ``do``. Any references to ``x`` in any further - items in the same ``do`` will point to the local ``x``. - """ - if syntax != "expr": - raise SyntaxError("local is an expr macro only") # pragma: no cover - return _local(tree) - -def delete(tree, *, syntax, invocation, **kw): # noqa: F811 - """[syntax] Delete a previously declared local name in a "do". - - Usage:: - - delete[name] - - Only meaningful in a ``do[...]``, ``do0[...]``, or an implicit ``do`` - (extra bracket syntax). - - The deletion takes effect starting from the next item; hence, the - deleted local variable will no longer shadow nonlocal variables of - the same name in any later items of the same `do`. - - Note ``do[]`` supports local variable deletion, but the ``let[]`` - constructs don't, by design. - """ - if syntax != "expr": - raise SyntaxError("delete is an expr macro only") # pragma: no cover - return _delete(tree) - -def do(tree, *, syntax, expander, **kw): # noqa: F811 - """[syntax, expr] Stuff imperative code into an expression position. - - Return value is the value of the last expression inside the ``do``. - See also ``do0``. - - Usage:: - - do[body0, ...] - - Example:: - - do[local[x << 42], - print(x), - x << 23, - x] - - This is sugar on top of ``unpythonic.seq.do``, but with some extra features. - - - To declare and initialize a local name, use ``local[name << value]``. - - The operator ``local`` is syntax, not really a function, and it - only exists inside a ``do``. There is also an operator ``delete`` - to delete a previously declared local name in the ``do``. - - Both ``local`` and ``delete``, if used, should be imported as macros. - - - By design, there is no way to create an uninitialized variable; - a value must be given at declaration time. Just use ``None`` - as an explicit "no value" if needed. - - - Names declared within the same ``do`` must be unique. Re-declaring - the same name is an expansion-time error. - - - To assign to an already declared local name, use ``name << value``. - - **local name declarations** - - A ``local`` declaration comes into effect in the expression following - the one where it appears. Thus:: - - result = [] - let((lst, []))[do[result.append(lst), # the let "lst" - local[lst << lst + [1]], # LHS: do "lst", RHS: let "lst" - result.append(lst)]] # the do "lst" - assert result == [[], [1]] - - **Syntactic ambiguity** - - These two cases cannot be syntactically distinguished: - - - Just one body expression, which is a literal tuple or list, - - - Multiple body expressions, represented as a literal tuple or list. - - ``do`` always uses the latter interpretation. - - Whenever there are multiple expressions in the body, the ambiguity does not - arise, because then the distinction between the sequence of expressions itself - and its items is clear. - - Examples:: - - do[1, 2, 3] # --> tuple, 3 - do[(1, 2, 3)] # --> tuple, 3 (since in Python, the comma creates tuples; - # parentheses are only used for disambiguation) - do[[1, 2, 3]] # --> list, 3 - do[[[1, 2, 3]]] # --> list containing a list, [1, 2, 3] - do[([1, 2, 3],)] # --> tuple containing a list, [1, 2, 3] - do[[1, 2, 3],] # --> tuple containing a list, [1, 2, 3] - do[[(1, 2, 3)]] # --> list containing a tuple, (1, 2, 3) - do[((1, 2, 3),)] # --> tuple containing a tuple, (1, 2, 3) - do[(1, 2, 3),] # --> tuple containing a tuple, (1, 2, 3) - - It is possible to use ``unpythonic.misc.pack`` to create a tuple from - given elements: ``do[pack(1, 2, 3)]`` is interpreted as a single-item body - that creates a tuple (by calling a function). - - Note the outermost brackets belong to the ``do``; they don't yet create a list. - - In the *use brackets to denote a multi-expr body* syntax (e.g. ``multilambda``, - ``let`` constructs), the extra brackets already create a list, so in those - uses, the ambiguity does not arise. The transformation inserts not only the - word ``do``, but also the outermost brackets. For example:: - - let[(x, 1), - (y, 2)][[ - [x, y]]] - - transforms to:: - - let[(x, 1), - (y, 2)][do[[ # "do[" is inserted between the two opening brackets - [x, y]]]] # and its closing "]" is inserted here - - which already gets rid of the ambiguity. - - **Notes** - - Macros are expanded in an inside-out order, so a nested ``let`` shadows - names, if the same names appear in the ``do``:: - - do[local[x << 17], - let[(x, 23)][ - print(x)], # 23, the "x" of the "let" - print(x)] # 17, the "x" of the "do" - - The reason we require local names to be declared is to allow write access - to lexically outer environments from inside a ``do``:: - - let[(x, 17)][ - do[x << 23, # no "local[...]"; update the "x" of the "let" - local[y << 42], # "y" is local to the "do" - print(x, y)]] - - With the extra bracket syntax, the latter example can be written as:: - - let[(x, 17)][[ - x << 23, - local[y << 42], - print(x, y)]] - - It's subtly different in that the first version has the do-items in a tuple, - whereas this one has them in a list, but the behavior is exactly the same. - - Python does it the other way around, requiring a ``nonlocal`` statement - to re-bind a name owned by an outer scope. - - The ``let`` constructs solve this problem by having the local bindings - declared in a separate block, which plays the role of ``local``. - """ - if syntax != "expr": - raise SyntaxError("do is an expr macro only") - with dyn.let(_macro_expander=expander): - return _do(tree) - -def do0(tree, *, syntax, expander, **kw): # noqa: F811 - """[syntax, expr] Like do, but return the value of the first expression.""" - if syntax != "expr": - raise SyntaxError("do0 is an expr macro only") - with dyn.let(_macro_expander=expander): - return _do0(tree) - -# ----------------------------------------------------------------------------- - -# TODO: change the block() construct to block[], for syntactic consistency -@parametricmacro -def let_syntax(tree, *, args, syntax, expander, **kw): # noqa: F811 - """[syntax, expr/block] Introduce local **syntactic** bindings. - - **Expression variant**:: - - let_syntax[(lhs, rhs), ...][body] - let_syntax[(lhs, rhs), ...][[body0, ...]] - - Alternative haskelly syntax:: - - let_syntax[((lhs, rhs), ...) in body] - let_syntax[((lhs, rhs), ...) in [body0, ...]] - - let_syntax[body, where((lhs, rhs), ...)] - let_syntax[[body0, ...], where((lhs, rhs), ...)] - - **Block variant**:: - - with let_syntax: - with block as xs: # capture a block of statements - bare name - ... - with block(a, ...) as xs: # capture a block of statements - template - ... - with expr as x: # capture a single expression - bare name - ... - with expr(a, ...) as x: # capture a single expression - template - ... - body0 - ... - - A single expression can be a ``do[]`` if multiple expressions are needed. - - The bindings are applied **at macro expansion time**, substituting - the expression on the RHS for each instance of the corresponding LHS. - Each substitution gets a fresh copy. - - This is useful to e.g. locally abbreviate long function names at macro - expansion time (with zero run-time overhead), or to splice in several - (possibly parametric) instances of a common pattern. - - In the expression variant, ``lhs`` may be: - - - A bare name (e.g. ``x``), or - - - A simple template of the form ``f(x, ...)``. The names inside the - parentheses declare the formal parameters of the template (that can - then be used in the body). - - In the block variant: - - - The **as-part** specifies the name of the LHS. - - - If a template, the formal parameters are declared on the ``block`` - or ``expr``, not on the as-part (due to syntactic limitations). - - **Templates** - - To make parametric substitutions, use templates. - - Templates support only positional arguments, with no default values. - - Even in block templates, parameters are always expressions (because they - use the function-call syntax at the use site). - - In the body of the ``let_syntax``, a template is used like a function call. - Just like in an actual function call, when the template is substituted, - any instances of its formal parameters on its RHS get replaced by the - argument values from the "call" site; but ``let_syntax`` performs this - at macro-expansion time. - - Note each instance of the same formal parameter gets a fresh copy of the - corresponding argument value. - - **Substitution order** - - This is a two-step process. In the first step, we apply template substitutions. - In the second step, we apply bare name substitutions to the result of the - first step. (So RHSs of templates may use any of the bare-name definitions.) - - Within each step, the substitutions are applied **in the order specified**. - So if the bindings are ``((x, y), (y, z))``, then ``x`` transforms to ``z``. - But if the bindings are ``((y, z), (x, y))``, then ``x`` transforms to ``y``, - and only an explicit ``y`` at the use site transforms to ``z``. - - **Notes** - - Inspired by Racket's ``let-syntax`` and ``with-syntax``, see: - https://docs.racket-lang.org/reference/let.html - https://docs.racket-lang.org/reference/stx-patterns.html - - **CAUTION**: This is essentially a toy macro system inside the real - macro system, implemented with the real macro system. - - The usual caveats of macro systems apply. Especially, we support absolutely - no form of hygiene. Be very, very careful to avoid name conflicts. - - ``let_syntax`` is meant only for simple local substitutions where the - elimination of repetition can shorten the code and improve readability. - - If you need to do something complex, prefer writing a real macro directly - in `mcpyrate`. - """ - if syntax not in ("expr", "block"): - raise SyntaxError("let_syntax is an expr and block macro only") - - tree = expander.visit(tree) - - if syntax == "expr": - return _destructure_and_apply_let(tree, args, expander, _let_syntax_expr, allow_call_in_name_position=True) - else: # syntax == "block": - return _let_syntax_block(block_body=tree) - -@parametricmacro -def abbrev(tree, *, args, syntax, expander, **kw): # noqa: F811 - """[syntax, expr/block] Exactly like ``let_syntax``, but expands outside in. - - Because this variant expands before any macros in the body, it can locally - rename other macros, e.g.:: - - abbrev[(m, macrowithverylongname)][ - m[tree1] if m[tree2] else m[tree3]] - - **CAUTION**: Because ``abbrev`` expands outside-in, and does not respect - boundaries of any nested ``abbrev`` invocations, it will not lexically scope - the substitutions. Instead, the outermost ``abbrev`` expands first, and then - any inner ones expand with whatever substitutions they have remaining. - - If the same name is used on the LHS in two or more nested ``abbrev``, - any inner ones will likely raise an error (unless the outer substitution - just replaces a name with another), because also the names on the LHS - in the inner ``abbrev`` will undergo substitution when the outer - ``abbrev`` expands. - """ - if syntax not in ("expr", "block"): - raise SyntaxError("abbrev is an expr and block macro only") - - # DON'T expand inner macro invocations first - outside-in ordering is the default, so we simply do nothing. - - if syntax == "expr": - return _destructure_and_apply_let(tree, args, expander, _let_syntax_expr, allow_call_in_name_position=True) - else: - return _let_syntax_block(block_body=tree) - -# ----------------------------------------------------------------------------- - -def forall(tree, *, syntax, expander, **kw): # noqa: F811 - """[syntax, expr] Nondeterministic evaluation. - - Fully based on AST transformation, with real lexical variables. - Like Haskell's do-notation, but here specialized for the List monad. - - Example:: - - # pythagorean triples - pt = forall[z << range(1, 21), # hypotenuse - x << range(1, z+1), # shorter leg - y << range(x, z+1), # longer leg - insist(x*x + y*y == z*z), - (x, y, z)] - assert tuple(sorted(pt)) == ((3, 4, 5), (5, 12, 13), (6, 8, 10), - (8, 15, 17), (9, 12, 15), (12, 16, 20)) - """ - if syntax != "expr": - raise SyntaxError("forall is an expr macro only") - - tree = expander.visit(tree) - - return _forall(exprs=tree) - -# ----------------------------------------------------------------------------- - -def multilambda(tree, *, syntax, expander, **kw): # noqa: F811 - """[syntax, block] Supercharge your lambdas: multiple expressions, local variables. - - For all ``lambda`` lexically inside the ``with multilambda`` block, - ``[...]`` denotes a multiple-expression body with an implicit ``do``:: - - lambda ...: [expr0, ...] --> lambda ...: do[expr0, ...] - - Only the outermost set of brackets around the body of a ``lambda`` denotes - a multi-expression body; the rest are interpreted as lists, as usual. - - Examples:: - - with multilambda: - echo = lambda x: [print(x), x] - assert echo("hi there") == "hi there" - - count = let[(x, 0)][ - lambda: [x << x + 1, - x]] - assert count() == 1 - assert count() == 2 - - mk12 = lambda: [[1, 2]] - assert mk12() == [1, 2] - - For local variables, see ``do``. - """ - if syntax != "block": - raise SyntaxError("multilambda is a block macro only") - - # Expand outside in. - # multilambda should expand first before any let[], do[] et al. that happen - # to be inside the block, to avoid misinterpreting implicit lambdas - # generated by those constructs. - with dyn.let(_macro_expander=expander): # implicit do (extra bracket notation) needs this. - return _multilambda(block_body=tree) - -def namedlambda(tree, *, syntax, expander, **kw): # noqa: F811 - """[syntax, block] Name lambdas implicitly. - - Lexically inside a ``with namedlambda`` block, any literal ``lambda`` - that is assigned to a name using one of the supported assignment forms - is named to have the name of the LHS of the assignment. The name is - captured at macro expansion time. - - Naming modifies the original function object. - - We support: - - - Single-item assignments to a local name, ``f = lambda ...: ...`` - - - Named expressions (a.k.a. walrus operator, Python 3.8+), - ``f := lambda ...: ...`` - - - Assignments to unpythonic environments, ``f << (lambda ...: ...)`` - - - Let bindings, ``let[(f, (lambda ...: ...)) in ...]``, using any - let syntax supported by unpythonic (here using the haskelly let-in - just as an example). - - Support for other forms of assignment might or might not be added in a - future version. - - Example:: - - with namedlambda: - f = lambda x: x**3 # assignment: name as "f" - - let[(x, 42), (g, None), (h, None)][[ - g << (lambda x: x**2), # env-assignment: name as "g" - h << f, # still "f" (no literal lambda on RHS) - (g(x), h(x))]] - - foo = let[(f7, lambda x: x) in f7] # let-binding: name as "f7" - - The naming is performed using the function ``unpythonic.misc.namelambda``, - which will update ``__name__``, ``__qualname__`` and ``__code__.co_name``. - """ - if syntax != "block": - raise SyntaxError("namedlambda is a block macro only") - - # Two-pass macro. We pass in the expander to allow the macro to decide when to recurse. - with dyn.let(_macro_expander=expander): - return _namedlambda(block_body=tree) - -def f(tree, *, syntax, expander, **kw): # noqa: F811 - """[syntax, expr] Underscore notation (quick lambdas) for Python. - - Usage:: - - f[body] - - The ``f[]`` macro creates a lambda. Each underscore in ``body`` - introduces a new parameter. - - Example:: - - func = f[_ * _] - - expands to:: - - func = lambda a0, a1: a0 * a1 - - The underscore is interpreted magically by ``f[]``; but ``_`` itself - is not a macro, and has no special meaning outside ``f[]``. The underscore - does **not** need to be imported for ``f[]`` to recognize it. - - The macro does not descend into any nested ``f[]``. - """ - if syntax != "expr": - raise SyntaxError("f is an expr macro only") - - # This macro expands outside in, but needs `expander` for introspection. - with dyn.let(_macro_expander=expander): - return _f(tree) - -def quicklambda(tree, *, syntax, expander, **kw): # noqa: F811 - """[syntax, block] Make ``f`` quick lambdas expand first. - - To be able to transform correctly, the block macros in ``unpythonic.syntax`` - that transform lambdas (e.g. ``multilambda``, ``tco``) need to see all - ``lambda`` definitions written with Python's standard ``lambda``. - - However, the ``f`` macro uses the syntax ``f[...]``, which (to the analyzer) - does not look like a lambda definition. This macro changes the expansion - order, forcing any ``f[...]`` lexically inside the block to expand before - any other macros do. - - Any expression of the form ``f[...]``, where ``f`` is any name bound in the - current macro expander to the macro `unpythonic.syntax.f`, is understood as - a quick lambda. (In plain English, this respects as-imports of the macro ``f``.) - - Example - a quick multilambda:: - - from unpythonic.syntax import macros, multilambda, quicklambda, f, local - - with quicklambda, multilambda: - func = f[[local[x << _], - local[y << _], - x + y]] - assert func(1, 2) == 3 - - (This is of course rather silly, as an unnamed argument can only be mentioned - once. If we're giving names to them, a regular ``lambda`` is shorter to write. - The point is, this combo is now possible.) - """ - if syntax != "block": - raise SyntaxError("quicklambda is a block macro only") - - # This macro expands outside in. - # - # In `mcpyrate`, expander instances are cheap - so we create a second expander - # to which we register only the `f` macro, under whatever names it appears in - # the original expander. Thus it leaves all other macros alone. This is the - # official `mcpyrate` way to immediately expand only some particular macros - # inside the current macro invocation. - bindings = extract_bindings(expander.bindings, f) - return MacroExpander(bindings, expander.filename).visit(tree) - -def envify(tree, *, syntax, expander, **kw): # noqa: F811 - """[syntax, block] Make formal parameters live in an unpythonic env. - - The purpose is to allow overwriting formals using unpythonic's - expression-assignment ``name << value``. The price is that the references - to the arguments are copied into an env whenever an envified function is - entered. - - Example - PG's accumulator puzzle (http://paulgraham.com/icad.html):: - - with envify: - def foo(n): - return lambda i: n << n + i - - Or even shorter:: - - with autoreturn, envify: - def foo(n): - lambda i: n << n + i - """ - if syntax != "block": - raise SyntaxError("envify is a block macro only") - - # Two-pass macro. - with dyn.let(_macro_expander=expander): - return _envify(block_body=tree) - -# ----------------------------------------------------------------------------- - -def autoreturn(tree, *, syntax, expander, **kw): # noqa: F811 - """[syntax, block] Implicit "return" in tail position, like in Lisps. - - Each ``def`` function definition lexically within the ``with autoreturn`` - block is examined, and if the last item within the body is an expression - ``expr``, it is transformed into ``return expr``. - - If the last item is an if/elif/else block, the transformation is applied - to the last item in each of its branches. - - If the last item is a ``with`` or ``async with`` block, the transformation - is applied to the last item in its body. - - If the last item is a try/except/else/finally block, the rules are as follows. - If an ``else`` clause is present, the transformation is applied to the last - item in it; otherwise, to the last item in the ``try`` clause. Additionally, - in both cases, the transformation is applied to the last item in each of the - ``except`` clauses. The ``finally`` clause is not transformed; the intention - is it is usually a finalizer (e.g. to release resources) that runs after the - interesting value is already being returned by ``try``, ``else`` or ``except``. - - Example:: - - with autoreturn: - def f(): - "I'll just return this" - assert f() == "I'll just return this" - - def g(x): - if x == 1: - "one" - elif x == 2: - "two" - else: - "something else" - assert g(1) == "one" - assert g(2) == "two" - assert g(42) == "something else" - - **CAUTION**: If the final ``else`` is omitted, as often in Python, then - only the ``else`` item is in tail position with respect to the function - definition - likely not what you want. - - So with ``autoreturn``, the final ``else`` should be written out explicitly, - to make the ``else`` branch part of the same if/elif/else block. - - **CAUTION**: ``for``, ``async for``, ``while`` are currently not analyzed; - effectively, these are defined as always returning ``None``. If the last item - in your function body is a loop, use an explicit return. - - **CAUTION**: With ``autoreturn`` enabled, functions no longer return ``None`` - by default; the whole point of this macro is to change the default return - value. - - The default return value is ``None`` only if the tail position contains - a statement (because in a sense, a statement always returns ``None``). - """ - if syntax != "block": - raise SyntaxError("autoreturn is a block macro only") - - # Expand outside in. Any nested macros should get clean standard Python, - # not having to worry about implicit "return" statements. - return _autoreturn(block_body=tree) - -def tco(tree, *, syntax, expander, **kw): # noqa: F811 - """[syntax, block] Implicit tail-call optimization (TCO). - - Examples:: - - with tco: - evenp = lambda x: (x == 0) or oddp(x - 1) - oddp = lambda x: (x != 0) and evenp(x - 1) - assert evenp(10000) is True - - with tco: - def evenp(x): - if x == 0: - return True - return oddp(x - 1) - def oddp(x): - if x != 0: - return evenp(x - 1) - return False - assert evenp(10000) is True - - This is based on a strategy similar to MacroPy's tco macro, but using - the TCO machinery from ``unpythonic.tco``. - - This recursively handles also builtins ``a if p else b``, ``and``, ``or``; - and from ``unpythonic.syntax``, ``do[]``, ``let[]``, ``letseq[]``, ``letrec[]``, - when used in computing a return value. (``aif[]`` and ``cond[]`` also work.) - - Note only calls **in tail position** will be TCO'd. Any other calls - are left as-is. Tail positions are: - - - The whole return value, if it is just a single call. - - - Both ``a`` and ``b`` branches of ``a if p else b`` (but not ``p``). - - - The last item in an ``and``/``or``. If these are nested, only the - last item in the whole expression involving ``and``/``or``. E.g. in:: - - (a and b) or c - a and (b or c) - - in either case, only ``c`` is in tail position, regardless of the - values of ``a``, ``b``. - - - The last item in a ``do[]``. - - - In a ``do0[]``, this is the implicit item that just returns the - stored return value. - - - The argument of a call to an escape continuation. The ``ec(...)`` call - itself does not need to be in tail position; escaping early is the - whole point of an ec. - - All function definitions (``def`` and ``lambda``) lexically inside the block - undergo TCO transformation. The functions are automatically ``@trampolined``, - and any tail calls in their return values are converted to ``jump(...)`` - for the TCO machinery. - - Note in a ``def`` you still need the ``return``; it marks a return value. - But see ``autoreturn``:: - - with autoreturn, tco: - def evenp(x): - if x == 0: - True - else: - oddp(x - 1) - def oddp(x): - if x != 0: - evenp(x - 1) - else: - False - assert evenp(10000) is True - - **CAUTION**: regarding escape continuations, only basic uses of ecs created - via ``call_ec`` are currently detected as being in tail position. Any other - custom escape mechanisms are not supported. (This is mainly of interest for - lambdas, which have no ``return``, and for "multi-return" from a nested - function.) - - *Basic use* is defined as either of these two cases:: - - # use as decorator - @call_ec - def result(ec): - ... - - # use directly on a literal lambda - result = call_ec(lambda ec: ...) - - When macro expansion of the ``with tco`` block starts, names of escape - continuations created **anywhere lexically within** the ``with tco`` block - are captured. Lexically within the block, any call to a function having - any of the captured names, or as a fallback, one of the literal names - ``ec``, ``brk``, ``throw`` is interpreted as invoking an escape - continuation. - """ - if syntax != "block": - raise SyntaxError("tco is a block macro only") - - # Two-pass macro. - with dyn.let(_macro_expander=expander): - return _tco(block_body=tree) - -def continuations(tree, *, syntax, expander, **kw): # noqa: F811 - """[syntax, block] call/cc for Python. - - This allows saving the control state and then jumping back later - (in principle, any time later). Some possible use cases: - - - Tree traversal (possibly a cartesian product of multiple trees, with the - current position in each tracked automatically). - - - McCarthy's amb operator. - - - Generators. (Python already has those, so only for teaching.) - - This is a very loose pythonification of Paul Graham's continuation-passing - macros, which implement continuations by chaining closures and passing the - continuation semi-implicitly. For details, see chapter 20 in On Lisp: - - http://paulgraham.com/onlisp.html - - Continuations are most readily implemented when the program is written in - continuation-passing style (CPS), but that is unreadable for humans. - The purpose of this macro is to partly automate the CPS transformation, so - that at the use site, we can write CPS code in a much more readable fashion. - - A ``with continuations`` block implies TCO; the same rules apply as in a - ``with tco`` block. Furthermore, ``with continuations`` introduces the - following additional rules: - - - Functions which make use of continuations, or call other functions that do, - must be defined within a ``with continuations`` block, using the usual - ``def`` or ``lambda`` forms. - - - All function definitions in a ``with continuations`` block, including - any nested definitions, have an implicit formal parameter ``cc``, - **even if not explicitly declared** in the formal parameter list. - - If declared explicitly, ``cc`` must be in a position that can accept a - default value. - - This means ``cc`` must be declared either as by-name-only:: - - with continuations: - def myfunc(a, b, *, cc): - ... - - f = lambda *, cc: ... - - or as the last parameter that has no default:: - - with continuations: - def myfunc(a, b, cc): - ... - - f = lambda cc: ... - - Then the continuation machinery will automatically set the default value - of ``cc`` to the default continuation (``identity``), which just returns - its arguments. - - The most common use case for explicitly declaring ``cc`` is that the - function is the target of a ``call_cc[]``; then it helps readability - to make the ``cc`` parameter explicit. - - - A ``with continuations`` block will automatically transform all - function definitions and ``return`` statements lexically contained - within the block to use the continuation machinery. - - - ``return somevalue`` actually means a tail-call to ``cc`` with the - given ``somevalue``. - - Multiple values can be returned as a ``tuple``. Tupleness is tested - at run-time. - - Any tuple return value is automatically unpacked to the positional - args of ``cc``. To return multiple things as one without the implicit - unpacking, use a ``list``. - - - An explicit ``return somefunc(arg0, ..., k0=v0, ...)`` actually means - a tail-call to ``somefunc``, with its ``cc`` automatically set to our - ``cc``. Hence this inserts a call to ``somefunc`` before proceeding - with our current continuation. (This is most often what we want when - making a tail-call from a continuation-enabled function.) - - Here ``somefunc`` **must** be a continuation-enabled function; - otherwise the TCO chain will break and the result is immediately - returned to the top-level caller. - - (If the call succeeds at all; the ``cc`` argument is implicitly - filled in and passed by name. Regular functions usually do not - accept a named parameter ``cc``, let alone know what to do with it.) - - - Just like in ``with tco``, a lambda body is analyzed as one big - return-value expression. This uses the exact same analyzer; for example, - ``do[]`` (including any implicit ``do[]``) and the ``let[]`` expression - family are supported. - - - Calls from functions defined in one ``with continuations`` block to those - defined in another are ok; there is no state or context associated with - the block. - - - Much of the language works as usual. - - Any non-tail calls can be made normally. Regular functions can be called - normally in any non-tail position. - - Continuation-enabled functions behave as regular functions when - called normally; only tail calls implicitly set ``cc``. A normal call - uses ``identity`` as the default ``cc``. - - - For technical reasons, the ``return`` statement is not allowed at the - top level of the ``with continuations:`` block. (Because a continuation - is essentially a function, ``return`` would behave differently based on - whether it is placed lexically before or after a ``call_cc[]``.) - - If you absolutely need to terminate the function surrounding the - ``with continuations:`` block from inside the block, use an exception - to escape; see ``call_ec``, ``catch``, ``throw``. - - **Capturing the continuation**: - - Inside a ``with continuations:`` block, the ``call_cc[]`` statement - captures a continuation. (It is actually a macro, for technical reasons.) - - For various possible program topologies that continuations may introduce, see - the clarifying pictures under ``doc/`` in the source distribution. - - Syntax:: - - x = call_cc[func(...)] - *xs = call_cc[func(...)] - x0, ... = call_cc[func(...)] - x0, ..., *xs = call_cc[func(...)] - call_cc[func(...)] - - Conditional variant:: - - x = call_cc[f(...) if p else g(...)] - *xs = call_cc[f(...) if p else g(...)] - x0, ... = call_cc[f(...) if p else g(...)] - x0, ..., *xs = call_cc[f(...) if p else g(...)] - call_cc[f(...) if p else g(...)] - - Assignment targets: - - - To destructure a multiple-values (from a tuple return value), - use a tuple assignment target (comma-separated names, as usual). - - - The last assignment target may be starred. It is transformed into - the vararg (a.k.a. ``*args``) of the continuation function. - (It will capture a whole tuple, or any excess items, as usual.) - - - To ignore the return value (useful if ``func`` was called only to - perform its side-effects), just omit the assignment part. - - Conditional variant: - - - ``p`` is any expression. If truthy, ``f(...)`` is called, and if falsey, - ``g(...)`` is called. - - - Each of ``f(...)``, ``g(...)`` may be ``None``. A ``None`` skips the - function call, proceeding directly to the continuation. Upon skipping, - all assignment targets (if any are present) are set to ``None``. - The starred assignment target (if present) gets the empty tuple. - - - The main use case of the conditional variant is for things like:: - - with continuations: - k = None - def setk(cc): - global k - k = cc - def dostuff(x): - call_cc[setk() if x > 10 else None] # capture only if x > 10 - ... - - To keep things relatively straightforward, a ``call_cc[]`` is only - allowed to appear **at the top level** of: - - - the ``with continuations:`` block itself - - a ``def`` or ``async def`` - - Nested defs are ok; here *top level* only means the top level of the - *currently innermost* ``def``. - - If you need to place ``call_cc[]`` inside a loop, use ``@looped`` et al. - from ``unpythonic.fploop``; this has the loop body represented as the - top level of a ``def``. - - Multiple ``call_cc[]`` statements in the same function body are allowed. - These essentially create nested closures. - - **Main differences to Scheme and Racket**: - - Compared to Scheme/Racket, where ``call/cc`` will capture also expressions - occurring further up in the call stack, our ``call_cc`` may be need to be - placed differently (further out, depending on what needs to be captured) - due to the delimited nature of the continuations implemented here. - - Scheme and Racket implicitly capture the continuation at every position, - whereas we do it explicitly, only at the use sites of the ``call_cc`` macro. - - Also, since there are limitations to where a ``call_cc[]`` may appear, some - code may need to be structured differently to do some particular thing, if - porting code examples originally written in Scheme or Racket. - - Unlike ``call/cc`` in Scheme/Racket, ``call_cc`` takes **a function call** - as its argument, not just a function reference. Also, there's no need for - it to be a one-argument function; any other args can be passed in the call. - The ``cc`` argument is filled implicitly and passed by name; any others are - passed exactly as written in the client code. - - **Technical notes**: - - The ``call_cc[]`` statement essentially splits its use site into *before* - and *after* parts, where the *after* part (the continuation) can be run - a second and further times, by later calling the callable that represents - the continuation. This makes a computation resumable from a desired point. - - The return value of the continuation is whatever the original function - returns, for any ``return`` statement that appears lexically after the - ``call_cc[]``. - - The effect of ``call_cc[]`` is that the function call ``func(...)`` in - the brackets is performed, with its ``cc`` argument set to the lexically - remaining statements of the current ``def`` (at the top level, the rest - of the ``with continuations`` block), represented as a callable. - - The continuation itself ends there (it is *delimited* in this particular - sense), but it will chain to the ``cc`` of the function it appears in. - This is termed the *parent continuation* (**pcc**), stored in the internal - variable ``_pcc`` (which defaults to ``None``). - - Via the use of the pcc, here ``f`` will maintain the illusion of being - just one function, even though a ``call_cc`` appears there:: - - def f(*, cc): - ... - call_cc[g(1, 2, 3)] - ... - - The continuation is a closure. For its pcc, it will use the value the - original function's ``cc`` had when the definition of the continuation - was executed (for that particular instance of the closure). Hence, calling - the original function again with its ``cc`` set to something else will - produce a new continuation instance that chains into that new ``cc``. - - The continuation's own ``cc`` will be ``identity``, to allow its use just - like any other function (also as argument of a ``call_cc`` or target of a - tail call). - - When the pcc is set (not ``None``), the effect is to run the pcc first, - and ``cc`` only after that. This preserves the whole captured tail of a - computation also in the presence of nested ``call_cc`` invocations (in the - above example, this would occur if also ``g`` used ``call_cc``). - - Continuations are not accessible by name (their definitions are named by - gensym). To get a reference to a continuation instance, stash the value - of the ``cc`` argument somewhere while inside the ``call_cc``. - - The function ``func`` called by a ``call_cc[func(...)]`` is (almost) the - only place where the ``cc`` argument is actually set. There it is the - captured continuation. Roughly everywhere else, ``cc`` is just ``identity``. - - Tail calls are an exception to this rule; a tail call passes along the current - value of ``cc``, unless overridden manually (by setting the ``cc=...`` kwarg - in the tail call). - - When the pcc is set (not ``None``) at the site of the tail call, the - machinery will create a composed continuation that runs the pcc first, - and ``cc`` (whether current or manually overridden) after that. This - composed continuation is then passed to the tail call as its ``cc``. - - **Tips**: - - - Once you have a captured continuation, one way to use it is to set - ``cc=...`` manually in a tail call, as was mentioned. Example:: - - def main(): - call_cc[myfunc()] # call myfunc, capturing the current cont... - ... # ...which is the rest of "main" - - def myfunc(cc): - ourcc = cc # save the captured continuation (sent by call_cc[]) - def somefunc(): - return dostuff(..., cc=ourcc) # and use it here - somestack.append(somefunc) - - In this example, when ``somefunc`` is eventually called, it will tail-call - ``dostuff`` and then proceed with the continuation ``myfunc`` had - at the time when that instance of the ``somefunc`` closure was created. - (This pattern is essentially how to build the ``amb`` operator.) - - - Instead of setting ``cc``, you can also overwrite ``cc`` with a captured - continuation inside a function body. That overrides the continuation - for the rest of the dynamic extent of the function, not only for a - particular tail call:: - - def myfunc(cc): - ourcc = cc - def somefunc(): - cc = ourcc - return dostuff(...) - somestack.append(somefunc) - - - A captured continuation can also be called manually; it's just a callable. - - The assignment targets, at the ``call_cc[]`` use site that spawned this - particular continuation, specify its call signature. All args are - positional, except the implicit ``cc``, which is by-name-only. - - - Just like in Scheme/Racket's ``call/cc``, the values that get bound - to the ``call_cc[]`` assignment targets on second and further calls - (when the continuation runs) are the arguments given to the continuation - when it is called (whether implicitly or manually). - - - Setting ``cc`` to ``unpythonic.fun.identity``, while inside a ``call_cc``, - will short-circuit the rest of the computation. In such a case, the - continuation will not be invoked automatically. A useful pattern for - suspend/resume. - - - However, it is currently not possible to prevent the rest of the tail - of a captured continuation (the pcc) from running, apart from manually - setting ``_pcc`` to ``None`` before executing a ``return``. Note that - doing that is not strictly speaking supported (and may be subject to - change in a future version). - - - When ``call_cc[]`` appears inside a function definition: - - - It tail-calls ``func``, with its ``cc`` set to the captured - continuation. - - - The return value of the function containing one or more ``call_cc[]`` - statements is the return value of the continuation. - - - When ``call_cc[]`` appears at the top level of ``with continuations``: - - - A normal call to ``func`` is made, with its ``cc`` set to the captured - continuation. - - - In this case, if the continuation is called later, it always - returns ``None``, because the use site of ``call_cc[]`` is not - inside a function definition. - - - If you need to insert just a tail call (no further statements) before - proceeding with the current continuation, no need for ``call_cc[]``; - use ``return func(...)`` instead. - - The purpose of ``call_cc[func(...)]`` is to capture the current - continuation (the remaining statements), and hand it to ``func`` - as a first-class value. - - - To combo with ``multilambda``, use this ordering:: - - with multilambda, continuations: - ... - - - Some very limited comboability with ``call_ec``. May be better to plan - ahead, using ``call_cc[]`` at the appropriate outer level, and then - short-circuit (when needed) by setting ``cc`` to ``identity``. - This avoids the need to have both ``call_cc`` and ``call_ec`` at the - same time. - - - ``unpythonic.ec.call_ec`` can be used normally **lexically before any** - ``call_cc[]``, but (in a given function) after at least one ``call_cc[]`` - has run, the ``ec`` ceases to be valid. This is because our ``call_cc[]`` - actually splits the function into *before* and *after* parts, and - **tail-calls** the *after* part. - - (Wrapping the ``def`` in another ``def``, and placing the ``call_ec`` - on the outer ``def``, does not help either, because even the outer - function has exited by the time *the continuation* is later called - the second and further times.) - - Usage of ``call_ec`` while inside a ``with continuations`` block is:: - - with continuations: - @call_ec - def result(ec): - print("hi") - ec(42) - print("not reached") - assert result == 42 - - result = call_ec(lambda ec: do[print("hi"), - ec(42), - print("not reached")]) - - Note the signature of ``result``. Essentially, ``ec`` is a function - that raises an exception (to escape to a dynamically outer context), - whereas the implicit ``cc`` is the closure-based continuation handled - by the continuation machinery. - - See the ``tco`` macro for details on the ``call_ec`` combo. - """ - if syntax != "block": - raise SyntaxError("continuations is a block macro only") - - # Two-pass macro. - with dyn.let(_macro_expander=expander): - return _continuations(block_body=tree) - -# ----------------------------------------------------------------------------- - -@parametricmacro -def nb(tree, *, args, syntax, expander, **kw): # noqa: F811 - """[syntax, block] Ultralight math notebook. - - Auto-print top-level expressions, auto-assign last result as _. - - A custom print function can be supplied as an argument. - - Example:: - - with nb: - 2 + 3 - 42 * _ - - from sympy import * - with nb[pprint]: - x, y = symbols("x, y") - x * y - 3 * _ - """ - if syntax != "block": - raise SyntaxError("nb is a block macro only") - - # Expand outside in. This macro is so simple and orthogonal the - # ordering doesn't matter. This is cleaner. - return _nb(body=tree, args=args) - -# ----------------------------------------------------------------------------- - -@parametricmacro -def dbg(tree, *, args, syntax, expander, **kw): # noqa: F811 - """[syntax, expr/block] Debug-print expressions including their source code. - - **Expression variant**: - - Example:: - - dbg[25 + 17] # --> [file.py:100] (25 + 17): 42 - - The transformation is:: - - dbg[expr] --> dyn.dbgprint_expr(k, v, filename=__file__, lineno=xxx) - - where ``k`` is the source code of the expression and ``v`` is its value, - and `dyn` is `unpythonic.dynassign.dyn` (hygienically captured, so you - don't need to import it just to use the `dbg[]` macro). - - ``xxx`` is the original line number before macro expansion, if available - in the AST node of the expression, otherwise ``None``. (Some macros might - not care about inserting line numbers, because MacroPy fixes any missing - line numbers at the end; this is why it might be missing at some locations - in any specific macro-enabled program.) - - A default implementation of the debug printer is provided and automatically - assigned as the default value for `dyn.dbgprint_expr`. - - To customize the debug printing, set your custom printer function to the - dynvar ``dbgprint_expr``, using `with dyn.let(dbgprint_expr=...)`. - - The custom function, beside performing any printing/logging as a side effect, - **must** return the value ``v``, so that surrounding an expression with - ``dbg[...]`` does not alter its value. - - If you want to use the default implementation as part of your customized one - (e.g. if you want to decorate that with some logging code), it is available as - `unpythonic.syntax.dbgprint_expr`. - - **Block variant**: - - Lexically within the block, any call to ``print`` (alternatively, if specified, - the optional custom print function), prints both the expression source code - and the corresponding value. - - A custom print function can be supplied as an argument. To implement a - custom print function, see the default implementation ``dbgprint_block`` - for the signature. - - If you want to use the default implementation as part of your customized one, - it is available as `unpythonic.syntax.dbgprint_block`. - - Examples:: - - with dbg: - x = 2 - print(x) # --> [file.py:100] x: 2 - - with dbg: - x = 2 - y = 3 - print(x, y) # --> [file.py:100] x: 2, y: 3 - print(x, y, sep="\n") # --> [file.py:100] x: 2 - # [file.py:100] y: 3 - - prt = lambda *args, **kwargs: print(*args) - with dbg[prt]: - x = 2 - prt(x) # --> ('x',) (2,) - print(x) # --> 2 - - with dbg[prt]: - x = 2 - y = 17 - prt(x, y, 1 + 2) # --> ('x', 'y', '(1 + 2)'), (2, 17, 3)) - - **CAUTION**: The source code is back-converted from the AST representation; - hence its surface syntax may look slightly different to the original (e.g. - extra parentheses). See ``mcpyrate.unparse``. - """ - if syntax not in ("expr", "block"): - raise SyntaxError("dbg is an expr and block macro only") - - tree = expander.visit(tree) - - if syntax == "expr": - return _dbg_expr(tree) - else: # syntax == "block": - return _dbg_block(body=tree, args=args) - -# ----------------------------------------------------------------------------- - -def lazify(tree, *, syntax, expander, **kw): # noqa: F811 - """[syntax, block] Call-by-need for Python. - - In a ``with lazify`` block, function arguments are evaluated only when - actually used, at most once each, and in the order in which they are - actually used. Promises are automatically forced on access. - - Automatic lazification applies to arguments in function calls and to - let-bindings, since they play a similar role. **No other binding forms - are auto-lazified.** - - Automatic lazification uses the ``lazyrec[]`` macro, which recurses into - certain types of container literals, so that the lazification will not - interfere with unpacking. See its docstring for details. - - Comboing with other block macros in ``unpythonic.syntax`` is supported, - including ``curry`` and ``continuations``. - - Silly contrived example:: - - with lazify: - def my_if(p, a, b): - if p: - return a # b never evaluated in this code path... - else: - return b # a never evaluated in this code path... - - # ...hence the divisions by zero here are never performed. - assert my_if(True, 23, 1/0) == 23 - assert my_if(False, 1/0, 42) == 42 - - Note ``my_if`` is a run-of-the-mill runtime function, not a macro. Only the - ``with lazify`` is imbued with any magic. - - Like ``with continuations``, no state or context is associated with a - ``with lazify`` block, so lazy functions defined in one block may call - those defined in another. Calls between lazy and strict code are also - supported (in both directions), without requiring any extra effort. - - Evaluation of each lazified argument is guaranteed to occur at most once; - the value is cached. Order of evaluation of lazy arguments is determined - by the (dynamic) order in which the lazy code actually uses them. - - Essentially, the above code expands into:: - - from unpythonic.syntax import macros, lazy - from unpythonic.syntax import force - - def my_if(p, a, b): - if force(p): - return force(a) - else: - return force(b) - assert my_if(lazy[True], lazy[23], lazy[1/0]) == 23 - assert my_if(lazy[False], lazy[1/0], lazy[42]) == 42 - - plus some clerical details to allow lazy and strict code to be mixed. - - Just passing through a lazy argument to another lazy function will - not trigger evaluation, even when it appears in a computation inlined - to the argument list:: - - with lazify: - def g(a, b): - return a - def f(a, b): - return g(2*a, 3*b) - assert f(21, 1/0) == 42 - - The division by zero is never performed, because the value of ``b`` is - not needed to compute the result (worded less magically, that promise is - never forced in the code path that produces the result). Essentially, - the above code expands into:: - - from unpythonic.syntax import macros, lazy - from unpythonic.syntax import force - - def g(a, b): - return force(a) - def f(a, b): - return g(lazy[2*force(a)], lazy[3*force(b)]) - assert f(lazy[21], lazy[1/0]) == 42 - - This relies on the magic of closures to capture f's ``a`` and ``b`` into - the promises. - - But be careful; **assignments are not auto-lazified**, so the following does - **not** work:: - - with lazify: - def g(a, b): - return a - def f(a, b): - c = 3*b # not in an arglist, b gets evaluated! - return g(2*a, c) - assert f(21, 1/0) == 42 - - To avoid that, explicitly wrap the computation into a ``lazy[]``. For why - assignment RHSs are not auto-lazified, see the section on pitfalls below. - - In calls, bare references (name, subscript, attribute) are detected and for - them, re-thunking is skipped. For example:: - - def g(a): - return a - def f(a): - return g(a) - assert f(42) == 42 - - expands into:: - - def g(a): - return force(a) - def f(a): - return g(a) # <-- no lazy[force(a)] since "a" is just a name - assert f(lazy[42]) == 42 - - When resolving references, subscripts and attributes are forced just enough - to obtain the containing object from a promise, if any; for example, the - elements of a list ``lst`` will not be evaluated just because the user code - happens to use ``lst.append(...)``; this only forces the object ``lst`` - itself. - - A ``lst`` appearing by itself evaluates the whole list. Similarly, ``lst[0]`` - by itself evaluates only the first element, and ``lst[:-1]`` by itself - evaluates all but the last element. The index expression in a subscript is - fully forced, because its value is needed to determine which elements of the - subscripted container are to be accessed. - - **Mixing lazy and strict code** - - Lazy code is allowed to call strict functions and vice versa, without - requiring any additional effort. - - Keep in mind what this implies: when calling a strict function, any arguments - given to it will be evaluated! - - In the other direction, when calling a lazy function from strict code, the - arguments are evaluated by the caller before the lazy code gets control. - The lazy code gets just the evaluated values. - - If you have, in strict code, an argument expression you want to pass lazily, - use syntax like ``f(lazy[...], ...)``. If you accidentally do this in lazy - code, it shouldn't break anything; ``with lazify`` detects any argument - expressions that are already promises, and just passes them through. - - **Forcing promises manually** - - This is mainly useful if you ``lazy[]`` or ``lazyrec[]`` something explicitly, - and want to compute its value outside a ``with lazify`` block. - - We provide the functions ``force1`` and ``force``. - - Using ``force1``, if ``x`` is a ``lazy[]`` promise, it will be forced, - and the resulting value is returned. If ``x`` is not a promise, - ``x`` itself is returned, à la Racket. - - The function ``force``, in addition, descends into containers (recursively). - When an atom ``x`` (i.e. anything that is not a container) is encountered, - it is processed using ``force1``. - - Mutable containers are updated in-place; for immutables, a new instance is - created. Any container with a compatible ``collections.abc`` is supported. - (See ``unpythonic.collections.mogrify`` for details.) In addition, as - special cases ``unpythonic.collections.box`` and ``unpythonic.llist.cons`` - are supported. - - **Tips, tricks and pitfalls** - - You can mix and match bare data values and promises, since ``force(x)`` - evaluates to ``x`` when ``x`` is not a promise. - - So this is just fine:: - - with lazify: - def f(x): - x = 2*21 # assign a bare data value - print(x) # the implicit force(x) evaluates to x - f(17) - - If you want to manually introduce a promise, use ``lazy[]``:: - - from unpythonic.syntax import macros, lazify, lazy - - with lazify: - def f(x): - x = lazy[2*21] # assign a promise - print(x) # the implicit force(x) evaluates the promise - f(17) - - If you have a container literal and want to lazify it recursively in a - position that does not auto-lazify, use ``lazyrec[]`` (see its docstring - for details):: - - from unpythonic.syntax import macros, lazify, lazyrec - - with lazify: - def f(x): - return x[:-1] - lst = lazyrec[[1, 2, 3/0]] - assert f(lst) == [1, 2] - - For non-literal containers, use ``lazy[]`` for each item as appropriate:: - - def f(lst): - lst.append(lazy["I'm lazy"]) - lst.append(lazy["Don't call me lazy, I'm just evaluated later!"]) - - Keep in mind, though, that ``lazy[]`` will introduce a lambda, so there's - the usual pitfall:: - - from unpythonic.syntax import macros, lazify, lazy - - with lazify: - lst = [] - for x in range(3): # DANGER: only one "x", mutated imperatively - lst.append(lazy[x]) # all these closures capture the same "x" - print(lst[0]) # 2 - print(lst[1]) # 2 - print(lst[2]) # 2 - - So to capture the value instead of the name, use the usual workaround, - the wrapper lambda (here written more readably as a let, which it really is):: - - from unpythonic.syntax import macros, lazify, lazy, let - - with lazify: - lst = [] - for x in range(3): - lst.append(let[(y, x) in lazy[y]]) - print(lst[0]) # 0 - print(lst[1]) # 1 - print(lst[2]) # 2 - - Be careful not to ``lazy[]`` or ``lazyrec[]`` too much:: - - with lazify: - a = 10 - a = lazy[2*a] # 20, right? - print(a) # crash! - - Why does this example crash? The expanded code is:: - - with lazify: - a = 10 - a = lazy[2*force(a)] - print(force(a)) - - The ``lazy[]`` sets up a promise, which will force ``a`` *at the time when - the containing promise is forced*, but at that time the name ``a`` points - to a promise, which will force... - - The fundamental issue is that ``a = 2*a`` is an imperative update; if you - need to do that, just let Python evaluate the RHS normally (i.e. use the - value the name ``a`` points to *at the time when the RHS runs*). - - Assigning a lazy value to a new name evaluates it, because any read access - triggers evaluation:: - - with lazify: - def g(x): - y = x # the "x" on the RHS triggers the implicit force - print(y) # bare data value - f(2*21) - - Inspired by Haskell, Racket's (delay) and (force), and lazy/racket. - - **Combos** - - Introducing the *HasThon* programming language (it has 100% more Thon than - popular brands):: - - with autocurry, lazify: # or continuations, autocurry, lazify if you want those - def add2first(a, b, c): - return a + b - assert add2first(2)(3)(1/0) == 5 - - def f(a, b): - return a - assert let[((c, 42), - (d, 1/0)) in f(c)(d)] == 42 - assert letrec[((c, 42), - (d, 1/0), - (e, 2*c)) in f(e)(d)] == 84 - - assert letrec[((c, 42), - (d, 1/0), - (e, 2*c)) in [local[x << f(e)(d)], - x/4]] == 21 - - Works also with continuations. Rules: - - - Also continuations are transformed into lazy functions. - - - ``cc`` built by chain_conts is treated as lazy, **itself**; then it's - up to the continuations chained by it to decide whether to force their - arguments. - - - The default continuation ``identity`` is strict, so that return values - from a continuation-enabled computation will be forced. - - Example:: - - with continuations, lazify: - k = None - def setk(*args, cc): - nonlocal k - k = cc - return args[0] - def doit(): - lst = ['the call returned'] - *more, = call_cc[setk('A', 1/0)] - return lst + [more[0]] - assert doit() == ['the call returned', 'A'] - assert k('again') == ['the call returned', 'again'] - assert k('thrice', 1/0) == ['the call returned', 'thrice'] - - For a version with comments, see ``unpythonic/syntax/test/test_lazify.py``. - - **CAUTION**: Call-by-need is a low-level language feature that is difficult - to bolt on after the fact. Some things might not work. - - **CAUTION**: The functions in ``unpythonic.fun`` are lazify-aware (so that - e.g. curry and compose work with lazy functions), as are ``call`` and - ``callwith`` in ``unpythonic.misc``, but the rest of ``unpythonic`` is not. - - **CAUTION**: Argument passing by function call, and let-bindings are - currently the only binding constructs to which auto-lazification is applied. - """ - if syntax != "block": - raise SyntaxError("lazify is a block macro only") - - # Two-pass macro. - with dyn.let(_macro_expander=expander): - return _lazify(body=tree) - -# The `lazy` macro comes from `demo/promise.py` in `mcpyrate`. -def lazy(tree, *, syntax, **kw): # noqa: F811 - """[syntax, expr] Delay an expression (lazy evaluation). - - This macro injects a lambda to delay evaluation, and encapsulates - the result into a *promise* (an `unpythonic.lazyutil.Lazy` object). - - In Racket, this operation is known as `delay`. - """ - if syntax != "expr": - raise SyntaxError("lazy is an expr macro only") - - # Expand outside in. Ordering shouldn't matter here. - return _lazy(tree) - -def lazyrec(tree, *, syntax, expander, **kw): # noqa: F811 - """[syntax, expr] Delay items in a container literal, recursively. - - Essentially, this distributes ``lazy[]`` into the items inside a literal - ``list``, ``tuple``, ``set``, ``frozenset``, ``unpythonic.collections.box`` - or ``unpythonic.llist.cons``, and into the values of a literal ``dict`` or - ``unpythonic.collections.frozendict``. - - Because this is a macro and must work by names only, only this fixed set of - container types is supported. - - The container itself is not lazified, only the items inside it are, to keep - the lazification from interfering with unpacking. This allows things such as - ``f(*lazyrec[(1*2*3, 4*5*6)])`` to work as expected. - - See also ``lazy[]`` (the effect on each item) and ``unpythonic.syntax.force`` - (the inverse of ``lazyrec[]``). - - For an atom, ``lazyrec[]`` has the same effect as ``lazy[]``:: - - lazyrec[dostuff()] --> lazy[dostuff()] - - For a container literal, ``lazyrec[]`` descends into it:: - - lazyrec[(2*21, 1/0)] --> (lazy[2*21], lazy[1/0]) - lazyrec[{'a': 2*21, 'b': 1/0}] --> {'a': lazy[2*21], 'b': lazy[1/0]} - - Constructor call syntax for container literals is also supported:: - - lazyrec[list(2*21, 1/0)] --> [lazy[2*21], lazy[1/0]] - - Nested container literals (with any combination of known types) are - processed recursively, for example:: - - lazyrec[((2*21, 1/0), (1+2+3, 4+5+6))] --> ((lazy[2*21], lazy[1/0]), - (lazy[1+2+3], lazy[4+5+6])) - """ - if syntax != "expr": - raise SyntaxError("lazyrec is an expr macro only") - - # Expand outside in. Ordering shouldn't matter here. - return _lazyrec(tree) - -# ----------------------------------------------------------------------------- - -def prefix(tree, *, syntax, expander, **kw): # noqa: F811 - """[syntax, block] Write Python like Lisp: the first item is the operator. - - Example:: - - with prefix: - (print, "hello world") - t1 = (q, 1, 2, (3, 4), 5) - x = 42 - t2 = (q, 17, 23, x) - (print, t1, t2) - - Lexically inside a ``with prefix``: - - - A bare ``q`` at the head of a tuple is the quote operator. It increases - the quote level by one. - - It actually just tells the macro that this tuple (and everything in it, - recursively) is not a function call. - - Variables can be used as usual, there is no need to unquote them. - - - A bare ``u`` at the head of a tuple is the unquote operator, which - decreases the quote level by one. In other words, in:: - - with prefix: - t = (q, 1, 2, (u, print, 3), (print, 4), 5) - (print, t) - - the third item will call ``print(3)`` and evaluate to its return value - (in this case ``None``, since it's ``print``), whereas the fourth item - is a tuple with the two items ``(, 4)``. - - - Quote/unquote operators are parsed from the start of the tuple until - no more remain. Then any remaining items are either returned quoted - (if quote level > 0), or evaluated as a function call and replaced - by the return value. - - - How to pass named args:: - - from unpythonic.misc import call - - with prefix: - (f, kw(myarg=3)) # ``kw(...)`` (syntax, not really a function!) - call(f, myarg=3) # in a call(), kwargs are ok - f(myarg=3) # or just use Python's usual function call syntax - - One ``kw`` operator may include any number of named args (and **only** - named args). The tuple may have any number of ``kw`` operators. - - All named args are collected from ``kw`` operators in the tuple - when writing the final function call. If the same kwarg has been - specified by multiple ``kw`` operators, the rightmost definition wins. - - **Note**: Python itself prohibits having repeated named args in the **same** - ``kw`` operator, because it uses the function call syntax. If you get a - `SyntaxError: keyword argument repeated` with no useful traceback, - check any recent ``kw`` operators you have added in prefix blocks. - - A ``kw(...)`` operator in a quoted tuple (not a function call) is an error. - - Current limitations: - - - passing ``*args`` and ``**kwargs`` not supported. - - Workarounds: ``call(...)``; Python's usual function call syntax. - - - For ``*args``, to keep it lispy, maybe you want ``unpythonic.fun.apply``; - this allows syntax such as ``(apply, f, 1, 2, lst)``. - - **CAUTION**: This macro is experimental, not intended for production use. - """ - if syntax != "block": - raise SyntaxError("prefix is a block macro only") - - # Expand outside in. Any nested macros should get clean standard Python, - # not having to worry about tuples possibly denoting function calls. - return _prefix(block_body=tree) - -# ----------------------------------------------------------------------------- - -@parametricmacro -def test(tree, *, args, syntax, expander, **kw): # noqa: F811 - """[syntax, expr/block] Make a test assertion. For writing automated tests. - - **Testing overview**: - - Use the `test[]`, `test_raises[]`, `test_signals[]`, `fail[]`, `error[]` - and `warn[]` macros inside a `with testset()`, as appropriate. - - See `testset` and `session` in the module `unpythonic.test.fixtures`, - as well as the docstrings of any constructs exported from that module. - - See below for tips and tricks. - - Finally, see the unit tests of `unpythonic` itself for examples. - - **Expression variant**: - - Syntax:: - - test[expr] - test[expr, message] - - The test succeeds if `expr` evaluates to truthy. The `message` - is used in forming the error message if the test fails or errors. - - If you want to assert just that an expression runs to completion - normally, and don't care about the return value:: - - from unpythonic.test.fixtures import returns_normally - - test[returns_normally(expr)] - test[returns_normally(expr), message] - - This can be useful for testing functions with side effects; sometimes - what is important is that the function completes normally. - - What `test[expr]` captures for reporting as "result" in the failure - message, if the test fails: - - - If a `the[...]` mark is present, the subexpression marked as `the[...]`. - At most one `the[]` may appear in a single `test[...]`. - - Else if `expr` is a comparison, the LHS (leftmost term in case of - a chained comparison). So e.g. `test[x < 3]` needs no annotation - to do the right thing. This is a common use case, hence automatic. - - Else the whole `expr`. - - The `the[...]` mark is useful in tests involving comparisons:: - - test[lower_limit < the[computeitem(...)]] - test[lower_limit < the[computeitem(...)] < upper_limit] - test[myconstant in the[computeset(...)]] - - If your interesting part is on the LHS, `the[]` is optional, although - allowed (to explicitly document intent). These have the same effect:: - - test[the[computeitem(...)] in myitems] - test[computeitem(...) in myitems] - - The `the[...]` mark passes the value through, and does not affect the - evaluation order of user code. - - The `the[]` mark can be imported as a macro from this module, so that - its appearance in your source code won't confuse `flake8`, and you'll - get a nice macro-expansion-time error if it accidentally appears outside - a `test[]` or `with test:`. - - **Block variant**: - - A test that requires statements (e.g. assignments) can be written as a - `with test` block:: - - with test: - body0 - ... - return expr # optional - - with test(message): - body0 - ... - return expr # optional - - The test block is automatically lifted into a function, so it introduces - **a local scope**. Use the `nonlocal` or `global` declarations if you need - to mutate something defined on the outside. - - If there is a `return` at the top level of the block, that is the return - value from the test; it is what will be asserted. - - If there is no `return`, the test asserts that the block completes normally, - just like a `test[returns_normally(...)]` does for an expression. - - The asymmetry in syntax reflects the asymmetry between expressions and - statements in Python. Likewise, the fact that `with test` requires `return` - to return a value, but `test[...]` doesn't, is similar to the difference - between `def` and `lambda`. - - In the block variant, the "result" capture rules apply to the return value - designated by `return`. To override, the `the[]` mark can be used for - capturing the value of any one expression inside the block. The mark - doesn't have to be in the `return`. - - At most one `the[]` may appear in the same `with test` block. - - **Failure and error signaling**: - - Upon a test failure, `test[]` will *signal* a `TestFailure` using the - *cerror* (correctable error) protocol, via unpythonic's condition - system, which is a pythonification of Common Lisp's condition system. - See `unpythonic.conditions`. - - If a test fails to run to completion due to an uncaught exception or an - unhandled signal (e.g. an `error` or `cerror` condition), `TestError` - is signaled instead, so the caller can easily tell apart which case - occurred. - - Finally, when a `warn[]` runs, `TestWarning` is signaled. - - These condition types are defined in `unpythonic.test.fixtures`. - They inherit from `TestingException`, defined in the same module. - Beside the human-readable message, these exception types contain - attributes with programmatically inspectable information about - what happened. See the docstring of `TestingException`. - - *Signaling* a condition, instead of *raising* an exception, allows the - surrounding code (inside the test framework) to install a handler that - invokes the `proceed` restart (if there is such in scope), so upon a test - failure or error, the test suite resumes. - - **Disabling the signal barrier**: - - As implied above, `test[]` (likewise `with test:`) forms a barrier that - alerts the user about uncaught signals, and stops those signals from - propagating further. If your `with handlers` block that needs to see - the signal is outside the `test` invocation, or if allowing a signal to - go uncaught is part of normal operation (e.g. `warn` signals are often - not caught, because the only reason to do so is to muffle the warning), - use a `with catch_signals(False):` block (from the module - `unpythonic.test.fixtures`) to disable the signal barrier:: - - from unpythonic.test.fixtures import catch_signals - - with catch_signals(False): - test[...] - - Another way to avoid catching signals that should not be caught by the - test framework is to rearrange the `test[]` so that the expression being - asserted cannot result in an uncaught signal. For example, save the result - of a computation into a variable first, and then use it in the `test[]`, - instead of invoking that computation inside the `test[]`. See - `unpythonic.test.test_conditions` for examples. - - Exceptions are always caught by `test[]`, because exceptions do not support - resumption; unlike with signals, the inner level of the call stack is already - destroyed by the time the exception is caught by the test construct. - """ - if syntax not in ("expr", "block"): - raise SyntaxError("test is an expr and block macro only") - - # Two-pass macros. - with dyn.let(_macro_expander=expander): - if syntax == "expr": - if args: - raise SyntaxError("test[] in expression mode does not take macro arguments") - return _test_expr(tree) - else: # syntax == "block": - return _test_block(block_body=tree, args=args) - -@parametricmacro -def test_signals(tree, *, args, syntax, expander, **kw): # noqa: F811 - """[syntax, expr/block] Like `test`, but expect the expression to signal a condition. - - "Signal" as in `unpythonic.conditions.signal` and its sisters. - - Syntax:: - - test_signals[exctype, expr] - test_signals[exctype, expr, message] - - with test_signals[exctype]: - body0 - ... - - with test_signals[exctype, message]: - body0 - ... - - Example:: - - test_signals[ValueError, myfunc()] - test_signals[ValueError, myfunc(), "failure message"] - - The test succeeds, if `expr` signals a condition of type `exctype`, and the - signal propagates into the (implicit) handler inside the `test_signals[]` - construct. - - If `expr` returns normally, the test fails. - - If `expr` signals some other type of condition, or raises an exception, the - test errors. - - **Differences to `test[]`, `with test`**: - - As the focus of this construct is on signaling vs. returning normally, the - `the[]` mark is not supported. The block variant does not support `return`. - """ - if syntax not in ("expr", "block"): - raise SyntaxError("test_signals is an expr and block macro only") - - # Two-pass macros. - with dyn.let(_macro_expander=expander): - if syntax == "expr": - if args: - raise SyntaxError("test_signals[] in expression mode does not take macro arguments") - return _test_expr_signals(tree) - else: # syntax == "block": - return _test_block_signals(block_body=tree, args=args) - -@parametricmacro -def test_raises(tree, *, args, syntax, expander, **kw): # noqa: F811 - """[syntax, expr/block] Like `test`, but expect the expression to raise an exception. - - Syntax:: - - test_raises[exctype, expr] - test_raises[exctype, expr, message] - - with test_raises[exctype]: - body0 - ... - - with test_raises[exctype, message]: - body0 - ... - - Example:: - - test_raises[TypeError, issubclass(1, int)] - test_raises[ValueError, myfunc()] - test_raises[ValueError, myfunc(), "failure message"] - - The test succeeds, if `expr` raises an exception of type `exctype`, and the - exception propagates into the (implicit) handler inside the `test_raises[]` - construct. - - If `expr` returns normally, the test fails. - - If `expr` signals a condition, or raises some other type of exception, the - test errors. - - **Differences to `test[]`, `with test`**: - - As the focus of this construct is on raising vs. returning normally, the - `the[]` mark is not supported. The block variant does not support `return`. - """ - if syntax not in ("expr", "block"): - raise SyntaxError("test_raises is an expr and block macro only") - - with dyn.let(_macro_expander=expander): - if syntax == "expr": - if args: - raise SyntaxError("test_raises[] in expression mode does not take macro arguments") - return _test_expr_raises(tree) - else: # syntax == "block": - return _test_block_raises(block_body=tree, args=args) - -def fail(tree, *, syntax, expander, **kw): # noqa: F811 - """[syntax, expr] Produce a test failure, unconditionally. - - Useful to e.g. mark a line of code that should not be reached in automated - tests, reaching which is therefore a test failure. - - Usage:: - - fail["human-readable reason"] - - which has the same effect as:: - - test[False, "human-readable reason"] - - except in the case of `fail[]`, the error message generating machinery is - special-cased to omit the source code expression, because it explicitly - states that the intent of the "test" is not actually to perform a test. - - See also `error[]`, `warn[]`. - """ - if syntax != "expr": - raise SyntaxError("fail is an expr macro only") - - # Expand outside in. The ordering shouldn't matter here. - # The underlying `test` machinery needs to access the expander. - with dyn.let(_macro_expander=expander): - return _fail_expr(tree) - -def error(tree, *, syntax, expander, **kw): # noqa: F811 - """[syntax, expr] Produce a test error, unconditionally. - - Useful to e.g. indicate to the user that an optional dependency that could - be used to run some integration test is not installed. - - Usage:: - - error["human-readable reason"] - - See also `warn[]`, `fail[]`. - """ - if syntax != "expr": - raise SyntaxError("error is an expr macro only") - - # Expand outside in. The ordering shouldn't matter here. - # The underlying `test` machinery needs to access the expander. - with dyn.let(_macro_expander=expander): - return _error_expr(tree) - -def warn(tree, *, syntax, expander, **kw): # noqa: F811 - """[syntax, expr] Produce a test warning, unconditionally. - - Useful to e.g. indicate that the Python interpreter or version the - tests are running on does not support a particular test, or to alert - about a non-essential TODO. - - A warning does not increase the failure count, so it will not cause - your CI workflow to break. - - Usage:: - - warn["human-readable reason"] - - See also `error[]`, `fail[]`. - """ - if syntax != "expr": - raise SyntaxError("warn is an expr macro only") - - # Expand outside in. The ordering shouldn't matter here. - # The underlying `test` machinery needs to access the expander. - with dyn.let(_macro_expander=expander): - return _warn_expr(tree) +_make_dynvar(_macro_expander=_NoExpander()) -# ----------------------------------------------------------------------------- +# Set up `unpythonic`'s AST markers to be deleted by the macro expander's global postprocessor. +# This way we can use AST markers for data-driven internal communication between macros. +from . import util +util.register_postprocessor_hook() diff --git a/unpythonic/syntax/astcompat.py b/unpythonic/syntax/astcompat.py deleted file mode 100644 index 25a05c5a..00000000 --- a/unpythonic/syntax/astcompat.py +++ /dev/null @@ -1,67 +0,0 @@ -# -*- coding: utf-8 -*- -"""Conditionally import AST node types only supported by recent enough Python versions (3.7+).""" - -# This is an internal module and does not have an officially defined `__all__`. -# Any names defined here are fair game to use anywhere inside `unpythonic.syntax`. - -import ast - -from ..symbol import gensym - -_NoSuchNodeType = gensym("_NoSuchNodeType") - -# -------------------------------------------------------------------------------- -# New AST node types - -# Minimum language version supported by this module is Python 3.6. - -# No new AST node types in Python 3.7. - -try: # Python 3.8+ - from ast import NamedExpr # a.k.a. walrus operator ":=" -except ImportError: # pragma: no cover - NamedExpr = _NoSuchNodeType - -# No new AST node types in Python 3.9. - -# TODO: any new AST node types in Python 3.10? (release expected in October 2021) - -# -------------------------------------------------------------------------------- -# Deprecated AST node types - -try: # Python 3.8+, https://docs.python.org/3/whatsnew/3.8.html#deprecated - from ast import Num, Str, Bytes, NameConstant, Ellipsis -except ImportError: # pragma: no cover - Num = Str = Bytes = NameConstant = Ellipsis = _NoSuchNodeType - -try: # Python 3.9+, https://docs.python.org/3/whatsnew/3.9.html#deprecated - from ast import Index, ExtSlice - # We ignore the internal classes Suite, Param, AugLoad, AugStore, - # which were never used in Python 3.x. -except ImportError: # pragma: no cover - Index = ExtSlice = _NoSuchNodeType - -# -------------------------------------------------------------------------------- -# Compatibility functions - -def getconstant(tree): - """Given an AST node `tree` representing a constant, return the contained raw value. - - This encapsulates the AST differences between Python 3.8+ and older versions. - - There are no `setconstant` or `makeconstant` counterparts, because you can - just create an `ast.Constant` in Python 3.6 and later. The parser doesn't - emit them until Python 3.8, but Python 3.6+ compile `ast.Constant` just fine. - """ - if type(tree) is ast.Constant: # Python 3.8+ - return tree.value - # up to Python 3.7 - elif type(tree) is ast.NameConstant: # up to Python 3.7 - return tree.value - elif type(tree) is ast.Num: - return tree.n - elif type(tree) in (ast.Str, ast.Bytes): - return tree.s - elif type(tree) is ast.Ellipsis: # `ast.Ellipsis` is the AST node type, `builtins.Ellipsis` is `...`. - return ... - raise TypeError(f"Not an AST node representing a constant: {type(tree)} with value {repr(tree)}") diff --git a/unpythonic/syntax/autocurry.py b/unpythonic/syntax/autocurry.py index d68bf880..0124c3b5 100644 --- a/unpythonic/syntax/autocurry.py +++ b/unpythonic/syntax/autocurry.py @@ -1,22 +1,83 @@ # -*- coding: utf-8 -*- """Automatic currying. Transforms both function definitions and calls.""" +__all__ = ["autocurry"] + from ast import Call, Lambda, FunctionDef, AsyncFunctionDef from mcpyrate.quotes import macros, q, a, h # noqa: F401 +from mcpyrate.astcompat import TypeAlias from mcpyrate.quotes import is_captured_value from mcpyrate.walkers import ASTTransformer from .util import (suggest_decorator_index, isx, has_curry, sort_lambda_decorators) +from ..dynassign import dyn + # CAUTION: unpythonic.syntax.lambdatools.namedlambda depends on the exact names # "curryf" and "currycall" to detect an auto-curried expression with a final lambda. from ..fun import curry as curryf, _currycall as currycall + +def autocurry(tree, *, syntax, expander, **kw): # technically a list of trees, the body of the with block + """[syntax, block] Automatic currying. + + Usage:: + + from unpythonic.syntax import macros, autocurry + + with autocurry: + ... + + All **function calls** and **function definitions** (``def``, ``lambda``) + *lexically* inside the ``with autocurry`` block are automatically curried. + + **CAUTION**: Some builtins are uninspectable or may report their arities + incorrectly; in those cases, ``curry`` may fail, occasionally in mysterious + ways. + + The function ``unpythonic.arity.arities``, which ``unpythonic.fun.curry`` + internally uses, has a workaround for the inspectability problems of all + builtins in the top-level namespace (as of Python 3.7), but e.g. methods + of builtin types are not handled. + + Lexically inside a ``with autocurry`` block, the auto-curried function calls + will skip the curry if the function is uninspectable, instead of raising + ``TypeError`` as usual. + + Example:: + + from unpythonic.syntax import macros, autocurry + from unpythonic import foldr, composerc as compose, cons, nil, ll + + with autocurry: + def add3(a, b, c): + return a + b + c + assert add3(1)(2)(3) == 6 + assert add3(1, 2)(3) == 6 + assert add3(1)(2, 3) == 6 + assert add3(1, 2, 3) == 6 + + mymap = lambda f: foldr(compose(cons, f), nil) + double = lambda x: 2 * x + assert mymap(double, ll(1, 2, 3)) == ll(2, 4, 6) + + # The definition was auto-curried, so this works here too. + assert add3(1)(2)(3) == 6 + """ + if syntax != "block": + raise SyntaxError("autocurry is a block macro only") # pragma: no cover + if syntax == "block" and kw['optional_vars'] is not None: + raise SyntaxError("autocurry does not take an as-part") # pragma: no cover + + with dyn.let(_macro_expander=expander): + return _autocurry(block_body=tree) + + _iscurry = lambda name: name in ("curry", "currycall") -def autocurry(block_body): +def _autocurry(block_body): class AutoCurryTransformer(ASTTransformer): def transform(self, tree): # Ignore hygienically captured values, and don't recurse in them. @@ -25,16 +86,35 @@ def transform(self, tree): if is_captured_value(tree): return tree + # Python 3.12+: leave `type` statements alone (autocurrying a type declaration makes no sense) + if type(tree) is TypeAlias: + return tree + hascurry = self.state.hascurry - if type(tree) is Call and not isx(tree.func, "AutorefMarker"): - if has_curry(tree): # detect decorated lambda with manual curry - # the lambda inside the curry(...) is the next Lambda node we will descend into. - hascurry = True - if not isx(tree.func, _iscurry): - tree.args = [tree.func] + tree.args - tree.func = q[h[currycall]] - if hascurry: # this must be done after the edit because the edit changes the children - self.generic_withstate(tree, hascurry=True) + if type(tree) is Call: + # Don't auto-curry some calls we know not to need it. This is both a performance optimization + # and allows other macros (particularly `lazify`) to be able to see the original calls. + # (It also generates cleaner expanded output.) + # - `Values(...)` accepts any args and kwargs, so currying it does not make sense. + # - `(chain_conts(cc1, cc2))(...)` handles a return value in `with continuations`. + # This has the effect that in `with continuations`, the tail-calls to continuation + # functions won't be curried, but perhaps that's ok. This allows the Pytkell dialect's + # `with lazify, autocurry` combo to work with an inner `with continuations`. + if (isx(tree.func, "Values") or + (type(tree.func) is Call and isx(tree.func.func, "chain_conts"))): + # However, *do* auto-curry in the positional and named args of the call. + tree.args = self.visit(tree.args) + tree.keywords = self.visit(tree.keywords) + return tree + else: # general case + if has_curry(tree): # detect decorated lambda with manual curry + # the lambda inside the curry(...) is the next Lambda node we will descend into. + hascurry = True + if not isx(tree.func, _iscurry): + tree.args = [tree.func] + tree.args + tree.func = q[h[currycall]] + if hascurry: # this must be done after the edit because the edit changes the children + self.generic_withstate(tree, hascurry=True) elif type(tree) in (FunctionDef, AsyncFunctionDef): if not any(isx(item, _iscurry) for item in tree.decorator_list): # no manual curry already @@ -55,5 +135,6 @@ def transform(self, tree): return self.generic_visit(tree) + block_body = dyn._macro_expander.visit_recursively(block_body) newbody = AutoCurryTransformer(hascurry=False).visit(block_body) return sort_lambda_decorators(newbody) diff --git a/unpythonic/syntax/autoref.py b/unpythonic/syntax/autoref.py index 33e55e82..0d4e9723 100644 --- a/unpythonic/syntax/autoref.py +++ b/unpythonic/syntax/autoref.py @@ -1,20 +1,24 @@ # -*- coding: utf-8 -*- """Implicitly reference attributes of an object.""" -from ast import (Name, Assign, Load, Call, Lambda, With, Constant, arg, +__all__ = ["autoref"] + +from ast import (Name, Load, Call, Lambda, arg, Attribute, Subscript, Store, Del) from mcpyrate.quotes import macros, q, u, n, a, h # noqa: F401 -from mcpyrate import gensym +from mcpyrate import gensym, parametricmacro +from mcpyrate.astfixers import fix_ctx from mcpyrate.quotes import is_captured_value from mcpyrate.walkers import ASTTransformer -from .astcompat import getconstant, Str from .nameutil import isx -from .util import wrapwith, AutorefMarker +from .util import ExpandedAutorefMarker from .letdoutil import isdo, islet, ExpandedDoView, ExpandedLetView +from .testingtools import _test_function_names +from ..dynassign import dyn from ..lazyutil import force1, passthrough_lazy_args # with autoref[o]: @@ -38,21 +42,25 @@ # # One possible clean-ish implementation is:: # -# with AutorefMarker("o"): # no-op at runtime -# x # --> (lambda _ar271: _ar271[1] if _ar271[0] else x)(_autoref_resolve((o, "x"))) -# x.a # --> ((lambda _ar271: _ar271[1] if _ar271[0] else x)(_autoref_resolve((o, "x")))).a -# x[s] # --> ((lambda _ar271: _ar271[1] if _ar271[0] else x)(_autoref_resolve((o, "x"))))[s] -# o # --> o (can only occur if an asname is supplied) -# with AutorefMarker("p"): -# x # --> (lambda _ar314: _ar314[1] if _ar314[0] else x)(_autoref_resolve((p, o, "x"))) -# x.a # --> ((lambda _ar314: _ar314[1] if _ar314[0] else x)(_autoref_resolve((p, o, "x"))).a -# x[s] # --> ((lambda _ar314: _ar314[1] if _ar314[0] else x)(_autoref_resolve((p, o, "x")))[s] -# # when the inner autoref expands, it doesn't know about the outer one, so we will get this: -# o # --> (lambda _ar314: _ar314[1] if _ar314[0] else o)(_autoref_resolve((p, "o"))) -# o.x # --> ((lambda _ar314: _ar314[1] if _ar314[0] else o)(_autoref_resolve((p, "o")))).x -# o[s] # --> ((lambda _ar314: _ar314[1] if _ar314[0] else o)(_autoref_resolve((p, "o"))))[s] -# # the outer autoref needs the marker to know to skip this (instead of looking up o.p): -# p # --> p +# $ASTMarker: +# varname: 'o' +# body: +# x # --> (lambda _ar271: _ar271[1] if _ar271[0] else x)(_autoref_resolve((o, "x"))) +# x.a # --> ((lambda _ar271: _ar271[1] if _ar271[0] else x)(_autoref_resolve((o, "x")))).a +# x[s] # --> ((lambda _ar271: _ar271[1] if _ar271[0] else x)(_autoref_resolve((o, "x"))))[s] +# o # --> o (can only occur if an as-part is supplied) +# $ASTMarker: +# varname: 'p' +# body: +# x # --> (lambda _ar314: _ar314[1] if _ar314[0] else x)(_autoref_resolve((p, o, "x"))) +# x.a # --> ((lambda _ar314: _ar314[1] if _ar314[0] else x)(_autoref_resolve((p, o, "x"))).a +# x[s] # --> ((lambda _ar314: _ar314[1] if _ar314[0] else x)(_autoref_resolve((p, o, "x")))[s] +# # when the inner autoref expands, it doesn't know about the outer one, so we will get this: +# o # --> (lambda _ar314: _ar314[1] if _ar314[0] else o)(_autoref_resolve((p, "o"))) +# o.x # --> ((lambda _ar314: _ar314[1] if _ar314[0] else o)(_autoref_resolve((p, "o")))).x +# o[s] # --> ((lambda _ar314: _ar314[1] if _ar314[0] else o)(_autoref_resolve((p, "o"))))[s] +# # the outer autoref needs the marker to know to skip this (instead of looking up o.p): +# p # --> p # # The lambda is needed, because the lexical-variable lookup for ``x`` must occur at the use site, # and it can only be performed by Python itself. We could modify ``_autoref_resolve`` to take @@ -73,35 +81,106 @@ # to ``o`` and ``p`` directly), so that arbitrary expressions can be autoref'd without giving them # a name in user code. +@parametricmacro +def autoref(tree, *, args, syntax, expander, **kw): + """Implicitly reference attributes of an object. + + Example:: + + e = env(a=1, b=2) + c = 3 + with autoref[e]: + a + b + c + + The macro argument of `with autoref[...]` is an arbitrary expression that, + at run time, evaluates to the object instance to be autoreferenced. + + At the beginning of the block, the expression given as the macro argument + is implicitly assigned to a gensymmed variable, and then always used from + there, to ensure that the expression is evaluated only once. If you want to + explicitly name the variable instead of allowing `autoref` to gensym it, + use `with autoref[...] as ...`:: + + with autoref[e] as the_e: + a + b + c + + (Explicit naming can be useful for debugging.) + + The transformation is applied in ``Load`` context only. ``Store`` and ``Del`` + are not redirected. + + Useful e.g. with the ``.mat`` file loader of SciPy. + + **CAUTION**: `autoref` is essentially the `with` construct of JavaScript + (which is completely different from Python's meaning of `with`), which is + nowadays deprecated. See: + + https://www.ecma-international.org/ecma-262/6.0/#sec-with-statement + https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/with + https://2ality.com/2011/06/with-statement.html + + **CAUTION**: The auto-reference `with` construct was deprecated in JavaScript + **for security reasons**. Since the autoref'd object **will hijack all name + lookups**, use `with autoref` only with an object you trust! + + **CAUTION**: `with autoref` also complicates static code analysis or makes it + outright infeasible, for the same reason. It is impossible to statically know + whether something that looks like a bare name in the source code is actually + a true bare name, or a reference to an attribute of the autoref'd object. + That status can also change at any time, since the lookup is dynamic, and + attributes can be added and removed dynamically. + """ + if syntax != "block": + raise SyntaxError("autoref is a block macro only") # pragma: no cover + if not args: + raise SyntaxError("autoref requires an argument, the object to be auto-referenced") # pragma: no cover + + target = kw.get("optional_vars", None) + if target and type(target) is not Name: # tuples not accepted + raise SyntaxError("with autoref[...] as ... takes at most one name in the as-part") # pragma: no cover + + with dyn.let(_macro_expander=expander): + return _autoref(block_body=tree, args=args, asname=target) + +# -------------------------------------------------------------------------------- + @passthrough_lazy_args def _autoref_resolve(args): - *objs, s = [force1(x) for x in args] + """Perform an autoref lookup in a `with autoref` block. + + `args`: list [obj0, ..., objN, attrname] + + Each `obj` is tried, left to right, and the first one that + `hasattr(obj, attrname)` wins. The return value is the tuple + `(True, getattr(obj, attrname))`. + + If no obj matches, the return value is `(False, None)`. + """ + *objs, attrname = [force1(x) for x in args] for o in objs: - if hasattr(o, s): - return True, force1(getattr(o, s)) + if hasattr(o, attrname): + return True, force1(getattr(o, attrname)) return False, None -def autoref(block_body, args, asname): +def _autoref(block_body, args, asname): + # first pass, outside-in if len(args) != 1: raise SyntaxError("expected exactly one argument, the expr to implicitly reference") # pragma: no cover if not block_body: raise SyntaxError("expected at least one statement inside the 'with autoref' block") # pragma: no cover - o = asname.id if asname else gensym("_o") # Python itself guarantees asname to be a bare Name. + block_body = dyn._macro_expander.visit_recursively(block_body) - # TODO: We can't use `unpythonic.syntax.util.isexpandedmacromarker` here, because it - # TODO: doesn't currently understand markers with arguments. Extend it? - # - # with AutorefMarker("_o42"): - def isexpandedautorefblock(tree): - if not (type(tree) is With and len(tree.items) == 1): - return False - ctxmanager = tree.items[0].context_expr - return (type(ctxmanager) is Call and - isx(ctxmanager.func, "AutorefMarker") and - len(ctxmanager.args) == 1 and type(ctxmanager.args[0]) in (Constant, Str)) # Python 3.8+: ast.Constant - def getreferent(tree): - return getconstant(tree.items[0].context_expr.args[0]) + # second pass, inside-out + + # `autoref`'s analyzer needs the `ctx` attributes in `tree` to be filled in correctly. + block_body = fix_ctx(block_body, copy_seen_nodes=False) # TODO: or maybe copy seen nodes? + + o = asname.id if asname else gensym("_o") # Python itself guarantees asname to be a bare Name. # (lambda _ar314: _ar314[1] if _ar314[0] else x)(_autoref_resolve((p, o, "x"))) def isautoreference(tree): @@ -118,6 +197,8 @@ def add_to_resolver_list(tree, objnode): # x --> the autoref code above. def makeautoreference(tree): + # We don't need to care about `Done` markers from expanded `@namemacro`s + # because the transformer that calls this function recurses into them. assert type(tree) is Name and (type(tree.ctx) is Load or not tree.ctx) newtree = q[(lambda __ar_: __ar_[1] if __ar_[0] else a[tree])(h[_autoref_resolve]((n[o], u[tree.id])))] our_lambda_argname = gensym("_ar") @@ -149,10 +230,10 @@ def transform(self, tree): elif isdo(tree): view = ExpandedDoView(tree) self.generic_withstate(tree, referents=referents + [view.body[0].args.args[0].arg]) # lambda e14: ... - elif isexpandedautorefblock(tree): - self.generic_withstate(tree, referents=referents + [getreferent(tree)]) + elif isinstance(tree, ExpandedAutorefMarker): + self.generic_withstate(tree, referents=referents + [tree.varname]) elif isautoreference(tree): # generated by an inner already expanded autoref block - thename = getconstant(get_resolver_list(tree)[-1]) + thename = get_resolver_list(tree)[-1].value if thename in referents: # This case is tricky to trigger, so let's document it here. This code: # @@ -162,11 +243,15 @@ def transform(self, tree): # # expands to: # - # with AutorefMarker('_o5'): - # _o5 = e - # with AutorefMarker('_o4'): - # _o4 = (lambda _ar13: (_ar13[1] if _ar13[0] else e2))(_autoref_resolve((_o5, 'e2'))) - # (lambda _ar9: (_ar9[1] if _ar9[0] else e))(_autoref_resolve((_o4, _o5, 'e'))) + # $ASTMarker: + # varname: '_o5' + # body: + # _o5 = e + # $ASTMarker: + # varname: '_o4' + # body: + # _o4 = (lambda _ar13: (_ar13[1] if _ar13[0] else e2))(_autoref_resolve((_o5, 'e2'))) + # (lambda _ar9: (_ar9[1] if _ar9[0] else e))(_autoref_resolve((_o4, _o5, 'e'))) # # so there's no "e" as referent; the actual referent has a gensymmed name. # Inside the body of the inner autoref, looking up "e" in e2 before falling @@ -181,11 +266,15 @@ def transform(self, tree): # # expands to: # - # with AutorefMarker('outer'): - # outer = e - # with AutorefMarker('inner'): - # inner = (lambda _ar17: (_ar17[1] if _ar17[0] else e2))(_autoref_resolve((outer, 'e2'))) - # outer # <-- !!! + # $ASTMarker: + # varname: 'outer' + # body: + # outer = e + # $ASTMarker: + # varname: 'inner' + # body: + # inner = (lambda _ar17: (_ar17[1] if _ar17[0] else e2))(_autoref_resolve((outer, 'e2'))) + # outer # <-- !!! # # Now this case is triggered; we get a bare `outer` inside the inner body. # TODO: Whether this wart is a good idea is another question... @@ -196,7 +285,7 @@ def transform(self, tree): else: add_to_resolver_list(tree, q[n[o]]) # _autoref_resolve((p, "x")) --> _autoref_resolve((p, o, "x")) return tree - elif type(tree) is Call and isx(tree.func, "AutorefMarker"): # nested autorefs + elif isinstance(tree, ExpandedAutorefMarker): # nested autorefs return tree elif type(tree) is Name and (type(tree.ctx) is Load or not tree.ctx) and tree.id not in referents: tree = makeautoreference(tree) @@ -207,17 +296,18 @@ def transform(self, tree): # Skip (by name) some common references inserted by other macros. # - # We are a second-pass macro (inside out), so any first-pass macro invocations, - # as well as any second-pass macro invocations inside the `with autoref` block, - # have already expanded by the time we run our transformer. - always_skip = ['letter', 'dof', 'namelambda', 'curry', 'currycall', 'lazy', 'lazyrec', 'maybe_force_args', - # test framework stuff - 'unpythonic_assert', 'unpythonic_assert_signals', 'unpythonic_assert_raises', - 'callsite_filename', 'returns_normally'] - newbody = [Assign(targets=[q[n[o]]], value=args[0])] + # This part runs in the inside-out pass, so any outside-in macro invocations, + # as well as any inside-out macro invocations inside the `with autoref` + # block, have already expanded by the time we run our transformer. + always_skip = ['letter', 'dof', # let/do subsystem + 'namelambda', # lambdatools subsystem + 'curry', 'curryf' 'currycall', # autocurry subsystem + 'lazy', 'lazyrec', 'maybe_force_args', # lazify subsystem + # the test framework subsystem + 'callsite_filename', 'returns_normally'] + _test_function_names + with q as newbody: + n[o] = a[args[0]] for stmt in block_body: newbody.append(AutorefTransformer(referents=always_skip + [o]).visit(stmt)) - return wrapwith(item=q[h[AutorefMarker](u[o])], - body=newbody, - locref=block_body[0]) + return ExpandedAutorefMarker(body=newbody, varname=o) diff --git a/unpythonic/syntax/dbg.py b/unpythonic/syntax/dbg.py index e93a1e37..68abff2c 100644 --- a/unpythonic/syntax/dbg.py +++ b/unpythonic/syntax/dbg.py @@ -6,17 +6,110 @@ The printing can be customized; see ``dbgprint_block`` and ``dbgprint_expr``. """ -from ast import Call, Name, Tuple, keyword +__all__ = ["dbg", "dbgprint_block", "dbgprint_expr"] -from mcpyrate.quotes import macros, q, u, a, h # noqa: F401 +from ast import Call, Name, keyword -from mcpyrate import unparse +from mcpyrate.quotes import macros, q, u, a, t, h # noqa: F401 + +from mcpyrate import parametricmacro, unparse from mcpyrate.quotes import is_captured_value from mcpyrate.walkers import ASTTransformer from ..dynassign import dyn, make_dynvar from ..misc import callsite_filename +@parametricmacro +def dbg(tree, *, args, syntax, expander, **kw): + """[syntax, expr/block] Debug-print expressions including their source code. + + **Expression variant**: + + Example:: + + dbg[25 + 17] # --> [file.py:100] (25 + 17): 42 + + The transformation is:: + + dbg[expr] --> dyn.dbgprint_expr(k, v, filename=__file__, lineno=xxx) + + where ``k`` is the source code of the expression and ``v`` is its value, + and `dyn` is `unpythonic.dynassign.dyn` (hygienically captured, so you + don't need to import it just to use the `dbg[]` macro). + + ``xxx`` is the original line number before macro expansion, if available + in the AST node of the expression, otherwise ``None``. (Some macros might + not care about inserting line numbers, because `mcpyrate` fixes any missing + line numbers in a postprocess step; this is why it might be missing at some + locations in any specific macro-enabled program.) + + A default implementation of the debug printer is provided and automatically + assigned as the default value for `dyn.dbgprint_expr`. + + To customize the debug printing, set your custom printer function to the + dynvar ``dbgprint_expr``, using `with dyn.let(dbgprint_expr=...)`. + + The custom function, beside performing any printing/logging as a side effect, + **must** return the value ``v``, so that surrounding an expression with + ``dbg[...]`` does not alter its value. + + If you want to use the default implementation as part of your customized one + (e.g. if you want to decorate that with some logging code), it is available as + `unpythonic.syntax.dbgprint_expr`. + + **Block variant**: + + Lexically within the block, any call to ``print`` (alternatively, if specified, + the optional custom print function), prints both the expression source code + and the corresponding value. + + A custom print function can be supplied as an argument. To implement a + custom print function, see the default implementation ``dbgprint_block`` + for the signature. + + If you want to use the default implementation as part of your customized one, + it is available as `unpythonic.syntax.dbgprint_block`. + + Examples:: + + with dbg: + x = 2 + print(x) # --> [file.py:100] x: 2 + + with dbg: + x = 2 + y = 3 + print(x, y) # --> [file.py:100] x: 2, y: 3 + print(x, y, sep="\n") # --> [file.py:100] x: 2 + # [file.py:100] y: 3 + + prt = lambda *args, **kwargs: print(*args) + with dbg[prt]: + x = 2 + prt(x) # --> ('x',) (2,) + print(x) # --> 2 + + with dbg[prt]: + x = 2 + y = 17 + prt(x, y, 1 + 2) # --> ('x', 'y', '(1 + 2)'), (2, 17, 3)) + + **CAUTION**: The source code is back-converted from the AST representation; + hence its surface syntax may look slightly different to the original (e.g. + extra parentheses). See ``mcpyrate.unparse``. + """ + if syntax not in ("expr", "block"): + raise SyntaxError("dbg is an expr and block macro only") # pragma: no cover + if syntax == "block" and kw['optional_vars'] is not None: + raise SyntaxError("dbg (block mode) does not take an as-part") # pragma: no cover + + # Expand inside-out. + with dyn.let(_macro_expander=expander): + if syntax == "expr": + return _dbg_expr(tree) + else: # syntax == "block": + return _dbg_block(body=tree, args=args) + def dbgprint_block(ks, vs, *, filename=None, lineno=None, sep=", ", **kwargs): """Default debug printer for the ``dbg`` macro, block variant. @@ -68,34 +161,6 @@ def dbgprint_block(ks, vs, *, filename=None, lineno=None, sep=", ", **kwargs): else: print(header + sep.join(f"{k}: {v}" for k, v in zip(ks, vs)), **kwargs) -def dbg_block(body, args): - if args: # custom print function hook - # TODO: add support for Attribute to support using a method as a custom print function - # (the problem is we must syntactically find matches in the AST, and AST nodes don't support comparison) - if type(args[0]) is not Name: # pragma: no cover, let's not test the macro expansion errors. - raise SyntaxError("Custom debug print function must be specified by a bare name") - p = args[0] - pname = p.id # name of the print function as it appears in the user code - else: - p = q[h[dbgprint_block]] - pname = "print" # override standard print function within this block - - class DbgBlockTransformer(ASTTransformer): - def transform(self, tree): - if is_captured_value(tree): - return tree # don't recurse! - if type(tree) is Call and type(tree.func) is Name and tree.func.id == pname: - names = [q[u[unparse(node)]] for node in tree.args] # x --> "x"; (1 + 2) --> "(1 + 2)"; ... - names = Tuple(elts=names, lineno=tree.lineno, col_offset=tree.col_offset) - values = Tuple(elts=tree.args, lineno=tree.lineno, col_offset=tree.col_offset) - tree.args = [names, values] - # can't use inspect.stack in the printer itself because we want the line number *before macro expansion*. - tree.keywords += [keyword(arg="filename", value=q[h[callsite_filename]()]), - keyword(arg="lineno", value=(q[u[tree.lineno]] if hasattr(tree, "lineno") else q[None]))] - tree.func = q[a[p]] - return self.generic_visit(tree) - return DbgBlockTransformer().visit(body) - def dbgprint_expr(k, v, *, filename, lineno): """Default debug printer for the ``dbg`` macro, expression variant. @@ -133,8 +198,46 @@ def dbgprint_expr(k, v, *, filename, lineno): print(f"[{filename}:{lineno}] {k}: {v}") return v # IMPORTANT! (passthrough; debug printing is a side effect) -def dbg_expr(tree): - ln = q[u[tree.lineno]] if hasattr(tree, "lineno") else q[None] +# -------------------------------------------------------------------------------- +# Syntax transformers + +def _dbg_block(body, args): + if args: # custom print function hook + # TODO: add support for Attribute to support using a method as a custom print function + # (the problem is we must syntactically find matches in the AST, and AST nodes don't support comparison) + if type(args[0]) is not Name: # pragma: no cover, let's not test the macro expansion errors. + raise SyntaxError("Custom debug print function must be specified by a bare name") # pragma: no cover + pfunc = args[0] + pname = pfunc.id # name of the print function as it appears in the user code + else: + pfunc = q[h[dbgprint_block]] + pname = "print" # override standard print function within this block + + # TODO: Do we really need to expand inside-out here? + body = dyn._macro_expander.visit_recursively(body) + + class DbgBlockTransformer(ASTTransformer): + def transform(self, tree): + if is_captured_value(tree): + return tree # don't recurse! + if type(tree) is Call and type(tree.func) is Name and tree.func.id == pname: + names = [q[u[unparse(node)]] for node in tree.args] # x --> "x"; (1 + 2) --> "(1 + 2)"; ... + names = q[t[names]] + values = q[t[tree.args]] + tree.args = [names, values] + # can't use inspect.stack in the printer itself because we want the line number *before macro expansion*. + lineno = getattr(tree, "lineno", None) # may be absent on 3.10–3.12; None on 3.13+ + tree.keywords += [keyword(arg="filename", value=q[h[callsite_filename]()]), + keyword(arg="lineno", value=q[u[lineno]])] + tree.func = pfunc + return self.generic_visit(tree) + return DbgBlockTransformer().visit(body) + +def _dbg_expr(tree): + # TODO: Do we really need to expand inside-out here? + tree = dyn._macro_expander.visit_recursively(tree) + + ln = q[u[getattr(tree, "lineno", None)]] filename = q[h[callsite_filename]()] # Careful here! We must `h[]` the `dyn`, but not `dbgprint_expr` itself, # because we want to look up that attribute dynamically. diff --git a/unpythonic/syntax/forall.py b/unpythonic/syntax/forall.py index a22feb1a..f85ed0b1 100644 --- a/unpythonic/syntax/forall.py +++ b/unpythonic/syntax/forall.py @@ -1,6 +1,8 @@ # -*- coding: utf-8 -*- """Nondeterministic evaluation (a tuple comprehension with a multi-expr body).""" +__all__ = ["forall", "insist", "deny"] + from ast import Tuple, arg from mcpyrate.quotes import macros, q, u, n, a, h # noqa: F401 @@ -9,10 +11,12 @@ from .letdoutil import isenvassign, UnexpandedEnvAssignView from ..amb import monadify -from ..amb import insist, deny # for re-export only # noqa: F401 +from ..dynassign import dyn from ..misc import namelambda -def forall(exprs): +from ..amb import insist, deny # for re-export only # noqa: F401 + +def forall(tree, *, syntax, expander, **kw): """[syntax, expr] Nondeterministic evaluation. Fully based on AST transformation, with real lexical variables. @@ -29,8 +33,20 @@ def forall(exprs): assert tuple(sorted(pt)) == ((3, 4, 5), (5, 12, 13), (6, 8, 10), (8, 15, 17), (9, 12, 15), (12, 16, 20)) """ + if syntax != "expr": + raise SyntaxError("forall is an expr macro only") # pragma: no cover + + # Inside-out macro. + with dyn.let(_macro_expander=expander): + return _forall(exprs=tree) + +def _forall(exprs): if type(exprs) is not Tuple: # pragma: no cover, let's not test macro expansion errors. - raise SyntaxError("forall body: expected a sequence of comma-separated expressions") + raise SyntaxError("forall body: expected a sequence of comma-separated expressions") # pragma: no cover + + # Expand inside-out to easily support lexical scoping. + exprs = dyn._macro_expander.visit_recursively(exprs) + itemno = 0 def build(lines, tree): if not lines: diff --git a/unpythonic/syntax/ifexprs.py b/unpythonic/syntax/ifexprs.py index 4633228c..a915f7e3 100644 --- a/unpythonic/syntax/ifexprs.py +++ b/unpythonic/syntax/ifexprs.py @@ -1,43 +1,143 @@ # -*- coding: utf-8 -*- """Anaphoric if.""" +__all__ = ["aif", "it", + "cond"] + from ast import Tuple -from mcpyrate.quotes import macros, q, a # noqa: F811, F401 +from mcpyrate.quotes import macros, q, n, a, h # noqa: F811, F401 +from .letdo import macros, let # noqa: F811, F401 + +from mcpyrate import namemacro +from mcpyrate.expander import MacroExpander +from mcpyrate.utils import extract_bindings, NestingLevelTracker + +from .letdo import _implicit_do + +from ..dynassign import dyn + +# -------------------------------------------------------------------------------- + +def aif(tree, *, syntax, expander, **kw): + """[syntax, expr] Anaphoric if. + + Usage:: + + aif[test, then, otherwise] + + aif[[pre, ..., test], + [post_true, ..., then], # "then" branch + [post_false, ..., otherwise]] # "otherwise" branch + + Inside the ``then`` and ``otherwise`` branches, the magic identifier ``it`` + (which is always named literally ``it``) refers to the value of ``test``. + + This expands into a ``let`` and an expression-form ``if``. + + Each part may consist of multiple expressions by using brackets around it; + those brackets create a `do` environment (see `unpythonic.syntax.do`). + + To represent a single expression that is a literal list, use extra + brackets: ``[[1, 2, 3]]``. + """ + if syntax != "expr": + raise SyntaxError("aif is an expr macro only") # pragma: no cover + + # Detect the name(s) of `it` at the use site (this accounts for as-imports) + # TODO: We don't know which binding this particular use site uses. + # TODO: For now, we hack this by making `it` always rename itself to literal `it`. + macro_bindings = extract_bindings(expander.bindings, it) + if not macro_bindings: + raise SyntaxError("The use site of `aif` must macro-import `it`, too.") # pragma: no cover + + # Expand outside-in, but the implicit do[] needs the expander. + with dyn.let(_macro_expander=expander): + return _aif(tree, macro_bindings) -from .letdo import implicit_do, let +_aif_level = NestingLevelTracker() -# TODO: `mcpyrate` has a rudimentary capability like Racket's "syntax-parameterize". -# TODO: Make `it` a name macro that errors out unless it appears inside an `aif`. -# -# We could just leave "it" undefined by default, but IDEs are happier if the -# name exists, and this also gives us a chance to provide a docstring. -class it: - """[syntax] The result of the test in an aif. +def _aif(tree, bindings_of_it): + # expand any `it` inside the `aif` (thus confirming those uses are valid) + def expand_it(tree): + return MacroExpander(bindings_of_it, dyn._macro_expander.filename).visit(tree) - Only meaningful inside the ``then`` and ``otherwise`` branches of an aif. + # careful here: `it` is only valid in the `then` and `otherwise` parts. + test, then, otherwise = tree.elts + test = _implicit_do(test) + with _aif_level.changed_by(+1): + # TODO: We don't know which binding this particular use site uses. + # TODO: For now, we hack this by making `it` always rename itself to literal `it`. + name_of_it = list(bindings_of_it.keys())[0] + expanded_it = expand_it(q[n[name_of_it]]) + + then = _implicit_do(expand_it(then)) + otherwise = _implicit_do(expand_it(otherwise)) + + let_bindings = q[(a[expanded_it], a[test])] + let_body = q[a[then] if a[expanded_it] else a[otherwise]] + # We use a hygienic macro reference to `let[]` in the output, + # so that the expander can expand it later. + return q[h[let][a[let_bindings]][a[let_body]]] + +@namemacro +def it(tree, *, syntax, **kw): + """[syntax, name] The `it` of an anaphoric if. + + Inside an `aif` body, evaluates to the value of the test result. + Anywhere else, is considered a syntax error. + + **CAUTION**: Currently cannot be as-imported; must be imported + without renaming. + """ + if syntax != "name": + raise SyntaxError("`it` is a name macro only") # pragma: no cover + if _aif_level.value < 1: + raise SyntaxError("`it` may only appear in the 'then' and 'otherwise' parts of an `aif[...]`") # pragma: no cover + return q[it] # always rename to literal `it` + +# -------------------------------------------------------------------------------- + +def cond(tree, *, syntax, expander, **kw): + """[syntax, expr] Lispy cond; like "a if p else b", but has "elif". + + Usage:: + + cond[test1, then1, + test2, then2, + ... + otherwise] + + cond[[pre1, ..., test1], [post1, ..., then1], + [pre2, ..., test2], [post2, ..., then2], + ... + [postn, ..., otherwise]] + + This allows human-readable multi-branch conditionals in an expression position. + + Each part may consist of multiple expressions by using brackets around it; + those brackets create a `do` environment (see `unpythonic.syntax.do`). + + To represent a single expression that is a literal list, use extra + brackets: ``[[1, 2, 3]]``. """ - def __repr__(self): # pragma: no cover, we have a repr just in case one of these ends up somewhere at runtime. - return "" -it = it() - -def aif(tree): - test, then, otherwise = [implicit_do(x) for x in tree.elts] - bindings = [q[(it, a[test])]] - body = q[a[then] if it else a[otherwise]] - # TODO: we should use a hygienically captured macro here. - return let(bindings, body) - -def cond(tree): + if syntax != "expr": + raise SyntaxError("cond is an expr macro only") # pragma: no cover + + # Expand outside-in, but the implicit do[] needs the expander. + with dyn.let(_macro_expander=expander): + return _cond(tree) + +def _cond(tree): if type(tree) is not Tuple: raise SyntaxError("Expected cond[test1, then1, test2, then2, ..., otherwise]") # pragma: no cover def build(elts): if len(elts) == 1: # final "otherwise" branch - return implicit_do(elts[0]) + return _implicit_do(elts[0]) if not elts: raise SyntaxError("Expected cond[test1, then1, test2, then2, ..., otherwise]") # pragma: no cover test, then, *more = elts - test = implicit_do(test) - then = implicit_do(then) + test = _implicit_do(test) + then = _implicit_do(then) return q[a[then] if a[test] else a[build(more)]] return build(tree.elts) diff --git a/unpythonic/syntax/lambdatools.py b/unpythonic/syntax/lambdatools.py index 93525f1b..754767e0 100644 --- a/unpythonic/syntax/lambdatools.py +++ b/unpythonic/syntax/lambdatools.py @@ -1,73 +1,283 @@ # -*- coding: utf-8 -*- """Lambdas with multiple expressions, local variables, and a name.""" -from ast import (Lambda, List, Name, Assign, Subscript, Call, FunctionDef, - AsyncFunctionDef, Attribute, keyword, Dict, Constant, arg, - copy_location) +__all__ = ["multilambda", + "namedlambda", + "fn", "_", + "quicklambda", + "envify"] + +from ast import (Lambda, Name, Assign, Subscript, Call, FunctionDef, + AsyncFunctionDef, Attribute, keyword, Dict, Constant, arg) from copy import deepcopy from mcpyrate.quotes import macros, q, u, n, a, h # noqa: F401 from mcpyrate import gensym +from mcpyrate.astcompat import NamedExpr +from mcpyrate.expander import MacroExpander from mcpyrate.quotes import is_captured_value from mcpyrate.splicing import splice_expression from mcpyrate.utils import extract_bindings from mcpyrate.walkers import ASTTransformer from ..dynassign import dyn -from ..misc import namelambda -from ..fun import orf from ..env import env +from ..misc import namelambda +from ..symbol import sym -from .astcompat import getconstant, Str, NamedExpr -from .letdo import do +from .letdo import _implicit_do, _do from .letdoutil import islet, isenvassign, UnexpandedLetView, UnexpandedEnvAssignView, ExpandedDoView -from .util import (is_decorated_lambda, isx, make_isxpred, has_deco, +from .nameutil import getname +from .util import (is_decorated_lambda, isx, has_deco, destructure_decorated_lambda, detect_lambda) -def multilambda(block_body): +# -------------------------------------------------------------------------------- +# Macro interface + +def multilambda(tree, *, syntax, expander, **kw): + """[syntax, block] Supercharge your lambdas: multiple expressions, local variables. + + For all ``lambda`` lexically inside the ``with multilambda`` block, + ``[...]`` denotes a multiple-expression body with an implicit ``do``:: + + lambda ...: [expr0, ...] --> lambda ...: do[expr0, ...] + + Only the outermost set of brackets around the body of a ``lambda`` denotes + a multi-expression body; the rest are interpreted as lists, as usual. + + Examples:: + + with multilambda: + echo = lambda x: [print(x), x] + assert echo("hi there") == "hi there" + + count = let[x << 0][ + lambda: [x << x + 1, + x]] + assert count() == 1 + assert count() == 2 + + mk12 = lambda: [[1, 2]] + assert mk12() == [1, 2] + + For local variables, see ``do``. + """ + if syntax != "block": + raise SyntaxError("multilambda is a block macro only") # pragma: no cover + if syntax == "block" and kw['optional_vars'] is not None: + raise SyntaxError("multilambda does not take an as-part") # pragma: no cover + + # Expand outside in. + # multilambda should expand first before any let[], do[] et al. that happen + # to be inside the block, to avoid misinterpreting implicit lambdas + # generated by those constructs. + with dyn.let(_macro_expander=expander): # implicit do (extra bracket notation) needs this. + return _multilambda(block_body=tree) + +def namedlambda(tree, *, syntax, expander, **kw): + """[syntax, block] Name lambdas implicitly. + + Lexically inside a ``with namedlambda`` block, any literal ``lambda`` + that is assigned to a name using one of the supported assignment forms + is named to have the name of the LHS of the assignment. The name is + captured at macro expansion time. + + Naming modifies the original function object. + + We support: + + - Single-item assignments to a local name, ``f = lambda ...: ...`` + + - Named expressions (a.k.a. walrus operator, Python 3.8+), + ``f := lambda ...: ...`` + + - Assignments to unpythonic environments, ``f << (lambda ...: ...)`` + + - Let bindings, ``let[[f << (lambda ...: ...)] in ...]``, using any + let syntax supported by unpythonic (here using the haskelly let-in + just as an example). + + Support for other forms of assignment might or might not be added in a + future version. + + Example:: + + with namedlambda: + f = lambda x: x**3 # assignment: name as "f" + + let[x << 42, g << None, h << None][[ + g << (lambda x: x**2), # env-assignment: name as "g" + h << f, # still "f" (no literal lambda on RHS) + (g(x), h(x))]] + + foo = let[[f7 << (lambda x: x)] in f7] # let-binding: name as "f7" + + The naming is performed using the function ``unpythonic.misc.namelambda``, + which will update ``__name__``, ``__qualname__`` and ``__code__.co_name``. + """ + if syntax != "block": + raise SyntaxError("namedlambda is a block macro only") # pragma: no cover + if syntax == "block" and kw['optional_vars'] is not None: + raise SyntaxError("namedlambda does not take an as-part") # pragma: no cover + + # Two-pass macro. We pass in the expander to allow the macro to decide when to recurse. + with dyn.let(_macro_expander=expander): + return _namedlambda(block_body=tree) + +def fn(tree, *, syntax, expander, **kw): + """[syntax, expr] Underscore notation (quick lambdas) for Python. + + Usage:: + + fn[body] + + The ``fn[]`` macro creates a lambda. Each underscore in ``body`` + introduces a new parameter. + + Example:: + + func = fn[_ * _] + + expands to:: + + func = lambda a0, a1: a0 * a1 + + The underscore is interpreted magically by ``fn[]``; but ``_`` itself + is not a macro, and has no special meaning outside ``fn[]``. The underscore + does **not** need to be imported for ``fn[]`` to recognize it. + + But if you want to make your IDE happy, there is a symbol named ``_`` in + `unpythonic.syntax` you can import to silence any "undefined name" errors + regarding the use of ``_``. It is a regular run-time object, not a macro. + + The macro does not descend into any nested ``fn[]``. + """ + if syntax != "expr": + raise SyntaxError("f is an expr macro only") # pragma: no cover + + # What's my name in the current expander? (There may be several names.) + # https://github.com/Technologicat/mcpyrate/blob/master/doc/quasiquotes.md#hygienic-macro-recursion + bindings = extract_bindings(expander.bindings, fn) + mynames = list(bindings.keys()) + + return _fn(tree, mynames) + +_ = sym("_") # for those who want to make their IDEs happy + +def quicklambda(tree, *, syntax, expander, **kw): + """[syntax, block] Make ``fn`` quick lambdas expand first. + + To be able to transform correctly, the block macros in ``unpythonic.syntax`` + that transform lambdas (e.g. ``multilambda``, ``tco``) need to see all + ``lambda`` definitions written with Python's standard ``lambda``. + + However, the ``fn`` macro uses the syntax ``f[...]``, which (to the analyzer) + does not look like a lambda definition. This macro changes the expansion + order, forcing any ``fn[...]`` lexically inside the block to expand before + any other macros do. + + Any expression of the form ``fn[...]``, where ``fn`` is any name bound in the + current macro expander to the macro `unpythonic.syntax.fn`, is understood as + a quick lambda. (In plain English, this respects as-imports of the macro ``fn``.) + + Example - a quick multilambda:: + + from unpythonic.syntax import macros, multilambda, quicklambda, fn, local + from unpythonic.syntax import _ # optional, makes IDEs happy + + with quicklambda, multilambda: + func = fn[[local[x << _], + local[y << _], + x + y]] + assert func(1, 2) == 3 + + (This is of course rather silly, as an unnamed argument can only be mentioned + once. If we're giving names to them, a regular ``lambda`` is shorter to write. + The point is, this combo is now possible.) + """ + if syntax != "block": + raise SyntaxError("quicklambda is a block macro only") # pragma: no cover + if syntax == "block" and kw['optional_vars'] is not None: + raise SyntaxError("quicklambda does not take an as-part") # pragma: no cover + + # This macro expands outside in. + # + # In `mcpyrate`, expander instances are cheap - so we create a second expander + # to which we register only the `f` macro, under whatever names it appears in + # the original expander. Thus it leaves all other macros alone. This is the + # official `mcpyrate` way to immediately expand only some particular macros + # inside the current macro invocation. + bindings = extract_bindings(expander.bindings, fn) + return MacroExpander(bindings, expander.filename).visit(tree) + +def envify(tree, *, syntax, expander, **kw): + """[syntax, block] Make formal parameters live in an unpythonic env. + + The purpose is to allow overwriting formals using unpythonic's + expression-assignment ``name << value``. The price is that the references + to the arguments are copied into an env whenever an envified function is + entered. + + Example - PG's accumulator puzzle (http://paulgraham.com/icad.html):: + + with envify: + def foo(n): + return lambda i: n << n + i + + Or even shorter:: + + with autoreturn, envify: + def foo(n): + lambda i: n << n + i + """ + if syntax != "block": + raise SyntaxError("envify is a block macro only") # pragma: no cover + if syntax == "block" and kw['optional_vars'] is not None: + raise SyntaxError("envify does not take an as-part") # pragma: no cover + + # Two-pass macro. + with dyn.let(_macro_expander=expander): + return _envify(block_body=tree) + +# -------------------------------------------------------------------------------- +# Syntax transformers + +def _multilambda(block_body): class MultilambdaTransformer(ASTTransformer): def transform(self, tree): if is_captured_value(tree): return tree # don't recurse! - if not (type(tree) is Lambda and type(tree.body) is List): - return self.generic_visit(tree) - bodys = tree.body - # bracket magic: - # - don't recurse to the implicit lambdas generated by the "do" we are inserting here - # - for each item, "do" internally inserts a lambda to delay execution, - # as well as to bind the environment - # - we must do() instead of q[h[do][...]] for pickling reasons - # - but recurse manually into each *do item*; these are explicit - # user-provided code so we should transform them - bodys = self.visit(bodys) - tree.body = do(bodys) # insert the do, with the implicit lambdas - return tree + if type(tree) is Lambda: + tree.body = _implicit_do(tree.body) + return self.generic_visit(tree) # multilambda should expand first before any let[], do[] et al. that happen # to be inside the block, to avoid misinterpreting implicit lambdas # generated by those constructs. return MultilambdaTransformer().visit(block_body) -def namedlambda(block_body): +def _namedlambda(block_body): def issingleassign(tree): return type(tree) is Assign and len(tree.targets) == 1 and type(tree.targets[0]) is Name # detect a manual curry - iscurry = make_isxpred("curry") def iscurrywithfinallambda(tree): - if not (type(tree) is Call and isx(tree.func, iscurry) and tree.args): + if not (type(tree) is Call and isx(tree.func, "curry") and tree.args): return False return type(tree.args[-1]) is Lambda # Detect an autocurry from an already expanded "with autocurry". - # CAUTION: These must match what unpythonic.syntax.curry.autocurry uses in its output. - iscurrycall = make_isxpred("currycall") - iscurryf = orf(make_isxpred("curryf"), make_isxpred("curry")) # auto or manual curry in a "with autocurry" + # CAUTION: These must match what unpythonic.syntax.autocurry.autocurry uses in its output. + currycall_name = "currycall" + iscurryf = lambda name: name in ("curryf", "curry") # auto or manual curry in a "with autocurry" def isautocurrywithfinallambda(tree): - if not (type(tree) is Call and isx(tree.func, iscurrycall) and tree.args and + # "currycall(..., curryf(lambda ...: ...))" + if not (type(tree) is Call and isx(tree.func, currycall_name) and tree.args and type(tree.args[-1]) is Call and isx(tree.args[-1].func, iscurryf)): return False - return type(tree.args[-1].args[-1]) is Lambda + curryf_callnode = tree.args[-1] + lastarg = curryf_callnode.args[-1] + return type(lastarg) is Lambda def iscallwithnamedargs(tree): return type(tree) is Call and tree.keywords @@ -76,14 +286,16 @@ def iscallwithnamedargs(tree): # it as `myname` (str); else return `tree` as-is. def nameit(myname, tree): match, thelambda = False, None - # for decorated lambdas, match any chain of one-argument calls. - d = is_decorated_lambda(tree, mode="any") and not has_deco(tree, "namelambda") + # For decorated lambdas, match any chain of one-argument calls. + # The `has_deco` check ignores any already named lambdas. + d = is_decorated_lambda(tree, mode="any") and not has_deco(["namelambda"], tree) c = iscurrywithfinallambda(tree) - # this matches only during the second pass (after "with autocurry" has expanded) + # This matches only during the second pass (after "with autocurry" has expanded) # so it can't have namelambda already applied if isautocurrywithfinallambda(tree): # "currycall(..., curryf(lambda ...: ...))" match = True thelambda = tree.args[-1].args[-1] + # --> "currycall(..., (namelambda(myname))(curryf(lambda ...: ...)))" tree.args[-1].args[-1] = q[h[namelambda](u[myname])(a[thelambda])] elif type(tree) is Lambda or d or c: match = True @@ -102,12 +314,15 @@ def transform(self, tree): return tree # don't recurse! if islet(tree, expanded=False): # let bindings view = UnexpandedLetView(tree) + newbindings = [] for b in view.bindings: - b.elts[1], thelambda, match = nameit(b.elts[0].id, b.elts[1]) + b.elts[1], thelambda, match = nameit(getname(b.elts[0]), b.elts[1]) if match: thelambda.body = self.visit(thelambda.body) else: b.elts[1] = self.visit(b.elts[1]) + newbindings.append(b) + view.bindings = newbindings # write the new bindings (important!) view.body = self.visit(view.body) return tree # assumption: no one left-shifts by a literal lambda :) @@ -120,14 +335,14 @@ def transform(self, tree): view.value = self.visit(view.value) return tree elif issingleassign(tree): # f = lambda ...: ... - tree.value, thelambda, match = nameit(tree.targets[0].id, tree.value) + tree.value, thelambda, match = nameit(getname(tree.targets[0]), tree.value) if match: thelambda.body = self.visit(thelambda.body) else: tree.value = self.visit(tree.value) return tree - elif type(tree) is NamedExpr: # f := lambda ...: ... (Python 3.8+, added in unpythonic 0.15) - tree.value, thelambda, match = nameit(tree.target.id, tree.value) + elif type(tree) is NamedExpr: # f := lambda ...: ... (Python 3.8+, added in unpythonic 0.15.0) + tree.value, thelambda, match = nameit(getname(tree.target), tree.value) if match: thelambda.body = self.visit(thelambda.body) else: @@ -153,8 +368,8 @@ def transform(self, tree): if k is None: # {..., **d, ...} tree.values[j] = self.visit(v) else: - if type(k) in (Constant, Str): # Python 3.8+: ast.Constant - thename = getconstant(k) + if type(k) is Constant: + thename = k.value tree.values[j], thelambda, match = nameit(thename, v) if match: thelambda.body = self.visit(thelambda.body) @@ -169,10 +384,33 @@ def transform(self, tree): # outside in: transform in unexpanded let[] forms newbody = NamedLambdaTransformer().visit(block_body) - newbody = dyn._macro_expander.visit(newbody) + newbody = dyn._macro_expander.visit_recursively(newbody) # inside out: transform in expanded autocurry - return NamedLambdaTransformer().visit(newbody) + newbody = NamedLambdaTransformer().visit(newbody) + + # v0.15.0+: Finally, auto-name any still anonymous `lambda` with source location info. + # We must perform this in a separate pass so that expanded autocurry invocations + # are transformed correctly first. + class NamedLambdaFinalizationTransformer(ASTTransformer): + def transform(self, tree): + # Recurse into the lambda body in already named lambdas. + if is_decorated_lambda(tree, mode="any") and has_deco(["namelambda"], tree): + decorator_list, thelambda = destructure_decorated_lambda(tree) + thelambda.body = self.visit(thelambda.body) + return tree + elif type(tree) is Lambda: + if hasattr(tree, "lineno"): + thename = f"" + tree, thelambda, match = nameit(thename, tree) + if match: + thelambda.body = self.visit(thelambda.body) + else: + tree = self.visit(tree) + return tree + return self.generic_visit(tree) + return NamedLambdaFinalizationTransformer().visit(newbody) + # The function `f` is adapted from the `f` macro in `macropy.quick_lambda`, # stripped into a bare syntax transformer., and then the `@Walker` inside @@ -181,13 +419,7 @@ def transform(self, tree): # # Used under the MIT license. # Copyright (c) 2013-2018, Li Haoyi, Justin Holmgren, Alberto Berti and all the other contributors. -def f(tree): - # What's my name in the current expander? (There may be several names.) - # https://github.com/Technologicat/mcpyrate/blob/master/doc/quasiquotes.md#hygienic-macro-recursion - # TODO: doesn't currently work because this `f` is the syntax transformer, not the `f[]` macro. - bindings = extract_bindings(dyn._macro_expander.bindings, f) - mynames = list(bindings.keys()) - +def _fn(tree, mynames=()): class UnderscoreTransformer(ASTTransformer): def transform(self, tree): if is_captured_value(tree): @@ -209,19 +441,17 @@ def transform(self, tree): tree.args.args = [arg(arg=x) for x in used_names] return tree -def envify(block_body): +def _envify(block_body): # first pass, outside-in userlambdas = detect_lambda(block_body) - block_body = dyn._macro_expander.visit(block_body) + # Expand inside-out to easily support lexical scoping. + block_body = dyn._macro_expander.visit_recursively(block_body) # second pass, inside-out def getargs(tree): # tree: FunctionDef, AsyncFunctionDef, Lambda a = tree.args - if hasattr(a, "posonlyargs"): # Python 3.8+: positional-only parameters - allargs = a.posonlyargs + a.args + a.kwonlyargs - else: - allargs = a.args + a.kwonlyargs + allargs = a.posonlyargs + a.args + a.kwonlyargs argnames = [x.arg for x in allargs] if a.vararg: argnames.append(a.vararg.arg) @@ -235,7 +465,6 @@ def isfunctionoruserlambda(tree): # Create a renamed reference to the env() constructor to be sure the Call # nodes added by us have a unique .func (not used by other macros or user code) - _ismakeenv = make_isxpred("_envify") _envify = env class EnvifyTransformer(ASTTransformer): @@ -261,9 +490,9 @@ def isourupdate(thecall): ename = gensym("e") theenv = q[h[_envify]()] theenv.keywords = kws - assignment = Assign(targets=[q[n[ename]]], - value=theenv) - assignment = copy_location(assignment, tree) + with q as quoted: + n[ename] = a[theenv] + assignment = quoted[0] tree.body.insert(0, assignment) elif type(tree) is Lambda and id(tree) in userlambdas: # We must in general inject a new do[] even if one is already there, @@ -272,33 +501,38 @@ def isourupdate(thecall): # the name should revert to mean the formal parameter. # # inject a do[] and reuse its env - tree.body = do(List(elts=[q[n["_here_"]], - tree.body])) + tree.body = _do(q[n["_here_"], + a[tree.body]]) view = ExpandedDoView(tree.body) # view.body: [(lambda e14: ...), ...] ename = view.body[0].args.args[0].arg # do[] environment name - theupdate = Attribute(value=q[n[ename]], attr="update") + theupdate = q[n[f"{ename}.update"]] thecall = q[a[theupdate]()] thecall.keywords = kws tree.body = splice_expression(thecall, tree.body, "_here_") - newbindings.update({k: Attribute(value=q[n[ename]], attr=k) for k in argnames}) # "x" --> e.x + newbindings.update({k: q[n[f"{ename}.{k}"]] for k in argnames}) # "x" --> e.x self.generic_withstate(tree, enames=(enames + [ename]), bindings=newbindings) else: # leave alone the _envify() added by us - if type(tree) is Call and (isx(tree.func, _ismakeenv) or isourupdate(tree)): + if type(tree) is Call and (isx(tree.func, "_envify") or isourupdate(tree)): # don't recurse return tree # transform env-assignments into our envs elif isenvassign(tree): view = UnexpandedEnvAssignView(tree) if view.name in bindings.keys(): - envset = Attribute(value=bindings[view.name].value, attr="set") + # Grab the envname from the actual binding of "varname", of the form `e.varname` + # (so it's the `id` of a `Name` that is the `value` of an `Attribute`). + envset = q[n[f"{bindings[view.name].value.id}.set"]] newvalue = self.visit(view.value) return q[a[envset](u[view.name], a[newvalue])] # transform references to currently active bindings + # x --> e14.x + # It doesn't matter if this hits an already expanded inner `with envify`, + # because the gensymmed environment name won't be in our bindings, and the "x" + # has become the `attr` in an `Attribute` node. elif type(tree) is Name and tree.id in bindings.keys(): - # We must be careful to preserve the Load/Store/Del context of the name. - # The default lets mcpyrate fix it later. - ctx = tree.ctx if hasattr(tree, "ctx") else None + # Preserve the Load/Store/Del context of the name. + ctx = getattr(tree, "ctx", None) out = deepcopy(bindings[tree.id]) out.ctx = ctx return out diff --git a/unpythonic/syntax/lazify.py b/unpythonic/syntax/lazify.py index 50cf9be1..af2207b7 100644 --- a/unpythonic/syntax/lazify.py +++ b/unpythonic/syntax/lazify.py @@ -1,25 +1,441 @@ # -*- coding: utf-8 -*- """Automatic lazy evaluation of function arguments.""" +__all__ = ["lazy", "lazyrec", "lazify"] + from ast import (Lambda, FunctionDef, AsyncFunctionDef, Call, Name, Attribute, Starred, keyword, List, Tuple, Dict, Set, Subscript, Load) +from functools import partial -from mcpyrate.quotes import macros, q, a, h # noqa: F401 +from mcpyrate.quotes import macros, q, u, a, h # noqa: F401 -from mcpyrate.quotes import is_captured_value +from mcpyrate.astcompat import TypeAlias +from mcpyrate.astfixers import fix_ctx +from mcpyrate.quotes import capture_as_macro, is_captured_value +from mcpyrate.unparser import unparse from mcpyrate.walkers import ASTTransformer from .util import (suggest_decorator_index, sort_lambda_decorators, detect_lambda, - isx, make_isxpred, getname, is_decorator, wrapwith) + isx, getname, is_decorator) from .letdoutil import islet, isdo, ExpandedLetView +from .nameutil import is_unexpanded_expr_macro from ..lazyutil import Lazy, passthrough_lazy_args, force, force1, maybe_force_args from ..dynassign import dyn # ----------------------------------------------------------------------------- +# The `lazy` macro comes from `demo/promise.py` in `mcpyrate`. +def lazy(tree, *, syntax, **kw): + """[syntax, expr] Delay an expression (lazy evaluation). + + This macro injects a lambda to delay evaluation, and encapsulates + the result into a *promise* (an `unpythonic.lazyutil.Lazy` object). + + In Racket, this operation is known as `delay`. + """ + if syntax != "expr": + raise SyntaxError("lazy is an expr macro only") # pragma: no cover + + # Expand outside in. Ordering shouldn't matter here. + return _lazy(tree) + +def lazyrec(tree, *, syntax, **kw): + """[syntax, expr] Delay items in a container literal, recursively. + + Essentially, this distributes ``lazy[]`` into the items inside a literal + ``list``, ``tuple``, ``set``, ``frozenset``, ``unpythonic.collections.box`` + or ``unpythonic.llist.cons``, and into the values of a literal ``dict`` or + ``unpythonic.collections.frozendict``. + + Because this is a macro and must work by names only, only this fixed set of + container types is supported. + + The container itself is not lazified, only the items inside it are, to keep + the lazification from interfering with unpacking. This allows things such as + ``f(*lazyrec[(1*2*3, 4*5*6)])`` to work as expected. + + See also ``lazy[]`` (the effect on each item) and ``unpythonic.syntax.force`` + (the inverse of ``lazyrec[]``). + + For an atom, ``lazyrec[]`` has the same effect as ``lazy[]``:: + + lazyrec[dostuff()] --> lazy[dostuff()] + + For a container literal, ``lazyrec[]`` descends into it:: + + lazyrec[(2*21, 1/0)] --> (lazy[2*21], lazy[1/0]) + lazyrec[{'a': 2*21, 'b': 1/0}] --> {'a': lazy[2*21], 'b': lazy[1/0]} + + Constructor call syntax for container literals is also supported:: + + lazyrec[list(2*21, 1/0)] --> [lazy[2*21], lazy[1/0]] + + Nested container literals (with any combination of known types) are + processed recursively, for example:: + + lazyrec[((2*21, 1/0), (1+2+3, 4+5+6))] --> ((lazy[2*21], lazy[1/0]), + (lazy[1+2+3], lazy[4+5+6])) + """ + if syntax != "expr": + raise SyntaxError("lazyrec is an expr macro only") # pragma: no cover + + # Expand outside in. Ordering shouldn't matter here. + return _lazyrec(tree) + +def lazify(tree, *, syntax, expander, **kw): + """[syntax, block] Call-by-need for Python. + + In a ``with lazify`` block, function arguments are evaluated only when + actually used, at most once each, and in the order in which they are + actually used. Promises are automatically forced on access. + + Automatic lazification applies to arguments in function calls and to + let-bindings, since they play a similar role. **No other binding forms + are auto-lazified.** + + Automatic lazification uses the ``lazyrec[]`` macro, which recurses into + certain types of container literals, so that the lazification will not + interfere with unpacking. See its docstring for details. + + Comboing with other block macros in ``unpythonic.syntax`` is supported, + including ``curry`` and ``continuations``. + + Silly contrived example:: + + with lazify: + def my_if(p, a, b): + if p: + return a # b never evaluated in this code path... + else: + return b # a never evaluated in this code path... + + # ...hence the divisions by zero here are never performed. + assert my_if(True, 23, 1/0) == 23 + assert my_if(False, 1/0, 42) == 42 + + Note ``my_if`` is a run-of-the-mill runtime function, not a macro. Only the + ``with lazify`` is imbued with any magic. + + Like ``with continuations``, no state or context is associated with a + ``with lazify`` block, so lazy functions defined in one block may call + those defined in another. Calls between lazy and strict code are also + supported (in both directions), without requiring any extra effort. + + Evaluation of each lazified argument is guaranteed to occur at most once; + the value is cached. Order of evaluation of lazy arguments is determined + by the (dynamic) order in which the lazy code actually uses them. + + Essentially, the above code expands into:: + + from unpythonic.syntax import macros, lazy + from unpythonic.syntax import force + + def my_if(p, a, b): + if force(p): + return force(a) + else: + return force(b) + assert my_if(lazy[True], lazy[23], lazy[1/0]) == 23 + assert my_if(lazy[False], lazy[1/0], lazy[42]) == 42 + + plus some clerical details to allow lazy and strict code to be mixed. + + Just passing through a lazy argument to another lazy function will + not trigger evaluation, even when it appears in a computation inlined + to the argument list:: + + with lazify: + def g(a, b): + return a + def f(a, b): + return g(2*a, 3*b) + assert f(21, 1/0) == 42 + + The division by zero is never performed, because the value of ``b`` is + not needed to compute the result (worded less magically, that promise is + never forced in the code path that produces the result). Essentially, + the above code expands into:: + + from unpythonic.syntax import macros, lazy + from unpythonic.syntax import force + + def g(a, b): + return force(a) + def f(a, b): + return g(lazy[2*force(a)], lazy[3*force(b)]) + assert f(lazy[21], lazy[1/0]) == 42 + + This relies on the magic of closures to capture f's ``a`` and ``b`` into + the promises. + + But be careful; **assignments are not auto-lazified**, so the following does + **not** work:: + + with lazify: + def g(a, b): + return a + def f(a, b): + c = 3*b # not in an arglist, b gets evaluated! + return g(2*a, c) + assert f(21, 1/0) == 42 + + To avoid that, explicitly wrap the computation into a ``lazy[]``. For why + assignment RHSs are not auto-lazified, see the section on pitfalls below. + + In calls, bare references (name, subscript, attribute) are detected and for + them, re-thunking is skipped. For example:: + + def g(a): + return a + def f(a): + return g(a) + assert f(42) == 42 + + expands into:: + + def g(a): + return force(a) + def f(a): + return g(a) # <-- no lazy[force(a)] since "a" is just a name + assert f(lazy[42]) == 42 + + When resolving references, subscripts and attributes are forced just enough + to obtain the containing object from a promise, if any; for example, the + elements of a list ``lst`` will not be evaluated just because the user code + happens to use ``lst.append(...)``; this only forces the object ``lst`` + itself. + + A ``lst`` appearing by itself evaluates the whole list. Similarly, ``lst[0]`` + by itself evaluates only the first element, and ``lst[:-1]`` by itself + evaluates all but the last element. The index expression in a subscript is + fully forced, because its value is needed to determine which elements of the + subscripted container are to be accessed. + + **Mixing lazy and strict code** + + Lazy code is allowed to call strict functions and vice versa, without + requiring any additional effort. + + Keep in mind what this implies: when calling a strict function, any arguments + given to it will be evaluated! + + In the other direction, when calling a lazy function from strict code, the + arguments are evaluated by the caller before the lazy code gets control. + The lazy code gets just the evaluated values. + + If you have, in strict code, an argument expression you want to pass lazily, + use syntax like ``f(lazy[...], ...)``. If you accidentally do this in lazy + code, it shouldn't break anything; ``with lazify`` detects any argument + expressions that are already promises, and just passes them through. + + **Forcing promises manually** + + This is mainly useful if you ``lazy[]`` or ``lazyrec[]`` something explicitly, + and want to compute its value outside a ``with lazify`` block. + + We provide the functions ``force1`` and ``force``. + + Using ``force1``, if ``x`` is a ``lazy[]`` promise, it will be forced, + and the resulting value is returned. If ``x`` is not a promise, + ``x`` itself is returned, à la Racket. + + The function ``force``, in addition, descends into containers (recursively). + When an atom ``x`` (i.e. anything that is not a container) is encountered, + it is processed using ``force1``. + + Mutable containers are updated in-place; for immutables, a new instance is + created. Any container with a compatible ``collections.abc`` is supported. + (See ``unpythonic.collections.mogrify`` for details.) In addition, as + special cases ``unpythonic.collections.box`` and ``unpythonic.llist.cons`` + are supported. + + **Tips, tricks and pitfalls** + + You can mix and match bare data values and promises, since ``force(x)`` + evaluates to ``x`` when ``x`` is not a promise. + + So this is just fine:: + + with lazify: + def f(x): + x = 2*21 # assign a bare data value + print(x) # the implicit force(x) evaluates to x + f(17) + + If you want to manually introduce a promise, use ``lazy[]``:: + + from unpythonic.syntax import macros, lazify, lazy + + with lazify: + def f(x): + x = lazy[2*21] # assign a promise + print(x) # the implicit force(x) evaluates the promise + f(17) + + If you have a container literal and want to lazify it recursively in a + position that does not auto-lazify, use ``lazyrec[]`` (see its docstring + for details):: + + from unpythonic.syntax import macros, lazify, lazyrec + + with lazify: + def f(x): + return x[:-1] + lst = lazyrec[[1, 2, 3/0]] + assert f(lst) == [1, 2] + + For non-literal containers, use ``lazy[]`` for each item as appropriate:: + + def f(lst): + lst.append(lazy["I'm lazy"]) + lst.append(lazy["Don't call me lazy, I'm just evaluated later!"]) + + Keep in mind, though, that ``lazy[]`` will introduce a lambda, so there's + the usual pitfall:: + + from unpythonic.syntax import macros, lazify, lazy + + with lazify: + lst = [] + for x in range(3): # DANGER: only one "x", mutated imperatively + lst.append(lazy[x]) # all these closures capture the same "x" + print(lst[0]) # 2 + print(lst[1]) # 2 + print(lst[2]) # 2 + + So to capture the value instead of the name, use the usual workaround, + the wrapper lambda (here written more readably as a let, which it really is):: + + from unpythonic.syntax import macros, lazify, lazy, let + + with lazify: + lst = [] + for x in range(3): + lst.append(let[[y << x] in lazy[y]]) + print(lst[0]) # 0 + print(lst[1]) # 1 + print(lst[2]) # 2 + + Be careful not to ``lazy[]`` or ``lazyrec[]`` too much:: + + with lazify: + a = 10 + a = lazy[2*a] # 20, right? + print(a) # crash! + + Why does this example crash? The expanded code is:: + + with lazify: + a = 10 + a = lazy[2*force(a)] + print(force(a)) + + The ``lazy[]`` sets up a promise, which will force ``a`` *at the time when + the containing promise is forced*, but at that time the name ``a`` points + to a promise, which will force... + + The fundamental issue is that ``a = 2*a`` is an imperative update; if you + need to do that, just let Python evaluate the RHS normally (i.e. use the + value the name ``a`` points to *at the time when the RHS runs*). + + Assigning a lazy value to a new name evaluates it, because any read access + triggers evaluation:: + + with lazify: + def g(x): + y = x # the "x" on the RHS triggers the implicit force + print(y) # bare data value + f(2*21) + + Inspired by Haskell, Racket's (delay) and (force), and lazy/racket. + + **Combos** + + Introducing the *HasThon* programming language (it has 100% more Thon than + popular brands):: + + with lazify, autocurry: + def add2first(a, b, c): + return a + b + assert add2first(2)(3)(1/0) == 5 + + def f(a, b): + return a + assert let[[c << 42, + d << 1/0] in f(c)(d)] == 42 + assert letrec[[c << 42, + d << 1/0, + e << 2*c] in f(e)(d)] == 84 + + assert letrec[[c << 42, + d << 1/0, + e << 2*c] in [local[x << f(e)(d)], + x/4]] == 21 + + Works also with continuations. Rules: + + - Also continuations are transformed into lazy functions. + + - ``cc`` built by chain_conts is treated as lazy, **itself**; then it's + up to the continuations chained by it to decide whether to force their + arguments. + + - The default continuation ``identity`` is strict, so that return values + from a continuation-enabled computation will be forced. + + If you need a lazy ``identity`` (so that you can obtain those delicious + promises), use:: + + from unpythonic import identity + from unpythonic.lazyutil import passthrough_lazy_args + lazy_identity = passthrough_lazy_args(identity) + + and then explicitly set the kwarg `cc=lazy_identity` when invoking the + continuation-enabled computation (e.g. in the example below, we could + `ourpromises = doit(cc=lazy_identity)`). + + Example:: + + with lazify, continuations: + k = None + def setk(*args, cc): + nonlocal k + k = cc + return args[0] + def doit(): + lst = ['the call returned'] + *more, = call_cc[setk('A', 1/0)] + return lst + [more[0]] + assert doit() == ['the call returned', 'A'] + assert k('again') == ['the call returned', 'again'] + assert k('thrice', 1/0) == ['the call returned', 'thrice'] + + For a version with comments, see ``unpythonic/syntax/test/test_lazify.py``. + + **CAUTION**: Call-by-need is a low-level language feature that is difficult + to bolt on after the fact. Some things might not work. + + **CAUTION**: The functions in ``unpythonic.fun`` are lazify-aware (so that + e.g. curry and compose work with lazy functions), as are ``call`` and + ``callwith`` in ``unpythonic.misc``, but the rest of ``unpythonic`` is not. + + **CAUTION**: Argument passing by function call, and let-bindings are + currently the only binding constructs to which auto-lazification is applied. + """ + if syntax != "block": + raise SyntaxError("lazify is a block macro only") # pragma: no cover + if syntax == "block" and kw['optional_vars'] is not None: + raise SyntaxError("lazify does not take an as-part") # pragma: no cover + + # Two-pass macro. + with dyn.let(_macro_expander=expander): + return _lazify(body=tree) + +# ----------------------------------------------------------------------------- + # lazy: syntax transformer, lazify a single expression -def lazy(tree): - return q[h[Lazy](lambda: a[tree])] +def _lazy(tree): + return q[h[Lazy](lambda: a[tree], sourcecode=u[f"lazy[{unparse(tree, debug=True)}]"])] # lazyrec: syntax transformer, recursively lazify elements in container literals # @@ -87,9 +503,12 @@ def lazy(tree): # variant `frozendict(mapping1, mapping2, ...)`. _ctorcalls_that_take_exactly_one_positional_arg = {"tuple", "list", "set", "dict", "frozenset", "llist"} -islazy = make_isxpred("lazy") # unexpanded -isLazy = make_isxpred("Lazy") # expanded -def lazyrec(tree): +_unexpanded_lazy_name = "lazy" +_expanded_lazy_name = "Lazy" +_our_lazy = capture_as_macro(lazy) +def _lazyrec(tree): + is_unexpanded_lazy = partial(is_unexpanded_expr_macro, lazy, dyn._macro_expander) + # This helper doesn't need to recurse, so we don't need `ASTTransformer` here. def transform(tree): if type(tree) in (Tuple, List, Set): @@ -99,17 +518,14 @@ def transform(tree): elif type(tree) is Call and any(isx(tree.func, ctor) for ctor in _ctorcalls_all): p, k = _ctor_handling_modes[getname(tree.func)] lazify_ctorcall(tree, p, k) - elif type(tree) is Subscript and isx(tree.value, islazy): # unexpanded + elif is_unexpanded_lazy(tree): pass - elif type(tree) is Call and isx(tree.func, isLazy): # expanded + elif type(tree) is Call and isx(tree.func, _expanded_lazy_name): pass else: - # mcpyrate supports hygienic macro capture, so we can just splice unexpanded - # (but hygienically unquoted) `lazy` invocations here. - # TODO: Doing so renames the macro, so detection needs to be adjusted. - # TODO: It must also be bound in the current expander for hygienic macro capture to work. - # tree = q[h[lazy][a[tree]]] - tree = lazy(tree) + # `mcpyrate` supports hygienic macro capture, so we can just splice + # hygienic `lazy` invocations here. + tree = q[a[_our_lazy][a[tree]]] return tree def lazify_ctorcall(tree, positionals="all", keywords="all"): @@ -119,25 +535,25 @@ def lazify_ctorcall(tree, positionals="all", keywords="all"): newargs = [] for arg in tree.args: if type(arg) is Starred: # *args in Python 3.5+ - if is_literal_container(arg.value, maps_only=False): + if _is_literal_container(arg.value, maps_only=False): arg.value = rec(arg.value) # else do nothing - elif positionals == "all" or is_literal_container(arg, maps_only=False): # single positional arg + elif positionals == "all" or _is_literal_container(arg, maps_only=False): # single positional arg arg = rec(arg) newargs.append(arg) tree.args = newargs for kw in tree.keywords: if kw.arg is None: # **kwargs in Python 3.5+ - if is_literal_container(kw.value, maps_only=True): + if _is_literal_container(kw.value, maps_only=True): kw.value = rec(kw.value) # else do nothing - elif keywords == "all" or is_literal_container(kw.value, maps_only=True): # single named arg + elif keywords == "all" or _is_literal_container(kw.value, maps_only=True): # single named arg kw.value = rec(kw.value) rec = transform return rec(tree) -def is_literal_container(tree, maps_only=False): +def _is_literal_container(tree, maps_only=False): """Test whether tree is a container literal understood by lazyrec[].""" if not maps_only: if type(tree) in (List, Tuple, Set): @@ -161,10 +577,10 @@ def is_literal_container(tree, maps_only=False): # it is too easy to accidentally set up an infinite recursion. # # This is ok: -# force1(lst)[0] = (10 * (force1(lst()[0]) if isinstance(lst, Lazy1) else force1(lst[0]))) +# force1(lst)[0] = (10 * (force1(lst()[0]) if isinstance(lst, Lazy) else force1(lst[0]))) # # but this blows up (by infinite recursion) later when we eventually force lst[0]: -# force1(lst)[0] = Lazy1(lambda: (10 * (force1(lst()[0]) if isinstance(lst, Lazy1) else force1(lst[0])))) +# force1(lst)[0] = Lazy(lambda: (10 * (force1(lst()[0]) if isinstance(lst, Lazy) else force1(lst[0])))) # # We **could** solve this by forcing and capturing the current value before assigning, # instead of allowing the RHS to refer to a lazy list element. But on the other hand, @@ -177,11 +593,20 @@ def is_literal_container(tree, maps_only=False): # - don't lazify "for", the loop counter changes value imperatively (and usually rather rapidly) # full list: see unpythonic.syntax.scopeanalyzer.get_names_in_store_context (and the link therein) -def lazify(body): +def _lazify(body): # first pass, outside-in userlambdas = detect_lambda(body) - body = dyn._macro_expander.visit(body) + # Expand any inner macro invocations. Particularly, this expands away any `lazyrec[]` and `lazy[]` + # so they become easier to work with. We also know that after this, any `Subscript` is really a + # subscripting operation and not a macro invocation. + # + # We must explicitly use recursive mode to ensure we get rid of all macro invocations, because + # we may be running inside a `with step_expansion`, which uses the expand-once-only mode. + body = dyn._macro_expander.visit_recursively(body) + + # `lazify`'s analyzer needs the `ctx` attributes in `tree` to be filled in correctly. + body = fix_ctx(body, copy_seen_nodes=False) # TODO: or maybe copy seen nodes? # second pass, inside-out class LazifyTransformer(ASTTransformer): @@ -224,6 +649,10 @@ def f(tree): # else forcing_mode == "off" return tree + # Python 3.12+: leave `type` statements alone (lazifying a type declaration makes no sense) + elif type(tree) is TypeAlias: + return tree + elif type(tree) in (FunctionDef, AsyncFunctionDef, Lambda): if type(tree) is Lambda and id(tree) not in userlambdas: return self.generic_visit(tree) # ignore macro-introduced lambdas (but recurse inside them) @@ -253,6 +682,10 @@ def f(tree): return tree elif type(tree) is Call: + # We don't need to expand in the output of `_lazyrec`, + # because we don't recurse further into the args of the call, + # so the `lazify` transformer never sees the confusing `Subscript` + # instances that are actually macro invocations for `lazy[]`. def transform_arg(tree): # add any needed force() invocations inside the tree, # but leave the top level of simple references untouched. @@ -260,7 +693,7 @@ def transform_arg(tree): self.withstate(tree, forcing_mode=("off" if isref else "full")) tree = self.visit(tree) if not isref: # (re-)thunkify expr; a reference can be passed as-is. - tree = lazyrec(tree) + tree = _lazyrec(tree) return tree def transform_starred(tree, dstarred=False): @@ -269,8 +702,8 @@ def transform_starred(tree, dstarred=False): tree = self.visit(tree) # lazify items if we have a literal container # we must avoid lazifying any other exprs, since a Lazy cannot be unpacked. - if is_literal_container(tree, maps_only=dstarred): - tree = lazyrec(tree) + if _is_literal_container(tree, maps_only=dstarred): + tree = _lazyrec(tree) return tree # let bindings have a role similar to function arguments, so auto-lazify there @@ -289,33 +722,54 @@ def transform_starred(tree, dstarred=False): thelambda.body = self.visit(thelambda.body) return tree - # namelambda() is used by let[] and do[] - # Lazy() is a strict function, takes a lambda, constructs a Lazy object - # _autoref_resolve doesn't need any special handling + # Don't lazify in calls to some specific functions we know to be strict. + # Some of these are performance optimizations; others must be left as-is + # for other macros to be able to see the original calls. (It also generates + # cleaner expanded output.) + # - `namelambda` (emitted by `let[]`, `do[]`, and `test[]`) + # - All known container constructor calls (listed in `_ctorcalls_all`). + # - `Lazy` takes a lambda, constructs a `Lazy` object; if we're calling `Lazy`, + # the expression is already lazy. + # - `_autoref_resolve` does the name lookup in `with autoref` blocks. + # + # Don't lazify in calls to return-value utilities, because return values + # are never implicitly lazy in `unpythonic`. + # - `Values` constructs a multiple-return-values and/or named return values. + # - `(chain_conts(cc1, cc2))(args)` handles a return value in `with continuations`. elif (isdo(tree) or is_decorator(tree.func, "namelambda") or - any(isx(tree.func, s) for s in _ctorcalls_all) or isx(tree.func, isLazy) or - any(isx(tree.func, s) for s in ("_autoref_resolve", "AutorefMarker"))): - # here we know the operator (.func) to be one of specific names; - # don't transform it to avoid confusing lazyrec[] (important if this - # is an inner call in the arglist of an outer, lazy call, since it - # must see any container constructor calls that appear in the args) + any(isx(tree.func, s) for s in _ctorcalls_all) or + isx(tree.func, _expanded_lazy_name) or + isx(tree.func, "_autoref_resolve") or + isx(tree.func, "Values") or + (type(tree.func) is Call and isx(tree.func.func, "chain_conts"))): + # Here we know the operator (.func) to be one of specific names; + # don't transform it to avoid confusing `lazyrec[]`. # - # TODO: correct forcing mode for `rec`? We shouldn't need to forcibly use "full", + # This is especially important, if this is an inner call in the + # arglist of an outer, lazy call, since it must see any container + # constructor calls that appear in the args. + # + # But *do* transform in the positional and named args of the call; + # doing so generates the code to force any promises that are passed + # to the function being called. + # + # TODO: correct forcing mode for recursion? We shouldn't need to forcibly use "full", # since maybe_force_args() already fully forces any remaining promises # in the args when calling a strict function. + # NOTE v0.15.0: In practice, using whatever is the currently active mode seems to be fine. tree.args = self.visit(tree.args) tree.keywords = self.visit(tree.keywords) return tree - else: - ln, co = tree.lineno, tree.col_offset + else: # general case thefunc = self.visit(tree.func) + # Lazify the arguments of the call. adata = [] for x in tree.args: if type(x) is Starred: # *args in Python 3.5+ v = transform_starred(x.value) - v = Starred(value=q[a[v]], lineno=ln, col_offset=co) + v = Starred(value=q[a[v]]) else: v = transform_arg(x) adata.append(v) @@ -331,17 +785,21 @@ def transform_starred(tree, dstarred=False): # Construct the call mycall = Call(func=q[h[maybe_force_args]], args=[q[a[thefunc]]] + [q[a[x]] for x in adata], - keywords=[keyword(arg=k, value=q[a[x]]) for k, x in kwdata], - lineno=ln, col_offset=co) + keywords=[keyword(arg=k, value=q[a[x]]) for k, x in kwdata]) tree = mycall return tree + # NOTE: We must expand all inner macro invocations before we hit this, or we'll produce nonsense. + # Hence it is easiest to have `lazify` expand inside-out. elif type(tree) is Subscript: # force only accessed part of obj[...] + # force the slice expression; it is needed to extract the relevant items. self.withstate(tree.slice, forcing_mode="full") tree.slice = self.visit(tree.slice) # resolve reference to the actual container without forcing its items. self.withstate(tree.value, forcing_mode="flat") tree.value = self.visit(tree.value) + # using the currently active forcing mode, force the value returned + # by the subscript expression. tree = f(tree) return tree @@ -363,10 +821,13 @@ def transform_starred(tree, dstarred=False): # in reality there is always an f() around the whole expr.) self.withstate(tree.value, forcing_mode="flat") tree.value = self.visit(tree.value) + # using the currently active forcing mode, force the value returned + # by the attribute expression. tree = f(tree) return tree elif type(tree) is Name and type(tree.ctx) is Load: + # using the currently active forcing mode, force the value. tree = f(tree) # must not recurse when a Name changes into a Call. return tree @@ -418,8 +879,10 @@ def transform_starred(tree, dstarred=False): # The second strict callee may get promises instead of values, because the # strict trampoline does not have the maybe_force_args (that usually forces the args # when lazy code calls into strict code). - return wrapwith(item=q[h[dyn.let](_build_lazy_trampoline=True)], - body=newbody, - locref=body[0]) + with q as quoted: + with h[dyn.let](_build_lazy_trampoline=True): + with a: + newbody + return quoted # ----------------------------------------------------------------------------- diff --git a/unpythonic/syntax/letdo.py b/unpythonic/syntax/letdo.py index 59f8e9a2..42ea4dc5 100644 --- a/unpythonic/syntax/letdo.py +++ b/unpythonic/syntax/letdo.py @@ -1,6 +1,12 @@ # -*- coding: utf-8 -*- """Local bindings (let), imperative code in expression position (do).""" +__all__ = ["where", + "let", "letseq", "letrec", + "dlet", "dletseq", "dletrec", + "blet", "bletseq", "bletrec", + "local", "delete", "do", "do0"] + # Let constructs are implemented as sugar around unpythonic.lispylet. # # We take this approach because letrec needs assignment (must create @@ -15,46 +21,355 @@ from functools import partial -from ast import (Name, Attribute, +from ast import (Name, Tuple, List, FunctionDef, Return, AsyncFunctionDef, arguments, arg, - Load) -import sys + Store, Del) -from mcpyrate.quotes import macros, q, u, n, a, h # noqa: F401 +from mcpyrate.quotes import macros, q, u, n, a, t, h # noqa: F401 -from mcpyrate import gensym -from mcpyrate.markers import ASTMarker -from mcpyrate.quotes import is_captured_value -from mcpyrate.utils import NestingLevelTracker -from mcpyrate.walkers import ASTTransformer +from mcpyrate import gensym, namemacro, parametricmacro +from mcpyrate.quotes import capture_as_macro, is_captured_value +from mcpyrate.walkers import ASTTransformer, ASTVisitor from ..dynassign import dyn from ..lispylet import _let as letf, _dlet as dletf, _blet as bletf -from ..seq import do as dof from ..misc import namelambda +from ..seq import do as dof +from .letdoutil import (isdo, isenvassign, UnexpandedEnvAssignView, + UnexpandedLetView, canonize_bindings) +from .nameutil import getname, is_unexpanded_expr_macro from .scopeanalyzer import scoped_transform -from .letdoutil import isenvassign, UnexpandedEnvAssignView -def let(bindings, body): - return _letimpl(bindings, body, "let") +# -------------------------------------------------------------------------------- +# Macro interface internal helper + +# NOTE: At the macro interface, the invocations `let()[...]` (empty args) +# and `let[...]` (no args) were indistinguishable in MacroPy. This was a +# problem, because it might be that the user wrote the body but simply +# forgot to put anything in the parentheses. (There's `do[]` if you need +# a `let` without making any bindings.) +# +# In `mcpyrate`, `let()[...]` is a syntax error. The preferred syntax, +# when using macro arguments, is `let[...][...]`. When this is not +# possible (in decorator position up to Python 3.8), then `let(...)[...]` +# is acceptable. But empty brackets/parentheses are not accepted. Thus, +# we will have an empty `args` list only when there are no brackets/parentheses +# for the macro arguments part. +# +# So when `args` is empty, this function assumes haskelly let syntax +# `let[[...] in ...]` or `let[..., where[...]]`. In these cases, +# both the bindings and the body reside inside the brackets (i.e., +# in the AST contained in the `tree` argument). +# +# Since the brackets/parentheses must be deleted when no macro arguments +# are given, this is now the correct assumption to make. +# +# But note that if needed elsewhere, `mcpyrate` has the `invocation` kwarg +# in the macro interface that gives a copy of the whole macro invocation +# node (so we could see the exact original syntax). +# +# letsyntax_mode: used by let_syntax to allow template definitions. +def _destructure_and_apply_let(tree, args, macro_expander, let_transformer, letsyntax_mode=False): + with dyn.let(_macro_expander=macro_expander): # implicit do (extra bracket notation) needs this. + if args: + bs = canonize_bindings(args, letsyntax_mode=letsyntax_mode) + return let_transformer(bindings=bs, body=tree) + # haskelly syntax, let[[...] in ...], let[..., where[...]] + view = UnexpandedLetView(tree) # note "tree" here is only the part inside the brackets + return let_transformer(bindings=view.bindings, body=view.body) + +# -------------------------------------------------------------------------------- +# Macro interface - expr macros + +@namemacro +def where(tree, *, syntax, **kw): + """[syntax, special] `where` operator for let. + + Usage:: + + let[body, where[k0 := v0, ...]] + + Only meaningful for declaring the bindings in a let-where, for all + expression-form let constructs: `let`, `letseq`, `letrec`, `let_syntax`, + `abbrev`. + """ + if syntax != "name": + raise SyntaxError("where (unpythonic.syntax.letdo.where) is a name macro only") # pragma: no cover + raise SyntaxError("where (unpythonic.syntax.letdo.where) is only meaningful in a let[body, where[k0 := v0, ...]]") # pragma: no cover + +@parametricmacro +def let(tree, *, args, syntax, expander, **kw): + """[syntax, expr] Introduce expression-local variables. + + This is sugar on top of ``unpythonic.lispylet.let``. + + Usage:: + + let[k0 := v0, ...][body] + let[k0 := v0, ...][[body0, ...]] + + where ``body`` is an expression. The names bound by ``let`` are local; + they are available in ``body``, and do not exist outside ``body``. + + Alternative haskelly syntax is also available:: + + let[[k0 := v0, ...] in body] + let[[k0 := v0, ...] in [body0, ...]] + let[body, where[k0 := v0, ...]] + let[[body0, ...], where[k0 := v0, ...]] + + For a body with multiple expressions, use an extra set of brackets, + as shown above. This inserts a ``do``. Only the outermost extra brackets + are interpreted specially; all others in the bodies are interpreted + as usual, as lists. + + Note that in the haskelly syntax, the extra brackets for a multi-expression + body should enclose only the ``body`` part. + + Each ``name`` in the same ``let`` must be unique. + + Starting at v0.15.3, rebinding of let-bound variables inside `body` + is supported using the walrus assignment syntax, ``x := 42``. + The new syntax is preferred, but the old one is still available + for backward compatibility. + + From v0.15.0 to v0.15.2, rebinding of let-bound variables inside `body` + is supported with `unpythonic` env-assignment syntax, ``x << 42``. + This is an expression, performing the assignment, and returning the new value. + + In a multiple-expression body, also an internal definition context exists + for local variables that are not part of the ``let``; see ``do`` for details. + + Technical points: + + - In reality, the let-bound variables live in an ``unpythonic.env``. + This macro performs the magic to make them look (and pretty much behave) + like lexical variables. + + - Compared to ``unpythonic.lispylet.let``, the macro version needs no quotes + around variable names in bindings. + + - The body is automatically wrapped in a ``lambda e: ...``. + + - For all ``x`` in bindings, the macro transforms lookups ``x --> e.x``. + + - Lexical scoping is respected (so ``let`` constructs can be nested) + by actually using a unique name (gensym) instead of just ``e``. + + - In the case of a multiple-expression body, the ``do`` transformation + is applied first to ``[body0, ...]``, and the result becomes ``body``. + """ + if syntax != "expr": + raise SyntaxError("let is an expr macro only") # pragma: no cover + + # The `let[]` family of macros expands inside out. + with dyn.let(_macro_expander=expander): + return _destructure_and_apply_let(tree, args, expander, _let) + +@parametricmacro +def letseq(tree, *, args, syntax, expander, **kw): + """[syntax, expr] Let with sequential binding (like Scheme/Racket let*). + + Like ``let``, but bindings take effect sequentially. Later bindings + shadow earlier ones if the same name is used multiple times. + + Expands to nested ``let`` expressions. + """ + if syntax != "expr": + raise SyntaxError("letseq is an expr macro only") # pragma: no cover + + with dyn.let(_macro_expander=expander): + return _destructure_and_apply_let(tree, args, expander, _letseq) + +@parametricmacro +def letrec(tree, *, args, syntax, expander, **kw): + """[syntax, expr] Let with mutually recursive binding. + + Like ``let``, but bindings can see other bindings in the same ``letrec``. + + Each ``name`` in the same ``letrec`` must be unique. + + The definitions are processed sequentially, left to right. A definition + may refer to any previous definition. If ``value`` is callable (lambda), + it may refer to any definition, including later ones. + + This is useful for locally defining mutually recursive functions. + """ + if syntax != "expr": + raise SyntaxError("letrec is an expr macro only") # pragma: no cover + + with dyn.let(_macro_expander=expander): + return _destructure_and_apply_let(tree, args, expander, _letrec) + +# ----------------------------------------------------------------------------- +# Macro interface - decorator versions, for "let over def". + +@parametricmacro +def dlet(tree, *, args, syntax, expander, **kw): + """[syntax, decorator] Decorator version of let, for 'let over def'. + + Example:: + + @dlet[x := 0] + def count(): + (x := x + 1) + return x + assert count() == 1 + assert count() == 2 + + **CAUTION**: function arguments, local variables, and names declared as + ``global`` or ``nonlocal`` in a given lexical scope shadow names from the + ``let`` environment *for the entirety of that lexical scope*. (This is + modeled after Python's standard scoping rules.) + + **CAUTION**: assignment to the let environment is ``name := value``; + the regular syntax ``name = value`` creates a local variable in the + lexical scope of the ``def``. + """ + if syntax != "decorator": + raise SyntaxError("dlet is a decorator macro only") # pragma: no cover + + with dyn.let(_macro_expander=expander): + return _destructure_and_apply_let(tree, args, expander, _dlet) + +@parametricmacro +def dletseq(tree, *, args, syntax, expander, **kw): + """[syntax, decorator] Decorator version of letseq, for 'letseq over def'. + + Expands to nested function definitions, each with one ``dlet`` decorator. + + Example:: + + @dletseq[x := 1, + x := x + 1, + x := x + 2] + def g(a): + return a + x + assert g(10) == 14 + """ + if syntax != "decorator": + raise SyntaxError("dletseq is a decorator macro only") # pragma: no cover + + with dyn.let(_macro_expander=expander): + return _destructure_and_apply_let(tree, args, expander, _dletseq) + +@parametricmacro +def dletrec(tree, *, args, syntax, expander, **kw): + """[syntax, decorator] Decorator version of letrec, for 'letrec over def'. + + Example:: + + @dletrec[evenp := (lambda x: (x == 0) or oddp(x - 1)), + oddp := (lambda x: (x != 0) and evenp(x - 1))] + def f(x): + return evenp(x) + assert f(42) is True + assert f(23) is False + + Same cautions apply as to ``dlet``. + """ + if syntax != "decorator": + raise SyntaxError("dletrec is a decorator macro only") # pragma: no cover + + with dyn.let(_macro_expander=expander): + return _destructure_and_apply_let(tree, args, expander, _dletrec) + +@parametricmacro +def blet(tree, *, args, syntax, expander, **kw): + """[syntax, decorator] def --> let block. + + Example:: + + @blet[x := 21] + def result(): + return 2 * x + assert result == 42 + """ + if syntax != "decorator": + raise SyntaxError("blet is a decorator macro only") # pragma: no cover + + with dyn.let(_macro_expander=expander): + return _destructure_and_apply_let(tree, args, expander, _blet) + +@parametricmacro +def bletseq(tree, *, args, syntax, expander, **kw): + """[syntax, decorator] def --> letseq block. + + Example:: + + @bletseq[x := 1, + x := x + 1, + x := x + 2] + def result(): + return x + assert result == 4 + """ + if syntax != "decorator": + raise SyntaxError("bletseq is a decorator macro only") # pragma: no cover + + with dyn.let(_macro_expander=expander): + return _destructure_and_apply_let(tree, args, expander, _bletseq) + +@parametricmacro +def bletrec(tree, *, args, syntax, expander, **kw): + """[syntax, decorator] def --> letrec block. + + Example:: -def letseq(bindings, body): + @bletrec[evenp := (lambda x: (x == 0) or oddp(x - 1)), + oddp := (lambda x: (x != 0) and evenp(x - 1))] + def result(): + return evenp(42) + assert result is True + + Because names inside a ``def`` have mutually recursive scope, + an almost equivalent pure Python solution (no macros) is:: + + from unpythonic import call + + @call + def result(): + evenp = lambda x: (x == 0) or oddp(x - 1) + oddp = lambda x: (x != 0) and evenp(x - 1) + return evenp(42) + assert result is True + """ + if syntax != "decorator": + raise SyntaxError("bletrec is a decorator macro only") # pragma: no cover + + with dyn.let(_macro_expander=expander): + return _destructure_and_apply_let(tree, args, expander, _bletrec) + +# -------------------------------------------------------------------------------- +# Syntax transformers + +def _let(bindings, body): + return _let_expr_impl(bindings, body, "let") + +_our_let = capture_as_macro(let) +_our_letseq = capture_as_macro(letseq) +def _letseq(bindings, body): if not bindings: return body first, *rest = bindings - # TODO: Could just return hygienic macro invocations, but that needs to be done - # TODO: where the macro interfaces are visible. See `unpythonic.syntax.simplelet` - # TODO: for how to do it. - return let([first], letseq(rest, body)) - -def letrec(bindings, body): - return _letimpl(bindings, body, "letrec") - -def _letimpl(bindings, body, mode): + # We use hygienic macro references in the output, + # so that the expander can expand them later. + if rest: + nested_letseq = q[a[_our_letseq][t[rest]][a[body]]] + return q[a[_our_let][a[first]][a[nested_letseq]]] + else: + # We must do this optimization (no letseq with empty bindings) + # because empty bindings confuse `_destructure_and_apply_let`. + return q[a[_our_let][a[first]][a[body]]] + +def _letrec(bindings, body): + return _let_expr_impl(bindings, body, "letrec") + +def _let_expr_impl(bindings, body, mode): """bindings: sequence of ast.Tuple: (k1, v1), (k2, v2), ..., (kn, vn)""" assert mode in ("let", "letrec") @@ -62,39 +377,25 @@ def _letimpl(bindings, body, mode): # invocations in both bindings and body. # # But apply the implicit `do` (extra bracket syntax) first. - body = implicit_do(body) - body = dyn._macro_expander.visit(body) + # (It is important we expand at least that immediately after, to resolve its local variables, + # because those may have the same lexical names as some of the let-bindings.) + body = _implicit_do(body) + body = dyn._macro_expander.visit_recursively(body) if not bindings: # Optimize out a `let` with no bindings. The macro layer cannot trigger # this case, because our syntaxes always require at least one binding. # So this check is here just to protect against use with no bindings directly # from other syntax transformers, which in theory could attempt anything. - # - # TODO: update this comment for mcpyrate - # The reason the macro layer never calls us with no bindings is technical. - # In the macro interface, with no bindings, the macro's `args` are `()` - # whether it was invoked as `let()[...]` or just `let[...]`. Thus, - # there is no way to distinguish, in the macro layer, between these - # two. We can't use `UnexpandedLetView` to do the dirty work of AST - # analysis, because MacroPy does too much automatically: in the macro - # layer, `tree` is only the part inside the brackets. So we really - # can't see whether the part outside the brackets was a Call with no - # arguments, or just a Name - both cases get treated exactly the same, - # as a macro invocation with empty `args`. - # - # The latter form, `let[...]`, is used by the haskelly syntax - # `let[(...) in ...]`, `let[..., where(...)]` - and in these cases, - # both the bindings and the body reside inside the brackets. return body # pragma: no cover - bindings = dyn._macro_expander.visit(bindings) + bindings = dyn._macro_expander.visit_recursively(bindings) names, values = zip(*[b.elts for b in bindings]) # --> (k1, ..., kn), (v1, ..., vn) - names = [k.id for k in names] # any duplicates will be caught by env at run-time + names = [getname(k, accept_attr=False) for k in names] # any duplicates will be caught by env at run-time e = gensym("e") - envset = Attribute(value=q[n[e]], attr="set", ctx=Load()) + envset = q[n[f"{e}.set"]] - transform = partial(letlike_transform, envname=e, lhsnames=names, rhsnames=names, setter=envset) + transform = partial(_letlike_transform, envname=e, lhsnames=names, rhsnames=names, setter=envset) if mode == "letrec": values = [transform(rhs) for rhs in values] # RHSs of bindings values = [q[h[namelambda](u[f"letrec_binding{j}_{lhs}"])(a[rhs])] @@ -110,33 +411,34 @@ def _letimpl(bindings, body, mode): # - the exact AST structure, for the views letter = letf bindings = [q[(u[k], a[v])] for k, v in zip(names, values)] - newtree = q[h[letter](a[Tuple(elts=bindings)], a[body], mode=u[mode])] + newtree = q[h[letter](t[bindings], a[body], mode=u[mode])] return newtree -def letlike_transform(tree, envname, lhsnames, rhsnames, setter, dowrap=True): +def _letlike_transform(tree, envname, lhsnames, rhsnames, setter, dowrap=True): """Common transformations for let-like operations. Namely:: + x := val --> e.set('x', val) x << val --> e.set('x', val) x --> e.x (when x appears in load context) # ... -> lambda e: ... (applied if dowrap=True) - lhsnames: names to recognize on the LHS of x << val as belonging to this env + lhsnames: names to recognize on the LHS of env-assignment (`x := val` or `x << val`) as belonging to this env rhsnames: names to recognize anywhere in load context as belonging to this env - These are separate mainly for ``do[]``, so that we can have new bindings + The LHS/RHS names are separate mainly for ``do[]``, so that we can have new bindings take effect only in following exprs. setter: function, (k, v) --> v, side effect to set e.k to v """ - tree = transform_envassignment(tree, lhsnames, setter) - tree = transform_name(tree, rhsnames, envname) + tree = _transform_envassignment(tree, lhsnames, setter) + tree = _transform_name(tree, rhsnames, envname) if dowrap: - tree = envwrap(tree, envname) + tree = _envwrap(tree, envname) return tree -def transform_envassignment(tree, lhsnames, envset): - """x << val --> e.set('x', val) (for names bound in this environment)""" +def _transform_envassignment(tree, lhsnames, envset): + """`x := val` or `x << val` --> `e.set('x', val)` (for names bound in this environment)""" # names_in_scope: according to Python's standard binding rules, see scopeanalyzer.py. # Variables defined in let envs are thus not listed in `names_in_scope`. def transform(tree, names_in_scope): @@ -148,13 +450,15 @@ def transform(tree, names_in_scope): return tree return scoped_transform(tree, callback=transform) -def transform_name(tree, rhsnames, envname): - """x --> e.x (in load context; for names bound in this environment)""" +def _transform_name(tree, rhsnames, envname): + """`x` --> `e.x` (in load context; for names bound in this environment)""" # names_in_scope: according to Python's standard binding rules, see scopeanalyzer.py. # Variables defined in let envs are thus not listed in `names_in_scope`. def transform(tree, names_in_scope): # This transformation is deceptively simple, hence requires some comment: # + # - The goal is to transform read accesses to let variables, `x` --> `e.x`. + # # - Attributes (and Subscripts) work, because we are called again for # the `value` part of the `Attribute` (or `Subscript`) node, which # then gets transformed if it's a `Name` matching our rules. @@ -170,62 +474,62 @@ def transform(tree, names_in_scope): # in those parts of code where it is used, so an outer let will # leave it alone. if type(tree) is Name and tree.id in rhsnames and tree.id not in names_in_scope: - hasctx = hasattr(tree, "ctx") # macro-created nodes might not have a ctx. - if hasctx and type(tree.ctx) is not Load: # let variables are rebound using `<<`, not `=`. + if type(getattr(tree, "ctx", None)) in (Store, Del): # Skip assignments and deletes. return tree attr_node = q[n[f"{envname}.{tree.id}"]] - if hasctx: - attr_node.ctx = tree.ctx # let mcpyrate fix it if needed + attr_node.ctx = getattr(tree, "ctx", None) return attr_node return tree return scoped_transform(tree, callback=transform) -def envwrap(tree, envname): +def _envwrap(tree, envname): """... -> lambda e: ...""" lam = q[lambda _: a[tree]] lam.args.args[0] = arg(arg=envname) # lambda e44: ... return lam # ----------------------------------------------------------------------------- -# Decorator versions, for "let over def". +# Syntax transformers for decorator versions, for "let over def". -def dlet(bindings, body): - return _dletimpl(bindings, body, "let", "decorate") +def _dlet(bindings, body): + return _let_decorator_impl(bindings, body, "let", "decorate") -def dletseq(bindings, body): - return _dletseqimpl(bindings, body, "decorate") +def _dletseq(bindings, body): + return _dletseq_impl(bindings, body, "decorate") -def dletrec(bindings, body): - return _dletimpl(bindings, body, "letrec", "decorate") +def _dletrec(bindings, body): + return _let_decorator_impl(bindings, body, "letrec", "decorate") -def blet(bindings, body): - return _dletimpl(bindings, body, "let", "call") +def _blet(bindings, body): + return _let_decorator_impl(bindings, body, "let", "call") -def bletseq(bindings, body): - return _dletseqimpl(bindings, body, "call") +def _bletseq(bindings, body): + return _dletseq_impl(bindings, body, "call") -def bletrec(bindings, body): - return _dletimpl(bindings, body, "letrec", "call") +def _bletrec(bindings, body): + return _let_decorator_impl(bindings, body, "letrec", "call") -# Very similar to _letimpl, but perhaps more readable to keep these separate. -def _dletimpl(bindings, body, mode, kind): +# Very similar to _let_expr_impl, but perhaps more readable to keep these separate. +def _let_decorator_impl(bindings, body, mode, kind): assert mode in ("let", "letrec") assert kind in ("decorate", "call") if type(body) not in (FunctionDef, AsyncFunctionDef): raise SyntaxError("Expected a function definition to decorate") # pragma: no cover + body = dyn._macro_expander.visit_recursively(body) if not bindings: # Similarly as above, this cannot trigger from the macro layer no # matter what that layer does. This is here to optimize away a `dlet` # with no bindings, when used directly from other syntax transformers. return body # pragma: no cover + bindings = dyn._macro_expander.visit_recursively(bindings) names, values = zip(*[b.elts for b in bindings]) # --> (k1, ..., kn), (v1, ..., vn) - names = [k.id for k in names] # any duplicates will be caught by env at run-time + names = [getname(k, accept_attr=False) for k in names] # any duplicates will be caught by env at run-time e = gensym("e") - envset = Attribute(value=q[n[e]], attr="set", ctx=Load()) + envset = q[n[f"{e}.set"]] - transform1 = partial(letlike_transform, envname=e, lhsnames=names, rhsnames=names, setter=envset) + transform1 = partial(_letlike_transform, envname=e, lhsnames=names, rhsnames=names, setter=envset) transform2 = partial(transform1, dowrap=False) if mode == "letrec": values = [transform1(rhs) for rhs in values] @@ -249,23 +553,23 @@ def _dletimpl(bindings, body, mode, kind): body.args.kw_defaults = body.args.kw_defaults + [None] return body -def _dletseqimpl(bindings, body, kind): +def _dletseq_impl(bindings, body, kind): # What we want: # - # @dletseq[(x, 1), - # (x, x+1), - # (x, x+2)] + # @dletseq[x := 1, + # x := x + 1, + # x := x + 2] # def g(*args, **kwargs): # return x # assert g() == 4 # # --> # - # @dlet[(x, 1)] + # @dlet[x := 1] # def g(*args, **kwargs, e1): # original args from tree go to the outermost def - # @dlet[(x, x+1)] # on RHS, important for e1.x to be in scope + # @dlet[x := x + 1] # on RHS, important for e1.x to be in scope # def g2(*, e2): - # @dlet[(x, x+2)] + # @dlet[x := x + 2] # def g3(*, e3): # expansion proceeds from inside out # return e3.x # original args travel here by the closure property # return g3() @@ -283,16 +587,14 @@ def _dletseqimpl(bindings, body, kind): userargs = body.args # original arguments to the def fname = body.name - noargs = arguments(args=[], kwonlyargs=[], vararg=None, kwarg=None, + noargs = arguments(args=[], posonlyargs=[], kwonlyargs=[], vararg=None, kwarg=None, defaults=[], kw_defaults=[]) - if sys.version_info >= (3, 8, 0): # Python 3.8+: positional-only arguments - noargs.posonlyargs = [] iname = gensym(f"{fname}_inner") body.args = noargs body.name = iname *rest, last = bindings - dletter = dlet if kind == "decorate" else blet + dletter = _dlet if kind == "decorate" else _blet innerdef = dletter([last], body) # optimization: in the final step, no need to generate a wrapper function @@ -314,141 +616,327 @@ def _dletseqimpl(bindings, body, kind): body=[innerdef, ret], decorator_list=[], returns=None) # no return type annotation - return _dletseqimpl(rest, outer, kind) + return _dletseq_impl(rest, outer, kind) # ----------------------------------------------------------------------------- # Imperative code in expression position. Uses the "let" machinery. +# +# Macro interface + +def local(tree, *, syntax, **kw): + """[syntax] Declare a local name in a "do". + + Usage:: + + local[name := value] + + Only meaningful in a ``do[...]``, ``do0[...]``, or an implicit ``do`` + (extra bracket syntax). + + The declaration takes effect starting from next item in the ``do``, i.e. + the item that comes after the ``local[]``. It will not shadow nonlocal + variables of the same name in any earlier items of the same ``do``, and + in the item making the definition, the old bindings are still in effect + on the RHS. + + This means that if you want, you can declare a local ``x`` that takes its + initial value from a nonlocal ``x``, by ``local[x := x]``. Here the ``x`` + on the RHS is the nonlocal one (since the declaration has not yet taken + effect), and the ``x`` on the LHS is the name given to the new local variable + that only exists inside the ``do``. Any references to ``x`` in any further + items in the same ``do`` will point to the local ``x``. + """ + if syntax != "expr": + raise SyntaxError("local is an expr macro only") # pragma: no cover + raise SyntaxError("local[] is only valid at the top level of a do[] or do0[]") # pragma: no cover + +def delete(tree, *, syntax, **kw): + """[syntax] Delete a previously declared local name in a "do". + + Usage:: + + delete[name] + + Only meaningful in a ``do[...]``, ``do0[...]``, or an implicit ``do`` + (extra bracket syntax). + + The deletion takes effect starting from the next item; hence, the + deleted local variable will no longer shadow nonlocal variables of + the same name in any later items of the same `do`. + + Note ``do[]`` supports local variable deletion, but the ``let[]`` + constructs don't, by design. + """ + if syntax != "expr": + raise SyntaxError("delete is an expr macro only") # pragma: no cover + raise SyntaxError("delete[] is only valid at the top level of a do[] or do0[]") # pragma: no cover + +def do(tree, *, syntax, expander, **kw): + """[syntax, expr] Stuff imperative code into an expression position. + + Return value is the value of the last expression inside the ``do``. + See also ``do0``. + + Usage:: + + do[body0, ...] + + Example:: + + do[local[x := 42], + print(x), + x := 23, + x] + + This is sugar on top of ``unpythonic.seq.do``, but with some extra features. + + - To declare and initialize a local name, use ``local[name := value]``. + + The operator ``local`` is syntax, not really a function, and it + only exists inside a ``do``. There is also an operator ``delete`` + to delete a previously declared local name in the ``do``. + + Both ``local`` and ``delete``, if used, should be imported as macros. + + - By design, there is no way to create an uninitialized variable; + a value must be given at declaration time. Just use ``None`` + as an explicit "no value" if needed. -_do_level = NestingLevelTracker() # for checking validity of local[] and delete[] - -# Use `mcpyrate` ASTMarkers, so that the expander can do the dirty work of -# detecting macro invocations. Our `do[]` macro then only needs to detect -# instances of the appropriate markers. -class UnpythonicLetDoMarker(ASTMarker): - """AST marker related to unpythonic's let/do subsystem.""" -class UnpythonicDoLocalMarker(UnpythonicLetDoMarker): - """AST marker for local variable definitions in a `do` context.""" -class UnpythonicDoDeleteMarker(UnpythonicLetDoMarker): - """AST marker for local variable deletion in a `do` context.""" - -# TODO: fail-fast: promote `local[]`/`delete[]` usage errors to compile-time errors -# TODO: (doesn't currently work e.g. for `let` with an implicit do (extra bracket notation)) -def local(tree): # syntax transformer - if _do_level.value < 1: - raise SyntaxError("local[] is only valid within a do[] or do0[]") # pragma: no cover - return UnpythonicDoLocalMarker(tree) - -def delete(tree): # syntax transformer - if _do_level.value < 1: - raise SyntaxError("delete[] is only valid within a do[] or do0[]") # pragma: no cover - return UnpythonicDoDeleteMarker(tree) - -def do(tree): + - Names declared within the same ``do`` must be unique. Re-declaring + the same name is an expansion-time error. + + - To assign to an already declared local name, use ``name := value``. + + **local name declarations** + + A ``local`` declaration comes into effect in the expression following + the one where it appears. Thus:: + + result = [] + let((lst, []))[do[result.append(lst), # the let "lst" + local[lst := lst + [1]], # LHS: do "lst", RHS: let "lst" + result.append(lst)]] # the do "lst" + assert result == [[], [1]] + + **Syntactic ambiguity** + + These two cases cannot be syntactically distinguished: + + - Just one body expression, which is a literal tuple or list, + + - Multiple body expressions, represented as a literal tuple or list. + + ``do`` always uses the latter interpretation. + + Whenever there are multiple expressions in the body, the ambiguity does not + arise, because then the distinction between the sequence of expressions itself + and its items is clear. + + Examples:: + + do[1, 2, 3] # --> tuple, 3 + do[(1, 2, 3)] # --> tuple, 3 (since in Python, the comma creates tuples; + # parentheses are only used for disambiguation) + do[[1, 2, 3]] # --> list, 3 + do[[[1, 2, 3]]] # --> list containing a list, [1, 2, 3] + do[([1, 2, 3],)] # --> tuple containing a list, [1, 2, 3] + do[[1, 2, 3],] # --> tuple containing a list, [1, 2, 3] + do[[(1, 2, 3)]] # --> list containing a tuple, (1, 2, 3) + do[((1, 2, 3),)] # --> tuple containing a tuple, (1, 2, 3) + do[(1, 2, 3),] # --> tuple containing a tuple, (1, 2, 3) + + It is possible to use ``unpythonic.misc.pack`` to create a tuple from + given elements: ``do[pack(1, 2, 3)]`` is interpreted as a single-item body + that creates a tuple (by calling a function). + + Note the outermost brackets belong to the ``do``; they don't yet create a list. + + In the *use brackets to denote a multi-expr body* syntax (e.g. ``multilambda``, + ``let`` constructs), the extra brackets already create a list, so in those + uses, the ambiguity does not arise. The transformation inserts not only the + word ``do``, but also the outermost brackets. For example:: + + let[x := 1, + y := 2][[ + [x, y]]] + + transforms to:: + + let[x := 1, + y := 2][do[[ # "do[" is inserted between the two opening brackets + [x, y]]]] # and its closing "]" is inserted here + + which already gets rid of the ambiguity. + + **Notes** + + Macros are expanded in an inside-out order, so a nested ``let`` shadows + names, if the same names appear in the ``do``:: + + do[local[x := 17], + let[x := 23][ + print(x)], # 23, the "x" of the "let" + print(x)] # 17, the "x" of the "do" + + The reason we require local names to be declared is to allow write access + to lexically outer environments from inside a ``do``:: + + let[x := 17][ + do[x := 23, # no "local[...]"; update the "x" of the "let" + local[y := 42], # "y" is local to the "do" + print(x, y)]] + + With the extra bracket syntax, the latter example can be written as:: + + let[x := 17][[ + x := 23, + local[y := 42], + print(x, y)]] + + It's subtly different in that the first version has the do-items in a tuple, + whereas this one has them in a list, but the behavior is exactly the same. + + Python does it the other way around, requiring a ``nonlocal`` statement + to re-bind a name owned by an outer scope. + + The ``let`` constructs solve this problem by having the local bindings + declared in a separate block, which plays the role of ``local``. + """ + if syntax != "expr": + raise SyntaxError("do is an expr macro only") # pragma: no cover + with dyn.let(_macro_expander=expander): + return _do(tree) + +def do0(tree, *, syntax, expander, **kw): + """[syntax, expr] Like do, but return the value of the first expression.""" + if syntax != "expr": + raise SyntaxError("do0 is an expr macro only") # pragma: no cover + with dyn.let(_macro_expander=expander): + return _do0(tree) + +# -------------------------------------------------------------------------------- +# Syntax transformers + +def _do(tree): if type(tree) not in (Tuple, List): raise SyntaxError("do body: expected a sequence of comma-separated expressions") # pragma: no cover, let's not test the macro expansion errors. - # Handle nested `local[]`/`delete[]`. This will also expand any other nested macro invocations. - # TODO: If we want to make `do` an outside-in macro, instantiate another expander here and register - # TODO: only the `local` and `delete` transformers to it - grabbing them from the current expander's - # TODO: bindings to respect as-imports. (Expander instances are cheap in `mcpyrate`.) - # TODO: Grep the `unpythonic` codebase (and `mcpyrate` demos) for `MacroExpander` to see how. - with _do_level.changed_by(+1): - tree = dyn._macro_expander.visit(tree) - e = gensym("e") envset = q[n[f"{e}._set"]] # use internal _set to allow new definitions - envset.ctx = Load() envdel = q[n[f"{e}.pop"]] - envdel.ctx = Load() - def find_localdefs(tree): + islocaldef = partial(is_unexpanded_expr_macro, local, dyn._macro_expander) + isdelete = partial(is_unexpanded_expr_macro, delete, dyn._macro_expander) + + def transform_localdefs(tree): class LocaldefCollector(ASTTransformer): def transform(self, tree): if is_captured_value(tree): return tree # don't recurse! - if isinstance(tree, UnpythonicDoLocalMarker): - expr = tree.body + expr = islocaldef(tree) + if expr: if not isenvassign(expr): - raise SyntaxError("local[...] takes exactly one expression of the form 'name << value'") # pragma: no cover + raise SyntaxError("local[...] takes exactly one expression of the form 'name := value' or 'name << value'") # pragma: no cover view = UnexpandedEnvAssignView(expr) self.collect(view.name) - # e.g. `x << 21`; preserve the original expr to make the assignment occur. - return self.visit(expr) # handle nested local[] (e.g. from `do0[local[y << 5],]`) - return self.generic_visit(tree) + view.value = self.visit(view.value) # nested local[] (e.g. from `do0[local[y := 5],]`) + return expr # `local[x := 21]` --> `x := 21`; compiling *that* makes the env-assignment occur. + return tree # don't recurse! c = LocaldefCollector() tree = c.visit(tree) return tree, c.collected - def find_deletes(tree): + def transform_deletes(tree): class DeleteCollector(ASTTransformer): def transform(self, tree): if is_captured_value(tree): return tree # don't recurse! - if isinstance(tree, UnpythonicDoDeleteMarker): - expr = tree.body + expr = isdelete(tree) + if expr: if type(expr) is not Name: raise SyntaxError("delete[...] takes exactly one name") # pragma: no cover self.collect(expr.id) - return q[a[envdel](u[expr.id])] # -> e.pop(...) - return self.generic_visit(tree) + return q[a[envdel](u[expr.id])] # `delete[x]` --> `e.pop('x')` + return tree # don't recurse! c = DeleteCollector() tree = c.visit(tree) return tree, c.collected + def check_strays(ismatch, tree): + class StrayHelperMacroChecker(ASTVisitor): # TODO: refactor this? + def examine(self, tree): + if is_captured_value(tree): + return # don't recurse! + elif isdo(tree, expanded=False): + return # don't recurse! + elif ismatch(tree): + # Expand the stray helper macro invocation, to trigger its `SyntaxError` + # with a useful message, and *make the expander generate a use site traceback*. + # + # (If we just `raise` here directly, the expander won't see the use site + # of the `local[]` or `delete[]`, but just that of the `do[]`.) + dyn._macro_expander.visit(tree) + self.generic_visit(tree) + StrayHelperMacroChecker().visit(tree) + check_stray_localdefs = partial(check_strays, islocaldef) + check_stray_deletes = partial(check_strays, isdelete) + names = [] lines = [] for j, expr in enumerate(tree.elts, start=1): # Despite the recursion, this will not trigger false positives for nested do[] expressions, - # because do[] is a second-pass macro, so they expand from inside out. - expr, newnames = find_localdefs(expr) - expr, deletednames = find_deletes(expr) + # because the transformers only operate at the top level of this do[]. + expr, newnames = transform_localdefs(expr) + expr, deletednames = transform_deletes(expr) if newnames and deletednames: raise SyntaxError("a do-item may have only local[] or delete[], not both") # pragma: no cover if newnames: if any(x in names for x in newnames): raise SyntaxError("local names must be unique in the same do") # pragma: no cover + + # Before transforming any further, check that there are no local[] or delete[] further in, where + # they don't belong. This allows the error message to show the *untransformed* source code for + # the erroneous invocation. These checkers respect the boundaries of any nested do[]. + check_stray_localdefs(expr) + check_stray_deletes(expr) + # The envassignment transform (LHS) needs the updated bindings, whereas # the name transform (RHS) should use the previous bindings, so that any # changes to bindings take effect starting from the **next** do-item. updated_names = [x for x in names + newnames if x not in deletednames] - expr = letlike_transform(expr, e, lhsnames=updated_names, rhsnames=names, setter=envset) + expr = _letlike_transform(expr, e, lhsnames=updated_names, rhsnames=names, setter=envset) expr = q[h[namelambda](u[f"do_line{j}"])(a[expr])] names = updated_names lines.append(expr) # CAUTION: letdoutil.py depends on the literal name "dof" to detect expanded do forms. # Also, the views depend on the exact AST structure. - thecall = q[h[dof]()] - thecall.args = lines + # AST-unquoting a `list` of ASTs in the arguments position of a quasiquoted call + # unpacks it into positional arguments. + thecall = q[h[dof](a[lines])] return thecall -def do0(tree): +_our_local = capture_as_macro(local) +_our_do = capture_as_macro(do) +def _do0(tree): if type(tree) not in (Tuple, List): raise SyntaxError("do0 body: expected a sequence of comma-separated expressions") # pragma: no cover elts = tree.elts - newelts = [] - # TODO: Would be cleaner to use `local[]` as a hygienically captured macro. - # Now we call the syntax transformer directly, and splice in the returned AST. - with _do_level.changed_by(+1): # it's alright, `local[]`, we're inside a `do0[]`. - firstexpr = elts[0] - firstexpr = dyn._macro_expander.visit(firstexpr) - thelocalexpr = q[_do0_result << a[firstexpr]] # noqa: F821, the local[] defines it inside the do[]. - newelts.append(q[a[local(thelocalexpr)]]) - newelts.extend(elts[1:]) - newelts.append(q[_do0_result]) # noqa: F821 -# newtree = q[t[newelts]] # TODO: doesn't work, missing lineno TODO: test with mcpyrate - newtree = Tuple(elts=newelts, lineno=tree.lineno, col_offset=tree.col_offset) - # TODO: Would be cleaner to use `do[]` as a hygienically captured macro. - return do(newtree) # do0[] is also just a do[] - -def implicit_do(tree): + # Use `local[]` and `do[]` as hygienically captured macros. + # + newelts = [q[a[_our_local][_do0_result := a[elts[0]]]], # noqa: F821, local[] defines it inside the do[]. + *elts[1:], + q[_do0_result]] # noqa: F821 + return q[a[_our_do][t[newelts]]] # do0[] is also just a do[] + +def _implicit_do(tree): """Allow a sequence of expressions in expression position. - Apply ``do[]`` if ``tree`` is a ``List``, otherwise return ``tree`` as-is. + Insert a ``do[]`` if ``tree`` is a ``List``, otherwise return ``tree`` as-is. Hence, in user code, to represent a sequence of expressions, use brackets:: [expr0, ...] - To represent a single literal list where ``implicit_do`` is in use, use an + To represent a single literal list where ``_implicit_do`` is in use, use an extra set of brackets:: [[1, 2, 3]] @@ -456,4 +944,4 @@ def implicit_do(tree): The outer brackets enable multiple-expression mode, and the inner brackets are then interpreted as a list. """ - return do(tree) if type(tree) is List else tree + return q[a[_our_do][t[tree.elts]]] if type(tree) is List else tree diff --git a/unpythonic/syntax/letdoutil.py b/unpythonic/syntax/letdoutil.py index 6581e092..2af69541 100644 --- a/unpythonic/syntax/letdoutil.py +++ b/unpythonic/syntax/letdoutil.py @@ -1,61 +1,136 @@ # -*- coding: utf-8 -*- -"""Detect let and do forms, and destructure them writably. +"""Detect let and do forms, and destructure them writably.""" -Separate from letdo.py for dependency reasons. -Separate from util.py due to the length. -""" +__all__ = ["canonize_bindings", # used by the macro interface layer + "isenvassign", "islet", "isdo", + "UnexpandedEnvAssignView", "UnexpandedLetView", "UnexpandedDoView", + "ExpandedLetView", "ExpandedDoView"] -from ast import (Call, Name, Subscript, Index, Compare, In, +from ast import (Call, Name, Subscript, Compare, In, Tuple, List, Constant, BinOp, LShift, Lambda) -import sys -from .astcompat import getconstant, Str -from .nameutil import isx, make_isxpred +from mcpyrate import unparse +from mcpyrate.astcompat import NamedExpr +from mcpyrate.core import Done + +from .nameutil import isx, getname + +letf_name = "letter" # must match what ``unpythonic.syntax.letdo._let_expr_impl`` uses in its output. +dof_name = "dof" # name must match what ``unpythonic.syntax.letdo.do`` uses in its output. +currycall_name = "currycall" # output of ``unpythonic.syntax.autocurry`` + +def _get_subscript_slice(tree): + assert type(tree) is Subscript + return tree.slice +def _set_subscript_slice(tree, newslice): # newslice: AST + assert type(tree) is Subscript + tree.slice = newslice +def _canonize_macroargs_node(macroargs): + # We do this like `mcpyrate.expander.destructure_candidate` does, + # except that we also destructure a list. + if type(macroargs) in (List, Tuple): # [a0, a1, ...] + return macroargs.elts + return [macroargs] # anything that doesn't have at least one comma at the top level + +# For analysis of let-bindings and env-assignments. +def _isname(tree): + """Return whether `tree` is a lexical name. + + The actual `ast.Name` may be wrapped in a `mcpyrate.core.Done`, which is produced + by expanded `@namemacro`s; we accept a `Done` containing an `ast.Name`, too. + + We don't accept hygienic captures, since those correspond to values, not names. + """ + return type(tree) is Name or (isinstance(tree, Done) and _isname(tree.body)) +def _isbindingtarget(tree, letsyntax_mode): + """Return whether `tree` is a valid target for a let-binding or env-assignment. -def where(*bindings): - """[syntax] Only meaningful in a let[body, where((k0, v0), ...)].""" - raise RuntimeError("where() is only meaningful in a let[body, where((k0, v0), ...)]") # pragma: no cover + letsyntax_mode: used by let_syntax to allow template definitions. + This allows, beside a bare name `k`, the formats `k(a0, ...)` and `k[a0, ...]` + to appear in the variable-name position. + """ + return (_isname(tree) or + (letsyntax_mode and ((type(tree) is Call and _isname(tree.func)) or + (type(tree) is Subscript and _isname(tree.value))))) -_isletf = make_isxpred("letter") # name must match what ``unpythonic.syntax.letdo._letimpl`` uses in its output. -_isdof = make_isxpred("dof") # name must match what ``unpythonic.syntax.letdo.do`` uses in its output. -_iscurrycall = make_isxpred("currycall") # output of ``unpythonic.syntax.curry`` +def canonize_bindings(elts, letsyntax_mode=False): # public as of v0.14.3+ + """Convert any `let` bindings format supported by `unpythonic` into a canonical format. -# TODO: switch from call to subscript in name position for let_syntax templates. -def canonize_bindings(elts, allow_call_in_name_position=False): # public as of v0.14.3+ - """Wrap a single binding without container into a length-1 `list`. + Yell if the input format is invalid. - Pass through multiple bindings as-is. + The canonical format is a `list` of `ast.Tuple`:: - Yell if the input format is invalid. + [Tuple(elts=[k0, v0]), ...] - elts: `list` of bindings, either:: - [(k0, v0), ...] # multiple bindings contained in a tuple - [(k, v),] # single binding contained in a tuple also ok - [k, v] # special single binding format, missing tuple container + elts: `list` of bindings, one of:: + [k0 := v0, ...] # v0.15.3+: new env-assignment syntax, preferred + [k := v] # v0.15.3+ + [k0 << v0, ...] # v0.15.0+: previous env-assignment syntax + [k << v] # v0.15.0+ + [[k0, v0], ...] # v0.15.0+: accept also brackets (for consistency) + [[k, v]] # v0.15.0+ + [(k0, v0), ...] # multiple bindings contained in a tuple + [(k, v),] # single binding contained in a tuple also ok + [k, v] # special single binding format, missing tuple container where the ks and vs are AST nodes. - allow_call_in_name_position: used by let_syntax to allow template definitions; - in the call, the "function" is the template name, and the positional "parameters" - are the template parameters (which may then appear in the template body). + letsyntax_mode: used by let_syntax to allow template definitions. + This allows, beside a bare name `k`, the formats `k(a0, ...)` and `k[a0, ...]` + to appear in the variable-name position. """ - def iskey(x): - return ((type(x) is Name) or - (allow_call_in_name_position and type(x) is Call and type(x.func) is Name)) - if len(elts) == 2 and iskey(elts[0]): + def iskvpairbinding(lst): + return len(lst) == 2 and _isbindingtarget(lst[0], letsyntax_mode) + + if len(elts) == 1: + if isenvassign(elts[0], letsyntax_mode) is LShift: # [k << v] + return [Tuple(elts=[elts[0].left, elts[0].right])] + if isenvassign(elts[0], letsyntax_mode) is NamedExpr: # [k := v] + return [Tuple(elts=[elts[0].target, elts[0].value])] + if len(elts) == 2 and iskvpairbinding(elts): # [k, v] return [Tuple(elts=elts)] # TODO: `mcpyrate`: just `q[t[elts]]`? - if all((type(b) is Tuple and len(b.elts) == 2 and iskey(b.elts[0])) for b in elts): + if all((type(b) is Tuple and iskvpairbinding(b.elts)) for b in elts): # [(k0, v0), ...] return elts - raise SyntaxError("expected bindings to be ((k0, v0), ...) or a single (k, v)") # pragma: no cover - -def isenvassign(tree): - """Detect whether tree is an unpythonic ``env`` assignment, ``name << value``. - - The only way this differs from a general left-shift is that the LHS must be - an ``ast.Name``. + if all((type(b) is List and iskvpairbinding(b.elts)) for b in elts): # [[k0, v0], ...] + return [Tuple(elts=b.elts) for b in elts] + if all(isenvassign(b, letsyntax_mode) for b in elts): # [k0 << v0, ...] or [k0 := v0, ...] + out = [] + for b in elts: + if isenvassign(b, letsyntax_mode) is LShift: + out.append(Tuple(elts=[b.left, b.right])) + else: # NamedExpr + out.append(Tuple(elts=[b.target, b.value])) + return out + raise SyntaxError("expected bindings to be `k0 := v0, ...`, `k0 << v0, ...`, `[k0, v0], ...`, or `(k0, v0), ...`, or a single `k := v`, `k << v`, or `k, v`") # pragma: no cover + +def isenvassign(tree, letsyntax_mode=False): + """Detect whether tree is an unpythonic ``env`` assignment. + + Starting at v0.15.3: new env-assignment syntax ``name := value`` is recommended. + + From v0.15.0 to v0.15.2, env-assignment used the syntax ``name << value``. + This is still available for backward compatibility. + + Return value is one of the constants: + `NamedExpr`: `tree` is an env-assignment, with modern syntax. + `LShift`: `tree` is an env-assignment, with classic syntax, + `False`: `tree` is not an env-assignment, + + The only way this differs from a left-shift or the usual kind of walrus assignment + is that the LHS must be an ``ast.Name``. + + letsyntax_mode: used by let_syntax to allow template definitions. + This allows, beside a bare name `k`, the formats `k(a0, ...)` and `k[a0, ...]` + to appear in the variable-name position. """ - return type(tree) is BinOp and type(tree.op) is LShift and type(tree.left) is Name - + if type(tree) is BinOp and type(tree.op) is LShift and _isbindingtarget(tree.left, letsyntax_mode): + return LShift + if type(tree) is NamedExpr and _isbindingtarget(tree.target, letsyntax_mode): # added in 0.15.3 + return NamedExpr + return False + +# TODO: This would benefit from macro destructuring in the expander. +# TODO: See https://github.com/Technologicat/mcpyrate/issues/3 def islet(tree, expanded=True): """Test whether tree is a ``let[]``, ``letseq[]``, ``letrec[]``, ``let_syntax[]``, or ``abbrev[]``. @@ -98,47 +173,54 @@ def islet(tree, expanded=True): if type(tree) is not Call: return False kind = "expanded" - if isx(tree.func, _iscurrycall) and isx(tree.args[0], _isletf): + if isx(tree.func, currycall_name) and isx(tree.args[0], letf_name): kind = "curried" - elif not isx(tree.func, _isletf): + elif not isx(tree.func, letf_name): return False mode = [kw.value for kw in tree.keywords if kw.arg == "mode"] - assert len(mode) == 1 and type(mode[0]) in (Constant, Str) - mode = getconstant(mode[0]) + assert len(mode) == 1 and type(mode[0]) is Constant + mode = mode[0].value kwnames = [kw.arg for kw in tree.keywords] if "_envname" in kwnames: - return (f"{kind}_decorator", mode) # this call was generated by _dletimpl + return (f"{kind}_decorator", mode) # this call was generated by _let_decorator_impl else: - return (f"{kind}_expr", mode) # this call was generated by _letimpl - # dlet[(k0, v0), ...] (usually in a decorator list) + return (f"{kind}_expr", mode) # this call was generated by _let_expr_impl + # dlet[k0 := v0, ...] (usually in a decorator list) deconames = ("dlet", "dletseq", "dletrec", "blet", "bletseq", "bletrec") if type(tree) is Subscript and type(tree.value) is Name: # could be a Subscript decorator (Python 3.9+) s = tree.value.id if any(s == x for x in deconames): return ("decorator", s) - if type(tree) is Call and type(tree.func) is Name: # up to Python 3.8: parenthesis syntax for decorator macros + if type(tree) is Call and type(tree.func) is Name: # Parenthesis syntax for macro arguments (deprecated; kept for backward compatibility) s = tree.func.id if any(s == x for x in deconames): return ("decorator", s) # otherwise we should have an expr macro invocation if not type(tree) is Subscript: return False + # Note we don't care about the bindings format here. + # let[k0 := v0, ...][body] + # let(k0 := v0, ...)[body] + # ^^^^^^^^^^^^^^^^^^ macro = tree.value - if sys.version_info >= (3, 9, 0): # Python 3.9+: the Index wrapper is gone. - expr = tree.slice - else: - expr = tree.slice.value exprnames = ("let", "letseq", "letrec", "let_syntax", "abbrev") - # let[(k0, v0), ...][body] if type(macro) is Subscript and type(macro.value) is Name: s = macro.value.id if any(s == x for x in exprnames): return ("lispy_expr", s) + elif type(macro) is Call and type(macro.func) is Name: # Parenthesis syntax for macro arguments (deprecated; kept for backward compatibility) + s = macro.func.id + if any(s == x for x in exprnames): + return ("lispy_expr", s) # The haskelly syntaxes are only available as a let expression (no decorator form). elif type(macro) is Name: s = macro.id if any(s == x for x in exprnames): + # let[k0 := v0, ...][body] + # let(k0 := v0, ...)[body] + # ^^^^ + expr = _get_subscript_slice(tree) h = _ishaskellylet(expr) if h: return (h, s) @@ -151,37 +233,44 @@ def _ishaskellylet(tree): In other words, detect the part inside the brackets in:: - let[((k0, v0), ...) in body] - let[body, where((k0, v0), ...)] + let[[k0 := v0, ...] in body] + let[body, where[k0 := v0, ...]] To detect the full expression including the ``let[]``, use ``islet`` instead. """ - # let[((k0, v0), ...) in body] + # let[[k0 := v0, ...] in body] + # let[(k0 := v0, ...) in body] def maybeiscontentofletin(tree): return (type(tree) is Compare and len(tree.ops) == 1 and type(tree.ops[0]) is In and - type(tree.left) is Tuple) - # let[body, where((k0, v0), ...)] + type(tree.left) in (List, Tuple)) + # let[body, where[k0 := v0, ...]] + # let[body, where(k0 := v0, ...)] def maybeiscontentofletwhere(tree): - return type(tree) is Tuple and len(tree.elts) == 2 and type(tree.elts[1]) is Call + return type(tree) is Tuple and len(tree.elts) == 2 and type(tree.elts[1]) in (Call, Subscript) if maybeiscontentofletin(tree): bindings = tree.left - if all((type(b) is Tuple and len(b.elts) == 2 and type(b.elts[0]) is Name) - for b in bindings.elts): - return "in_expr" - # Single binding special case: let's not require a trailing comma. - # In this case, the wrapper tuple containing the bindings is missing. - # (For consistency of surface syntax with the other variants that don't - # require it, because they look like function calls in the AST.) - if len(bindings.elts) == 2 and type(bindings.elts[0]) is Name: + try: + # This could be a `let_syntax` or `abbrev` using the haskelly let-in syntax. + # We don't want to care about that, so we always use `letsyntax_mode=True`. + _ = canonize_bindings(_canonize_macroargs_node(bindings), letsyntax_mode=True) return "in_expr" + except SyntaxError: + pass elif maybeiscontentofletwhere(tree): - thecall = tree.elts[1] - if type(thecall.func) is Name and thecall.func.id == "where": - return "where_expr" + # TODO: account for as-imports here? (use isx()) + thewhere = tree.elts[1] + if type(thewhere) is Call: + if type(thewhere.func) is Name and thewhere.func.id == "where": + return "where_expr" + elif type(thewhere) is Subscript: + if type(thewhere.value) is Name and thewhere.value.id == "where": + return "where_expr" return False # invalid syntax for haskelly let +# TODO: This would benefit from macro destructuring in the expander. +# TODO: See https://github.com/Technologicat/mcpyrate/issues/3 def isdo(tree, expanded=True): """Detect whether tree is a ``do[]`` or ``do0[]``. @@ -203,66 +292,87 @@ def isdo(tree, expanded=True): if type(tree) is not Call: return False kind = "expanded" - if isx(tree.func, _iscurrycall) and isx(tree.args[0], _isdof): + if isx(tree.func, currycall_name) and isx(tree.args[0], dof_name): kind = "curried" - elif not isx(tree.func, _isdof): + elif not isx(tree.func, dof_name): return False return kind + # TODO: account for as-imports here? (use isx()) if not (type(tree) is Subscript and type(tree.value) is Name and any(tree.value.id == x for x in ("do", "do0"))): return False # TODO: detect also do[] with a single expression inside? (now requires a comma) - if sys.version_info >= (3, 9, 0): # Python 3.9+: the Index wrapper is gone. - if not type(tree.slice) is Tuple: - return False - else: - if not type(tree.slice) is Index and type(tree.slice.value) is Tuple: - return False + if not type(_get_subscript_slice(tree)) is Tuple: + return False return tree.value.id # ----------------------------------------------------------------------------- class UnexpandedEnvAssignView: - """Destructure an env-assignment, writably. + """Destructure an unexpanded env-assignment, writably. If ``tree`` cannot be interpreted as an unpythonic ``env`` assignment - of the form ``name << value``, then ``TypeError`` is raised. + of the form ``name := value`` or ``name << value``, then ``TypeError`` is raised. For easy in-place modification of both ``name`` and ``value``. Use before the env-assignment is expanded away (so, before the ``let[]`` or ``do[]`` containing it is expanded away). + This handles `mcpyrate.core.Done` `ASTMarker`s in the name position transparently, + to accommodate for expanded `mcpyrate.namemacro`s. + + In other words, if the AST for the LHS is `Name(id=...)`, reading/writing the `name` + property will access `lhs.id`. If the AST for the LHS is `Done(body=Name(id=...))`, + reading/writing the `name` property will access `lhs.body.id`. This means you don't + need to care about whether there is a `Done` or not. + **Attributes**: ``name``: the name of the variable, as a str. ``value``: the thing being assigned, as an AST. - Writing to either attribute updates the original. + Writing to either attribute updates the original, preserving the syntax (`:=` or `<<`). """ def __init__(self, tree): if not isenvassign(tree): - raise TypeError(f"expected a tree representing an unexpanded env-assignment, got {tree}") + raise TypeError(f"expected a tree representing an unexpanded env-assignment, got {unparse(tree)}") self._tree = tree def _getname(self): - return self._tree.left.id + if isenvassign(self._tree) is LShift: + return getname(self._tree.left, accept_attr=False) + else: # NamedExpr + return getname(self._tree.target, accept_attr=False) def _setname(self, newname): if not isinstance(newname, str): raise TypeError(f"expected str for new name, got {type(newname)} with value {repr(newname)}") - self._tree.left.id = newname + if isenvassign(self._tree) is LShift: + targetnode = self._tree.left + else: # NamedExpr + targetnode = self._tree.target + # The `Done` may be produced by expanded `@namemacro`s. + if isinstance(targetnode, Done): + targetnode.body.id = newname + else: + targetnode.id = newname name = property(fget=_getname, fset=_setname, doc="The name of the assigned var, as an str. Writable.") def _getvalue(self): - return self._tree.right + if isenvassign(self._tree) is LShift: + return self._tree.right + else: # NamedExpr + return self._tree.value def _setvalue(self, newvalue): - self._tree.right = newvalue + if isenvassign(self._tree) is LShift: + self._tree.right = newvalue + else: # NamedExpr + self._tree.value = newvalue value = property(fget=_getvalue, fset=_setvalue, doc="The value of the assigned var, as an AST. Writable.") -# TODO: kwargs support for let(x=42)[...] if implemented later class UnexpandedLetView: """Destructure a let form, writably. @@ -274,21 +384,35 @@ class UnexpandedLetView: **Supported formats**:: - dlet[(k0, v0), ...] # decorator - let[(k0, v0), ...][body] # lispy expression - let[((k0, v0), ...) in body] # haskelly expression - let[body, where((k0, v0), ...)] # haskelly expression, inverted + dlet[k0 := v0, ...] # decorator + let[k0 := v0, ...][body] # lispy expression + let[[k0 := v0, ...] in body] # haskelly expression + let[body, where[k0 := v0, ...]] # haskelly expression, inverted In addition, we also support *just the bracketed part* of the haskelly formats. This is to make it easier for the macro interface to destructure these forms (for sending into the ``let`` syntax transformer). So these forms are supported, too:: - ((k0, v0), ...) in body - (body, where((k0, v0), ...)) + [k0 := v0, ...] in body + (body, where[k0 := v0, ...]) + + Finally, in any of these, the bindings subform can actually be in any of + the formats: + + [k0 := v0, ...] # preferred, v0.15.3+ + [k0 << v0, ...] # preferred, v0.15.0 to v0.15.2 + (k0 << v0, ...) + [[k0, v0], ...] + [(k0, v0), ...] + ([k0, v0], ...) + ((k0, v0), ...) + k, v + k := v # preferred for a single binding, v0.15.3+ + k << v # preferred for a single binding, v0.15.0 to v0.15.2 This is a data abstraction that hides the detailed structure of the AST, - since there are three alternate syntaxes that can be used for a ``let`` + since there are many alternate syntaxes that can be used for a ``let`` expression. For the decorator forms, ``tree`` should be the decorator call. In this case @@ -301,6 +425,20 @@ class UnexpandedLetView: ``(k, v)``, where ``k`` is an ``ast.Name``. Writing to ``bindings`` updates the original. + The bindings are always presented in this format, regardless of the actual + syntax used in the `let` form. Updates must also be done in this format. + + **CAUTION**: The bindings are only written to the AST when you assign to + the ``bindings`` attribute; in-place updates might not have any effect, + depending on the actual syntax in the original AST (i.e. whether what you + got was actually a reformatted copy). You'll likely want something like this:: + + newbindings = [] + for b in view.bindings: + b.elts[1] = ... # modify it + newbindings.append(b) + view.bindings = newbindings # write the updated bindings to the AST + ``body`` (when available) is an AST representing a single expression. If it is an ``ast.List``, it means an implicit ``do[]`` (handled by the ``let`` expander), allowing a multiple-expression body. @@ -327,7 +465,7 @@ def __init__(self, tree): # from the given tree, to send them to the let transformer). h = _ishaskellylet(tree) if not h: - raise TypeError(f"expected a tree representing an unexpanded let, got {tree}") + raise TypeError(f"expected a tree representing an unexpanded let, got {unparse(tree)}") data = (h, None) # cannot detect mode, because no access to the surrounding Subscript AST node self._has_subscript_container = False self._tree = tree @@ -337,59 +475,66 @@ def __init__(self, tree): # Resolve the "content" node in the haskelly format. def _theexpr_ref(self): - if self._has_subscript_container: - if sys.version_info >= (3, 9, 0): # Python 3.9+: the Index wrapper is gone. - return self._tree.slice - else: - return self._tree.slice.value - else: - return self._tree + if self._has_subscript_container: # `let[(...) in ...]`, `let[..., where(...)]` + return _get_subscript_slice(self._tree) + return self._tree # `(...) in ...`, `..., where(...)` def _getbindings(self): t = self._type - if t == "decorator": # bare Subscript, dlet[...], blet[...] - if type(self._tree) is Call: # up to Python 3.8: parenthesis syntax for decorator macros - return canonize_bindings(self._tree.args) - # Subscript as decorator (Python 3.9+) - if sys.version_info >= (3, 9, 0): # Python 3.9+: the Index wrapper is gone. - theargs = self._tree.slice - else: - theargs = self._tree.slice.value - return canonize_bindings(theargs.elts) - elif t == "lispy_expr": # Subscript inside a Subscript, (let[...])[...] - if sys.version_info >= (3, 9, 0): # Python 3.9+: the Index wrapper is gone. - theargs = self._tree.value.slice - else: - theargs = self._tree.value.slice.value - return canonize_bindings(theargs.elts) - else: # haskelly let, let[(...) in ...], let[..., where(...)] - theexpr = self._theexpr_ref() + if t in ("decorator", "lispy_expr"): + if t == "decorator": + # dlet[...], blet[...] + # dlet(...), blet(...) + thetree = self._tree + else: # "lispy_expr" + # (let[...])[...] + # (let(...))[...] + # ^^^^^^^^^^ + thetree = self._tree.value + + if type(thetree) is Call: # Parenthesis syntax for macro arguments (deprecated; kept for backward compatibility) + return canonize_bindings(thetree.args) + # Subscript + theargs = _get_subscript_slice(thetree) + return canonize_bindings(_canonize_macroargs_node(theargs)) + else: # haskelly let, `let[[...] in ...]`, `let[..., where[...]]` + theexpr = self._theexpr_ref() # `[...] in ...`, `..., where[...]` if t == "in_expr": - return canonize_bindings(theexpr.left.elts) + return canonize_bindings(_canonize_macroargs_node(theexpr.left)) elif t == "where_expr": - return canonize_bindings(theexpr.elts[1].args) + thewhere = theexpr.elts[1] + if type(thewhere) is Call: + return canonize_bindings(thewhere.args) + else: # Subscript + return canonize_bindings(_canonize_macroargs_node(_get_subscript_slice(thewhere))) + assert False def _setbindings(self, newbindings): t = self._type - if t == "decorator": - if type(self._tree) is Call: # up to Python 3.8: parenthesis syntax for decorator macros - self._tree.args = newbindings + if t in ("decorator", "lispy_expr"): + if t == "decorator": + # dlet[...], blet[...] + # dlet(...), blet(...) + thetree = self._tree + else: # "lispy_expr" + # (let[...])[...] + # (let(...))[...] + # ^^^^^^^^^^ + thetree = self._tree.value + + if type(thetree) is Call: # Parenthesis syntax for macro arguments (deprecated; kept for backward compatibility) + thetree.args = newbindings return - # Subscript as decorator (Python 3.9+) - if sys.version_info >= (3, 9, 0): # Python 3.9+: the Index wrapper is gone. - self._tree.slice.elts = newbindings - else: - self._tree.slice.value.elts = newbindings - elif t == "lispy_expr": - if sys.version_info >= (3, 9, 0): # Python 3.9+: the Index wrapper is gone. - self._tree.value.slice.elts = newbindings - else: - self._tree.value.slice.value.elts = newbindings + _set_subscript_slice(thetree, Tuple(elts=newbindings)) else: theexpr = self._theexpr_ref() if t == "in_expr": - theexpr.left.elts = newbindings + theexpr.left = Tuple(elts=newbindings) elif t == "where_expr": - theexpr.elts[1].args = newbindings + thewhere = theexpr.elts[1] + if type(thewhere) is Call: + thewhere.args = newbindings + else: # Subscript + _set_subscript_slice(thewhere, Tuple(elts=newbindings)) bindings = property(fget=_getbindings, fset=_setbindings, doc="The bindings subform of the let. Writable.") def _getbody(self): @@ -397,10 +542,7 @@ def _getbody(self): if t == "decorator": raise TypeError("the body of a decorator let form is the body of decorated function, not a subform of the let.") elif t == "lispy_expr": - if sys.version_info >= (3, 9, 0): # Python 3.9+: the Index wrapper is gone. - return self._tree.slice - else: - return self._tree.slice.value + return _get_subscript_slice(self._tree) else: theexpr = self._theexpr_ref() if t == "in_expr": @@ -412,10 +554,7 @@ def _setbody(self, newbody): if t == "decorator": raise TypeError("the body of a decorator let form is the body of decorated function, not a subform of the let.") elif t == "lispy_expr": - if sys.version_info >= (3, 9, 0): # Python 3.9+: the Index wrapper is gone. - self._tree.slice = newbody - else: - self._tree.slice.value = newbody + _set_subscript_slice(self._tree, newbody) else: theexpr = self._theexpr_ref() if t == "in_expr": @@ -439,8 +578,9 @@ class UnexpandedDoView: do0[body0, ...] [...] - The list format is for convenience, for viewing an implicit ``do[]`` in the - body of a ``let`` form. + The list format is for convenience, for viewing an implicit ``do[]`` + (extra bracket syntax) in the body of a ``let`` form before the ``do`` + is actually injected. **Attributes**: @@ -451,24 +591,18 @@ def __init__(self, tree): self._implicit = False if not isdo(tree, expanded=False): if type(tree) is not List: # for implicit do[] - raise TypeError(f"expected a tree representing an unexpanded do, got {tree}") + raise TypeError(f"expected a tree representing an unexpanded do, got {unparse(tree)}") self._implicit = True self._tree = tree def _getbody(self): if not self._implicit: - if sys.version_info >= (3, 9, 0): # Python 3.9+: the Index wrapper is gone. - return self._tree.slice.elts - else: - return self._tree.slice.value.elts + return _get_subscript_slice(self._tree).elts else: return self._tree.elts def _setbody(self, newbody): if not self._implicit: - if sys.version_info >= (3, 9, 0): # Python 3.9+: the Index wrapper is gone. - self._tree.slice.elts = newbody - else: - self._tree.slice.value.elts = newbody + _set_subscript_slice(self._tree, Tuple(elts=newbody)) else: self._tree.elts = newbody body = property(fget=_getbody, fset=_setbody, doc="The body of the do. Writable.") @@ -481,8 +615,8 @@ class ExpandedLetView: We support both "with autocurry" and bare formats. This is for simple in-place modifications; changing the number of bindings - is currently not supported. Prefer doing any extensive modifications in the - first pass, before the ``let[]`` expands. + is currently not supported. Prefer doing any extensive modifications + **before** the ``let[]`` expands. The bindings are contained in an `ast.Tuple`. Each binding is also an `ast.Tuple`. @@ -521,7 +655,7 @@ class ExpandedLetView: def __init__(self, tree): data = islet(tree, expanded=True) if not data: - raise TypeError(f"expected a tree representing an expanded let, got {tree}") + raise TypeError(f"expected a tree representing an expanded let, got {unparse(tree)}") self._tree = tree self._type, self.mode = data if self._type not in ("expanded_decorator", "expanded_expr", "curried_decorator", "curried_expr"): @@ -589,8 +723,8 @@ def _setbindings(self, newbindings): raise NotImplementedError("changing the number of items currently not supported by this view (do that before the let[] expands)") # pragma: no cover for newb in newbindings.elts: newk, newv = newb.elts - if type(newk) not in (Constant, Str): # Python 3.8+: ast.Constant - raise TypeError("ExpandedLetView: let: each key must be an ast.Constant or an ast.Str") # pragma: no cover + if type(newk) is not Constant: + raise TypeError("ExpandedLetView: let: each key must be an ast.Constant") # pragma: no cover # Abstract away the namelambda(...). We support both "with autocurry" and bare formats: # currycall(letter, bindings, currycall(currycall(namelambda, "let_body"), curryf(lambda e: ...))) # letter(bindings, namelambda("let_body")(lambda e: ...)) @@ -602,7 +736,7 @@ def _setbindings(self, newbindings): for oldb, newb in zip(thebindings.elts, newbindings.elts): oldk, thev = oldb.elts newk, newv = newb.elts - newk_string = getconstant(newk) # Python 3.8+: ast.Constant + newk_string = newk.value if type(newv) is not Lambda: raise TypeError("ExpandedLetView: letrec: each value must be of the form `lambda e: ...`") # pragma: no cover if curried: @@ -619,9 +753,9 @@ def _setbindings(self, newbindings): # update name in the namelambda(...) thev.func.args[0] = Constant(value=f"letrec_binding_{newk_string}") # Python 3.8+: ast.Constant # Macro-generated nodes may be missing source location information, - # in which case we let MacroPy fix it later. + # in which case we let `mcpyrate` fix it later. # This is mainly an issue for the unit tests of this module, which macro-generate the "old" data. - if hasattr(oldb, "lineno") and hasattr(oldb, "col_offset"): + if getattr(oldb, "lineno", None) is not None and getattr(oldb, "col_offset", None) is not None: newelts.append(Tuple(elts=[newk, thev], lineno=oldb.lineno, col_offset=oldb.col_offset)) else: newelts.append(Tuple(elts=[newk, thev])) @@ -663,8 +797,8 @@ class ExpandedDoView: We support both "with autocurry" and bare formats. This is for simple in-place modifications; changing the number of do-items - is currently not supported. Prefer doing any extensive modifications in the - first pass, before the ``do[]`` expands. + is currently not supported. Prefer doing any extensive modifications + **before** the ``do[]`` expands. ``body`` is a ``list``, where each item is of the form ``lambda e: ...``. @@ -689,7 +823,7 @@ class ExpandedDoView: def __init__(self, tree): t = isdo(tree, expanded=True) if not t: - raise TypeError(f"expected a tree representing an expanded do, got {tree}") + raise TypeError(f"expected a tree representing an expanded do, got {unparse(tree)}") self.curried = t.startswith("curried") self._tree = tree self.envname = self._deduce_envname() # stash at init time to prevent corruption by user mutations. diff --git a/unpythonic/syntax/letsyntax.py b/unpythonic/syntax/letsyntax.py index 9d74dd6d..3ea73c3f 100644 --- a/unpythonic/syntax/letsyntax.py +++ b/unpythonic/syntax/letsyntax.py @@ -4,39 +4,220 @@ # at macro expansion time. If you're looking for regular run-time let et al. macros, # see letdo.py. -from ast import (Name, Call, Starred, If, Constant, Expr, With, - FunctionDef, AsyncFunctionDef, ClassDef, Attribute) +# TODO: Coverage of code using `with block` and `with expr` is not reported correctly. +# +# TODO: As this is a toy macro system within the real macro system, that is to be expected; +# TODO: `mcpyrate` goes to some degree of trouble to produce correct coverage reporting for +# TODO: the real macro system, and we haven't duplicated that effort here. +# +# TODO: With `mcpyrate`, we don't really need `let_syntax` and `abbrev` anymore, so we could +# TODO: actually remove them; but their tests exercise some code paths that would otherwise +# TODO: remain untested. As of v0.15.0, we're keeping them for now. + +__all__ = ["let_syntax", "abbrev", "expr", "block"] + +from mcpyrate.quotes import macros, q, a # noqa: F401 + +from ast import Name, Call, Subscript, Tuple, Starred, Expr, With from copy import deepcopy +from functools import partial +from mcpyrate import parametricmacro from mcpyrate.quotes import is_captured_value -from mcpyrate.walkers import ASTTransformer +from mcpyrate.utils import rename +from mcpyrate.walkers import ASTTransformer, ASTVisitor -from .letdo import implicit_do +from .letdo import _implicit_do, _destructure_and_apply_let +from .nameutil import is_unexpanded_block_macro from .util import eliminate_ifones -def let_syntax_expr(bindings, body): # bindings: sequence of ast.Tuple: (k1, v1), (k2, v2), ..., (kn, vn) - body = implicit_do(body) # support the extra bracket syntax - if not bindings: - # Optimize out a `let_syntax` with no bindings. The macro layer cannot trigger - # this case, because our syntaxes always require at least one binding. - # So this check is here just to protect against use with no bindings directly - # from other syntax transformers, which in theory could attempt anything. - # - # TODO: update this comment for mcpyrate - # The reason the macro layer never calls us with no bindings is technical. - # In the macro interface, with no bindings, the macro's `args` are `()` - # whether it was invoked as `let_syntax()[...]` or just `let_syntax[...]`. - # Thus, there is no way to distinguish, in the macro layer, between these - # two. We can't use `UnexpandedLetView` to do the dirty work of AST - # analysis, because the macro expander does too much automatically: in the macro - # layer, `tree` is only the part inside the brackets. So we really - # can't see whether the part outside the brackets was a Call with no - # arguments, or just a Name - both cases get treated exactly the same, - # as a macro invocation with empty `args`. - # - # The latter form, `let_syntax[...]`, is used by the haskelly syntax - # `let_syntax[(...) in ...]`, `let_syntax[..., where(...)]` - and in - # these cases, both the bindings and the body reside inside the brackets. +from ..dynassign import dyn + +# -------------------------------------------------------------------------------- +# Macro interface + +@parametricmacro +def let_syntax(tree, *, args, syntax, expander, **kw): + """[syntax, expr/block] Introduce local **syntactic** bindings. + + **Expression variant**:: + + let_syntax[lhs << rhs, ...][body] + let_syntax[lhs << rhs, ...][[body0, ...]] + + Alternative haskelly syntax:: + + let_syntax[[lhs << rhs, ...] in body] + let_syntax[[lhs << rhs, ...] in [body0, ...]] + + let_syntax[body, where[lhs << rhs, ...]] + let_syntax[[body0, ...], where[lhs << rhs, ...]] + + **Block variant**:: + + with let_syntax: + with block as xs: # capture a block of statements - bare name + ... + with block[a, ...] as xs: # capture a block of statements - template + ... + with expr as x: # capture a single expression - bare name + ... + with expr[a, ...] as x: # capture a single expression - template + ... + body0 + ... + + A single expression can be a ``do[]`` if multiple expressions are needed. + + The bindings are applied **at macro expansion time**, substituting + the expression on the RHS for each instance of the corresponding LHS. + Each substitution gets a fresh copy. + + This is useful to e.g. locally abbreviate long function names at macro + expansion time (with zero run-time overhead), or to splice in several + (possibly parametric) instances of a common pattern. + + In the expression variant, ``lhs`` may be: + + - A bare name (e.g. ``x``), or + + - A simple template of the form ``f(x, ...)``. The names inside the + parentheses declare the formal parameters of the template (that can + then be used in the body). + + In the block variant: + + - The **as-part** specifies the name of the LHS. + + - If a template, the formal parameters are declared on the ``block`` + or ``expr``, not on the as-part (due to syntactic limitations). + + **Templates** + + To make parametric substitutions, use templates. + + Templates support only positional arguments, with no default values. + + Even in block templates, parameters are always expressions (because they + use the subscript syntax at the use site). + + In the body of the ``let_syntax``, a template is used like an expr macro. + Just like in an actual macro invocation, when the template is substituted, + any instances of its formal parameters on its RHS get replaced by the + argument values from the invocation site. + + Note each instance of the same formal parameter gets a fresh copy of the + corresponding argument value. + + **Substitution order** + + This is a two-step process. In the first step, we apply template substitutions. + In the second step, we apply bare name substitutions to the result of the + first step. (So RHSs of templates may use any of the bare-name definitions.) + + Within each step, the substitutions are applied **in the order specified**. + So if the bindings are ``((x, y), (y, z))``, then ``x`` transforms to ``z``. + But if the bindings are ``((y, z), (x, y))``, then ``x`` transforms to ``y``, + and only an explicit ``y`` at the use site transforms to ``z``. + + **Notes** + + Inspired by Racket's ``let-syntax`` and ``with-syntax``, see: + https://docs.racket-lang.org/reference/let.html + https://docs.racket-lang.org/reference/stx-patterns.html + + **CAUTION**: This is essentially a toy macro system inside the real + macro system, implemented with the real macro system. + + The usual caveats of macro systems apply. Especially, we support absolutely + no form of hygiene. Be very, very careful to avoid name conflicts. + + ``let_syntax`` is meant only for simple local substitutions where the + elimination of repetition can shorten the code and improve readability. + + If you need to do something complex, prefer writing a real macro directly + in `mcpyrate`. + """ + if syntax not in ("expr", "block"): + raise SyntaxError("let_syntax is an expr and block macro only") # pragma: no cover + if syntax == "block" and kw['optional_vars'] is not None: + raise SyntaxError("let_syntax (block mode) does not take an as-part") # pragma: no cover + + if syntax == "expr": + _let_syntax_expr_inside_out = partial(_let_syntax_expr, expand_inside=True) + return _destructure_and_apply_let(tree, args, expander, _let_syntax_expr_inside_out, letsyntax_mode=True) + else: # syntax == "block": + with dyn.let(_macro_expander=expander): + return _let_syntax_block(block_body=tree, expand_inside=True) + +@parametricmacro +def abbrev(tree, *, args, syntax, expander, **kw): + """[syntax, expr/block] Exactly like ``let_syntax``, but expands outside in. + + Because this variant expands before any macros in the body, it can locally + rename other macros, e.g.:: + + abbrev[m << macrowithverylongname][ + m[tree1] if m[tree2] else m[tree3]] + + **CAUTION**: Because ``abbrev`` expands outside-in, and does not respect + boundaries of any nested ``abbrev`` invocations, it will not lexically scope + the substitutions. Instead, the outermost ``abbrev`` expands first, and then + any inner ones expand with whatever substitutions they have remaining. + + If the same name is used on the LHS in two or more nested ``abbrev``, + any inner ones will likely raise an error (unless the outer substitution + just replaces a name with another), because also the names on the LHS + in the inner ``abbrev`` will undergo substitution when the outer + ``abbrev`` expands. + """ + if syntax not in ("expr", "block"): + raise SyntaxError("abbrev is an expr and block macro only") # pragma: no cover + if syntax == "block" and kw['optional_vars'] is not None: + raise SyntaxError("abbrev (block mode) does not take an as-part") # pragma: no cover + + # DON'T expand inner macro invocations first - outside-in ordering is the default, so we simply do nothing. + + if syntax == "expr": + _let_syntax_expr_outside_in = partial(_let_syntax_expr, expand_inside=False) + return _destructure_and_apply_let(tree, args, expander, _let_syntax_expr_outside_in, + letsyntax_mode=True) + else: + with dyn.let(_macro_expander=expander): + return _let_syntax_block(block_body=tree, expand_inside=False) + +@parametricmacro +def expr(tree, *, syntax, **kw): + """[syntax, block] ``with expr:`` inside a ``with let_syntax:``.""" + if syntax != "block": + raise SyntaxError("`expr` is a block macro only") # pragma: no cover + raise SyntaxError("`expr` is only valid at the top level of a block-mode `let_syntax` or `abbrev`") # pragma: no cover, not intended to hit the expander + +@parametricmacro +def block(tree, *, syntax, **kw): + """[syntax, block] ``with block:`` inside a ``with let_syntax:``.""" + if syntax != "block": + raise SyntaxError("`block` is a block macro only") # pragma: no cover + raise SyntaxError("`block` is only valid at the top level of a block-mode `let_syntax` or `abbrev`") # pragma: no cover, not intended to hit the expander + +# -------------------------------------------------------------------------------- +# Syntax transformers + +# let_syntax[lhs << rhs, ...][body] +# let_syntax[lhs << rhs, ...][[body0, ...]] +# let_syntax[[lhs << rhs, ...] in body] +# let_syntax[[lhs << rhs, ...] in [body0, ...]] +# let_syntax[body, where[lhs << rhs, ...]] +# let_syntax[[body0, ...], where[lhs << rhs, ...]] +# +# This transformer takes destructured input, with the bindings subform +# and the body already extracted, and supplied separately. +# +# bindings: sequence of ast.Tuple: (k1, v1), (k2, v2), ..., (kn, vn) +# expand_inside: if True, expand inside-out. If False, expand outside-in. +def _let_syntax_expr(bindings, body, *, expand_inside): + body = _implicit_do(body) # support the extra bracket syntax + if not bindings: # Optimize out a `let_syntax` with no bindings. return body # pragma: no cover names_seen = set() @@ -52,32 +233,55 @@ def register_bindings(): target = templates if args else barenames target.append((name, args, value, "expr")) + if expand_inside: + bindings = dyn._macro_expander.visit_recursively(bindings) + body = dyn._macro_expander.visit_recursively(body) register_bindings() body = _substitute_templates(templates, body) body = _substitute_barenames(barenames, body) return body -# ----------------------------------------------------------------------------- - # block version: # # with let_syntax: # with block as xs: # ... -# with block(a, ...) as xs: +# with block[a, ...] as xs: # ... # with expr as x: # ... -# with expr(a, ...) as x: +# with expr[a, ...] as x: # ... # body0 # ... # -def let_syntax_block(block_body): +# expand_inside: if True, expand inside-out. If False, expand outside-in. +def _let_syntax_block(block_body, *, expand_inside): + is_let_syntax = partial(is_unexpanded_block_macro, let_syntax, dyn._macro_expander) + is_abbrev = partial(is_unexpanded_block_macro, abbrev, dyn._macro_expander) + is_expr_declaration = partial(is_unexpanded_block_macro, expr, dyn._macro_expander) + is_block_declaration = partial(is_unexpanded_block_macro, block, dyn._macro_expander) + is_helper_macro = lambda tree: is_expr_declaration(tree) or is_block_declaration(tree) + def check_strays(ismatch, tree): + class StrayHelperMacroChecker(ASTVisitor): # TODO: refactor this? + def examine(self, tree): + if is_captured_value(tree): + return # don't recurse! + elif is_let_syntax(tree) or is_abbrev(tree): + return # don't recurse! + elif ismatch(tree): + # Expand the stray helper macro invocation, to trigger its `SyntaxError` + # with a useful message, and *make the expander generate a use site traceback*. + # + # (If we just `raise` here directly, the expander won't see the use site + # of the `with expr` or `with block`, but just that of the `do[]`.) + dyn._macro_expander.visit(tree) + self.generic_visit(tree) + StrayHelperMacroChecker().visit(tree) + check_stray_blocks_and_exprs = partial(check_strays, is_helper_macro) + names_seen = set() - templates = [] - barenames = [] - def register_binding(withstmt, mode, kind): + def destructure_binding(withstmt, mode, kind): assert mode in ("block", "expr") assert kind in ("barename", "template") ctxmanager = withstmt.items[0].context_expr @@ -97,10 +301,10 @@ def register_binding(withstmt, mode, kind): args = [] if mode == "block": - value = If(test=Constant(value=1), - body=withstmt.body, - orelse=[], - lineno=stmt.lineno, col_offset=stmt.col_offset) + with q as value: + if 1: + with a: + withstmt.body else: # mode == "expr": if len(withstmt.body) != 1: raise SyntaxError("'with expr:' expected a one-item body (use a do[] if need more)") # pragma: no cover @@ -109,8 +313,8 @@ def register_binding(withstmt, mode, kind): raise SyntaxError("'with expr:' expected an expression body, got a statement") # pragma: no cover value = theexpr.value # discard Expr wrapper in definition names_seen.add(name) - target = templates if args else barenames - target.append((name, args, value, mode)) + + return name, args, value, mode def isbinding(tree): for mode in ("block", "expr"): @@ -119,48 +323,69 @@ def isbinding(tree): ctxmanager = tree.items[0].context_expr if type(ctxmanager) is Name and ctxmanager.id == mode: return mode, "barename" + # expr[...], block[...] + if type(ctxmanager) is Subscript and type(ctxmanager.value) is Name and ctxmanager.value.id == mode: + return mode, "template" + # expr(...), block(...) + # Parenthesis syntax for macro arguments (deprecated; kept for backward compatibility) if type(ctxmanager) is Call and type(ctxmanager.func) is Name and ctxmanager.func.id == mode: return mode, "template" return False + templates = [] + barenames = [] new_block_body = [] for stmt in block_body: + # `let_syntax` mode (expand_inside): respect lexical scoping of nested `let_syntax`/`abbrev` + expanded = False + if expand_inside and (is_let_syntax(stmt) or is_abbrev(stmt)): + stmt = dyn._macro_expander.visit_recursively(stmt) + expanded = True + stmt = _substitute_templates(templates, stmt) stmt = _substitute_barenames(barenames, stmt) binding_data = isbinding(stmt) if binding_data: - register_binding(stmt, *binding_data) + name, args, value, mode = destructure_binding(stmt, *binding_data) + + check_stray_blocks_and_exprs(value) # before expanding it! + if expand_inside and not expanded: + value = dyn._macro_expander.visit_recursively(value) + + target = templates if args else barenames + target.append((name, args, value, mode)) else: + check_stray_blocks_and_exprs(stmt) # before expanding it! + if expand_inside and not expanded: + stmt = dyn._macro_expander.visit_recursively(stmt) + new_block_body.append(stmt) new_block_body = eliminate_ifones(new_block_body) if not new_block_body: raise SyntaxError("let_syntax: expected at least one statement beside definitions") # pragma: no cover return new_block_body -# TODO: convert to mcpyrate magic variable -class block: - """[syntax] Magic identifier for ``with block:`` inside a ``with let_syntax:``.""" - def __repr__(self): # in case one of these ends up somewhere at runtime - return "" # pragma: no cover - def __call__(self, tree, **kw): # make `block` look like a macro - pass -block = block() - -# TODO: convert to mcpyrate magic variable -class expr: - """[syntax] Magic identifier for ``with expr:`` inside a ``with let_syntax:``.""" - def __repr__(self): # in case one of these ends up somewhere at runtime - return "" # pragma: no cover - def __call__(self, tree, **kw): # make `expr` look like a macro - pass -expr = expr() - # ----------------------------------------------------------------------------- +def _get_subscript_args(tree): + theslice = tree.slice + if type(theslice) is Tuple: + args = theslice.elts + else: + args = [theslice] + return args + +# x --> "x", [] +# f[a, b, c] --> "f", ["a", "b", "c"] +# f(a, b, c) --> "f", ["a", "b", "c"] def _analyze_lhs(tree): if type(tree) is Name: # bare name name = tree.id args = [] + elif type(tree) is Subscript and type(tree.value) is Name: # template f[x, ...] + name = tree.value.id + args = [a.id for a in _get_subscript_args(tree)] + # Parenthesis syntax for macro arguments (deprecated; kept for backward compatibility) elif type(tree) is Call and type(tree.func) is Name: # template f(x, ...) name = tree.func.id if any(type(a) is Starred for a in tree.args): # *args (Python 3.5+) @@ -191,53 +416,48 @@ def subst(): return tree elif isthisname(tree): if mode == "block": - raise SyntaxError("cannot substitute a block into expression position") # pragma: no cover + raise SyntaxError(f"cannot substitute block '{name}' into expression position") # pragma: no cover tree = subst() return self.generic_visit(tree) return self.generic_visit(tree) return Splicer().visit(tree) - # if the new value is also bare name, perform the substitution (now as a string) - # also in the name part of def and similar, to support human intuition of "renaming" - # TODO: use `mcpyrate.utils.rename`, it was designed for things like this? + # If the new value is also bare name, perform the substitution (now as a string) + # also in the name part of def and similar, to support human intuition of "renaming". if type(value) is Name: - newname = value.id - def splice_barestring(tree): - class BarestringSplicer(ASTTransformer): - def transform(self, tree): - if is_captured_value(tree): - return tree # don't recurse! - if type(tree) in (FunctionDef, AsyncFunctionDef, ClassDef): - if tree.name == name: - tree.name = newname - elif type(tree) is Attribute: - if tree.attr == name: - tree.attr = newname - return self.generic_visit(tree) - return BarestringSplicer().visit(tree) - postproc = splice_barestring + postproc = partial(rename, name, value.id) else: postproc = lambda x: x return postproc(splice(tree)) def _substitute_barenames(barenames, tree): - for name, _, value, mode in barenames: + for name, _noformalparams, value, mode in barenames: tree = _substitute_barename(name, value, tree, mode) return tree def _substitute_templates(templates, tree): for name, formalparams, value, mode in templates: def isthisfunc(tree): - return type(tree) is Call and type(tree.func) is Name and tree.func.id == name + if type(tree) is Subscript and type(tree.value) is Name and tree.value.id == name: + return True + # Parenthesis syntax for macro arguments (deprecated; kept for backward compatibility) + if type(tree) is Call and type(tree.func) is Name and tree.func.id == name: + return True + return False def subst(tree): - theargs = tree.args + if type(tree) is Subscript: + theargs = _get_subscript_args(tree) + elif type(tree) is Call: + theargs = tree.args + else: + assert False if len(theargs) != len(formalparams): raise SyntaxError(f"let_syntax template '{name}' expected {len(formalparams)} arguments, got {len(theargs)}") # pragma: no cover # make a fresh deep copy of the RHS to avoid destroying the template. - tree = deepcopy(value) # expand the f itself in f(x, ...) + tree = deepcopy(value) # expand the f itself in f[x, ...] or f(x, ...) for k, v in zip(formalparams, theargs): # expand the x, ... in the expanded form of f - # can't put statements in a Call, so always treat args as expressions. + # can't put statements in a Subscript or in a Call, so always treat args as expressions. tree = _substitute_barename(k, v, tree, "expr") return tree def splice(tree): @@ -252,7 +472,7 @@ def transform(self, tree): return tree elif isthisfunc(tree): if mode == "block": - raise SyntaxError("cannot substitute a block into expression position") # pragma: no cover + raise SyntaxError(f"cannot substitute block '{name}' into expression position") # pragma: no cover tree = subst(tree) return self.generic_visit(tree) return self.generic_visit(tree) diff --git a/unpythonic/syntax/nameutil.py b/unpythonic/syntax/nameutil.py index abce0099..1df61e98 100644 --- a/unpythonic/syntax/nameutil.py +++ b/unpythonic/syntax/nameutil.py @@ -1,16 +1,58 @@ # -*- coding: utf-8 -*- -"""Utilities for working with identifiers in macros.""" +"""Utilities for working with identifiers in macros. -from ast import Name, Attribute +Main purpose is to be able to query both direct and hygienically captured names +with a unified API. +""" -from mcpyrate.quotes import is_captured_value +__all__ = ["isx", "getname", + "is_unexpanded_expr_macro", "is_unexpanded_block_macro"] + +from ast import Name, Attribute, Subscript, Call, With + +from mcpyrate.core import Done +from mcpyrate.quotes import is_captured_macro, is_captured_value, lookup_macro + +# Here hygienic captures only come from `unpythonic.syntax` (unless there are +# also user-defined macros), and we use from-imports and bare names for anything +# `q[h[]]`'d; but any references that appear explicitly in the user code may use +# either bare `somename` or `unpythonic.somename`. +# +# TODO: How about `unpythonic.somemodule.somename`? Currently not detected. +# +# Note that in `mcpyrate`, a hygienic capture can contain the value of an +# arbitrary expression, which does not need to be bound to a name. In that +# case the "name" will be the unparsed source code of the expression. See +# the implementation of `mcpyrate.quotes.h`. That's harmless here since +# an expression won't produce an exact match on the name. +# +# Here we're mainly interested in the case where we have captured the value +# a name had at the use site of `h[]`, and even then, we just look at the name, +# not the actual value. +# +# TODO: Let's look at the value, not just the name. Requires changes to use sites, +# TODO: because currently `isx` doesn't know about the value the caller wants to +# TODO: check against. +# +# TODO: For our use cases, that value is usually a syntax transformer function +# TODO: defined somewhere in `unpythonic.syntax`, so we can use things like +# TODO: `q[h[letter]]` or `q[h[dof]]` in the let/do constructs to ensure that +# TODO: the workhorses resolve correctly at the use site, and still be able +# TODO: to detect the expanded forms of those constructs in the AST. +# +# TODO: The run-time value can be obtained at this end by +# TODO: `value = mcpyrate.quotes.lookup_value(key)`, +# TODO: provided that `key and (key[1] is not None)`. +# TODO: If the second element of the key is `None`, it means that +# TODO: program execution hasn't yet reached the point where the +# TODO: actual value capture triggers for that particular use of `h[]`. def isx(tree, x, accept_attr=True): """Test whether tree is a reference to the name ``x`` (str). Alternatively, ``x`` may be a predicate that accepts a ``str`` and returns whether it matches, to support more complex matching - (e.g. ``lambda s: s.startswith("foo")``). + (e.g. ``lambda name: name.startswith("foo")``). Both bare names and attributes can be recognized, to support both from-imports and regular imports of ``somemodule.x``. @@ -19,64 +61,17 @@ def isx(tree, x, accept_attr=True): - bare name ``x`` + - the name ``x`` inside a `mcpyrate.core.Done`, which may be produced + by expanded `@namemacro`s + - the name ``x`` inside a `mcpyrate` hygienic capture, which may be inserted during macro expansion - ``x`` as an attribute (if ``accept_attr=True``) """ - # Here hygienic captures only come from `unpythonic.syntax` (unless there are - # also user-defined macros), and we use from-imports and bare names for anything - # `q[h[]]`'d; but any references that appear explicitly in the user code may use - # either bare `somename` or `unpythonic.somename`. - # - # TODO: How about `unpythonic.somemodule.somename`? Currently not detected. - # - # Note that in `mcpyrate`, a hygienic capture can contain the value of an - # arbitrary expression, which does not need to be bound to a name. In that - # case the "name" will be the unparsed source code of the expression. See - # the implementation of `mcpyrate.quotes.h`. That's harmless here since - # an expression won't produce an exact match on the name. - # - # Here we're mainly interested in the case where we have captured the value - # a name had at the use site of `h[]`, and even then, we just look at the name, - # not the actual value. - # - # TODO: Let's look at the value, not just the name. Requires changes to use sites, - # TODO: because currently `isx` doesn't know about the value the caller wants to - # TODO: check against. - # - # TODO: For our use cases, that value is usually a syntax transformer function - # TODO: defined somewhere in `unpythonic.syntax`, so we can use things like - # TODO: `q[h[letter]]` or `q[h[dof]]` in the let/do constructs to ensure that - # TODO: the workhorses resolve correctly at the use site, and still be able - # TODO: to detect the expanded forms of those constructs in the AST. - # - # TODO: The run-time value can be obtained at this end by - # TODO: `value = mcpyrate.quotes.lookup_value(key)`, - # TODO: provided that `key and (key[1] is not None)`. - # TODO: If the second element of the key is `None`, it means that - # TODO: program execution hasn't yet reached the point where the - # TODO: actual value capture triggers for that particular use of `h[]`. - key = is_captured_value(tree) # AST -> (name, frozen_value) or False - if key: - name, frozen_value = key - ismatch = x if callable(x) else lambda name: name == x - return ((type(tree) is Name and ismatch(tree.id)) or - (key and ismatch(name)) or - (accept_attr and type(tree) is Attribute and ismatch(tree.attr))) - -# TODO: obsolete function, remove -def make_isxpred(x): - """Make a predicate for isx. - - Here ``x`` is an ``str``; the resulting function will match also - hygienically captured identifiers. - """ - # `mcpyrate` only renames captured macros; the names of captured - # run-time values live in the keys in the `lookup_value` calls - # (where the original name is preserved, with no renaming needed). - return lambda name: name == x + thename = getname(tree, accept_attr=accept_attr) + return thename is not None and ismatch(thename) def getname(tree, accept_attr=True): """The cousin of ``isx``. @@ -85,12 +80,89 @@ def getname(tree, accept_attr=True): If no match on ``tree``, return ``None``. """ + if isinstance(tree, Done): + return getname(tree.body, accept_attr=accept_attr) if type(tree) is Name: return tree.id key = is_captured_value(tree) # AST -> (name, frozen_value) or False - if key: + if key: # TODO: Python 3.8+: use walrus assignment here name, frozen_value = key return name if accept_attr and type(tree) is Attribute: return tree.attr return None + +# TODO: This utility really wants to live in `mcpyrate`, as part of a macro destructuring subsystem. +# TODO: It needs to be made more general: +# - detect also macro invocations that have macro arguments +# - destructure macro arguments, if any +def is_unexpanded_expr_macro(macrofunction, expander, tree): + """Check whether `tree` is an expr macro invocation bound to `macrofunction` in `expander`. + + This accounts for hygienic macro captures and as-imports. + + If there is a match, return the subscript slice, i.e. the tree that would be passed + to the macro function by the expander if the macro was expanded normally. + + **CAUTION**: This function doesn't currently support detecting macros that + take macro arguments. + """ + if not type(tree) is Subscript: + return False + maybemacro = tree.value + + # hygienic captures and as-imports + key = is_captured_macro(maybemacro) + if key: # TODO: Python 3.8+: use walrus assignment here + name_node = lookup_macro(key) + elif type(maybemacro) is Name: + name_node = maybemacro + else: + return False + + # extract the expr + macro = expander.isbound(name_node.id) + if macro is macrofunction: + return tree.slice + return False + + +# TODO: This utility really wants to live in `mcpyrate`, as part of a macro destructuring subsystem. +# TODO: It needs to be made more general: +# - detect if there are several macros in the same `with` +# - destructure macro arguments, if any +# - destructure as-part, if any +def is_unexpanded_block_macro(macrofunction, expander, tree): + """Check whether `tree` is an expr macro invocation bound to `macrofunction` in `expander`. + + This accounts for hygienic macro captures and as-imports. + + **CAUTION**: This function doesn't currently support several macros in the same `with`. + """ + if not type(tree) is With: + return False + ctxmanager = tree.items[0].context_expr + # optvars = tree.items[0].optional_vars # as-part + # body = tree.body + maybemacro = ctxmanager + + # discard args if any + if type(maybemacro) is Subscript: + maybemacro = maybemacro.value + # Parenthesis syntax for macro arguments (deprecated; kept for backward compatibility) + elif type(maybemacro) is Call: + maybemacro = maybemacro.func + + # hygienic captures and as-imports + key = is_captured_macro(maybemacro) + if key: # TODO: Python 3.8+: use walrus assignment here + name_node = lookup_macro(key) + elif type(maybemacro) is Name: + name_node = maybemacro + else: + return False + + macro = expander.isbound(name_node.id) + return macro is macrofunction + +# TODO: We might also need a utility to detect decorator macros. diff --git a/unpythonic/syntax/nb.py b/unpythonic/syntax/nb.py index cbd59c3d..39ab6c13 100644 --- a/unpythonic/syntax/nb.py +++ b/unpythonic/syntax/nb.py @@ -4,30 +4,69 @@ Auto-print top-level expressions, auto-assign last result as _. """ +__all__ = ["nb"] + # This is the kind of thing thinking with macros does to your program. ;) from ast import Expr from mcpyrate.quotes import macros, q, u, a, h # noqa: F401 +from mcpyrate import parametricmacro + from .testingtools import istestmacro -def nb(body, args): +@parametricmacro +def nb(tree, *, args, syntax, **kw): + """[syntax, block] Ultralight math notebook. + + Auto-print top-level expressions, auto-assign last result as _. + + A custom print function can be supplied as an argument. + + Example:: + + with nb: + 2 + 3 + 42 * _ + + from sympy import * + with nb[pprint]: + x, y = symbols("x, y") + x * y + 3 * _ + """ + if syntax != "block": + raise SyntaxError("nb is a block macro only") # pragma: no cover + if syntax == "block" and kw['optional_vars'] is not None: + raise SyntaxError("nb does not take an as-part") # pragma: no cover + + # Expand outside in. This macro is so simple and orthogonal the + # ordering doesn't matter. This is cleaner. + return _nb(body=tree, args=args) + +def _nb(body, args): p = args[0] if args else q[h[print]] # custom print function hook - with q as newbody: # pragma: no cover, quoted only. + with q as newbody: _ = None - theprint = a[p] + theprint = lambda value: h[_print_and_passthrough](a[p], value) for stmt in body: - # We ignore statements (because no return value), and, - # test[] and related expressions from our test framework. - # Those don't return a value either, and play a role - # similar to the `assert` statement. + # We ignore statements (because no return value), and, test[] and related + # expressions from our test framework. Those have no meaningful return value + # either, and play a role similar to the `assert` statement. if type(stmt) is not Expr or istestmacro(stmt.value): newbody.append(stmt) continue - with q as newstmts: # pragma: no cover, quoted only. + with q as newstmts: _ = a[stmt.value] if _ is not None: theprint(_) newbody.extend(newstmts) return newbody + +# Work together with `autoreturn`. If the implicit print appears in tail position, +# the passthrough will return the value that was printed, so that when `autoreturn` +# transforms the code into `return theprint(_)`, it still works fine. +def _print_and_passthrough(printer, value): + printer(value) + return value diff --git a/unpythonic/syntax/prefix.py b/unpythonic/syntax/prefix.py index d5c828c0..b5dcfc6a 100644 --- a/unpythonic/syntax/prefix.py +++ b/unpythonic/syntax/prefix.py @@ -4,22 +4,153 @@ Experimental, not for use in production code. """ -from ast import Name, Call, Starred, Tuple, Load, Subscript -import sys +__all__ = ["prefix", "q", "u", "kw"] + +from ast import Call, Starred, Tuple, Load, Subscript from mcpyrate.quotes import macros, q, u, a, t # noqa: F811, F401 +from mcpyrate import namemacro from mcpyrate.quotes import is_captured_value from mcpyrate.walkers import ASTTransformer from .letdoutil import islet, isdo, UnexpandedLetView, UnexpandedDoView +from .nameutil import getname from ..it import flatmap, rev, uniqify -def prefix(block_body): - isquote = lambda tree: type(tree) is Name and tree.id == "q" - isunquote = lambda tree: type(tree) is Name and tree.id == "u" - iskwargs = lambda tree: type(tree) is Call and type(tree.func) is Name and tree.func.id == "kw" +# -------------------------------------------------------------------------------- + +def prefix(tree, *, syntax, **kw): # noqa: F811 + """[syntax, block] Write Python like Lisp: the first item is the operator. + + Example:: + + with prefix: + (print, "hello world") + t1 = (q, 1, 2, (3, 4), 5) + x = 42 + t2 = (q, 17, 23, x) + (print, t1, t2) + + Lexically inside a ``with prefix``: + + - A bare ``q`` at the head of a tuple is the quote operator. It increases + the quote level by one. + + It actually just tells the macro that this tuple (and everything in it, + recursively) is not a function call. + + Variables can be used as usual, there is no need to unquote them. + + - A bare ``u`` at the head of a tuple is the unquote operator, which + decreases the quote level by one. In other words, in:: + + with prefix: + t = (q, 1, 2, (u, print, 3), (print, 4), 5) + (print, t) + + the third item will call ``print(3)`` and evaluate to its return value + (in this case ``None``, since it's ``print``), whereas the fourth item + is a tuple with the two items ``(, 4)``. + + - Quote/unquote operators are parsed from the start of the tuple until + no more remain. Then any remaining items are either returned quoted + (if quote level > 0), or evaluated as a function call and replaced + by the return value. + + - How to pass named args:: + + from unpythonic import call + + with prefix: + (f, kw(myarg=3)) # ``kw(...)`` (syntax, not really a function!) + call(f, myarg=3) # in a call(), kwargs are ok + f(myarg=3) # or just use Python's usual function call syntax + + One ``kw`` operator may include any number of named args (and **only** + named args). The tuple may have any number of ``kw`` operators. + + All named args are collected from ``kw`` operators in the tuple + when writing the final function call. If the same kwarg has been + specified by multiple ``kw`` operators, the rightmost definition wins. + + **Note**: Python itself prohibits having repeated named args in the **same** + ``kw`` operator, because it uses the function call syntax. If you try to pass + the same named arg multiple times, as of 0.15, you should get a + `SyntaxError: keyword argument repeated` with a traceback. + + A ``kw(...)`` operator in a quoted tuple (i.e. a tuple that does not not + represent a function call) is an error. + + Current limitations: + + - The `q`, `u` and `kw` macros cannot be renamed by as-importing; + `with prefix` expects them to have their original names. + + - passing ``*args`` and ``**kwargs`` not supported. + + Workarounds: ``call(...)``; Python's usual function call syntax. + + - For ``*args``, to keep it lispy, maybe you want ``unpythonic.fun.apply``; + this allows syntax such as ``(apply, f, 1, 2, lst)``. + + **CAUTION**: This macro is experimental, not intended for production use. + """ + if syntax != "block": + raise SyntaxError("prefix is a block macro only") # pragma: no cover + if syntax == "block" and kw['optional_vars'] is not None: + raise SyntaxError("prefix does not take an as-part") # pragma: no cover + + # Expand outside in. Any nested macros should get clean standard Python, + # not having to worry about tuples possibly denoting function calls. + return _prefix(block_body=tree) + +# Note the exported "q" and "u" are ours (namely the stubs for the "q" and "u" +# operators compiled away by `prefix`), but the "q[]" we use as a macro in +# this module is the quasiquote operator from `mcpyrate.quotes`. +# +# This `def` doesn't overwrite the `mcpyrate` quasiquote macro `q`, because the `def` runs at run time. +# The expander does not try to expand this `q` as a macro, because `def q(...)` +# is not a valid macro invocation even when the name `q` has been imported as a macro. +@namemacro +def q(tree, *, syntax, **kw): # noqa: F811 + """[syntax, name] Quote operator. Only meaningful in a tuple inside a prefix block.""" + if syntax != "name": + raise SyntaxError("q (unpythonic.syntax.prefix.q) is a name macro only") # pragma: no cover + raise SyntaxError("q (unpythonic.syntax.prefix.q) is only valid in a tuple inside a `with prefix` block") # pragma: no cover, not meant to hit the expander + +@namemacro +def u(tree, *, syntax, **kw): # noqa: F811 + """[syntax, name] Unquote operator. Only meaningful in a tuple inside a prefix block.""" + if syntax != "name": + raise SyntaxError("u (unpythonic.syntax.prefix.u) is a name macro only") # pragma: no cover + raise SyntaxError("u (unpythonic.syntax.prefix.u) is only valid in a tuple inside a `with prefix` block") # pragma: no cover, not meant to hit the expander + +# TODO: This isn't a perfect solution, because there is no "call" macro kind. +# TODO: We currently trigger the error on any appearance of the name `kw` outside a valid context. +@namemacro +def kw(tree, *, syntax, **kw): # noqa: F811 + """[syntax, special] Pass-named-args operator for `with prefix`. + + Usage:: + + (f, a0, ..., kw(k0=v0, ...)) + + Only meaningful in a tuple inside a prefix block. + """ + if syntax != "name": + raise SyntaxError("kw (unpythonic.syntax.prefix.kw) is a name macro only") # pragma: no cover + raise SyntaxError("kw (unpythonic.syntax.prefix.kw) is only valid in a tuple inside a `with prefix` block") # pragma: no cover, not meant to hit the expander + +# -------------------------------------------------------------------------------- + +def _prefix(block_body): + # TODO: Should change these to query the expander to allow renaming by as-imports. + # TODO: How to do that can be found in the implementation of `quicklambda`. + isquote = lambda tree: getname(tree, accept_attr=False) == "q" + isunquote = lambda tree: getname(tree, accept_attr=False) == "u" + iskwargs = lambda tree: type(tree) is Call and getname(tree.func, accept_attr=False) == "kw" class PrefixTransformer(ASTTransformer): def transform(self, tree): @@ -35,11 +166,14 @@ def transform(self, tree): # let and do have not expanded yet when prefix runs (better that way!). if islet(tree, expanded=False): view = UnexpandedLetView(tree) + newbindings = [] for binding in view.bindings: if type(binding) is not Tuple: raise SyntaxError("prefix: expected a tuple in let binding position") # pragma: no cover _, value = binding.elts # leave name alone, recurse into value binding.elts[1] = self.visit(value) + newbindings.append(binding) + view.bindings = newbindings # write the new bindings (important!) if view.body: view.body = self.visit(view.body) return tree @@ -59,11 +193,7 @@ def transform(self, tree): # Expr # Subscript if type(tree) is Subscript: - if sys.version_info >= (3, 9, 0): # Python 3.9+: the Index wrapper is gone. - body = tree.slice - else: - body = tree.slice.value - + body = tree.slice if type(body) is Tuple: # Skip the transformation of the expr tuple itself, but transform its elements. # This skips the transformation of the macro argument tuple, too, because @@ -112,28 +242,6 @@ def transform(self, tree): self.withstate(thecall, quotelevel=quotelevel) return self.visit(thecall) - # This is a first-pass macro. Any nested macros should get clean standard Python, + # This is a outside-in macro. Any nested macros should get clean standard Python, # not having to worry about tuples possibly denoting function calls. return PrefixTransformer(quotelevel=0).visit(block_body) - -# Note the exported "q" and "u" are ours, but the "q" and "u" we use in this -# module are macros. The "q" and "u" we define here are regular run-time objects, -# namely the stubs for the "q" and "u" markers used within a `prefix` block. -class q: # noqa: F811 - """[syntax] Quote operator. Only meaningful in a tuple in a prefix block.""" - def __repr__(self): # in case one of these ends up somewhere at runtime # pragma: no cover - return "" -q = q() - -class u: # noqa: F811 - """[syntax] Unquote operator. Only meaningful in a tuple in a prefix block.""" - def __repr__(self): # in case one of these ends up somewhere at runtime # pragma: no cover - return "" -u = u() - -# TODO: Think of promoting this error to compile macro expansion time. -# TODO: Difficult to do, because we shouldn't probably hijack the name "kw" (so no name macro), -# TODO: and it can't be invoked like an expr macro, because the whole point is to pass arguments by name. -def kw(**kwargs): - """[syntax] Pass-named-args operator. Only meaningful in a tuple in a prefix block.""" - raise RuntimeError("kw(...) only meaningful inside a tuple in a prefix block") # pragma: no cover diff --git a/unpythonic/syntax/scopeanalyzer.py b/unpythonic/syntax/scopeanalyzer.py index ef26b254..6aaa7463 100644 --- a/unpythonic/syntax/scopeanalyzer.py +++ b/unpythonic/syntax/scopeanalyzer.py @@ -24,7 +24,7 @@ **CAUTION**: -What we do currently (before v0.15.0) doesn't fully make sense. +What we do currently (before v0.16.0) doesn't fully make sense. Scope - in the sense of controlling lexical name resolution - is a static (purely lexical) concept, but whether a particular name (once lexically @@ -37,7 +37,7 @@ exceptional trivial cases such as `if 1`, this depends on the condition part of the `if` at run time, and thus can't be statically determined. -In order to make more sense, in v0.15.0, we will migrate to a fully static analysis. +In order to make more sense, in v0.16.0, we will migrate to a fully static analysis. This will make the analyzer consistent with how Python itself handles scoping, at the cost of slightly (but backward-incompatibly) changing the semantics of some corner cases in the usage of `let` and `do`. @@ -47,7 +47,7 @@ It is disabled when `scoped_transform` calls `get_lexical_variables`, to preserve old behavior until the next opportunity for a public interface change. -In v0.15.0, we will make `scoped_transform` use the fully lexical mode. +In v0.16.0, we will make `scoped_transform` use the fully lexical mode. **NOTE**: @@ -67,10 +67,21 @@ OOPSLA '13. http://dx.doi.org/10.1145/2509136.2509536 """ +__all__ = ["isnewscope", + "scoped_transform", + "get_lexical_variables", + "get_names_in_store_context", + "get_names_in_del_context", + "extract_args", + "collect_globals", + "collect_nonlocals"] + from ast import (Name, Tuple, Lambda, FunctionDef, AsyncFunctionDef, ClassDef, Import, ImportFrom, Try, ListComp, SetComp, GeneratorExp, DictComp, Store, Del, Global, Nonlocal) +from mcpyrate.astcompat import TryStar, MatchStar, MatchMapping, MatchAs +from mcpyrate.core import Done from mcpyrate.walkers import ASTTransformer, ASTVisitor from ..it import uniqify @@ -205,35 +216,15 @@ def get_lexical_variables(tree, collect_locals=True): raise TypeError(f"Expected a tree representing a lexical scope, got {type(tree)}") if type(tree) in (Lambda, FunctionDef, AsyncFunctionDef): - a = tree.args - allargs = a.args + a.kwonlyargs - if hasattr(a, "posonlyargs"): # Python 3.8+: positional-only arguments - allargs += a.posonlyargs - argnames = [x.arg for x in allargs] - if a.vararg: - argnames.append(a.vararg.arg) - if a.kwarg: - argnames.append(a.kwarg.arg) - + argnames = extract_args(tree) fname = [] localvars = [] nonlocals = [] if type(tree) in (FunctionDef, AsyncFunctionDef): fname = [tree.name] - if collect_locals: localvars = list(uniqify(get_names_in_store_context(tree.body))) - - class NonlocalsCollector(ASTVisitor): - def examine(self, tree): - if type(tree) in (Global, Nonlocal): - for x in tree.names: - self.collect(x) - if not isnewscope(tree): - self.generic_visit(tree) - nc = NonlocalsCollector() - nc.visit(tree.body) - nonlocals = nc.collected + nonlocals = collect_nonlocals(tree.body) + collect_globals(tree.body) return list(uniqify(fname + argnames + localvars)), list(uniqify(nonlocals)) @@ -275,6 +266,9 @@ def examine(self, tree): for g in tree.generators: if type(g.target) is Name: targetnames.append(g.target.id) + # The `Done` may be produced by expanded `@namemacro`s. + elif isinstance(g.target, Done) and type(g.target.body) is Name: + targetnames.append(g.target.body.id) elif type(g.target) is Tuple: class NamesCollector(ASTVisitor): def examine(self, tree): @@ -296,8 +290,8 @@ def get_names_in_store_context(tree): This includes: - - Any ``Name`` in store context (such as on the LHS of an `Assign` - or `NamedExpr` node) + - Any ``Name`` in store context (such as on the LHS of an `Assign`, + `NamedExpr` (Python 3.8+), `TypeAlias` (Python 3.12+)) - The name of ``FunctionDef``, ``AsyncFunctionDef`` or``ClassDef`` @@ -307,8 +301,12 @@ def get_names_in_store_context(tree): - The exception name of any ``except`` handlers + - The exception name of any ``except*`` handlers (Python 3.11+) + - The names in the as-part of ``With`` + - The names bound in `match`/`case` patterns (Python 3.10+) + Duplicates may be returned; use ``set(...)`` or ``list(uniqify(...))`` on the output to remove them. @@ -339,7 +337,7 @@ def examine(self, tree): elif type(tree) in (Import, ImportFrom): for x in tree.names: self.collect(x.asname if x.asname is not None else x.name) - elif type(tree) is Try: + elif type(tree) in (Try, TryStar): # Python 3.11+: `try`/`except*` # https://docs.python.org/3/reference/compound_stmts.html#the-try-statement # # TODO: The `err` in `except SomeException as err` is only bound within the `except` block, @@ -351,13 +349,30 @@ def examine(self, tree): # TODO: `try`, even inside the `except` blocks, will be bound in the whole parent scope. for h in tree.handlers: self.collect(h.name) + # Python 3.10+: `match`/`case` captures are `MatchAs(name='x')` and + # `MatchStar(name='rest')` with bare strings (not `Name` nodes). The `name` + # is `None` for `_` (wildcard, doesn't capture). `Name` nodes in patterns are + # class references (e.g. `Point` in `case Point(x, y):`), not captures. + # + # `generic_visit` handles most match patterns automatically, since `MatchAs` + # and `MatchStar` nodes appear as children. The one exception is + # `MatchMapping.rest`, which is a bare string attribute (not an AST child). + elif type(tree) in (MatchAs, MatchStar): + if tree.name is not None: + self.collect(tree.name) + elif type(tree) is MatchMapping: + if tree.rest is not None: # `**rest` capture + self.collect(tree.rest) + + # Python 3.12+: `TypeAlias` uses a name in `Store` context on its LHS so it needs no special handling here. + # Same note as for for loops. # elif type(tree) in (With, AsyncWith): # for item in tree.items: # if item.optional_vars is not None: # self._collect_name_or_list(item.optional_vars) # macro-created nodes might not have a ctx, but our macros don't create lexical assignments. - if type(tree) is Name and hasattr(tree, "ctx") and type(tree.ctx) is Store: + if type(tree) is Name and type(getattr(tree, "ctx", None)) is Store: self.collect(tree.id) if not isnewscope(tree): self.generic_visit(tree) @@ -376,13 +391,61 @@ class DelNamesCollector(ASTVisitor): def examine(self, tree): # We want to detect things like "del x": # Delete(targets=[Name(id='x', ctx=Del()),]) - # We don't currently care about "del myobj.x" or "del mydict['x']" (these examples in Python 3.6): + # We don't currently care about "del myobj.x" or "del mydict['x']" (these old examples in Python 3.6): # Delete(targets=[Attribute(value=Name(id='myobj', ctx=Load()), attr='x', ctx=Del()),]) # Delete(targets=[Subscript(value=Name(id='mydict', ctx=Load()), slice=Index(value=Str(s='x')), ctx=Del()),]) - if type(tree) is Name and hasattr(tree, "ctx") and type(tree.ctx) is Del: + if type(tree) is Name and type(getattr(tree, "ctx", None)) is Del: self.collect(tree.id) if not isnewscope(tree): self.generic_visit(tree) nc = DelNamesCollector() nc.visit(tree) return nc.collected + +def extract_args(tree): + """Extract the parameter names from a `Lambda`, `FunctionDef`, or `AsyncFunctionDef` node. + + Return a `list` of bare `str`. + """ + if type(tree) not in (Lambda, FunctionDef, AsyncFunctionDef): + raise ValueError(f"Expected a function definition AST node, got {tree}") + a = tree.args + allargs = a.posonlyargs + a.args + a.kwonlyargs + argnames = [x.arg for x in allargs] + if a.vararg: + argnames.append(a.vararg.arg) + if a.kwarg: + argnames.append(a.kwarg.arg) + return argnames + +def collect_globals(tree): + """Collect the names of all names declared `global` in `tree`, stopping at scope boundaries. + + Return a `list` of bare `str`. + """ + class GlobalsCollector(ASTVisitor): + def examine(self, tree): + if type(tree) is Global: + for name in tree.names: + self.collect(name) + if not isnewscope(tree): + self.generic_visit(tree) + collector = GlobalsCollector() + collector.visit(tree) + return collector.collected + +def collect_nonlocals(tree): + """Collect the names of all names declared `nonlocal` in `tree`, stopping at scope boundaries. + + Return a `list` of bare `str`. + """ + class NonlocalsCollector(ASTVisitor): + def examine(self, tree): + if type(tree) is Nonlocal: + for name in tree.names: + self.collect(name) + if not isnewscope(tree): + self.generic_visit(tree) + collector = NonlocalsCollector() + collector.visit(tree) + return collector.collected diff --git a/unpythonic/syntax/simplelet.py b/unpythonic/syntax/simplelet.py index d5833a28..f97f258c 100644 --- a/unpythonic/syntax/simplelet.py +++ b/unpythonic/syntax/simplelet.py @@ -15,6 +15,8 @@ # Unlike the other submodules, this module contains the macro interface; # these macros are not part of the top-level ``unpythonic.syntax`` interface. +__all__ = ["let", "letseq"] + from mcpyrate.quotes import macros, q, a, t # noqa: F811, F401 from ast import arg @@ -74,10 +76,10 @@ def letseq(tree, *, args, syntax, expander, **kw): if not args: return tree first, *rest = args - body = q[a[our_letseq][t[rest]][a[tree]]] - return q[a[our_let][a[first]][a[body]]] + body = q[a[_our_letseq][t[rest]][a[tree]]] + return q[a[_our_let][a[first]][a[body]]] # for hygienic macro recursion -our_let = capture_as_macro(let) -our_letseq = capture_as_macro(letseq) +_our_let = capture_as_macro(let) +_our_letseq = capture_as_macro(letseq) diff --git a/unpythonic/syntax/tailtools.py b/unpythonic/syntax/tailtools.py index 4c1286de..7a1d6742 100644 --- a/unpythonic/syntax/tailtools.py +++ b/unpythonic/syntax/tailtools.py @@ -3,83 +3,732 @@ The common factor is tail-position analysis.""" +__all__ = ["autoreturn", + "tco", + "continuations", "call_cc", "get_cc", "iscontinuation"] + from functools import partial -from ast import (Lambda, FunctionDef, AsyncFunctionDef, +from ast import (Lambda, FunctionDef, AsyncFunctionDef, ClassDef, arguments, arg, keyword, List, Tuple, Call, Name, Starred, Constant, BoolOp, And, Or, - With, AsyncWith, If, IfExp, Try, Assign, Return, Expr, + With, AsyncWith, If, IfExp, Try, Match, Assign, Return, Expr, + Await, copy_location) -import sys from mcpyrate.quotes import macros, q, u, n, a, h # noqa: F401 from mcpyrate import gensym -from mcpyrate.markers import ASTMarker -from mcpyrate.quotes import is_captured_value +from mcpyrate.astcompat import TryStar +from mcpyrate.quotes import capture_as_macro, is_captured_value from mcpyrate.utils import NestingLevelTracker from mcpyrate.walkers import ASTTransformer, ASTVisitor -from .astcompat import getconstant, NameConstant -from .util import (isx, make_isxpred, isec, +from .ifexprs import aif, it +from .letdoutil import isdo, islet, ExpandedLetView, ExpandedDoView +from .util import (isx, isec, detect_callec, detect_lambda, has_tco, sort_lambda_decorators, - suggest_decorator_index, ContinuationsMarker, wrapwith, isexpandedmacromarker) -from .letdoutil import isdo, islet, ExpandedLetView, ExpandedDoView -from .ifexprs import aif + suggest_decorator_index, + UnpythonicASTMarker, ExpandedContinuationsMarker) from ..dynassign import dyn +from ..fun import identity +from ..funutil import Values from ..it import uniqify -from ..fun import identity, orf from ..tco import trampolined, jump -from ..lazyutil import passthrough_lazy_args -# ----------------------------------------------------------------------------- -# Implicit return statement. This performs a tail-position analysis of function bodies. +# In `continuations`, we use `aif` and `it` as hygienically captured macros. +# Note the difference between `aif[..., it, ...]` and `q[a[_our_aif][..., a[_our_it], ...]]`. +# +# If `it` is bound in the current expander, even *mentioning* it outside an `aif` is a syntax error, by design. +# +# When constructing a quasiquoted tree that invokes `aif[]`, we can splice in a hygienic reference to `it` +# as `a[_our_it]` without even having the macro bound in the expander that expands *this* module. +_our_aif = capture_as_macro(aif) +_our_it = capture_as_macro(it) + +# -------------------------------------------------------------------------------- +# Macro interface + +def autoreturn(tree, *, syntax, **kw): + """[syntax, block] Implicit "return" in tail position, like in Lisps. + + Each ``def`` function definition lexically within the ``with autoreturn`` + block is examined, and if the last item within the body is an expression + ``expr``, it is transformed into ``return expr``. + + If the last item is an if/elif/else block, the transformation is applied + to the last item in each of its branches. + + If the last item is a ``with`` or ``async with`` block, the transformation + is applied to the last item in its body. + + If the last item is a try/except/else/finally block, the rules are as follows. + If an ``else`` clause is present, the transformation is applied to the last + item in it; otherwise, to the last item in the ``try`` clause. Additionally, + in both cases, the transformation is applied to the last item in each of the + ``except`` clauses. The ``finally`` clause is not transformed; the intention + is it is usually a finalizer (e.g. to release resources) that runs after the + interesting value is already being returned by ``try``, ``else`` or ``except``. + + Example:: + + with autoreturn: + def f(): + "I'll just return this" + assert f() == "I'll just return this" + + def g(x): + if x == 1: + "one" + elif x == 2: + "two" + else: + "something else" + assert g(1) == "one" + assert g(2) == "two" + assert g(42) == "something else" + + **CAUTION**: If the final ``else`` is omitted, as often in Python, then + only the ``else`` item is in tail position with respect to the function + definition - likely not what you want. + + So with ``autoreturn``, the final ``else`` should be written out explicitly, + to make the ``else`` branch part of the same if/elif/else block. + + **CAUTION**: ``for``, ``async for``, ``while`` are currently not analyzed; + effectively, these are defined as always returning ``None``. If the last item + in your function body is a loop, use an explicit return. + + **CAUTION**: With ``autoreturn`` enabled, functions no longer return ``None`` + by default; the whole point of this macro is to change the default return + value. + + The default return value is ``None`` only if the tail position contains + a statement (because in a sense, a statement always returns ``None``). + """ + if syntax != "block": + raise SyntaxError("autoreturn is a block macro only") # pragma: no cover + if syntax == "block" and kw['optional_vars'] is not None: + raise SyntaxError("autoreturn does not take an as-part") # pragma: no cover + + # Expand outside in. Any nested macros should get clean standard Python, + # not having to worry about implicit "return" statements. + return _autoreturn(block_body=tree) + +def tco(tree, *, syntax, expander, **kw): + """[syntax, block] Implicit tail-call optimization (TCO). + + Examples:: + + with tco: + evenp = lambda x: (x == 0) or oddp(x - 1) + oddp = lambda x: (x != 0) and evenp(x - 1) + assert evenp(10000) is True + + with tco: + def evenp(x): + if x == 0: + return True + return oddp(x - 1) + def oddp(x): + if x != 0: + return evenp(x - 1) + return False + assert evenp(10000) is True + + This is based on a strategy similar to MacroPy's tco macro, but using + the TCO machinery from ``unpythonic.tco``. + + This recursively handles also builtins ``a if p else b``, ``and``, ``or``; + and from ``unpythonic.syntax``, ``do[]``, ``let[]``, ``letseq[]``, ``letrec[]``, + when used in computing a return value. (``aif[]`` and ``cond[]`` also work.) + + Note only calls **in tail position** will be TCO'd. Any other calls + are left as-is. Tail positions are: + + - The whole return value, if it is just a single call. + + - Both ``a`` and ``b`` branches of ``a if p else b`` (but not ``p``). + + - The last item in an ``and``/``or``. If these are nested, only the + last item in the whole expression involving ``and``/``or``. E.g. in:: + + (a and b) or c + a and (b or c) + + in either case, only ``c`` is in tail position, regardless of the + values of ``a``, ``b``. + + - The last item in a ``do[]``. + + - In a ``do0[]``, this is the implicit item that just returns the + stored return value. + + - The argument of a call to an escape continuation. The ``ec(...)`` call + itself does not need to be in tail position; escaping early is the + whole point of an ec. + + All function definitions (``def`` and ``lambda``) lexically inside the block + undergo TCO transformation. The functions are automatically ``@trampolined``, + and any tail calls in their return values are converted to ``jump(...)`` + for the TCO machinery. + + Note in a ``def`` you still need the ``return``; it marks a return value. + But see ``autoreturn``:: + + with autoreturn, tco: + def evenp(x): + if x == 0: + True + else: + oddp(x - 1) + def oddp(x): + if x != 0: + evenp(x - 1) + else: + False + assert evenp(10000) is True + + **CAUTION**: regarding escape continuations, only basic uses of ecs created + via ``call_ec`` are currently detected as being in tail position. Any other + custom escape mechanisms are not supported. (This is mainly of interest for + lambdas, which have no ``return``, and for "multi-return" from a nested + function.) + + *Basic use* is defined as either of these two cases:: + + # use as decorator + @call_ec + def result(ec): + ... + + # use directly on a literal lambda (effectively, as a decorator) + result = call_ec(lambda ec: ...) + + When macro expansion of the ``with tco`` block starts, names of escape + continuations created **anywhere lexically within** the ``with tco`` block + are captured. Lexically within the block, any call to a function having + any of the captured names, or as a fallback, one of the literal names + ``ec``, ``brk``, ``throw`` is interpreted as invoking an escape + continuation. + """ + if syntax != "block": + raise SyntaxError("tco is a block macro only") # pragma: no cover + if syntax == "block" and kw['optional_vars'] is not None: + raise SyntaxError("tco does not take an as-part") # pragma: no cover + + # Two-pass macro. + with dyn.let(_macro_expander=expander): + return _tco(block_body=tree) + +def continuations(tree, *, syntax, expander, **kw): + """[syntax, block] call/cc for Python. + + This allows saving the control state and then jumping back later + (in principle, any time later). Some possible use cases: + + - Tree traversal (possibly a cartesian product of multiple trees, with the + current position in each tracked automatically). + + - McCarthy's amb operator. + + - Generators. (Python already has those, so only for teaching.) + + This is a very loose pythonification of Paul Graham's continuation-passing + macros, which implement continuations by chaining closures and passing the + continuation semi-implicitly. For details, see chapter 20 in On Lisp: + + http://paulgraham.com/onlisp.html + + Continuations are most readily implemented when the program is written in + continuation-passing style (CPS), but that is unreadable for humans. + The purpose of this macro is to partly automate the CPS transformation, so + that at the use site, we can write CPS code in a much more readable fashion. + + A ``with continuations`` block implies TCO; the same rules apply as in a + ``with tco`` block. Furthermore, ``with continuations`` introduces the + following additional rules: + + - Functions which make use of continuations, or call other functions that do, + must be defined within a ``with continuations`` block, using the usual + ``def`` or ``lambda`` forms. + + - All function definitions in a ``with continuations`` block, including + any nested definitions, have an implicit formal parameter ``cc``, + **even if not explicitly declared** in the formal parameter list. + + If declared explicitly, ``cc`` must be in a position that can accept a + default value. + + This means ``cc`` must be declared either as by-name-only:: + + with continuations: + def myfunc(a, b, *, cc): + ... + + f = lambda *, cc: ... + + or as the last parameter that has no default:: + + with continuations: + def myfunc(a, b, cc): + ... + + f = lambda cc: ... + + Then the continuation machinery will automatically set the default value + of ``cc`` to the default continuation (``identity``), which just returns + its arguments. + + The most common use case for explicitly declaring ``cc`` is that the + function is the target of a ``call_cc[]``; then it helps readability + to make the ``cc`` parameter explicit. + + - A ``with continuations`` block will automatically transform all + function definitions and ``return`` statements lexically contained + within the block to use the continuation machinery. + + - ``return somevalue`` actually means a tail-call to ``cc`` with the + given ``somevalue``. + + Multiple values can be returned as a ``Values``. Multiple-valueness + is tested at run time. + + Any ``Values`` return value is automatically unpacked to the args + and kwargs of ``cc``. + + - An explicit ``return somefunc(arg0, ..., k0=v0, ...)`` actually means + a tail-call to ``somefunc``, with its ``cc`` automatically set to our + ``cc``. Hence this inserts a call to ``somefunc`` before proceeding + with our current continuation. (This is most often what we want when + making a tail-call from a continuation-enabled function.) + + Here ``somefunc`` **must** be a continuation-enabled function; + otherwise the TCO chain will break and the result is immediately + returned to the top-level caller. + + (If the call succeeds at all; the ``cc`` argument is implicitly + filled in and passed by name. Regular functions usually do not + accept a named parameter ``cc``, let alone know what to do with it.) + + - Just like in ``with tco``, a lambda body is analyzed as one big + return-value expression. This uses the exact same analyzer; for example, + ``do[]`` (including any implicit ``do[]``) and the ``let[]`` expression + family are supported. + + - Calls from functions defined in one ``with continuations`` block to those + defined in another are ok; there is no state or context associated with + the block. + + - Much of the language works as usual. + + Any non-tail calls can be made normally. Regular functions can be called + normally in any non-tail position. + + Continuation-enabled functions behave as regular functions when + called normally; only tail calls implicitly set ``cc``. A normal call + uses ``identity`` as the default ``cc``. + + - For technical reasons, the ``return`` statement is not allowed at the + top level of the ``with continuations:`` block. (Because a continuation + is essentially a function, ``return`` would behave differently based on + whether it is placed lexically before or after a ``call_cc[]``.) + + If you absolutely need to terminate the function surrounding the + ``with continuations:`` block from inside the block, use an exception + to escape; see ``call_ec``, ``catch``, ``throw``. + + **Capturing the continuation**: + + Inside a ``with continuations:`` block, the ``call_cc[]`` statement + captures a continuation. (It is actually a macro, for technical reasons.) + + Capturing a continuation introduces a scope boundary. The continuation + captured by `call_cc` (i.e. the rest of the function body after the + `call_cc` statement) is a new scope, and the assignment part of the + `call_cc` statement takes effect in that new scope. Under the hood, + the assignment from the `call_cc` is implemented as function parameters; + the continuation is a function. + + For various possible program topologies that continuations may introduce, see + the clarifying pictures under ``doc/`` in the source distribution. + + Syntax:: + + x = call_cc[func(...)] + *xs = call_cc[func(...)] + x0, ... = call_cc[func(...)] + x0, ..., *xs = call_cc[func(...)] + call_cc[func(...)] + + Conditional variant:: + + x = call_cc[f(...) if p else g(...)] + *xs = call_cc[f(...) if p else g(...)] + x0, ... = call_cc[f(...) if p else g(...)] + x0, ..., *xs = call_cc[f(...) if p else g(...)] + call_cc[f(...) if p else g(...)] + + Assignment targets: + + - To destructure positional multiple-values (from a `Values` return value), + use a tuple assignment target (comma-separated names, as usual). + + Destructuring *named* return values from a `call_cc` is currently not supported. + + - The last assignment target may be starred. It is transformed into + the vararg (a.k.a. ``*args``) of the continuation function. + (It will capture a whole tuple, or any excess items, as usual.) + + - To ignore the return value (useful if ``func`` was called only to + perform its side-effects), just omit the assignment part. + + Conditional variant: + + - ``p`` is any expression. If truthy, ``f(...)`` is called, and if falsey, + ``g(...)`` is called. + + - Each of ``f(...)``, ``g(...)`` may be ``None``. A ``None`` skips the + function call, proceeding directly to the continuation. Upon skipping, + all assignment targets (if any are present) are set to ``None``. + The starred assignment target (if present) gets the empty tuple. + + - The main use case of the conditional variant is for things like:: + + with continuations: + k = None + def setk(cc): + global k + k = cc + def dostuff(x): + call_cc[setk() if x > 10 else None] # capture only if x > 10 + ... + + To keep things relatively straightforward, a ``call_cc[]`` is only + allowed to appear **at the top level** of: + + - the ``with continuations:`` block itself + - a ``def`` or ``async def`` + + Nested defs are ok; here *top level* only means the top level of the + *currently innermost* ``def``. + + If you need to place ``call_cc[]`` inside a loop, use ``@looped`` et al. + from ``unpythonic.fploop``; this has the loop body represented as the + top level of a ``def``. + + Multiple ``call_cc[]`` statements in the same function body are allowed. + These essentially create nested closures. + + **Main differences to Scheme and Racket**: + + Compared to Scheme/Racket, where ``call/cc`` will capture also expressions + occurring further up in the call stack, our ``call_cc`` may be need to be + placed differently (further out, depending on what needs to be captured) + due to the delimited nature of the continuations implemented here. + + Scheme and Racket implicitly capture the continuation at every position, + whereas we do it explicitly, only at the use sites of the ``call_cc`` macro. + + Also, since there are limitations to where a ``call_cc[]`` may appear, some + code may need to be structured differently to do some particular thing, if + porting code examples originally written in Scheme or Racket. + + Unlike ``call/cc`` in Scheme/Racket, ``call_cc`` takes **a function call** + as its argument, not just a function reference. Also, there's no need for + it to be a one-argument function; any other args can be passed in the call. + The ``cc`` argument is filled implicitly and passed by name; any others are + passed exactly as written in the client code. + + **Technical notes**: + + The ``call_cc[]`` statement essentially splits its use site into *before* + and *after* parts, where the *after* part (the continuation) can be run + a second and further times, by later calling the callable that represents + the continuation. This makes a computation resumable from a desired point. + + The return value of the continuation is whatever the original function + returns, for any ``return`` statement that appears lexically after the + ``call_cc[]``. + + The effect of ``call_cc[]`` is that the function call ``func(...)`` in + the brackets is performed, with its ``cc`` argument set to the lexically + remaining statements of the current ``def`` (at the top level, the rest + of the ``with continuations`` block), represented as a callable. + + The continuation itself ends there (it is *delimited* in this particular + sense), but it will chain to the ``cc`` of the function it appears in. + This is termed the *parent continuation* (**pcc**), stored in the internal + variable ``_pcc`` (which defaults to ``None``). + + Via the use of the pcc, here ``f`` will maintain the illusion of being + just one function, even though a ``call_cc`` appears there:: + + def f(*, cc): + ... + call_cc[g(1, 2, 3)] + ... + + The continuation is a closure. For its pcc, it will use the value the + original function's ``cc`` had when the definition of the continuation + was executed (for that particular instance of the closure). Hence, calling + the original function again with its ``cc`` set to something else will + produce a new continuation instance that chains into that new ``cc``. + + The continuation's own ``cc`` will be ``identity``, to allow its use just + like any other function (also as argument of a ``call_cc`` or target of a + tail call). + + When the pcc is set (not ``None``), the effect is to run the pcc first, + and ``cc`` only after that. This preserves the whole captured tail of a + computation also in the presence of nested ``call_cc`` invocations (in the + above example, this would occur if also ``g`` used ``call_cc``). + + Continuations are not accessible by name (their definitions are named by + gensym). To get a reference to a continuation instance, stash the value + of the ``cc`` argument somewhere while inside the ``call_cc``. + + The function ``func`` called by a ``call_cc[func(...)]`` is (almost) the + only place where the ``cc`` argument is actually set. There it is the + captured continuation. Roughly everywhere else, ``cc`` is just ``identity``. + + Tail calls are an exception to this rule; a tail call passes along the current + value of ``cc``, unless overridden manually (by setting the ``cc=...`` kwarg + in the tail call). + + When the pcc is set (not ``None``) at the site of the tail call, the + machinery will create a composed continuation that runs the pcc first, + and ``cc`` (whether current or manually overridden) after that. This + composed continuation is then passed to the tail call as its ``cc``. + + **Tips**: + + - Once you have a captured continuation, one way to use it is to set + ``cc=...`` manually in a tail call, as was mentioned. Example:: + + def main(): + call_cc[myfunc()] # call myfunc, capturing the current cont... + ... # ...which is the rest of "main" + + def myfunc(cc): + ourcc = cc # save the captured continuation (sent by call_cc[]) + def somefunc(): + return dostuff(..., cc=ourcc) # and use it here + somestack.append(somefunc) + + In this example, when ``somefunc`` is eventually called, it will tail-call + ``dostuff`` and then proceed with the continuation ``myfunc`` had + at the time when that instance of the ``somefunc`` closure was created. + (This pattern is essentially how to build the ``amb`` operator.) + + - Instead of setting ``cc``, you can also overwrite ``cc`` with a captured + continuation inside a function body. That overrides the continuation + for the rest of the dynamic extent of the function, not only for a + particular tail call:: + + def myfunc(cc): + ourcc = cc + def somefunc(): + cc = ourcc + return dostuff(...) + somestack.append(somefunc) + + - A captured continuation can also be called manually; it's just a callable. + + The assignment targets, at the ``call_cc[]`` use site that spawned this + particular continuation, specify its call signature. All args are + positional, except the implicit ``cc``, which is by-name-only. + + - Just like in Scheme/Racket's ``call/cc``, the values that get bound + to the ``call_cc[]`` assignment targets on second and further calls + (when the continuation runs) are the arguments given to the continuation + when it is called (whether implicitly or manually). + + - Setting ``cc`` to ``unpythonic.fun.identity``, while inside a ``call_cc``, + will short-circuit the rest of the computation. In such a case, the + continuation will not be invoked automatically. A useful pattern for + suspend/resume. + + - However, it is currently not possible to prevent the rest of the tail + of a captured continuation (the pcc) from running, apart from manually + setting ``_pcc`` to ``None`` before executing a ``return``. Note that + doing that is not strictly speaking supported (and may be subject to + change in a future version). + + - When ``call_cc[]`` appears inside a function definition: + + - It tail-calls ``func``, with its ``cc`` set to the captured + continuation. + + - The return value of the function containing one or more ``call_cc[]`` + statements is the return value of the continuation. + + - When ``call_cc[]`` appears at the top level of ``with continuations``: + + - A normal call to ``func`` is made, with its ``cc`` set to the captured + continuation. + + - In this case, if the continuation is called later, it always + returns ``None``, because the use site of ``call_cc[]`` is not + inside a function definition. + + - If you need to insert just a tail call (no further statements) before + proceeding with the current continuation, no need for ``call_cc[]``; + use ``return func(...)`` instead. + + The purpose of ``call_cc[func(...)]`` is to capture the current + continuation (the remaining statements), and hand it to ``func`` + as a first-class value. + + - To combo with ``multilambda``, use this ordering:: + + with multilambda, continuations: + ... + + - Some very limited comboability with ``call_ec``. May be better to plan + ahead, using ``call_cc[]`` at the appropriate outer level, and then + short-circuit (when needed) by setting ``cc`` to ``identity``. + This avoids the need to have both ``call_cc`` and ``call_ec`` at the + same time. + + - ``unpythonic.ec.call_ec`` can be used normally **lexically before any** + ``call_cc[]``, but (in a given function) after at least one ``call_cc[]`` + has run, the ``ec`` ceases to be valid. This is because our ``call_cc[]`` + actually splits the function into *before* and *after* parts, and + **tail-calls** the *after* part. + + (Wrapping the ``def`` in another ``def``, and placing the ``call_ec`` + on the outer ``def``, does not help either, because even the outer + function has exited by the time *the continuation* is later called + the second and further times.) + + Usage of ``call_ec`` while inside a ``with continuations`` block is:: -def autoreturn(block_body): + with continuations: + @call_ec + def result(ec): + print("hi") + ec(42) + print("not reached") + assert result == 42 + + result = call_ec(lambda ec: do[print("hi"), + ec(42), + print("not reached")]) + + Note the signature of ``result``. Essentially, ``ec`` is a function + that raises an exception (to escape to a dynamically outer context), + whereas the implicit ``cc`` is the closure-based continuation handled + by the continuation machinery. + + See the ``tco`` macro for details on the ``call_ec`` combo. + """ + if syntax != "block": + raise SyntaxError("continuations is a block macro only") # pragma: no cover + if syntax == "block" and kw['optional_vars'] is not None: + raise SyntaxError("continuations does not take an as-part") # pragma: no cover + + # Two-pass macro. + with dyn.let(_macro_expander=expander): + return _continuations(block_body=tree) + +def call_cc(tree, **kw): + """[syntax] Only meaningful in a "with continuations" block. + + Syntax cheat sheet:: + + x = call_cc[func(...)] + *xs = call_cc[func(...)] + x0, ... = call_cc[func(...)] + x0, ..., *xs = call_cc[func(...)] + call_cc[func(...)] + + Conditional variant:: + + x = call_cc[f(...) if p else g(...)] + *xs = call_cc[f(...) if p else g(...)] + x0, ... = call_cc[f(...) if p else g(...)] + x0, ..., *xs = call_cc[f(...) if p else g(...)] + call_cc[f(...) if p else g(...)] + + where ``f()`` or ``g()`` may be ``None`` instead of a function call. + + For more, see the docstring of ``continuations``. + """ + if _continuations_level.value < 1: + raise SyntaxError("call_cc[] is only meaningful in a `with continuations` block.") # pragma: no cover, not meant to hit the expander (expanded away by `with continuations`) + return CallCcMarker(body=tree) + + +# -------------------------------------------------------------------------------- +# Syntax transformers + +# Implicit return statement. This performs a tail-position analysis of function bodies. +def _autoreturn(block_body): class AutoreturnTransformer(ASTTransformer): def transform(self, tree): if is_captured_value(tree): return tree # don't recurse! if type(tree) in (FunctionDef, AsyncFunctionDef): - tree.body[-1] = transform_tailstmt(tree.body[-1]) + newtail = TailStatementTransformer().visit(tree.body[-1]) + if isinstance(newtail, list): # replaced by more than one statement? + tree.body = tree.body[:-1] + newtail + else: + tree.body[-1] = newtail return self.generic_visit(tree) - def transform_tailstmt(tree): - # TODO: For/AsyncFor/While? - if type(tree) is If: - tree.body[-1] = transform_tailstmt(tree.body[-1]) - if tree.orelse: - tree.orelse[-1] = transform_tailstmt(tree.orelse[-1]) - elif type(tree) in (With, AsyncWith): - tree.body[-1] = transform_tailstmt(tree.body[-1]) - elif type(tree) is Try: - # We don't care about finalbody; typically used for unwinding only. - if tree.orelse: # tail position is in else clause if present - tree.orelse[-1] = transform_tailstmt(tree.orelse[-1]) - else: # tail position is in the body of the "try" - tree.body[-1] = transform_tailstmt(tree.body[-1]) - # additionally, tail position is in each "except" handler - for handler in tree.handlers: - handler.body[-1] = transform_tailstmt(handler.body[-1]) - elif type(tree) is Expr: - tree = Return(value=tree.value) - return tree - # This is a first-pass macro. Any nested macros should get clean standard Python, + + class TailStatementTransformer(ASTTransformer): + def transform(self, tree): + # TODO: For/AsyncFor/While? + if type(tree) is If: + tree.body[-1] = self.visit(tree.body[-1]) + if tree.orelse: + tree.orelse[-1] = self.visit(tree.orelse[-1]) + elif type(tree) in (With, AsyncWith): + tree.body[-1] = self.visit(tree.body[-1]) + elif type(tree) in (Try, TryStar): # Python 3.11+: `try`/`except*` + # We don't care about finalbody; typically used for unwinding only. + if tree.orelse: # tail position is in else clause if present + tree.orelse[-1] = self.visit(tree.orelse[-1]) + else: # tail position is in the body of the "try" + tree.body[-1] = self.visit(tree.body[-1]) + # additionally, tail position is in each "except" handler + for handler in tree.handlers: + handler.body[-1] = self.visit(handler.body[-1]) + elif type(tree) is Match: # Python 3.10+: `match`/`case` + for case in tree.cases: + if case.body: + case.body[-1] = self.visit(case.body[-1]) + elif type(tree) in (FunctionDef, AsyncFunctionDef, ClassDef): # v0.15.0+ + # If the item in tail position is a named function definition + # or a class definition, it binds a name - that of the function/class. + # Return that object. + with q as quoted: + with a: + tree + return n[tree.name] + tree = quoted + elif type(tree) is Expr: # expr -> return expr + with q as quoted: + return a[tree.value] + tree = quoted[0] + return tree + # This macro expands outside-in. Any nested macros should get clean standard Python, # not having to worry about implicit "return" statements. return AutoreturnTransformer().visit(block_body) -# ----------------------------------------------------------------------------- -# Automatic TCO. This is the same framework as in "continuations", in its simplest form. -def tco(block_body): +# Automatic TCO. This is the same framework as in "continuations", in its simplest form. +def _tco(block_body): # first pass, outside-in userlambdas = detect_lambda(block_body) known_ecs = list(uniqify(detect_callec(block_body))) - block_body = dyn._macro_expander.visit(block_body) + block_body = dyn._macro_expander.visit_recursively(block_body) # second pass, inside-out transform_retexpr = partial(_transform_retexpr) @@ -87,7 +736,7 @@ def tco(block_body): for stmt in block_body: # skip nested, already expanded "with continuations" blocks # (needed to support continuations in the Lispython dialect, which applies tco globally) - if isexpandedmacromarker("ContinuationsMarker", stmt): + if isinstance(stmt, ExpandedContinuationsMarker): new_block_body.append(stmt) continue @@ -102,55 +751,22 @@ def tco(block_body): new_block_body.append(stmt) return new_block_body -# ----------------------------------------------------------------------------- - -_continuations_level = NestingLevelTracker() # for checking validity of call_cc[] - -class UnpythonicContinuationsMarker(ASTMarker): - """AST marker related to the unpythonic's continuations (call_cc) subsystem.""" -class UnpythonicCallCcMarker(UnpythonicContinuationsMarker): - """AST marker denoting a `call_cc[]` invocation.""" - -def call_cc(tree, **kw): - """[syntax] Only meaningful in a "with continuations" block. - - Syntax cheat sheet:: - - x = call_cc[func(...)] - *xs = call_cc[func(...)] - x0, ... = call_cc[func(...)] - x0, ..., *xs = call_cc[func(...)] - call_cc[func(...)] - - Conditional variant:: - x = call_cc[f(...) if p else g(...)] - *xs = call_cc[f(...) if p else g(...)] - x0, ... = call_cc[f(...) if p else g(...)] - x0, ..., *xs = call_cc[f(...) if p else g(...)] - call_cc[f(...) if p else g(...)] - - where ``f()`` or ``g()`` may be ``None`` instead of a function call. - - For more, see the docstring of ``continuations``. - """ - if _continuations_level.value < 1: - raise SyntaxError("call_cc[] is only meaningful in a `with continuations` block.") # pragma: no cover, not meant to hit the expander (expanded away by `with continuations`) - return UnpythonicCallCcMarker(tree) +# ----------------------------------------------------------------------------- +# True multi-shot continuations for Python, based on a CPS transformation. # _pcc/cc chaining handler, to be exported to client code via q[h[]]. # # We handle multiple-return-values like the rest of unpythonic does: -# returning a tuple means returning multiple values. Unpack them -# to cc's arglist. +# returning a `Values` means returning multiple values. Unpack them +# to cc's args/kwargs. # def chain_conts(cc1, cc2, with_star=False): # cc1=_pcc, cc2=cc """Internal function, used in code generated by the continuations macro.""" if with_star: # to be chainable from a tail call, accept a multiple-values arglist if cc1 is not None: - @passthrough_lazy_args - def cc(*value): - return jump(cc1, cc=cc2, *value) + def cc(*rets, **kwrets): + return jump(cc1, cc=cc2, *rets, **kwrets) else: # Beside a small optimization, it is important to preserve # "identity" as "identity", so that the call_cc logic that @@ -159,22 +775,29 @@ def cc(*value): cc = cc2 else: # for inert data value returns (this produces the multiple-values arglist) if cc1 is not None: - @passthrough_lazy_args - def cc(value): - if isinstance(value, tuple): - return jump(cc1, cc=cc2, *value) + def cc(return_value): + if isinstance(return_value, Values): + return jump(cc1, cc=cc2, *return_value.rets, **return_value.kwrets) else: - return jump(cc1, value, cc=cc2) + return jump(cc1, return_value, cc=cc2) else: - @passthrough_lazy_args - def cc(value): - if isinstance(value, tuple): - return jump(cc2, *value) + def cc(return_value): + if isinstance(return_value, Values): + return jump(cc2, *return_value.rets, **return_value.kwrets) else: - return jump(cc2, value) + return jump(cc2, return_value) return cc -def continuations(block_body): + +_continuations_level = NestingLevelTracker() # for checking validity of call_cc[] + +class ContinuationsMarker(UnpythonicASTMarker): + """AST marker related to the unpythonic's continuations (call_cc) subsystem.""" +class CallCcMarker(ContinuationsMarker): + """AST marker denoting a `call_cc[]` invocation.""" + + +def _continuations(block_body): # here be dragons. # This is a very loose pythonification of Paul Graham's continuation-passing # macros in On Lisp, chapter 20. # @@ -186,7 +809,7 @@ def continuations(block_body): known_ecs = list(uniqify(detect_callec(block_body))) with _continuations_level.changed_by(+1): - block_body = dyn._macro_expander.visit(block_body) + block_body = dyn._macro_expander.visit_recursively(block_body) # second pass, inside-out @@ -218,7 +841,7 @@ def transform_args(tree): tree.args.kw_defaults[j] = q[h[identity]] # implicitly add "parent cc" arg for treating the tail of a computation # as one entity (only actually used in continuation definitions created by - # call_cc; everywhere else, it's None). See callcc_topology.pdf for clarifying pictures. + # call_cc; everywhere else, it's None). See doc/callcc_topology.pdf for clarifying pictures. if "_pcc" not in kwonlynames: non = q[None] non = copy_location(non, tree) @@ -237,15 +860,17 @@ def transform_args(tree): # Already performed by the TCO machinery: # return f(...) --> return jump(f, ...) # - # Additional transformations needed here: + # Additional transformations needed for `continuations`. + # Function calls, after the TCO transform: # return jump(f, ...) --> return jump(f, cc=cc, ...) # customize the transform to add the cc kwarg + # Bare data: # return value --> return jump(cc, value) - # return v1, ..., vn --> return jump(cc, *(v1, ..., vn)) + # return Values(a0, ..., k0=v0, ...) --> return jump(cc, a0, ..., k0=v0, ...) # # Here we only customize the transform_retexpr callback to pass our # current continuation (if no continuation already specified by user). def call_cb(tree): # add the cc kwarg (this plugs into the TCO transformation) - # we're a postproc; our input is "jump(some_target_func, *args)" + # we're a postproc; our input is "jump(some_target_func, *args, **kwargs)" hascc = any(kw.arg == "cc" for kw in tree.keywords) if hascc: # chain our _pcc and the cc=... manually provided by the user @@ -256,6 +881,7 @@ def call_cb(tree): # add the cc kwarg (this plugs into the TCO transformation) # chain our _pcc and the current value of cc tree.keywords = [keyword(arg="cc", value=q[h[chain_conts](n["_pcc"], n["cc"], with_star=True)])] + tree.keywords return tree + # The `data_cb` handles also `Values`; `_transform_retexpr` detects those and treats them as bare data. def data_cb(tree): # transform an inert-data return value into a tail-call to cc. tree = q[h[chain_conts](n["_pcc"], n["cc"])(a[tree])] return tree @@ -267,34 +893,118 @@ def data_cb(tree): # transform an inert-data return value into a tail-call to c # specified inside the body of the macro invocation like PG's solution does. # Instead, we capture as the continuation all remaining statements (i.e. # those that lexically appear after the ``call_cc[]``) in the current block. - def iscallcc(tree): + def iscallccstatement(tree): if type(tree) not in (Assign, Expr): return False - return isinstance(tree.value, UnpythonicCallCcMarker) - def split_at_callcc(body): + return isinstance(tree.value, CallCcMarker) + # owner: FunctionDef node, or `None` if the use site of the `call_cc` is not inside a function + def split_at_callcc(owner, body): if not body: return [], None, [] before, after = [], body while True: stmt, *after = after - if iscallcc(stmt): + if iscallccstatement(stmt): # after is always non-empty here (has at least the explicitified "return") # ...unless we're at the top level of the "with continuations" block if not after: raise SyntaxError("call_cc[] cannot appear as the last statement of a 'with continuations' block (no continuation to capture)") # pragma: no cover - # TODO: To support Python's scoping properly in assignments after the `call_cc`, - # TODO: we have to scan `before` for assignments to local variables (stopping at - # TODO: scope boundaries; use `unpythonic.syntax.scoping.get_names_in_store_context`, - # TODO: and declare those variables `nonlocal` in `after`. This way the binding - # TODO: will be shared between the original context and the continuation. - # See Politz et al 2013 (the "full monty" paper), section 4.2. + # after = patch_scoping(owner, before, stmt, after) # bad idea, DON'T DO THIS return before, stmt, after before.append(stmt) if not after: return before, None, [] + # Try to maintain an illusion of Python's standard scoping rules across the split + # into the parent context (`before`) and continuation closure (`after`). + # See Politz et al 2013 (the "full monty" paper), section 4.2. + # + # TODO: On second thought, this is a bad idea, DON'T DO THIS. + # + # The function `patch_scoping` is an experiment that implements propagation + # of the scope of variable definitions from the parent scope into the continuation, + # recursively. But: + # + # - Due to how the continuation machinery works, the continuation's + # parameters (assignment targets of the `call_cc`) **must** shadow + # the same names from the parent scope, if they happen to exist there. + # + # - There is no propagation from the continuation up the parent scope + # chain. That is, if a continuation declares a new local variable, the + # name won't become available to any of the parent contexts, even if + # those are part of the same original function (to which the + # continuation splitting was applied). Implementing this would require + # a second pass. + # + # - Without looking at the source code of the full module, it is not even + # possible to determine whether the top level of the with continuations + # block is inside a function or not. This has implications to `call_cc` + # invoked from the top level of the block: should the variables from + # the parent scope be declared `nonlocal` or `global`? + # + # It is much simpler and much more robust to just document that introducing a + # continuation introduces a scope boundary - that is a simple, transparent rule + # that is easy to work with. The behavior is no worse than how, in standard Python, + # comprehensions and generator expressions introduce a scope boundary. + # + # owner: FunctionDef node, or `None` if the use site of the `call_cc` is not inside a function + # def patch_scoping(owner, before, callcc, after): + # # Determine the names of all variables that should be made local to the continuation function. + # # In the unexpanded code, the continuation doesn't look like a new scope, so by appearances, + # # these will effectively break the usual scoping rules. Thus this set should be kept minimal. + # # To allow the machinery to actually work, at least the parameters of the continuation function + # # *must* be allowed to shadow names from the parent scope. + # targets, starget, ignored_condition, ignored_thecall, ignored_altcall = analyze_callcc(callcc) + # if not targets and not starget: + # targets = ["_ignored_arg"] # this must match what `make_continuation` does, below + # # The assignment targets of the `call_cc` become parameters of the continuation function. + # # Furthermore, a continuation function generated by `make_continuation` always takes + # # the `cc` and `_pcc` parameters. + # afterargs = targets + ([starget] or []) + ["cc", "_pcc"] + # afterlocals = afterargs + # + # if owner: + # # When `call_cc` is used inside a function, local variables of the + # # parent function (including parameters) become nonlocals in the + # # continuation. + # # + # # But only those that are not also locals of the continuation! + # # In that case, the local variable of the continuation overrides. + # # Locals of the continuation include its arguments, and any names in store context. + # beforelocals = set(extract_args(owner) + get_names_in_store_context(before)) + # afternonlocals = list(beforelocals.difference(afterlocals)) + # if afternonlocals: # TODO: Python 3.8: walrus assignment + # after.insert(0, Nonlocal(names=afternonlocals)) + # else: + # # When `call_cc` is used at the top level of `with continuations` block, + # # the variables at that level become globals in the continuation. + # # + # # TODO: This **CANNOT** always work correctly, because we would need to know + # # TODO: whether the `with continuations` block itself is inside a function or not. + # # TODO: So we just assume it's outside any function. + # beforelocals = set(get_names_in_store_context(before)) + # afternonlocals = list(beforelocals.difference(afterlocals)) + # if afternonlocals: # TODO: Python 3.8: walrus assignment + # after.insert(0, Global(names=afternonlocals)) + # + # # Nonlocals of the parent function remain nonlocals in the continuation. + # # When `owner is None`, `beforenonlocals` will be empty. + # beforenonlocals = collect_nonlocals(before) + # if beforenonlocals: # TODO: Python 3.8: walrus assignment + # after.insert(0, Nonlocal(names=beforenonlocals)) + # + # # Globals of parent are also globals in the continuation. + # beforeglobals = collect_globals(before) + # if beforeglobals: # TODO: Python 3.8: walrus assignment + # after.insert(0, Global(names=beforeglobals)) + # + # return after # we mutate; return it just for convenience + # TODO: To support named return values (`kwrets` in a `Values` object) from the `call_cc`'d function, + # TODO: we need to change the syntax to something that allows us to specify which names are meant to + # TODO: capture the positional return values, and which ones the named return values. Doing so will + # TODO: likely break IDE support, because there's no standard name-binding construct we could abuse here. def analyze_callcc(stmt): starget = None # "starget" = starred target, becomes the vararg for the cont - def maybe_starred(expr): # return expr.id or set starget + def maybe_starred(expr): # return [expr.id] or set starget nonlocal starget if type(expr) is Name: return [expr.id] @@ -311,7 +1021,8 @@ def maybe_starred(expr): # return expr.id or set starget target = stmt.targets[0] if type(target) in (Tuple, List): rest, last = target.elts[:-1], target.elts[-1] - # TODO: limitation due to Python's vararg syntax - the "*args" must be after positional args. + # TODO: limitation due to Python's vararg syntax - the "*args" must be after positional args + # TODO: in a function definition (we're going to define the cont using these). if any(type(x) is Starred for x in rest): raise SyntaxError("in call_cc[], only the last assignment target may be starred") # pragma: no cover if not all(type(x) is Name for x in rest): @@ -324,15 +1035,15 @@ def maybe_starred(expr): # return expr.id or set starget else: raise SyntaxError(f"call_cc[]: expected an assignment or a bare expr, got {stmt}") # pragma: no cover # extract the function call(s) - if not isinstance(stmt.value, UnpythonicCallCcMarker): # both Assign and Expr have a .value - assert False # we should get only valid call_cc[] invocations that pass the `iscallcc` test # pragma: no cover + if not isinstance(stmt.value, CallCcMarker): # both Assign and Expr have a .value + assert False # we should get only valid call_cc[] invocations that pass the `iscallccstatement` test # pragma: no cover theexpr = stmt.value.body # discard the AST marker - if not (type(theexpr) in (Call, IfExp) or (type(theexpr) in (Constant, NameConstant) and getconstant(theexpr) is None)): + if not (type(theexpr) in (Call, IfExp) or (type(theexpr) is Constant and theexpr.value is None)): raise SyntaxError("the bracketed expression in call_cc[...] must be a function call, an if-expression, or None") # pragma: no cover def extract_call(tree): if type(tree) is Call: return tree - elif type(tree) in (Constant, NameConstant) and getconstant(tree) is None: + elif type(tree) is Constant and tree.value is None: return None else: raise SyntaxError("call_cc[...]: expected a function call or None") # pragma: no cover @@ -344,6 +1055,7 @@ def extract_call(tree): condition = altcall = None thecall = extract_call(theexpr) return targets, starget, condition, thecall, altcall + # owner: FunctionDef node, or `None` if the use site of the `call_cc` is not inside a function def make_continuation(owner, callcc, contbody): targets, starget, condition, thecall, altcall = analyze_callcc(callcc) @@ -356,8 +1068,7 @@ def make_continuation(owner, callcc, contbody): else: posargdefaults = [] - # Name the continuation: f_cont, f_cont1, f_cont2, ... - # if multiple call_cc[]s in the same function body. + # Name the continuation: f_cont_UUID if owner: # TODO: robustness: use regexes, strip suf and any numbers at the end, until no match. # return prefix of s before the first occurrence of suf. @@ -397,30 +1108,29 @@ def prepare_call(tree): # TODO: needs to be modified, too. # FDef = type(owner) if owner else FunctionDef # use same type (regular/async) as parent function - locref = callcc # bad but no better source location reference node available non = q[None] - non = copy_location(non, locref) maybe_capture = IfExp(test=q[n["cc"] is not h[identity]], body=q[n["cc"]], - orelse=non, - lineno=locref.lineno, col_offset=locref.col_offset) + orelse=non) contarguments = arguments(args=[arg(arg=x) for x in targets], + posonlyargs=[], kwonlyargs=[arg(arg="cc"), arg(arg="_pcc")], vararg=(arg(arg=starget) if starget else None), kwarg=None, defaults=posargdefaults, kw_defaults=[q[h[identity]], maybe_capture]) - if sys.version_info >= (3, 8, 0): # Python 3.8+: positional-only arguments - contarguments.posonlyargs = [] funcdef = FDef(name=contname, args=contarguments, body=contbody, decorator_list=[], # patched later by transform_def - returns=None, # return annotation not used here - lineno=locref.lineno, col_offset=locref.col_offset) + returns=None) # return annotation not used here - # in the output stmts, define the continuation function... - newstmts = [funcdef] + # 0.15.1: tag the continuation function as a continuation, for introspection. + setcontflag = Assign(targets=[q[n[f"{contname}.is_continuation"]]], + value=q[True]) + + # in the output stmts, define the continuation function, set its is-continuation flag, ... + newstmts = [funcdef, setcontflag] if owner: # ...and tail-call it (if currently inside a def) def jumpify(tree): tree.args = [tree.func] + tree.args @@ -448,30 +1158,45 @@ def transform(self, tree): if type(tree) in (FunctionDef, AsyncFunctionDef): tree.body = transform_callcc(tree, tree.body) return self.generic_visit(tree) + # owner: FunctionDef node, or `None` if the use site of the `call_cc` is not inside a function def transform_callcc(owner, body): # owner: FunctionDef or AsyncFunctionDef node, or None (top level of block) # body: list of stmts # we need to consider only one call_cc in the body, because each one # generates a new nested def for the walker to pick up. - before, callcc, after = split_at_callcc(body) + before, callcc, after = split_at_callcc(owner, body) if callcc: body = before + make_continuation(owner, callcc, contbody=after) return body # TODO: improve error reporting for stray call_cc[] invocations - class StrayChecker(ASTVisitor): + class StrayCallccChecker(ASTVisitor): def examine(self, tree): - if iscallcc(tree): - raise SyntaxError("call_cc[...] only allowed at the top level of a def or async def, or at the top level of the block; must appear as an expr or an assignment RHS") # pragma: no cover + if iscallccstatement(tree): + raise SyntaxError("call_cc[...] only allowed at the top level of a def, or at the top level of the block; must appear as an expr or an assignment RHS") # pragma: no cover if type(tree) in (Assign, Expr): v = tree.value if type(v) is Call and type(v.func) is Name and v.func.id == "call_cc": raise SyntaxError("call_cc(...) should be call_cc[...] (note brackets; it's a macro)") # pragma: no cover self.generic_visit(tree) + # TODO: Interaction of `continuations` with async functions is not implemented. + # So for robustness, we raise a syntax error for now. + class AsyncDefChecker(ASTVisitor): + def examine(self, tree): + if type(tree) is AsyncFunctionDef: + raise SyntaxError("`with continuations` does not currently support `async` functions") + elif type(tree) is AsyncWith: + raise SyntaxError("`with continuations` does not currently support `async` context managers") + elif type(tree) is Await: + raise SyntaxError("`with continuations` does not currently support `await`") + self.generic_visit(tree) + # ------------------------------------------------------------------------- # Main processing logic begins here # ------------------------------------------------------------------------- + AsyncDefChecker().visit(block_body) + # Disallow return at the top level of the block, because it would behave # differently depending on whether placed before or after the first call_cc[] # invocation. (Because call_cc[] internally creates a function and calls it.) @@ -483,16 +1208,15 @@ def examine(self, tree): # into tail calls (to cc), we must insert any missing implicit bare "return" # statements so that _tco_transform_return() sees them. # - # Note that a bare "return" returns `None`, but in the AST `return` looks - # different from `return None`. + # Note that a bare "return" returns `None` at run time, but in the AST, + # `return` looks different from `return None`. class ImplicitBareReturnInjector(ASTTransformer): def transform(self, tree): if is_captured_value(tree): return tree # don't recurse! if type(tree) in (FunctionDef, AsyncFunctionDef): if type(tree.body[-1]) is not Return: - tree.body.append(Return(value=None, # bare "return" - lineno=tree.lineno, col_offset=tree.col_offset)) + tree.body.append(Return(value=None)) # bare "return" return self.generic_visit(tree) block_body = ImplicitBareReturnInjector().visit(block_body) @@ -506,11 +1230,12 @@ def transform(self, tree): block_body = CallccTransformer().visit(block_body) # inside defs # Validate. Each call_cc[] reached by the transformer was in a syntactically correct # position and has now been eliminated. Any remaining ones indicate syntax errors. - StrayChecker().visit(block_body) + StrayCallccChecker().visit(block_body) # set up the default continuation that just returns its args # (the top-level "cc" is only used for continuations created by call_cc[] at the top level of the block) - new_block_body = [Assign(targets=[q[n["cc"]]], value=q[h[identity]])] + with q as new_block_body: + cc = h[identity] # noqa: F841, only quoted # transform all defs (except the chaining handler), including those added by call_cc[]. for stmt in block_body: @@ -524,9 +1249,225 @@ def transform(self, tree): # Leave a marker so "with tco", if applied, can ignore the expanded "with continuations" block # (needed to support continuations in the Lispython dialect, since it applies tco globally.) - return wrapwith(item=q[h[ContinuationsMarker]], - body=new_block_body, - locref=block_body[0]) + return ExpandedContinuationsMarker(body=new_block_body) + +def iscontinuation(x): + """Return whether the object `x` is a continuation function. + + This function can be used for inspection at run time. + + Continuation functions are created by `call_cc[...]` in a `with continuations` block. + """ + return callable(x) and hasattr(x, "is_continuation") and x.is_continuation + +# TODO: Do we need to account for `_pcc` here? Probably not, since this is defined at the +# TODO: top level of a module, not as a closure inside another function. +@trampolined +def get_cc(*args, cc): + """When used together with `call_cc[]`, capture and get the current continuation. + + This convenience function covers the common use case when working with + continuations, when you just want to snapshot the control state into a + local variable. + + In other words, this is what you want 99% of the time when you need `call_cc`. + + Or in yet other words, `get_cc` is the less antisocial little sister of `call_cc` + from an alternate timeline, and in this adventure the two work as a team. + + The `*args`, if any, are passed through. + + Usage:: + + with continuations: + ... + def dostuff(): + ... + + k = call_cc[get_cc()] + + # Now `k` is the continuation from this point on. + # You can do whatever you want with it! + # + # To invoke it, `k(k)` to always preserve the meaning + # of `k` in this part of the code. (See below.) + + ... + return k # maybe our caller wants to replay part of us later + + Any positional `*args` are passed through, so that you can also make a + continuation that takes additional arguments:: + + def domorestuff(): + ... + + k, x1, x2 = call_cc[get_cc(1, 2)] # -> k=cc, x1=1, x2=2 + + print(x1, x2) + return k + + k = domorestuff() + k(3, 4) + k(x1=3, x2=4) # same thing + + Important: in the `get_cc` call, the initial values for the additional + arguments, if any, must be passed positionally, due to `call_cc` syntax + limitations. However, when invoking the continuation, they can be passed + any way you want. + + As for how this works, you may have seen the following helper function + in Matthew Might's article on continuations by example: + + (define (current-continuation) + (call/cc (lambda (cc) (cc cc)))) + + The lambda is pretty much `get_cc`. We cannot factor away the `call/cc`, + because our `call_cc` is a macro that arranges for the actual capture to + happen at its use site (and it cannot affect any outer levels of the call + stack). + + + **CAUTION**: + + In `k = call_cc[get_cc()]`, the continuation is automatically assigned to + `k` only during the first run, i.e. (in the example) whenever `dostuff` is + called normally. + + By the rules of `unpythonic.syntax.call_cc`, the continuation function will + have parameters for whatever is on the left-hand side of the assignment; in + this case, there will be one parameter, `k`. + + When you invoke the continuation later, the name `k` inside the continuation + (i.e. in the code below the `call_cc` line) will point to whatever value you + sent into the continuation as its argument. + + To achieve least surprise, in 99% of cases, one should arrange things so that + in the continuation, the name `k` always actually points to the continuation, + no matter whether the code runs normally or via continuation invocation. + + Thus, unless there is a specific reason to do otherwise, the recommended way + to invoke the continuation is `k(k)` (giving it itself as the argument). + + Note this caution applies to any continuation that expects to take itself + as an argument; the `k = call_cc[get_cc()]` pattern is just a convenient + way to create such continuations. + + + **Comparison to Lisps**: + + The `k = call_cc[get_cc()]` pattern was inspired by The One True Way to use + `call/cc` in Lisp dialects that have multi-shot continuations, as well as the + `let/cc` construct in Racket. + + The One True Way is to use a one-argument lambda that is invoked immediately + by the `call/cc`: + + (define dostuff () + ... + (call/cc (lambda (k) + ;; ...now k is the continuation... + ... + k))) ;; return it just for the lulz + + The name `call/cc` (`call-with-current-continuation`) is a misnomer; the + purpose of the construct is not really to call a reusable function defined + somewhere else; used that way, it may seem an esoteric feature primarily + intended to confuse programmers. Instead, when combined with a lexical closure + as above, it exposes the continuation as a local variable - which is a + clean and useful technique for a variety of purposes (custom escapes, + generators, backtracking, ...). + + Racket abstracts this pattern into `let/cc`, which communicates the intent + more clearly: + + (define dostuff () + ... + (let/cc k + ;; ...now k is the continuation... + ... + k)) ;; return it just for the lulz + + (Racket has no `return` keyword - it does not need one, since you can + create one using `(let/cc return ...)`, scoping it to whichever block + you want.) + + In the Lisp examples above, `k` is the continuation starting with the next + expression after the `call/cc` or `let/cc` block (expression). + + In our `k = call_cc[get_cc()]` pattern, `k` is the rest of the function body + after the statement `k = call_cc[get_cc()]`. + + So in Lisps, invoking `k` inside the block performs an exit (think of a Python + `return` from that block), whereas in our implementation, doing so loops back + to the next statement just after the `call_cc`. + + There is a similarity between our `get_cc` and something that is possible + in Lisps: our continuation starts from the next statement that runs after + `k = call_cc[get_cc()]`. This is exactly how the `(current-continuation)` + function, mentioned at the beginning, works. + + + **Why `get_cc`?**: + + In Python, a function using all the features of the language cannot be + defined in an expression, so in most cases the (un)pythonic `call_cc` + must indeed call a function defined somewhere else. + + The question becomes, what should this function be? + + 1. To be useful at all, it should make it easier to program with continuations, + over arbitrary use of `call_cc`. + + 2. To promote a standard usage pattern, the function should be as general as + possible, so that we only ever need one. + + 3. For least surprise, the function should do as little as possible; + particularly, no side effects. + + 4. For familiarity, we should stay as close to The One True Way pattern as + possible. In the pattern, the lambda converts the call into a let-like + construct, which pythonifies into an assignment, `k = call_cc[...]`. + + 5. The only reason to use `call_cc` is when you want to get the continuation. + + The obvious solution is a function that just passes the continuation as an + argument into that very same continuation, without any side effects; this is + exactly what `get_cc` does. Thus we get the pattern `k = call_cc[get_cc()]`, + which arguably does exactly what it says on the tin. + """ + # If `get_cc` was defined inside a `with continuations` block, the definition + # could be just: + # + # def get_cc(*, cc): + # return cc + # + # because that means "send the value `cc` into the current continuation" + # (i.e. "escape into the current continuation with the value `cc`"), and + # `cc` is the current continuation. For a more detailed analysis in Scheme: + # + # https://stackoverflow.com/questions/57663699/returning-continuations-from-call-cc + # + # Since `get_cc` is not defined inside a `with continuations` block (so that + # we can easily provide it in the same module that defines the continuation + # machinery, without using multiphase compilation), we make the actual definition + # essentially as a handcrafted macro expansion. + # + # So when returning, we are expected to tail-call (i.e. TCO-jump into) the + # continuation function that was given to us, with our return value(s) becoming + # its argument(s). + # + # Below the first `cc` is the continuation function, and the second `cc` + # is the return value that we are sending into it. + # + # The `*args` are a passthrough so that e.g. `k, a, b = call_cc[get_cc(1, 2)]`; + # allows you to pass parameters into the continuation later. + # + # One often sees the pattern `(cc cc)` also in Lisps; for example, see + # the function `(current-continuation)` in Matthew Might's article on + # continuations by example: + # http://matt.might.net/articles/programming-with-continuations--exceptions-backtracking-search-threads-generators-coroutines/ + # + return jump(cc, cc, *args) # ----------------------------------------------------------------------------- @@ -607,7 +1548,7 @@ def transform(self, tree): # Tail-position analysis for a return-value expression (also the body of a lambda). # Here we need to be very, very selective about where to recurse so this would not # benefit much from being made into an ASTTransformer. Just a function is fine. -_isjump = orf(make_isxpred("jump"), make_isxpred("loop")) +_isjump = lambda name: name in ("jump", "loop") def _transform_retexpr(tree, known_ecs, call_cb=None, data_cb=None): """Analyze and TCO a return-value expression or a lambda body. @@ -648,7 +1589,7 @@ def transform(tree): thelambda = lastitem thelambda.body = transform(thelambda.body) elif type(tree) is Call: - # Apply TCO to tail calls. + # Apply TCO to tail calls ("jumpify" them). # - If already an explicit jump() or loop(), leave it alone. # - If a call to an ec, leave it alone. # - Because an ec call may appear anywhere, a tail-position @@ -658,7 +1599,14 @@ def transform(tree): # - Hence, transform_return() calls us on the content of # all ec nodes directly. ec(...) is like return; the # argument is the retexpr. - if not (isx(tree.func, _isjump) or isec(tree, known_ecs)): + # - If a Values(...), leave it alone; that just constructs + # a multiple-return-values object so it doesn't need TCO. + # But it acts like bare data. + if isx(tree.func, _isjump) or isec(tree, known_ecs): + pass + elif isx(tree.func, "Values"): + tree = transform_data(tree) + else: tree.args = [tree.func] + tree.args tree.func = q[h[jump]] tree = transform_call(tree) @@ -675,25 +1623,19 @@ def transform(tree): if type(tree.values[-1]) in (Call, IfExp, BoolOp): # must match above handlers # other items: not in tail position, compute normally if len(tree.values) > 2: - op_of_others = BoolOp(op=tree.op, values=tree.values[:-1], - lineno=tree.lineno, col_offset=tree.col_offset) + op_of_others = BoolOp(op=tree.op, values=tree.values[:-1]) else: op_of_others = tree.values[0] if type(tree.op) is Or: - # or(data1, ..., datan, tail) --> it if any(others) else tail - tree = aif(Tuple(elts=[op_of_others, - transform_data(Name(id="it", - lineno=tree.lineno, - col_offset=tree.col_offset)), - transform(tree.values[-1])], - lineno=tree.lineno, col_offset=tree.col_offset)) # tail-call item + # or(data1, ..., datan, tail) --> aif[any(others), it, tail] + tree = q[a[_our_aif][a[op_of_others], + a[transform_data(_our_it)], + a[transform(tree.values[-1])]]] # tail-call item elif type(tree.op) is And: # and(data1, ..., datan, tail) --> tail if all(others) else False - fal = q[False] - fal = copy_location(fal, tree) - tree = IfExp(test=op_of_others, - body=transform(tree.values[-1]), - orelse=transform_data(fal)) + tree = q[a[transform(tree.values[-1])] + if a[op_of_others] + else a[transform_data(q[False])]] else: # cannot happen raise SyntaxError(f"unknown BoolOp type {tree.op}") # pragma: no cover else: # optimization: BoolOp, no call or compound in tail position --> treat as single data item diff --git a/unpythonic/syntax/testingtools.py b/unpythonic/syntax/testingtools.py index 118cb175..e3d8cc0f 100644 --- a/unpythonic/syntax/testingtools.py +++ b/unpythonic/syntax/testingtools.py @@ -4,14 +4,22 @@ See also `unpythonic.test.fixtures` for the high-level machinery. """ +__all__ = ["the", "test", + "test_signals", "test_raises", + "fail", "error", "warn", + "expand_testing_macros_first", + # these are mostly for use in other parts of `unpythonic` + "isunexpandedtestmacro", "isexpandedtestmacro", "istestmacro"] + from mcpyrate.quotes import macros, q, u, n, a, h # noqa: F401 -from mcpyrate import gensym, unparse +from mcpyrate import gensym, parametricmacro, unparse +from mcpyrate.expander import MacroExpander from mcpyrate.quotes import is_captured_value +from mcpyrate.utils import extract_bindings from mcpyrate.walkers import ASTTransformer from ast import Tuple, Subscript, Name, Call, copy_location, Compare, arg, Return, parse, Expr, AST -import sys from ..dynassign import dyn from ..env import env @@ -22,17 +30,447 @@ from .util import isx -from ..test import fixtures +from ..test import fixtures # unpythonic.test.fixtures, regular (non-macro) code belonging to the framework -# ----------------------------------------------------------------------------- -# Helper for other macros to detect uses of the ones we define here. +# -------------------------------------------------------------------------------- +# Macro interface + +def the(tree, **kw): + """[syntax, expr] In a test, mark a subexpression as the interesting one. + + Only meaningful inside a `test[]`, or inside a `with test` block. + + What `test[expr]` captures for reporting for human inspection upon + test failure: + + - If any `the[...]` are present, the subexpressions marked as `the[...]`. + + - Else if `expr` is a comparison, the LHS (leftmost term in case of + a chained comparison). So e.g. `test[x < 3]` needs no annotation + to do the right thing. This is a common use case, hence automatic. + + - Else nothing is captured; the value of the whole `expr` is reported. + + So the `the[...]` mark is useful in tests involving comparisons:: + + test[lower_limit < the[computeitem(...)]] + test[lower_limit < the[computeitem(...)] < upper_limit] + test[myconstant in the[computeset(...)]] + + especially if you need to capture several subexpressions:: + + test[the[counter()] < the[counter()]] + + Note the above rules mean that if there is just one interesting + subexpression, and it is the leftmost term of a comparison, `the[...]` + is optional, although allowed (to explicitly document intent). + These have the same effect:: + + test[the[computeitem(...)] in myitems] + test[computeitem(...) in myitems] + + The `the[...]` mark passes the value through, and does not affect the + evaluation order of user code. + + A `test[...]` may have multiple `the[...]`; the captured values are + gathered in a list that is shown upon test failure. + + In case of nested tests, each `the[...]` is understood as belonging to + the lexically innermost surrounding test. + + For `test_raises` and `test_signals`, the `the[...]` mark is not supported. + """ + raise SyntaxError("the[] is only meaningful inside a `test[]` or in a `with test` block") # pragma: no cover, not meant to hit the expander + +@parametricmacro +def test(tree, *, args, syntax, expander, **kw): # noqa: F811 + """[syntax, expr/block] Make a test assertion. For writing automated tests. + + **Testing overview**: + + Use the `test[]`, `test_raises[]`, `test_signals[]`, `fail[]`, `error[]` + and `warn[]` macros inside a `with testset()`, as appropriate. + + See `testset` and `session` in the module `unpythonic.test.fixtures`, + as well as the docstrings of any constructs exported from that module. + + See below for tips and tricks. + + Finally, see the unit tests of `unpythonic` itself for examples. + + **Expression variant**: + + Syntax:: + + test[expr] + test[expr, message] + + The test succeeds if `expr` evaluates to truthy. The `message` + is used in forming the error message if the test fails or errors. + + If you want to assert just that an expression runs to completion + normally, and don't care about the return value:: + + from unpythonic.test.fixtures import returns_normally + + test[returns_normally(expr)] + test[returns_normally(expr), message] + + This can be useful for testing functions with side effects; sometimes + what is important is that the function completes normally. + + What `test[expr]` captures for reporting as "result" in the failure + message, if the test fails: + + - If any `the[...]` marks are present, the subexpressions marked + as `the[...]`. + - Else if `expr` is a comparison, the LHS (leftmost term in case of + a chained comparison). So e.g. `test[x < 3]` needs no annotation + to do the right thing. This is a common use case, hence automatic. + - Else the whole `expr`. + + The `the[...]` mark is useful in tests involving comparisons:: + + test[lower_limit < the[computeitem(...)]] + test[lower_limit < the[computeitem(...)] < upper_limit] + test[myconstant in the[computeset(...)]] + + If your interesting part is on the LHS, `the[]` is optional, although + allowed (to explicitly document intent). These have the same effect:: + + test[the[computeitem(...)] in myitems] + test[computeitem(...) in myitems] + + The `the[...]` mark passes the value through, and does not affect the + evaluation order of user code. + + The `the[]` mark can be imported as a macro from this module, so that + its appearance in your source code won't confuse `flake8`, and you'll + get a nice macro-expansion-time error if it accidentally appears outside + a `test[]` or `with test:`. + + **Block variant**: + + A test that requires statements (e.g. assignments) can be written as a + `with test` block:: + + with test: + body0 + ... + return expr # optional + + with test[message]: + body0 + ... + return expr # optional + + The test block is automatically lifted into a function, so it introduces + **a local scope**. Use the `nonlocal` or `global` declarations if you need + to mutate something defined on the outside. + + If there is a `return` at the top level of the block, that is the return + value from the test; it is what will be asserted. + + If there is no `return`, the test asserts that the block completes normally, + just like a `test[returns_normally(...)]` does for an expression. + + The asymmetry in syntax reflects the asymmetry between expressions and + statements in Python. Likewise, the fact that `with test` requires `return` + to return a value, but `test[...]` doesn't, is similar to the difference + between `def` and `lambda`. + + In the block variant, the "result" capture rules apply to the return value + designated by `return`. To override, `the[]` marks can be used for capturing + the value of any expressions inside the block. The marks don't have to be + in the `return`; they can appear anywhere. + + **Failure and error signaling**: + + Upon a test failure, `test[]` will *signal* a `TestFailure` using the + *cerror* (correctable error) protocol, via unpythonic's condition + system, which is a pythonification of Common Lisp's condition system. + See `unpythonic.conditions`. + + If a test fails to run to completion due to an uncaught exception or an + unhandled signal (e.g. an `error` or `cerror` condition), `TestError` + is signaled instead, so the caller can easily tell apart which case + occurred. + + Finally, when a `warn[]` runs, `TestWarning` is signaled. + + These condition types are defined in `unpythonic.test.fixtures`. + They inherit from `TestingException`, defined in the same module. + Beside the human-readable message, these exception types contain + attributes with programmatically inspectable information about + what happened. See the docstring of `TestingException`. + + *Signaling* a condition, instead of *raising* an exception, allows the + surrounding code (inside the test framework) to install a handler that + invokes the `proceed` restart (if there is such in scope), so upon a test + failure or error, the test suite resumes. + + **Disabling the signal barrier**: + + As implied above, `test[]` (likewise `with test:`) forms a barrier that + alerts the user about uncaught signals, and stops those signals from + propagating further. If your `with handlers` block that needs to see + the signal is outside the `test` invocation, or if allowing a signal to + go uncaught is part of normal operation (e.g. `warn` signals are often + not caught, because the only reason to do so is to muffle the warning), + use a `with catch_signals(False):` block (from the module + `unpythonic.test.fixtures`) to disable the signal barrier:: + + from unpythonic.test.fixtures import catch_signals + + with catch_signals(False): + test[...] + + Another way to avoid catching signals that should not be caught by the + test framework is to rearrange the `test[]` so that the expression being + asserted cannot result in an uncaught signal. For example, save the result + of a computation into a variable first, and then use it in the `test[]`, + instead of invoking that computation inside the `test[]`. See + `unpythonic.test.test_conditions` for examples. + + Exceptions are always caught by `test[]`, because exceptions do not support + resumption; unlike with signals, the inner level of the call stack is already + destroyed by the time the exception is caught by the test construct. + """ + if syntax not in ("expr", "block"): + raise SyntaxError("test is an expr and block macro only") # pragma: no cover + if syntax == "block" and kw['optional_vars'] is not None: + raise SyntaxError("test (block mode) does not take an as-part") # pragma: no cover + + with dyn.let(_macro_expander=expander): + if syntax == "expr": + if args: + raise SyntaxError("test[] in expression mode does not take macro arguments") # pragma: no cover + return _test_expr(tree) + else: # syntax == "block": + return _test_block(block_body=tree, args=args) + +@parametricmacro +def test_signals(tree, *, args, syntax, expander, **kw): # noqa: F811 + """[syntax, expr/block] Like `test`, but expect the expression to signal a condition. + + "Signal" as in `unpythonic.conditions.signal` and its sisters. + + Syntax:: + + test_signals[exctype, expr] + test_signals[exctype, expr, message] + + with test_signals[exctype]: + body0 + ... + + with test_signals[exctype, message]: + body0 + ... + + Example:: + + test_signals[ValueError, myfunc()] + test_signals[ValueError, myfunc(), "failure message"] + + The test succeeds, if `expr` signals a condition of type `exctype`, and the + signal propagates into the (implicit) handler inside the `test_signals[]` + construct. + + If `expr` returns normally, the test fails. + + If `expr` signals some other type of condition, or raises an exception, the + test errors. + + **Differences to `test[]`, `with test`**: + + As the focus of this construct is on signaling vs. returning normally, the + `the[]` mark is not supported. The block variant does not support `return`. + """ + if syntax not in ("expr", "block"): + raise SyntaxError("test_signals is an expr and block macro only") # pragma: no cover + if syntax == "block" and kw['optional_vars'] is not None: + raise SyntaxError("test_signals (block mode) does not take an as-part") # pragma: no cover + + with dyn.let(_macro_expander=expander): + if syntax == "expr": + if args: + raise SyntaxError("test_signals[] in expression mode does not take macro arguments") # pragma: no cover + return _test_expr_signals(tree) + else: # syntax == "block": + return _test_block_signals(block_body=tree, args=args) + +@parametricmacro +def test_raises(tree, *, args, syntax, expander, **kw): # noqa: F811 + """[syntax, expr/block] Like `test`, but expect the expression to raise an exception. + + Syntax:: + + test_raises[exctype, expr] + test_raises[exctype, expr, message] + + with test_raises[exctype]: + body0 + ... + + with test_raises[exctype, message]: + body0 + ... + + Example:: + + test_raises[TypeError, issubclass(1, int)] + test_raises[ValueError, myfunc()] + test_raises[ValueError, myfunc(), "failure message"] + + The test succeeds, if `expr` raises an exception of type `exctype`, and the + exception propagates into the (implicit) handler inside the `test_raises[]` + construct. + + If `expr` returns normally, the test fails. + + If `expr` signals a condition, or raises some other type of exception, the + test errors. + + **Differences to `test[]`, `with test`**: + + As the focus of this construct is on raising vs. returning normally, the + `the[]` mark is not supported. The block variant does not support `return`. + """ + if syntax not in ("expr", "block"): + raise SyntaxError("test_raises is an expr and block macro only") # pragma: no cover + if syntax == "block" and kw['optional_vars'] is not None: + raise SyntaxError("test_raises (block mode) does not take an as-part") # pragma: no cover + + with dyn.let(_macro_expander=expander): + if syntax == "expr": + if args: + raise SyntaxError("test_raises[] in expression mode does not take macro arguments") # pragma: no cover + return _test_expr_raises(tree) + else: # syntax == "block": + return _test_block_raises(block_body=tree, args=args) + +def fail(tree, *, syntax, expander, **kw): # noqa: F811 + """[syntax, expr] Produce a test failure, unconditionally. + + Useful to e.g. mark a line of code that should not be reached in automated + tests, reaching which is therefore a test failure. + + Usage:: + + fail["human-readable reason"] + + which has the same effect as:: + + test[False, "human-readable reason"] + + except in the case of `fail[]`, the error message generating machinery is + special-cased to omit the source code expression, because it explicitly + states that the intent of the "test" is not actually to perform a test. + + See also `error[]`, `warn[]`. + """ + if syntax != "expr": + raise SyntaxError("fail is an expr macro only") # pragma: no cover + + # Expand outside in. The ordering shouldn't matter here. + # The underlying `test` machinery needs to access the expander. + with dyn.let(_macro_expander=expander): + return _fail_expr(tree) + +def error(tree, *, syntax, expander, **kw): # noqa: F811 + """[syntax, expr] Produce a test error, unconditionally. + + Useful to e.g. indicate to the user that an optional dependency that could + be used to run some integration test is not installed. + + Usage:: + + error["human-readable reason"] + + See also `warn[]`, `fail[]`. + """ + if syntax != "expr": + raise SyntaxError("error is an expr macro only") # pragma: no cover + + with dyn.let(_macro_expander=expander): + return _error_expr(tree) + +def warn(tree, *, syntax, expander, **kw): # noqa: F811 + """[syntax, expr] Produce a test warning, unconditionally. + + Useful to e.g. indicate that the Python interpreter or version the + tests are running on does not support a particular test, or to alert + about a non-essential TODO. + + A warning does not increase the failure count, so it will not cause + your CI workflow to break. + + Usage:: + + warn["human-readable reason"] + + See also `error[]`, `fail[]`. + """ + if syntax != "expr": + raise SyntaxError("warn is an expr macro only") # pragma: no cover + + with dyn.let(_macro_expander=expander): + return _warn_expr(tree) + +# TODO: There's also `quicklambda`. Maybe add a general utility for this kind of thing to `mcpyrate.metatools`? +def expand_testing_macros_first(tree, *, syntax, expander, **kw): + """[syntax, block] Force testing framework macros to expand first. + + Usage:: + + with expand_testing_macros_first: + ... + + This is useful if you have your own block macro that expands outside in and + does some code-walking transformations, and some tests inside such a block. + Expanding the test macros first allows the test framework to capture the + unexpanded source code for error reporting. + + As an example, consider:: + + with your_block_macro: + test[expr] + + In this case, if `your_block_macro` expands outside-in, it will transform the + `expr` inside the `test[expr] before `test` even sees the AST. If the test + fails or errors, the error message will contain the expanded version of `expr`, + not the original one. Now, if we change the example to:: -# TODO: Detect asserters only? Now this breaks the handling of the[] in a prefix block. -# TODO: It should be handled like any expr, but currently it's skipped because it's listed here. + with expand_testing_macros_first: + with your_block_macro: + test[expr] + + In this case, `expand_testing_macros_first` arranges things so that `test[expr]` + expands first (even if `your_block_macro` expands outside-in), so it will see + the original, unexpanded AST. + + This does imply that `your_block_macro` will then receive the expanded form of + `test[expr]` as input, but that's macros for you. Macros don't compose, after all. + """ + if syntax != "block": + raise SyntaxError("expand_testing_macros_first is a block macro only") # pragma: no cover + if syntax == "block" and kw['optional_vars'] is not None: + raise SyntaxError("expand_testing_macros_first does not take an as-part") # pragma: no cover + + testing_macros = [test, test_signals, test_raises, error, fail, warn] + macro_bindings = extract_bindings(expander.bindings, *testing_macros) + return MacroExpander(macro_bindings, filename=expander.filename).visit(tree) + +# ----------------------------------------------------------------------------- +# Helpers for other macros to detect uses of the ones we defined here. # Note the unexpanded `error[]` macro is distinguishable from a call to # the function `unpythonic.conditions.error`, because a macro invocation # is an `ast.Subscript`, whereas a function call is an `ast.Call`. +# TODO: Maybe these lists should be public, autoref already uses the list of functions. +# TODO: We should use `unpythonic.syntax.nameutil.is_unexpanded_expr_macro` to detect +# TODO: macro invocations, to respect as-imports. But it needs some bells and whistles first. _test_asserter_names = ["test", "test_signals", "test_raises", "error", "fail", "warn"] _test_function_names = ["unpythonic_assert", "unpythonic_assert_signals", @@ -55,7 +493,7 @@ def istestmacro(tree): return isunexpandedtestmacro(tree) or isexpandedtestmacro(tree) # ----------------------------------------------------------------------------- -# Regular code, no macros yet. +# Run-time helpers. _fail = sym("_fail") # used by the fail[] macro _error = sym("_error") # used by the error[] macro @@ -146,7 +584,7 @@ def unpythonic_assert(sourcecode, func, *, filename, lineno, message=None): # we send to `func` as its argument. A `the[]` is also implicitly injected # by the comparison destructuring mechanism. e = env(captured_values=[]) - testexpr = func # descriptive name for stack trace; if you change this, change also in `test_expr`. + testexpr = func # descriptive name for stack trace; if you change this, change also in `_test_expr`. mode, test_result = _observe(thunk=(lambda: testexpr(e))) # <-- run the actual expr being asserted if e.captured_values: # Convenience for testing/debugging macro code: @@ -340,75 +778,85 @@ def unpythonic_assert_raises(exctype, sourcecode, thunk, *, filename, lineno, me # ----------------------------------------------------------------------------- -# Syntax transformers for the macros. +# Syntax transformers +# fail/error/warn def _unconditional_error_expr(tree, syntaxname, marker): thetuple = q[(a[marker], a[tree])] # consider `test[tree, message]` thetuple = copy_location(thetuple, tree) - return test_expr(thetuple) + return _test_expr(thetuple) # Here `tree` is the AST for the failure message. -def fail_expr(tree): +def _fail_expr(tree): return _unconditional_error_expr(tree, "fail", q[h[_fail]]) # TODO: stash a copy of the hygienic value? -def error_expr(tree): +def _error_expr(tree): return _unconditional_error_expr(tree, "error", q[h[_error]]) -def warn_expr(tree): +def _warn_expr(tree): return _unconditional_error_expr(tree, "warn", q[h[_warn]]) -# ----------------------------------------------------------------------------- +# -------------------------------------------------------------------------------- # Expr variants. -def the(tree, **kw): - """[syntax, expr] In a test, mark a subexpression as the interesting one. - - Only meaningful inside a `test[]`, or inside a `with test` block. - - What `test[expr]` captures for reporting for human inspection upon - test failure: - - - If any `the[...]` are present, the subexpressions marked as `the[...]`. - - - Else if `expr` is a comparison, the LHS (leftmost term in case of - a chained comparison). So e.g. `test[x < 3]` needs no annotation - to do the right thing. This is a common use case, hence automatic. - - - Else nothing is captured; the value of the whole `expr` is reported. - - So the `the[...]` mark is useful in tests involving comparisons:: - - test[lower_limit < the[computeitem(...)]] - test[lower_limit < the[computeitem(...)] < upper_limit] - test[myconstant in the[computeset(...)]] - - especially if you need to capture several subexpressions:: - - test[the[counter()] < the[counter()]] +def _test_expr(tree): + # Note we want the line number *before macro expansion*, so we capture it now. + ln = q[u[getattr(tree, "lineno", None)]] # may be absent on 3.10–3.12; None on 3.13+ + filename = q[h[callsite_filename]()] + asserter = q[h[unpythonic_assert]] - Note the above rules mean that if there is just one interesting - subexpression, and it is the leftmost term of a comparison, `the[...]` - is optional, although allowed (to explicitly document intent). - These have the same effect:: + # test[expr, message] (like assert expr, message) + if type(tree) is Tuple and len(tree.elts) == 2: + tree, message = tree.elts + # test[expr] (like assert expr) + else: + message = q[None] - test[the[computeitem(...)] in myitems] - test[computeitem(...) in myitems] + # Before we edit the tree, get the source code in its pre-transformation + # state, so we can include that into the test failure message. + # + # We capture the source in the outside-in pass, so that no macros inside `tree` + # are expanded yet. For the same reason, we process the `the[]` marks in the + # outside-in pass. + # + # (Note, however, that if the `test[]` is nested within the invocation of + # a code-walking block macro, that macro may have performed edits already. + # For this reason, we provide `with expand_testing_macros_first`, which + # in itself is a code-walking block macro, whose only purpose is to force + # `test[]` and its sisters to expand first.) + sourcecode = unparse(tree, color=True, expander=dyn._macro_expander) - The `the[...]` mark passes the value through, and does not affect the - evaluation order of user code. + envname = gensym("e") # for injecting the captured value - A `test[...]` may have multiple `the[...]`; the captured values are - gathered in a list that is shown upon test failure. + # Handle the `the[...]` marks, if any. + tree, the_exprs = _transform_important_subexpr(tree, envname=envname) + if not the_exprs and type(tree) is Compare: # inject the implicit the[] on the LHS + tree.left = _inject_value_recorder(envname, tree.left) - In case of nested tests, each `the[...]` is understood as belonging to - the lexically innermost surrounding test. + # We delay the execution of the test expr using a lambda, so + # `unpythonic_assert` can get control first before the expr runs. + # + # Also, we need the lambda for passing in the value capture environment + # for the `the[]` mark, anyway. + # + # We can't inject `lazy[]` here (to be more explicit this is a delay operation), + # because we need to pass the environment. + # + # We name the lambda `testexpr` to make the stack trace more understandable. + # If you change the name, change it also in `unpythonic_assert`. + thelambda = q[lambda _: a[tree]] + thelambda.args.args[0] = arg(arg=envname) # inject the gensymmed parameter name + func_tree = q[h[namelambda]("testexpr")(a[thelambda])] # create the function that takes in the env - For `test_raises` and `test_signals`, the `the[...]` mark is not supported. - """ - raise SyntaxError("the[] is only meaningful inside a `test[]` or in a `with test` block") # pragma: no cover, not meant to hit the expander + return q[(a[asserter])(u[sourcecode], + a[func_tree], + filename=a[filename], + lineno=a[ln], + message=a[message])] # Destructuring utilities for marking a custom part of the expr # to be displayed upon test failure, using `the[...]`: # test[myconstant in the[computeset(...)]] # test[the[computeitem(...)] in expected_results_plus_uninteresting_items] +# These are used by `_test_expr` and `_test_block`. def _is_important_subexpr_mark(tree): return type(tree) is Subscript and type(tree.value) is Name and tree.value.id == "the" def _record_value(envname, sourcecode, value): @@ -417,7 +865,7 @@ def _record_value(envname, sourcecode, value): def _inject_value_recorder(envname, tree): # wrap tree with the the[] handler recorder = q[h[_record_value]] # TODO: stash hygienic value? return q[a[recorder](n[envname], - u[unparse(tree)], + u[unparse(tree, color=True, expander=dyn._macro_expander)], a[tree])] def _transform_important_subexpr(tree, envname): # The the[] mark mechanism is invoked outside-in, because for reporting, @@ -431,10 +879,7 @@ def transform(self, tree): if isunexpandedtestmacro(tree): return tree elif _is_important_subexpr_mark(tree): - if sys.version_info >= (3, 9, 0): # Python 3.9+: the Index wrapper is gone. - thing = tree.slice - else: - thing = tree.slice.value + thing = tree.slice self.collect(thing) # or anything really; value not used, we just count them. # Handle any nested the[] subexpressions subtree = self.visit(thing) @@ -446,57 +891,13 @@ def transform(self, tree): return tree, transformer.collected -def test_expr(tree): - # Note we want the line number *before macro expansion*, so we capture it now. - ln = q[u[tree.lineno]] if hasattr(tree, "lineno") else q[None] - filename = q[h[callsite_filename]()] - asserter = q[h[unpythonic_assert]] - - # test[expr, message] (like assert expr, message) - if type(tree) is Tuple and len(tree.elts) == 2: - tree, message = tree.elts - # test[expr] (like assert expr) - else: - message = q[None] - - # Before we edit the tree, get the source code in its pre-transformation - # state, so we can include that into the test failure message. - # - # We capture the source in the first pass, so that no macros in tree are - # expanded yet. For the same reason, we process the `the[]` marks in the - # first pass. - sourcecode = unparse(tree) - - envname = gensym("e") # for injecting the captured value - - # Handle the `the[...]` marks, if any. - tree, the_exprs = _transform_important_subexpr(tree, envname=envname) - if not the_exprs and type(tree) is Compare: # inject the implicit the[] on the LHS - tree.left = _inject_value_recorder(envname, tree.left) - - # End of first pass. - tree = dyn._macro_expander.visit(tree) - - # We delay the execution of the test expr using a lambda, so - # `unpythonic_assert` can get control first before the expr runs. - # - # Also, we need the lambda for passing in the value capture environment - # for the `the[]` mark, anyway. - # - # We name it `testexpr` to make the stack trace more understandable. - # If you change the name, change it also in `unpythonic_assert`. - thelambda = q[lambda _: a[tree]] - thelambda.args.args[0] = arg(arg=envname) # inject the gensymmed parameter name - func_tree = q[h[namelambda]("testexpr")(a[thelambda])] # create the function that takes in the env - - return q[(a[asserter])(u[sourcecode], - a[func_tree], - filename=a[filename], - lineno=a[ln], - message=a[message])] +def _test_expr_signals(tree): + return _test_expr_signals_or_raises(tree, "test_signals", q[h[unpythonic_assert_signals]]) +def _test_expr_raises(tree): + return _test_expr_signals_or_raises(tree, "test_raises", q[h[unpythonic_assert_raises]]) def _test_expr_signals_or_raises(tree, syntaxname, asserter): - ln = q[u[tree.lineno]] if hasattr(tree, "lineno") else q[None] + ln = q[u[getattr(tree, "lineno", None)]] # may be absent on 3.10–3.12; None on 3.13+ filename = q[h[callsite_filename]()] # test_signals[exctype, expr, message] @@ -507,71 +908,56 @@ def _test_expr_signals_or_raises(tree, syntaxname, asserter): exctype, tree = tree.elts message = q[None] else: - raise SyntaxError(f"Expected one of {syntaxname}[exctype, expr], {syntaxname}[exctype, expr, message]") + raise SyntaxError(f"Expected one of {syntaxname}[exctype, expr], {syntaxname}[exctype, expr, message]") # pragma: no cover - # Before we edit the tree, get the source code in its pre-transformation - # state, so we can include that into the test failure message. - # - # We capture the source in the first pass, so that no macros in tree are - # expanded yet. - sourcecode = unparse(tree) - - # End of first pass. - tree = dyn._macro_expander.visit(tree) + # Same remark about outside-in source code capture as in `_test_expr`. + sourcecode = unparse(tree, color=True, expander=dyn._macro_expander) + # Name our lambda to make the stack trace more understandable. + # For consistency, the name matches that used by `_test_expr`. + func_tree = q[h[namelambda]("testexpr")(lambda: a[tree])] return q[(a[asserter])(a[exctype], u[sourcecode], - lambda: a[tree], + a[func_tree], filename=a[filename], lineno=a[ln], message=a[message])] -def test_expr_signals(tree): - return _test_expr_signals_or_raises(tree, "test_signals", q[h[unpythonic_assert_signals]]) -def test_expr_raises(tree): - return _test_expr_signals_or_raises(tree, "test_raises", q[h[unpythonic_assert_raises]]) - # ----------------------------------------------------------------------------- # Block variants. # The strategy is we capture the block body into a new function definition, # and then `unpythonic_assert` on that function. -def test_block(block_body, args): +def _test_block(block_body, args): if not block_body: return [] # pragma: no cover, cannot happen through the public API. first_stmt = block_body[0] # Note we want the line number *before macro expansion*, so we capture it now. - ln = q[u[first_stmt.lineno]] if hasattr(first_stmt, "lineno") else q[None] + ln = q[u[getattr(first_stmt, "lineno", None)]] # may be absent on 3.10–3.12; None on 3.13+ filename = q[h[callsite_filename]()] asserter = q[h[unpythonic_assert]] - # with test(message): + # with test[message]: if len(args) == 1: message = args[0] # with test: elif len(args) == 0: message = q[None] else: - raise SyntaxError('Expected `with test:` or `with test(message):`') + raise SyntaxError('Expected `with test:` or `with test[message]:`') # pragma: no cover - # Before we edit the tree, get the source code in its pre-transformation - # state, so we can include that into the test failure message. - # - # We capture the source in the first pass, so that no macros in tree are - # expanded yet. For the same reason, we process the `the[]` marks in the - # first pass. - sourcecode = unparse(block_body) + # Same remark about outside-in source code capture as in `_test_expr`. + sourcecode = unparse(block_body, color=True, expander=dyn._macro_expander) envname = gensym("e") # for injecting the captured value # Handle the `the[...]` marks, if any. block_body, the_exprs = _transform_important_subexpr(block_body, envname=envname) - # End of first pass. - block_body = dyn._macro_expander.visit(block_body) - - testblock_function_name = gensym("test_block") + # Prepare the function template to be injected, and splice the contents + # of the `with test` block as the function body. + testblock_function_name = gensym("_test_block") thetest = q[(a[asserter])(u[sourcecode], n[testblock_function_name], filename=a[filename], @@ -579,64 +965,64 @@ def test_block(block_body, args): message=a[message])] with q as newbody: def _insert_funcname_here_(_insert_envname_here_): - ... - a[thetest] + ... # to be filled in below + a[thetest] # call the asserter thefunc = newbody[0] thefunc.name = testblock_function_name thefunc.args.args[0] = arg(arg=envname) # inject the gensymmed parameter name + thefunc.body = block_body # Handle the return statement. # # We just check if there is at least one; if so, we don't need to do # anything; the returned value is what the test should return to the # asserter. - for stmt in block_body: + for stmt in thefunc.body: if type(stmt) is Return: retval = stmt.value if not the_exprs and type(retval) is Compare: # inject the implicit the[] on the LHS retval.left = _inject_value_recorder(envname, retval.left) + break else: # When there is no return statement at the top level of the `with test` block, - # we inject a `return True` to satisfy the test when the function returns normally. + # we inject a `return True` to satisfy the test when the injected function + # returns normally. with q as thereturn: return True - block_body.extend(thereturn) - - thefunc.body = block_body + thefunc.body.extend(thereturn) return newbody + +def _test_block_signals(block_body, args): + return _test_block_signals_or_raises(block_body, args, "test_signals", q[h[unpythonic_assert_signals]]) +def _test_block_raises(block_body, args): + return _test_block_signals_or_raises(block_body, args, "test_raises", q[h[unpythonic_assert_raises]]) + def _test_block_signals_or_raises(block_body, args, syntaxname, asserter): if not block_body: return [] # pragma: no cover, cannot happen through the public API. first_stmt = block_body[0] # Note we want the line number *before macro expansion*, so we capture it now. - ln = q[u[first_stmt.lineno]] if hasattr(first_stmt, "lineno") else q[None] + ln = q[u[getattr(first_stmt, "lineno", None)]] # may be absent on 3.10–3.12; None on 3.13+ filename = q[h[callsite_filename]()] - # with test_raises(exctype, message): + # with test_raises[exctype, message]: if len(args) == 2: exctype, message = args - # with test_raises(exctype): + # with test_raises[exctype]: elif len(args) == 1: exctype = args[0] message = q[None] else: - raise SyntaxError(f'Expected `with {syntaxname}(exctype):` or `with {syntaxname}(exctype, message):`') + raise SyntaxError(f'Expected `with {syntaxname}(exctype):` or `with {syntaxname}[exctype, message]:`') # pragma: no cover - # Before we edit the tree, get the source code in its pre-transformation - # state, so we can include that into the test failure message. - # - # We capture the source in the first pass, so that no macros in tree are - # expanded yet. - sourcecode = unparse(block_body) + # Same remark about outside-in source code capture as in `_test_expr`. + sourcecode = unparse(block_body, color=True, expander=dyn._macro_expander) - # End of first pass. - block_body = dyn._macro_expander.visit(block_body) - - testblock_function_name = gensym("test_block") + testblock_function_name = gensym("_test_block") thetest = q[(a[asserter])(a[exctype], u[sourcecode], n[testblock_function_name], @@ -651,8 +1037,3 @@ def _insert_funcname_here_(): # no env needed, since `the[]` is not meaningful thefunc.name = testblock_function_name thefunc.body = block_body return newbody - -def test_block_signals(block_body, args): - return _test_block_signals_or_raises(block_body, args, "test_signals", q[h[unpythonic_assert_signals]]) -def test_block_raises(block_body, args): - return _test_block_signals_or_raises(block_body, args, "test_raises", q[h[unpythonic_assert_raises]]) diff --git a/unpythonic/syntax/tests/test_autocurry.py b/unpythonic/syntax/tests/test_autocurry.py index 325736bd..f5177bed 100644 --- a/unpythonic/syntax/tests/test_autocurry.py +++ b/unpythonic/syntax/tests/test_autocurry.py @@ -11,6 +11,8 @@ from ...llist import cons, nil, ll from ...collections import frozendict +# TODO: Add test that `autocurry` leaves `type` statements alone once we bump minimum language version to Python 3.12. + def runtests(): with testset("basic usage"): with autocurry: diff --git a/unpythonic/syntax/tests/test_autoref.py b/unpythonic/syntax/tests/test_autoref.py index 3e379840..da9cc031 100644 --- a/unpythonic/syntax/tests/test_autoref.py +++ b/unpythonic/syntax/tests/test_autoref.py @@ -62,12 +62,19 @@ def runtests(): test[c == 17] # # Explicit asname optimizes lookups also in nested autoref blocks. - # # TODO: how to test? For now, just "with step_expansion" this and eyeball the result. + # # TODO: To test this, we need to use run-time compiler access and look at the AST. + # # TODO: See how `mcpyrate` does its tests. + # # TODO: For now, just "with step_expansion" this and eyeball the result. # with autoref[env(a=1, b=2)] as e1: # e1 # just e1, no autoref lookup - # with autoref[env(c=3, d=4)] as e2: + # with autoref[env(c=3, d=4, e1=None)] as e2: # e2 # just e2 # e1 # just e1 (special handling; already inserted lookup is removed by the outer block when it expands) + # But this special case we can test easily: + with autoref[env(a=1, b=2)] as e1: + # Place a key "e1" into our second env so that a spurious lookup for that triggers an error. + with autoref[env(c=3, d=4, e1=None)] as e2: + test[isinstance(e1, env)] # just e1, no lookup with testset("attributes and subscripts"): e2 = env(x=e, s=[1, 2, 3]) @@ -98,9 +105,9 @@ def runtests(): with autoref[e]: x = do[local[a << 2], 2 * a] # noqa: F821 test[x == 4] - y = let[(x, 21) in 2 * x] + y = let[[x << 21] in 2 * x] test[y == 42] - z = let[(x, 21) in 2 * a] # e.a # noqa: F821 + z = let[[x << 21] in 2 * a] # e.a # noqa: F821 test[z == 2] with testset("integration with lazify"): diff --git a/unpythonic/syntax/tests/test_autoret.py b/unpythonic/syntax/tests/test_autoret.py index d1e431f9..005e9654 100644 --- a/unpythonic/syntax/tests/test_autoret.py +++ b/unpythonic/syntax/tests/test_autoret.py @@ -16,8 +16,8 @@ def runtests(): # - if you need a loop in tail position to have a return value, # use an explicit return, or the constructs from unpythonic.fploop. # - any explicit return statements are left alone, so "return" can be used normally. - with autoreturn: - with testset("basic usage"): + with testset("basic usage"): + with autoreturn: def f(): "I'll just return this" test[f() == "I'll just return this"] @@ -26,7 +26,8 @@ def f2(): return "I'll just return this" # explicit return, not transformed test[f2() == "I'll just return this"] - with testset("if, elif, else"): + with testset("if, elif, else"): + with autoreturn: def g(x): if x == 1: "one" @@ -38,7 +39,8 @@ def g(x): test[g(2) == "two"] test[g(42) == "something else"] - with testset("except, else"): + with testset("except, else"): + with autoreturn: def h(x): try: if x == 1: @@ -50,7 +52,8 @@ def h(x): test[h(10) == 20] test[h(1) == "error"] - with testset("except, body of the try"): + with testset("except, body of the try"): + with autoreturn: def h2(x): try: if x == 1: @@ -61,12 +64,67 @@ def h2(x): test[h2(10) == 10] test[h2(1) == "error"] - with testset("with block"): + with testset("with block"): + with autoreturn: def ctx(): with env(x="hi") as e: # just need some context manager for testing, doesn't matter which e.x # tail position in a with block test[ctx() == "hi"] + with testset("function definition"): # v0.15.0+ + with autoreturn: + def outer(): + def inner(): + "inner function" + test[callable(outer())] # returned a function + test[outer()() == "inner function"] + + with testset("class definition"): # v0.15.0+ + with autoreturn: + def classdefiner(): + class InnerClassDefinition: + pass + test[isinstance(classdefiner(), type)] # returned a class + test[classdefiner().__name__ == "InnerClassDefinition"] + + with testset("match/case"): # Python 3.10+ + with autoreturn: + def classify(x): + match x: + case 1: + "one" + case 2: + "two" + case _: + "other" + test[classify(1) == "one"] + test[classify(2) == "two"] + test[classify(42) == "other"] + + def classify_nested(x): + match x: + case (a, b): + a + b + case [a, b, *rest]: + a + b + sum(rest) + case _: + 0 + test[classify_nested((3, 4)) == 7] + test[classify_nested([1, 2, 3, 4]) == 10] + test[classify_nested("nope") == 0] + + def classify_with_guard(x): + match x: + case n if n < 0: + "negative" + case 0: + "zero" + case n if n > 0: + "positive" + test[classify_with_guard(-5) == "negative"] + test[classify_with_guard(0) == "zero"] + test[classify_with_guard(7) == "positive"] + if __name__ == '__main__': # pragma: no cover with session(__file__): runtests() diff --git a/unpythonic/syntax/tests/test_conts.py b/unpythonic/syntax/tests/test_conts.py index 966f6f53..59c58a6d 100644 --- a/unpythonic/syntax/tests/test_conts.py +++ b/unpythonic/syntax/tests/test_conts.py @@ -1,15 +1,18 @@ # -*- coding: utf-8 -*- """Continuations (call/cc for Python).""" -from ...syntax import macros, test, test_raises, error # noqa: F401 +from ...syntax import macros, test, test_raises, error, fail # noqa: F401 from ...test.fixtures import session, testset, returns_normally from ...syntax import macros, continuations, call_cc, multilambda, autoreturn, autocurry, let # noqa: F401, F811 +from ...syntax import get_cc, iscontinuation +from ...collections import box, unbox from ...ec import call_ec from ...fploop import looped -from ...tco import trampolined, jump from ...fun import withself +from ...funutil import Values +from ...tco import trampolined, jump def runtests(): with testset("basic usage"): @@ -19,7 +22,13 @@ def add1(x): test[add1(2) == 3] def message(cc): - return ("hello", "there") + # The continuations system essentially deals with function composition, + # so we make a distinction between a single `tuple` return value and + # multiple-return-values. + # + # Use Values(...) to return multiple values from a function that you + # intend to `call_cc`. + return Values("hello", "there") def baz(): m, n = call_cc[message()] # The cc arg is passed implicitly. return [m, n] @@ -27,18 +36,25 @@ def baz(): # The cc arg must be declared as the last one that has no default value, # or declared as by-name-only. It's always passed by name. + # + # If the function is going to be used as a target for `call_cc[]`, + # multiple return values must be packed into a `Values`. def f(a, b, cc): - return 2 * a, 3 * b - test[f(3, 4) == (6, 12)] + return Values(2 * a, 3 * b) + test[f(3, 4) == Values(6, 12)] x, y = f(3, 4) test[x == 6 and y == 12] def g(a, b): + # `f` packs its multiple return values into a `Values`, + # so we can use an unpacking assignment to extract them. x, y = call_cc[f(a, b)] return x, y fail["This line should not be reached."] # pragma: no cover test[g(3, 4) == (6, 12)] + # Unpacking into a star-target (as the last target) sends any + # remaining positional return values there, as a tuple. xs, *a = call_cc[f(1, 2)] test[xs == 2 and a == (6,)] @@ -49,7 +65,7 @@ def g(a, b): def h1(a, b): x, y = call_cc[f(a, b)] return None or f(3, 4) # the f from the previous "with continuations" block - test[h1(3, 4) == (6, 12)] + test[h1(3, 4) == Values(6, 12)] def h2(a, b): x, y = call_cc[f(a, b)] @@ -60,7 +76,7 @@ def h2(a, b): def h3(a, b): x, y = call_cc[f(a, b)] return None or False or f(3, 4) - test[h3(3, 4) == (6, 12)] + test[h3(3, 4) == Values(6, 12)] def h4(a, b): x, y = call_cc[f(a, b)] @@ -76,7 +92,7 @@ def h5(a, b): def i1(a, b): x, y = call_cc[f(a, b)] return True and f(3, 4) - test[i1(3, 4) == (6, 12)] + test[i1(3, 4) == Values(6, 12)] def i2(a, b): x, y = call_cc[f(a, b)] @@ -87,7 +103,7 @@ def i2(a, b): def i3(a, b): x, y = call_cc[f(a, b)] return True and 42 and f(3, 4) - test[i3(3, 4) == (6, 12)] + test[i3(3, 4) == Values(6, 12)] def i4(a, b): x, y = call_cc[f(a, b)] @@ -103,27 +119,27 @@ def i5(a, b): def j1(a, b): x, y = call_cc[f(a, b)] return None or True and f(3, 4) - test[j1(3, 4) == (6, 12)] + test[j1(3, 4) == Values(6, 12)] with testset("let in tail position"): with continuations: def j2(a, b): x, y = call_cc[f(a, b)] - return let[((c, a), # noqa: F821 - (d, b)) in f(c, d)] # noqa: F821 - test[j2(3, 4) == (6, 12)] + return let[[c << a, # noqa: F821 + d << b] in f(c, d)] # noqa: F821 + test[j2(3, 4) == Values(6, 12)] with testset("if-expression in tail position"): with continuations: def j3(a, b): x, y = call_cc[f(a, b)] return f(a, b) if True else None - test[j3(3, 4) == (6, 12)] + test[j3(3, 4) == Values(6, 12)] def j4(a, b): x, y = call_cc[f(a, b)] return None if False else f(a, b) - test[j4(3, 4) == (6, 12)] + test[j4(3, 4) == Values(6, 12)] with testset("integration with a lambda that has TCO"): with continuations: @@ -194,8 +210,6 @@ def setk(*args, cc): # and list() is a regular function, not a continuation-enabled one # (so it would immediately terminate the TCO chain; besides, # it takes only 1 argument and doesn't know what to do with "cc".) - # - list instead of tuple to return it as one value - # (a tuple return value is interpreted as multiple-return-values) return xs def doit(): lst = ['the call returned'] @@ -213,7 +227,7 @@ def doit(): def setk(*args, cc): # noqa: F811, the previous one is no longer used. nonlocal k k = cc # current continuation, i.e. where to go after setk() finishes - return args # tuple means multiple-return-values + return Values(*args) # multiple-return-values def doit(): lst = ['the call returned'] *more, = call_cc[setk('A')] @@ -224,6 +238,21 @@ def doit(): test[k('again') == ['the call returned', 'again']] test[k('thrice', '!') == ['the call returned', 'thrice', '!']] + with testset("integration with named return values"): + # Named return values aren't supported as assignment targets in a `call_cc[]` + # due to syntactic limitations. But they can be used elsewhere in continuation-enabled code. + with continuations: + def f1(x, y): + return Values(x=x, y=y) # named return values + def f2(*, x, y): # note keyword-only parameters + return x, y # one return value, a tuple (for multiple-return-values, use `Values(...)`) + # Think through carefully what this does: call `f1`, chain to `f2` as the continuation. + # The continuation is set here by explicitly providing a value for the implicit `cc` parameter. + # + # The named return values from `f1` are then unpacked, by the continuation machinery, + # into the kwargs of `f2`. Then `f2` takes those, and returns a tuple. + test[f1(2, 3, cc=f2) == (2, 3)] + with testset("top level call_cc"): # A top-level "call_cc" is also allowed. # @@ -235,7 +264,7 @@ def doit(): def setk(*args, cc): # noqa: F811, the previous one is no longer used. nonlocal k k = cc - return args # tuple return value (if not literal, tested at run-time) --> multiple-values + return Values(*args) # multiple-return-values x, y = call_cc[setk(*vals)] test[x, y == vals] # end the block to end capture, and start another one to resume programming @@ -370,10 +399,13 @@ def amb(lst, cc): return fail() first, *rest = tuple(lst) if rest: + # Note even the `lambda` below has an implicit `cc` parameter; + # hence we must name the current `cc` to something else to be + # able to use the value inside the `lambda`. ourcc = cc stack.append(lambda: amb(rest, cc=ourcc)) return first - def fail(): + def fail(): # noqa: F811, not redefining, the first one is a macro. if stack: f = stack.pop() return f() @@ -424,8 +456,9 @@ def pt_gen(maxn): count = 0 def pt(maxn): # This generates 1540 combinations, with several nested tail-calls each, - # so we really need TCO here. (Without TCO, nothing would return until - # the whole computation is done; it would blow the call stack very quickly.) + # so we really need TCO here. Without TCO, nothing would return until + # the whole computation is done; it would blow the call stack very quickly. + # With TCO, it's just a case of "lambda, the ultimate goto". z = call_cc[amb(range(1, maxn + 1))] y = call_cc[amb(range(1, z + 1))] x = call_cc[amb(range(1, y + 1))] @@ -478,7 +511,7 @@ def pt(maxn): test[out == pts] with testset("integration with autoreturn and autocurry simultaneously"): - with autocurry: # major slowdown, but works; must be in a separate "with" # TODO: why separate? https://github.com/azazel75/macropy/issues/21 + with autocurry: # major slowdown, but works with autoreturn, continuations: stack = [] def amb(lst, cc): # noqa: F811, the previous one is no longer used. @@ -621,6 +654,246 @@ def s(loop, acc=0): test[tuple(out) == 2 * tuple(range(11))] test[s == 10] + # As of 0.15.1, the preferred way of working with continuations is as follows. + # + # The pattern `k = call_cc[get_cc()]` covers the 99% common case where you + # just want to snapshot and save the control state into a local variable. + # + # See docstring of `unpythonic.syntax.get_cc` for more. It's a regular function + # that works together with the `call_cc` macro. + with testset("get_cc, the less antisocial little sister of call_cc"): + with continuations: + def append_stuff_to(lst): + lst.append("one") + k = call_cc[get_cc()] + lst.append("two") + return k + + lst = [] + k = append_stuff_to(lst) + test[lst == ["one", "two"]] + # invoke the continuation + k(k) # send `k` back in as argument so it the continuation sees it as its local `k` + test[lst == ["one", "two", "two"]] + + # If your continuation needs to take arguments, `get_cc` can also make a parametric continuation: + with testset("get_cc with parametric continuation"): + with continuations: + def append_stuff_to(lst): + # Important: in the `get_cc` call, the initial values for + # the additional arguments, if any, must be passed positionally, + # due to `call_cc` syntax limitations. + k, x1, x2 = call_cc[get_cc(1, 2)] + lst.extend([x1, x2]) + return k + + lst = [] + k = append_stuff_to(lst) + test[lst == [1, 2]] + # invoke the continuation, sending both `k` and our additional arguments. + k(k, 3, 4) + test[lst == [1, 2, 3, 4]] + # When invoking the continuation, the additional arguments can be passed + # in any way allowed by Python. + k(k, x1=5, x2=6) + test[lst == [1, 2, 3, 4, 5, 6]] + + # You can also abuse `k` to pass an arbitrary object, if inside the + # continuation, you don't need a reference to the continuation itself. + # This is the lispy solution. + # + # Then you can `iscontinuation(k)` to check whether it is a continuation + # (first run, return value of `get_cc()`), or something else (second and + # further runs, a value sent in via the continuation). + # + # Whether this or the previous example is more pythonic is left as an + # exercise to the reader. + # + # In this solution, be careful, if you need to send in a continuation + # function for some reason. It is impossible to be 100% sure whether `k` + # is *the* continuation that should have been returned by *this* `get_cc`. + # If you need to send in a continuation function, box it (in a read-only + # `Some` box, even), to make it explicit that it's intended as data. + with testset("get_cc lispy style"): + with continuations: + # The pattern + # + # k = call_cc[get_cc()] + # if iscontinuation(k): + # return k + # + # creates a multi-shot resume point. See also `test_conts_multishot.py`. + def append_stuff_to(lst): + ... # could do something useful here (otherwise, why make a continuation?) + + k = call_cc[get_cc()] + + # <-- the resume point is here, with `k` set to "the return value of the `call_cc`", + # i.e. the continuation during the first run, and whatever was sent in during later runs. + + # In 0.15.1+, continuation functions created by the `call_cc[...]` macro are + # tagged, and can be detected using `unpythonic.syntax.iscontinuation`, which + # is a regular function: + if iscontinuation(k): # first run; just return the continuation + return k + + # invoked via continuation, now `k` is input data instead of a continuation + x1, x2 = k + lst.extend([x1, x2]) + return None + + lst = [] + k = append_stuff_to(lst) + k([1, 2]) # whatever object we send in becomes the local `k` in the continuation. + test[lst == [1, 2]] + k([3, 4]) + test[lst == [1, 2, 3, 4]] + + with testset("scoping, locals only"): + # This is the cleanest way to scope your local variables in continuations: + # just accept the fact that each continuation introduces a scope boundary. + with continuations: + def f(): + # Original function scope + x = None + + # Continuation 1 scope begins here + # (from the statement following `call_cc` onward, but including the `k1`) + k1 = call_cc[get_cc()] + if iscontinuation(k1): + # This `x` is local to continuation 1. + x = "cont 1 first time" + return k1, x + + # Continuation 2 scope begins here + k2 = call_cc[get_cc()] + if iscontinuation(k2): + # This `x` is local to continuation 2. + x = "cont 2 first time" + return k2, x + + # Still in continuation 2, so this is the `x` of continuation 2. + x = "cont 2 second time" + return None, x + + k1, x = f() + test[x == "cont 1 first time"] + k2, x = k1(None) # when resuming, send `None` as the new value of variable `k1` in continuation 1 + test[x == "cont 2 first time"] + k3, x = k2(None) + test[k3 is None] + test[x == "cont 2 second time"] + + k2, x = k1(None) # multi-shotting from earlier resume point + test[x == "cont 2 first time"] + + # TODO: This breaks the coverage analyzer, because 'name 'x' is assigned to before nonlocal declaration'. + # TODO: Fair enough, that's not standard Python. So let's just disable this for now. + # with testset("scoping, in presence of nonlocal"): + # # TODO: better example + # # It shouldn't matter in this particular example whether we declare the `x` + # # in the continuations `nonlocal`, because once the parent returns, the + # # only places that can access its locals *from that activation* are the + # # continuation closures *created by that activation*. + # with continuations: + # def f(): + # # Original function scope + # x = None + # + # # Continuation 1 scope begins here + # # (from the statement following `call_cc` onward, but including the `k1`) + # k1 = call_cc[get_cc()] + # nonlocal x # <-- IMPORTANT + # if iscontinuation(k1): + # # This is now the original `x`. + # x = "cont 1 first time" + # return k1, x + # + # # Continuation 2 scope begins here + # k2 = call_cc[get_cc()] + # nonlocal x # <-- IMPORTANT + # if iscontinuation(k2): + # # This too is the original `x`. + # x = "cont 2 first time" + # return k2, x + # + # # Still the original `x`. + # x = "cont 2 second time" + # return None, x + # + # k1, x = f() + # test[x == "cont 1 first time"] + # k2, x = k1(None) # when resuming, send `None` as the new value of variable `k1` in continuation 1 + # test[x == "cont 2 first time"] + # k3, x = k2(None) + # test[k3 is None] + # test[x == "cont 2 second time"] + # + # k2, x = k1(None) # multi-shotting from earlier resume point + # test[x == "cont 2 first time"] + + # If you need to scope like `nonlocal`, use the classic solution: box the value, + # so you have no need to overwrite the name; you can replace the thing in the box. + # + # (Classic from before `nonlocal` declarations were a thing. They were added in 3.0; + # for historical interest, see https://www.python.org/dev/peps/pep-3104/ ) + with testset("scoping, using a box"): + with continuations: + # poor man's execution trace + def make_tracing_box_updater(thebox, trace): + def update(value): + trace.append(f"old: {unbox(thebox)}") + thebox << value + trace.append(f"new: {unbox(thebox)}") + return value + return update + + # If we wanted to replace the list instance later, we could pass the list in a box, too. + def f(lst): + # Now there is just one `x`, which is the box; we just update the contents. + # Original function scope + x = box("f") + lst.append(f"initial: {unbox(x)}") + update = make_tracing_box_updater(x, lst) + + # Continuation 1 scope begins here + # (from the statement following `call_cc` onward, but including the `k1`) + k1 = call_cc[get_cc()] + if iscontinuation(k1): + return k1, update("k1 first") + update("k1 again") + + # Continuation 2 scope begins here + k2 = call_cc[get_cc()] + if iscontinuation(k2): + return k2, update("k2 first") + update("k2 again") + + return None, unbox(x) + + trace = [] + k1, x = f(trace) + test[x == "k1 first"] + test[trace == ['initial: f', 'old: f', 'new: k1 first']] + k2, x = k1(None) # when resuming, send `None` as the new value of variable `k1` in continuation 1 + test[x == "k2 first"] + test[trace == ['initial: f', 'old: f', 'new: k1 first', + 'old: k1 first', 'new: k1 again', 'old: k1 again', 'new: k2 first']] + k3, x = k2(None) + test[k3 is None] + test[x == "k2 again"] + test[trace == ['initial: f', 'old: f', 'new: k1 first', + 'old: k1 first', 'new: k1 again', 'old: k1 again', 'new: k2 first', + 'old: k2 first', 'new: k2 again']] + + k2, x = k1(None) # multi-shotting from earlier resume point + test[x == "k2 first"] + test[trace == ['initial: f', 'old: f', 'new: k1 first', + 'old: k1 first', 'new: k1 again', 'old: k1 again', 'new: k2 first', + 'old: k2 first', 'new: k2 again', + 'old: k2 again', 'new: k1 again', 'old: k1 again', 'new: k2 first']] + # ^^^^^^^^^^^^^^^ state as left by `k2` before the multi-shot + if __name__ == '__main__': # pragma: no cover with session(__file__): runtests() diff --git a/unpythonic/syntax/tests/test_conts_gen.py b/unpythonic/syntax/tests/test_conts_gen.py index af582ac9..60964d26 100644 --- a/unpythonic/syntax/tests/test_conts_gen.py +++ b/unpythonic/syntax/tests/test_conts_gen.py @@ -16,24 +16,27 @@ See also the Racket version of this: https://github.com/Technologicat/python-3-scicomp-intro/blob/master/examples/beyond_python/generator.rkt + +And see the alternative approach using the pattern `k = call_cc[get_cc()]` +in `test_conts_multishot.py`. """ -from ...syntax import macros, test # noqa: F401 +from ...syntax import macros, test, test_raises # noqa: F401, F811 from ...test.fixtures import session, testset -from ...syntax import macros, continuations, call_cc, dlet, abbrev, let_syntax # noqa: F401, F811 -from ...syntax import block +from ...syntax import macros, continuations, call_cc, dlet, abbrev, let_syntax, block # noqa: F401, F811 from ...fploop import looped from ...fun import identity -#from mcpyrate.debug import macros, step_expansion # noqa: F811, F401 +from mcpyrate.debug import macros, step_expansion # noqa: F811, F401 + def runtests(): with testset("a basic generator"): with continuations: # logic to resume after the last executed my_yield, if any - @dlet((k, None)) # noqa: F821, dlet defines the name. + @dlet(k << None) # noqa: F821, dlet defines the name. def g(): if k: # noqa: F821 return k() # noqa: F821 @@ -58,7 +61,7 @@ def my_yield(value, cc): with testset("FP loop based generator"): with continuations: # logic to resume after the last executed my_yield, if any - @dlet((k, None)) # noqa: F821 + @dlet(k << None) # noqa: F821 def g(): if k: # noqa: F821 return k() # noqa: F821 @@ -94,11 +97,11 @@ def result(loop, i=0): # A basic generator template using abbrev[]. with testset("integration with abbrev"): with continuations: - # We must expand abbreviations in the first pass, before the @dlet that's + # We must expand abbreviations in the outside-in pass, before the @dlet that's # not part of the template (since we splice in stuff that is intended to # refer to the "k" in the @dlet env). So use abbrev[] instead of let_syntax[]. with abbrev: - with block(value) as my_yield: # noqa: F821, here `abbrev` defines the name `value` when we call `my_yield`. + with block[value] as my_yield: # noqa: F821, here `abbrev` defines the name `value` when we call `my_yield`. call_cc[my_yieldf(value)] # for this to work, abbrev[] must eliminate its "if 1" blocks. # noqa: F821, my_yieldf will be defined below and this is a macro. with block as begin_generator_body: # logic to resume after the last executed my_yield, if any @@ -109,7 +112,7 @@ def my_yieldf(value, cc): cc = identity return value - @dlet((k, None)) # <-- we must still remember this line # noqa: F821 + @dlet(k << None) # <-- we must still remember this line # noqa: F821 def g(): begin_generator_body my_yield(1) @@ -130,10 +133,10 @@ def g(): # and the user code (generator body) doesn't refer to k directly. # (So "k" can be resolved lexically *in the input source code that goes to dlet[]*.) with let_syntax: - with block(value) as my_yield: # noqa: F821 + with block[value] as my_yield: # noqa: F821 call_cc[my_yieldf(value)] # for this to work, let_syntax[] must eliminate its "if 1" blocks. # noqa: F821 - with block(myname, body) as make_generator: # noqa: F821, `let_syntax` defines `myname` and `body` when we call `make_generator`. - @dlet((k, None)) # noqa: F821 + with block[myname, body] as make_generator: # noqa: F821, `let_syntax` defines `myname` and `body` when we call `make_generator`. + @dlet(k << None) # noqa: F821 def myname(): # replaced by the user-supplied name, since "myname" is a template parameter. # logic to resume after the last executed my_yield, if any if k: # noqa: F821 @@ -179,6 +182,61 @@ def result(loop, i=0): x = g2() # noqa: F821 test[out == list(range(10))] + with testset("multi-shot generators with call_cc[]"): + with continuations: + with let_syntax: + with block[value] as my_yield: # noqa: F821 + call_cc[my_yieldf(value)] # noqa: F821 + with block[myname, body] as make_multishot_generator: # noqa: F821 + def myname(k=None): # "myname" is replaced by the user-supplied name + if k: # noqa: F821 + return k() # noqa: F821 + def my_yieldf(value=None, *, cc): + k = cc # noqa: F821 + cc = identity + if value is None: + return k + return k, value + body # noqa: F821 + # If we wanted a mechanism to `return` a final value, + # this would be the place to send it. + raise StopIteration + + # We must define the body as an abbrev block to give it a name, + # because template arguments must be expressions (and a name is, + # but a literal block of code isn't). + # + # This user-defined body gets spliced in after the make_generator + # template itself has expanded. + with block as mybody: + my_yield(1) + my_yield(2) + my_yield(3) + make_multishot_generator(g, mybody) + + # basic test + out = [] + k, x = g() + try: + while True: + out.append(x) + k, x = g(k) + except StopIteration: + pass + test[out == [1, 2, 3]] + + # multi-shot test + k1, x1 = g() # no argument: start from the beginning + k2, x2 = g(k1) # continue execution from k1 (after the first `my_yield`) + k3, x3 = g(k2) + k, x = g(k1) # multi-shot: continue *again* from k1 + test[x1 == 1] + test[x2 == x == 2] + test[x3 == 3] + test[k.__qualname__ == k2.__qualname__] # same bookmarked position... + test[k is not k2] # ...but different function object instance + test_raises[StopIteration, g(k3)] + # Unfortunately, this is as far as let_syntax[] gets us; if we wanted to # "librarify" this any further, we'd need to define a macro in `mcpyrate`. # @@ -187,6 +245,8 @@ def result(loop, i=0): # module level, define my_yield as a magic variable so that accidental uses # outside any make_generator are caught at compile time. The actual template the # make_generator macro needs to splice in is already here in the final example.) + # + # See `test_conts_multishot.py`, where we do librarify this a bit further. if __name__ == '__main__': # pragma: no cover with session(__file__): diff --git a/unpythonic/syntax/tests/test_conts_multishot.py b/unpythonic/syntax/tests/test_conts_multishot.py new file mode 100644 index 00000000..fc719a08 --- /dev/null +++ b/unpythonic/syntax/tests/test_conts_multishot.py @@ -0,0 +1,593 @@ +# -*- coding: utf-8 -*- +"""Multi-shot generator demo using the pattern `k = call_cc[get_cc()]`. + +This is a barebones implementation. + +We provide everything in one file, so we use `mcpyrate`'s multi-phase compilation +to be able to define the macros in the same module that uses them. + +Because `with continuations` is a two-pass macro, it will first expand any +`@multishot` inside the block before performing its own processing, which +is exactly what we want. We could force the ordering with the metatool +`mcpyrate.metatools.expand_first` that was added in `mcpyrate` 3.6.0, +but we don't need to do that. + +We provide a minimal `MultishotIterator` wrapper that makes a `@multishot` +multi-shot generator conform to the most basic parts of Python's generator API. +A full implementation of the generator API would require much more: + + - There is no `yield from` (delegation); needs a custom `myield_from`. + - Think hard about exception handling. + - Particularly, a `yield` inside a `finally` block is a classic catch. +""" + +from mcpyrate.multiphase import macros, phase + +from ...syntax import macros, test, test_raises # noqa: F401, F811 +from ...test.fixtures import session, testset + +from ...syntax import macros, continuations # noqa: F811 + +with phase[1]: + # TODO: relative imports + # TODO: mcpyrate does not recognize current package in phases higher than 0? (parent package missing) + + import ast + from functools import partial + + from mcpyrate.quotes import macros, q, n, a, h # noqa: F811 + from unpythonic.misc import safeissubclass + from unpythonic.syntax import macros, call_cc # noqa: F811 + + from mcpyrate import namemacro, gensym + from mcpyrate.quotes import is_captured_value + from mcpyrate.utils import extract_bindings + from mcpyrate.walkers import ASTTransformer + + from unpythonic.syntax import get_cc, iscontinuation + from unpythonic.syntax.scopeanalyzer import isnewscope + + def myield_function(tree, syntax, **kw): + """[syntax, name/expr] Yield from a multi-shot generator. + + For details, see `multishot`. + """ + if syntax not in ("name", "expr"): + raise SyntaxError("myield is a name and expr macro only") + + # Accept `myield` in any non-load context, so that we can below define the macro `myield`. + # + # This is only an issue, because this example uses multi-phase compilation. + # The phase-1 `myield` is in the macro expander - preventing us from referring to + # the name `myield` - when the lifted phase-0 definition is being run. During phase 0, + # that makes the line `myield = namemacro(...)` below into a macro-expansion-time + # syntax error, because that `myield` is not inside a `@multishot` generator. + # + # We hack around it, by allowing `myield` anywhere as long as the context is not a `Load`. + if type(getattr(tree, "ctx", None)) in (ast.Store, ast.Del): + return tree + + # `myield` is not really a macro, but a pattern that `multishot` looks for and compiles away. + # Hence if any `myield` is left over and reaches the macro expander, it was placed incorrectly, + # so we can raise an error at macro expansion time. + raise SyntaxError("myield may only appear at the top level of a `@multishot` generator") + myield = namemacro(myield_function) + + def multishot(tree, syntax, expander, **kw): + """[syntax, block] Make a function into a multi-shot generator. + + Only meaningful inside a `with continuations` block. This is not checked. + + Multi-shot yield is spelled `myield`. When using `multishot`, be sure to + macro-import also `myield`, so that `multishot` knows which name you want + to use to refer to the `myield` construct (it is automatically queried + from the current expander's bindings). + + There are four variants:: + + Multi-shot yield Returns `k` expects Single-shot analog + + myield k no argument yield + myield[expr] (k, value) no argument yield expr + var = myield k one argument var = yield + var = myield[expr] (k, value) one argument var = yield expr + + To resume, call the function `k`. In cases where `k` expects an argument, + it is the value to send into `var`. + + Important differences: + + - A multi-shot generator may be resumed from any `myield` arbitrarily + many times, in any order. There is no concept of a single paused + activation. Each continuation is a function (technically a closure). + + When a multi-shot generator "myields", it returns just like a + normal function, technically terminating its execution. But it gives + you a continuation closure, that you can call to continue execution + just after that particular `myield`. + + The magic is in that the continuation closures are nested, so for + a given activation of the multi-shot generator, any local variables + in the already executed part remain alive as long as at least one + reference to any relevant closure instance exists. + + And yes, "nested" does imply that the execution will branch into + "alternate timelines" if you re-invoke an earlier continuation. + (Maybe you want to send a different value into some algorithm, + to alter what it will do from a certain point onward.) + + This works in exactly the same way as manually nested closures. + The parent cells (in the technical sense of "cell variable") + are shared, but the continuation that was re-invoked is separately + activated again (in the sense of "activation record"), so the + continuation gets fresh locals. Thus the "timelines" will diverge. + + - `myield` is a *statement*, and it may only appear at the top level + of a multishot function definition, due to limitations of our `call_cc` + implementation. + + Usage:: + + with continuations: + @multishot + def f(): + # Stop, and return a continuation `k` that resumes just after this `myield`. + myield + + # Stop, and return the tuple `(k, 42)`. + myield[42] + + # Stop, and return a continuation `k`. Upon resuming `k`, + # set the local `k` to the value that was sent in. + k = myield + + # Stop, and return the tuple `(k, 42)`. Upon resuming `k`, + # set the local `k` to the value that was sent in. + k = myield[42] + + # Instantiate the multi-shot generator (like calling a gfunc). + # There is always an implicit bare `myield` at the beginning. + k0 = f() + + # Start, run up to the explicit bare `myield` in the example, + # receive new continuation. + k1 = k0() + + # Continue to the `myield[42]`, receive new continuation and the `42`. + k2, x2 = k1() + test[x2 == 42] + + # Continue to the `k = myield`, receive new continuation. + k3 = k2() + + # Send `23` as the value of `k`, continue to the `k = myield[42]`. + k4, x4 = k3(23) + test[x4 == 42] + + # Send `17` as the value of `k`, continue to the end. + # As with a regular Python generator, reaching the end raises `StopIteration`. + # (As with generators, you can also trigger a `StopIteration` earlier via `return`, + # with an optional value.) + test_raises[StopIteration, k4(17)] + + # Re-invoke an earlier continuation: + k2, x2 = k1() + test[x2 == 42] + """ + if syntax != "decorator": + raise SyntaxError("multishot is a decorator macro only") # pragma: no cover + if type(tree) is not ast.FunctionDef: + raise SyntaxError("@multishot supports `def` only") + + # Detect the name(s) of `myield` at the use site (this accounts for as-imports) + macro_bindings = extract_bindings(expander.bindings, myield_function) + if not macro_bindings: + raise SyntaxError("The use site of `multishot` must macro-import `myield`, too.") + names_of_myield = list(macro_bindings.keys()) + + def is_myield_name(node): + return type(node) is ast.Name and node.id in names_of_myield + def is_myield_expr(node): + return type(node) is ast.Subscript and is_myield_name(node.value) + def getslice(subscript_node): + return subscript_node.slice + class MultishotYieldTransformer(ASTTransformer): + def transform(self, tree): + if is_captured_value(tree): # do not recurse into hygienic captures + return tree + if isnewscope(tree): # respect scope boundaries + return tree + + # `k = myield[value]` + if type(tree) is ast.Assign and is_myield_expr(tree.value): + if len(tree.targets) != 1: + raise SyntaxError("expected exactly one assignment target in k = myield[expr]") + var = tree.targets[0] + value = getslice(tree.value) + with q as quoted: + # Note in `mcpyrate` we can hygienically capture macros, too. + a[var] = h[call_cc][h[get_cc]()] + if h[iscontinuation](a[var]): + return a[var], a[value] + # For `throw` support: if we are sent an exception instance or class, raise it. + elif isinstance(a[var], BaseException) or h[safeissubclass](a[var], BaseException): + raise a[var] + return quoted + + # `k = myield` + elif type(tree) is ast.Assign and is_myield_name(tree.value): + if len(tree.targets) != 1: + raise SyntaxError("expected exactly one assignment target in k = myield[expr]") + var = tree.targets[0] + with q as quoted: + a[var] = h[call_cc][h[get_cc]()] + if h[iscontinuation](a[var]): + return a[var] + elif isinstance(a[var], BaseException) or h[safeissubclass](a[var], BaseException): + raise a[var] + return quoted + + # `myield[value]` + elif type(tree) is ast.Expr and is_myield_expr(tree.value): + var = q[n[gensym("k")]] # kontinuation + value = getslice(tree.value) + with q as quoted: + a[var] = h[call_cc][h[get_cc]()] + if h[iscontinuation](a[var]): + return h[partial](a[var], None), a[value] + # For `throw` support: `MultishotIterator` digs the `.func` from inside the `partial` + # to force a send, even though this variant of `myield` cannot receive a value by + # a normal `send`. + elif isinstance(a[var], BaseException) or h[safeissubclass](a[var], BaseException): + raise a[var] + return quoted + + # `myield` + elif type(tree) is ast.Expr and is_myield_name(tree.value): + var = q[n[gensym("k")]] + with q as quoted: + a[var] = h[call_cc][h[get_cc]()] + if h[iscontinuation](a[var]): + return h[partial](a[var], None) + elif isinstance(a[var], BaseException) or h[safeissubclass](a[var], BaseException): + raise a[var] + return quoted + + return self.generic_visit(tree) + + class ReturnToRaiseStopIterationTransformer(ASTTransformer): + def transform(self, tree): + if is_captured_value(tree): # do not recurse into hygienic captures + return tree + if isnewscope(tree): # respect scope boundaries + return tree + + if type(tree) is ast.Return: + # `return` + if tree.value is None: + with q as quoted: + raise h[StopIteration] + return quoted + # `return expr` + with q as quoted: + raise h[StopIteration](a[tree.value]) + return quoted + + return self.generic_visit(tree) + + # ------------------------------------------------------------ + # main processing logic + + # Make the multishot generator raise `StopIteration` when it finishes + # via any `return`. First make the implicit bare `return` explicit. + # + # We must do this before we transform the `myield` statements, + # to avoid breaking tail-calling the continuations. + if type(tree.body[-1]) is not ast.Return: + with q as quoted: + return + tree.body.extend(quoted) + tree.body = ReturnToRaiseStopIterationTransformer().visit(tree.body) + + # Inject a bare `myield` resume point at the beginning of the function body. + # This makes the resulting function work somewhat like a Python generator. + # When initially called, the arguments are bound, and you get a continuation; + # then resuming that continuation actually starts executing the function body. + tree.body.insert(0, ast.Expr(value=ast.Name(id=names_of_myield[0]))) + + # Transform multishot yields (`myield`) into `call_cc`. + tree.body = MultishotYieldTransformer().visit(tree.body) + + return tree + + +# macro-import from higher phase; we're now in phase 0 +from __self__ import macros, multishot, myield # noqa: F811, F401 + +class MultishotIterator: + """Adapt a `@multishot` generator to Python's generator API. + + Example:: + + with continuations: + @multishot + def g(): + myield[1] + myield[2] + myield[3] + + # Instantiating the multi-shot generator returns a continuation; + # we can send that into a `MultishotIterator`. The resulting iterator + # behaves almost like a standard generator. + mi = MultishotIterator(g()) + assert [x for x in mi] == [1, 2, 3] + + `k`: A continuation, or a partially applied continuation + (e.g. one that does not usefully expect a value; + an `myield` with no assignment target will return such). + + The initial continuation to start execution from. + + Each `next` or `.send` will call the current `self.k`, and then overwrite + `self.k` with the new continuation returned by the multi-shot generator. + If the multi-shot generator raises `StopIteration` (so there is no new + continuation), the `MultishotIterator` marks itself as closed, and re-raises. + + The current continuation is stored as `self.k`. It is read/write, + type-checked at write time. + + If you overwrite `self.k` with another continuation, the next call + to `next` or `.send` will resume from that continuation instead. + If the iterator was closed, overwriting `self.k` will re-open it. + + This proof-of-concept demo only supports a subset of the generator API: + + - `iter(mi)` + - `next(mi)`, + - `mi.send(value)` + - `mi.throw(exc)` + - `mi.close()` + + where `mi` is a `MultishotIterator` instance. + """ + def __init__(self, k): + self.k = k + self._closed = False + + # make writes into `self.k` type-check, for fail-fast + def _getk(self): + return self._k + def _setk(self, k): + if not (iscontinuation(k) or (isinstance(k, partial) and iscontinuation(k.func))): + raise TypeError(f"expected `k` to be a continuation or a partially applied continuation, got {k}") + self._k = k + self._closed = False + k = property(fget=_getk, fset=_setk, doc="The current continuation. Read/write.") + + # TODO: For thread safety, we should lock writes to `self._closed`, + # TODO: as well as make `_advance` behave atomically. + # Internal method that implements `next` and `.send`. + def _advance(self, mode, value=None): + assert mode in ("next", "send") + if self._closed: + raise StopIteration + # Intercept possible `StopIteration` and enter the closed + # state, to prevent re-running the last continuation (that + # raised `StopIteration`) when `next()` is called again. + try: + if mode == "next": + result = self.k() + else: # mode == "send" + result = self.k(value) + except StopIteration: # no new continuation + self._closed = True + raise + if isinstance(result, tuple): + self.k, x = result + else: + self.k, x = result, None + return x + + # generator API + def __iter__(self): + return self + def __next__(self): + return self._advance("next") + def send(self, value): + return self._advance("send", value) + + # The `throw` and `close` methods are not so useful as with regular + # generators, due to there being no concept of paused execution. + # + # The continuation is a separate nested closure, and it is not + # possible to usefully straddle a `try` or `with` across the + # boundary. + # + # For example, `with` only takes effect whenever it is "entered + # from the top", and it will release the context as soon as the + # multi-shot generator `myield`s the continuation. + # + # `throw` pretty much just enters the continuation function, and + # makes it raise an exception; in true multi-shot fashion, the same + # continuation can still be resumed later (also without making it + # raise that time). + # + # `close` is only useful in that closing makes the multi-shot generator + # reject any further attempts to `next` or `.send` (unless you then + # overwrite the continuation manually). + # + # For an example of what serious languages that have `call_cc` do, see + # Racket's `dynamic-wind` construct ("wind" as in "winding/unwinding the call stack"). + # It's the supercharged big sister of Python's `with` construct that accounts for + # execution topologies where control may leave the block, and then suddenly return + # to the middle of it later (most often due to the invocation of a continuation + # that was created inside that block). + # https://docs.racket-lang.org/reference/cont.html#%28def._%28%28quote._~23~25kernel%29._dynamic-wind%29%29 + def throw(self, exc): + # If we are stopped at an `myield` that has no assignment target, so + # that it normally does not expect a value, we unwrap the original + # continuation from the `partial` to force-send the exception. + k = self.k.func if isinstance(self.k, partial) else self.k + k(exc) + + # https://stackoverflow.com/questions/60137570/explanation-of-generator-close-with-exception-handling + def close(self): + if self._closed: + return + self._closed = True + try: + self.throw(GeneratorExit) + except GeneratorExit: + return # ok! + # Any other exception is propagated. + else: # No exception means that the generator is trying to yield something. + raise RuntimeError("@multishot generator attempted to `myield` a value while it was being closed") + + +def runtests(): + # To start with, here's a sketch of what we want to do. + with testset("multi-shot generators with the pattern call_cc[get_cc()]"): + with continuations: + def g(): + # The resume point at the beginning (just after parameters of `g` have + # been bound to the given arguments; though here we don't have any). + k = call_cc[get_cc()] + if iscontinuation(k): + # The `partial` makes it so `k` doesn't expect an argument; + # otherwise it would expect a value to set the local variable `k` to + # when the continuation is resumed. + # + # Since this example doesn't use that `k` if it's not the continuation + # (i.e. the initial return value of the `call_cc[get_cc()]`), + # we can just set the argument to `None` here. + return partial(k, None) + + # yield 1 + k = call_cc[get_cc()] + if iscontinuation(k): + return partial(k, None), 1 + + # yield 2 + k = call_cc[get_cc()] + if iscontinuation(k): + return partial(k, None), 2 + + # yield 3 + k = call_cc[get_cc()] + if iscontinuation(k): + return partial(k, None), 3 + + raise StopIteration + + try: + out = [] + k = g() # instantiate the multi-shot generator + while True: + k, x = k() + out.append(x) + except StopIteration: + pass + test[out == [1, 2, 3]] + + k0 = g() # instantiate the multi-shot generator + k1, x1 = k0() + k2, x2 = k1() + k3, x3 = k2() + k, x = k1() # multi-shot generator can resume from an earlier point + test[x1 == 1] + test[x2 == x == 2] + test[x3 == 3] + test[k.func.__qualname__ == k2.func.__qualname__] # same bookmarked position... + test[k.func is not k2.func] # ...but different function object instance + test_raises[StopIteration, k3()] + + # Now, let's automate this. Testing all four kinds of multi-shot yield: + with testset("@multishot macro"): + with continuations: + @multishot + def f(): + myield + myield[42] + k = myield + test[k == 23] + k = myield[42] + test[k == 17] + + k0 = f() # instantiate the multi-shot generator + k1 = k0() + k2, x2 = k1() + test[x2 == 42] + k3 = k2() + k4, x4 = k3(23) + test[x4 == 42] + test_raises[StopIteration, k4(17)] + + # multi-shot: re-invoke an earlier continuation + k2, x2 = k1() + test[x2 == 42] + + # The first example rewritten to use the macro: + with testset("multi-shot generators with @multishot"): + with continuations: + @multishot + def g(): + myield[1] + myield[2] + myield[3] + + try: + out = [] + k = g() # instantiate the multi-shot generator + while True: + k, x = k() + out.append(x) + except StopIteration: + pass + test[out == [1, 2, 3]] + + k0 = g() # instantiate the multi-shot generator + k1, x1 = k0() + k2, x2 = k1() + k3, x3 = k2() + k, x = k1() # multi-shot generator can resume from an earlier point + test[x1 == 1] + test[x2 == x == 2] + test[x3 == 3] + test[k.func.__qualname__ == k2.func.__qualname__] # same bookmarked position... + test[k.func is not k2.func] # ...but different function object instance + test_raises[StopIteration, k3()] + + # Using a `@multishot` as if it was a standard generator: + with testset("MultishotIterator: adapting @multishot to Python's generator API"): + # basic use + test[[x for x in MultishotIterator(g())] == [1, 2, 3]] + + # Re-using `g` from above: + mig = MultishotIterator(g()) + test[next(mig) == 1] + k = mig.k # stash the current continuation tracked by the `MultishotIterator` + test[next(mig) == 2] + test[next(mig) == 3] + mig.k = k # multi-shot: rewind to the point we stashed + test[next(mig) == 2] + test[next(mig) == 3] + + # Re-using `f` from above: + mif = MultishotIterator(f()) + test[next(mif) is None] + k = mif.k + test[next(mif) == 42] + test[next(mif) is None] + test[mif.send(23) == 42] + test_raises[StopIteration, mif.send(17)] + mif.k = k # rewind + test[next(mif) == 42] + test[next(mif) is None] + test[mif.send(23) == 42] + test_raises[StopIteration, mif.send(17)] + + # TODO: advanced examples, exercise all features + +if __name__ == '__main__': # pragma: no cover + with session(__file__): + runtests() diff --git a/unpythonic/syntax/tests/test_conts_topo.py b/unpythonic/syntax/tests/test_conts_topo.py index ebdc3d8c..e61e8572 100644 --- a/unpythonic/syntax/tests/test_conts_topo.py +++ b/unpythonic/syntax/tests/test_conts_topo.py @@ -7,14 +7,14 @@ from ...syntax import macros, test, the # noqa: F401 from ...test.fixtures import session, testset -from inspect import stack +import inspect from ...syntax import macros, continuations, call_cc # noqa: F401, F811 def me(): """Return the caller's function name.""" - callstack = stack() - framerecord = callstack[1] # ignore me() itself, get caller's record + stack = inspect.stack() + framerecord = stack[1] # ignore me() itself, get caller's record return framerecord.function # Continuation names are gensymmed, so `mcpyrate` adds a uuid to them. diff --git a/unpythonic/syntax/tests/test_dbg.py b/unpythonic/syntax/tests/test_dbg.py index be1e5869..1e81d9f4 100644 --- a/unpythonic/syntax/tests/test_dbg.py +++ b/unpythonic/syntax/tests/test_dbg.py @@ -9,7 +9,7 @@ from ...syntax import dbgprint_block from ...dynassign import dyn -from ...misc import call +from ...funutil import call def runtests(): # some usage examples diff --git a/unpythonic/syntax/tests/test_ifexprs.py b/unpythonic/syntax/tests/test_ifexprs.py index 439af3d0..6da31068 100644 --- a/unpythonic/syntax/tests/test_ifexprs.py +++ b/unpythonic/syntax/tests/test_ifexprs.py @@ -4,8 +4,7 @@ from ...syntax import macros, test # noqa: F401 from ...test.fixtures import session, testset -from ...syntax import macros, aif, cond, local # noqa: F401, F811 -from ...syntax import it +from ...syntax import macros, aif, it, cond, local # noqa: F401, F811 def runtests(): with testset("aif (anaphoric if, you're `it`!)"): diff --git a/unpythonic/syntax/tests/test_lambdatools.py b/unpythonic/syntax/tests/test_lambdatools.py index 3345f57c..0f1a4d18 100644 --- a/unpythonic/syntax/tests/test_lambdatools.py +++ b/unpythonic/syntax/tests/test_lambdatools.py @@ -4,7 +4,7 @@ from ...syntax import macros, test, test_raises, warn # noqa: F401 from ...test.fixtures import session, testset -from ...syntax import (macros, multilambda, namedlambda, quicklambda, f, # noqa: F401, F811 +from ...syntax import (macros, multilambda, namedlambda, quicklambda, fn, # noqa: F401, F811 envify, local, let, autocurry, autoreturn) from functools import wraps @@ -20,13 +20,13 @@ def runtests(): echo = lambda x: [print(x), x] test[echo("hi there") == "hi there"] - count = let[(x, 0)][ # noqa: F821, the `let` macro defines `x` here. + count = let[x << 0][ # noqa: F821, the `let` macro defines `x` here. lambda: [x << x + 1, # noqa: F821 x]] # redundant, but demonstrating multi-expr body. # noqa: F821 test[count() == 1] test[count() == 2] - test1 = let[(x, 0)][ # noqa: F821 + test1 = let[x << 0][ # noqa: F821 lambda: [x << x + 1, # x belongs to the surrounding let # noqa: F821 local[y << 42], # y is local to the implicit do # noqa: F821 (x, y)]] # noqa: F821 @@ -47,16 +47,20 @@ def runtests(): with namedlambda: f1 = lambda x: x**3 # assignment: name as "f1" test[f1.__name__ == "f1"] - gn, hn = let[(x, 42), (g, None), (h, None)][[ # noqa: F821 + gn, hn = let[x << 42, g << None, h << None][[ # noqa: F821 g << (lambda x: x**2), # env-assignment: name as "g" # noqa: F821 h << f1, # still "f1" (RHS is not a literal lambda) # noqa: F821 (g.__name__, h.__name__)]] # noqa: F821 test[gn == "g"] test[hn == "f1"] - foo = let[(f7, lambda x: x) in f7] # let-binding: name as "f7" # noqa: F821 + foo = let[[f7 << (lambda x: x)] in f7] # let-binding: name as "f7" # noqa: F821 test[foo.__name__ == "f7"] + if foo2 := (lambda x: x): # NamedExpr a.k.a. walrus operator (Python 3.8+) + pass + test[foo2.__name__ == "foo2"] + # function call with named arg def foo(func1, func2): test[func1.__name__ == "func1"] @@ -65,12 +69,12 @@ def foo(func1, func2): func2=lambda x: x**2) # function call with named arg: name as "func2" def bar(func1, func2): - test[func1.__name__ == ""] - test[func2.__name__ == ""] - bar(lambda x: x**2, lambda x: x**2) # no naming when passed positionally + test[func1.__name__.startswith(""] + test[func1.__name__.startswith(" 1 else acc) # linear process test[islazy(fact)] test[fact(5) == 120] + with testset("integration with pipes"): + # This is the testset from unpythonic/tests/test_seq.py, slightly modified. + with lazify: + double = lambda x: 2 * x + inc = lambda x: x + 1 + test[pipe1(42, double, inc) == 85] # 1-in-1-out + test[pipe1(42, inc, double) == 86] + test[pipe(42, double, inc) == 85] # n-in-m-out, supports also 1-in-1-out + test[pipe(42, inc, double) == 86] + + # 2-in-2-out + a, b = pipe(Values(2, 3), + lambda x, y: Values(x + 1, 2 * y), + lambda x, y: Values(x * 2, y + 1)) + test[(a, b) == (6, 7)] + + # 2-in-2-out, pass intermediate result by name + a, b = pipe(Values(2, 3), + lambda x, y: Values(x=(x + 1), y=(2 * y)), + lambda x, y: Values(x * 2, y + 1)) + test[(a, b) == (6, 7)] + + # 2-in-2-out, also return final result by name + v = pipe(Values(2, 3), + lambda x, y: Values(x=(x + 1), y=(2 * y)), + lambda x, y: Values(a=(x * 2), b=(y + 1))) + test[v == Values(a=6, b=7)] + test[v["a"] == 6 and v["b"] == 7] # can access them via subscripting too + + # 2-in-eventually-3-out + a, b, c = pipe(Values(2, 3), + lambda x, y: Values(x + 1, 2 * y, "foo"), + lambda x, y, z: Values(x * 2, y + 1, f"got {z}")) + test[(a, b, c) == (6, 7, "got foo")] + + # 2-in-3-in-between-2-out + a, b = pipe(Values(2, 3), + lambda x, y: Values(x + 1, 2 * y, "foo"), + lambda x, y, s: Values(x * 2, y + 1, f"got {s}"), + lambda x, y, s: Values(x + y, s)) + test[(a, b) == (13, "got foo")] + + # pipec: curry the functions before running the pipeline + a, b = pipec(Values(1, 2), + lambda x: x + 1, # extra values passed through by curry (positionals on the right) + lambda x, y: Values(x * 2, y + 1)) + test[(a, b) == (4, 3)] + + with test_raises[TypeError, "should error when the curry context exits with args remaining"]: + a, b = pipec(Values(1, 2), + lambda x: x + 1, + lambda x: x * 2) + + # optional shell-like syntax + test[piped1(42) | double | inc | exitpipe == 85] + + y = piped1(42) | double + test[y | inc | exitpipe == 85] + test[y | exitpipe == 84] # y is never modified by the pipe system + + # multi-arg version + f = lambda x, y: Values(2 * x, y + 1) + g = lambda x, y: Values(x + 1, 2 * y) + x = piped(2, 3) | f | g | exitpipe # --> (5, 8) + test[x == Values(5, 8)] + + # abuse multi-arg version for single-arg case + test[piped(42) | double | inc | exitpipe == 85] + + with testset("integration with lazy pipes (plan computations)"): + # This is the testset from unpythonic/tests/test_seq.py, slightly modified. + with lazify: + # lazy pipe: compute later + lst = [1] + def append_succ(lis): + lis.append(lis[-1] + 1) + return lis # important, handed to the next function in the pipe + p = lazy_piped1(lst) | append_succ | append_succ # plan a computation + test[lst == [1]] # nothing done yet + p | exitpipe # run the computation + test[lst == [1, 2, 3]] # now the side effect has updated lst. + + # lazy pipe as an unfold + fibos = [] + def nextfibo(state): + a, b = state + fibos.append(a) # store result by side effect + return (b, a + b) # new state, handed to the next function in the pipe + p = lazy_piped1((1, 1)) # load initial state into a lazy pipe + for _ in range(10): # set up pipeline + p = p | nextfibo + p | exitpipe + test[fibos == [1, 1, 2, 3, 5, 8, 13, 21, 34, 55]] + + # multi-arg lazy pipe + p1 = lazy_piped(2, 3) + p2 = p1 | (lambda x, y: Values(x + 1, 2 * y, "foo")) + p3 = p2 | (lambda x, y, s: Values(x * 2, y + 1, f"got {s}")) + p4 = p3 | (lambda x, y, s: Values(x + y, s)) + # nothing done yet, and all computations purely functional: + test[(p1 | exitpipe) == Values(2, 3)] + test[(p2 | exitpipe) == Values(3, 6, "foo")] # runs the chain up to p2 + test[(p3 | exitpipe) == Values(6, 7, "got foo")] # runs the chain up to p3 + test[(p4 | exitpipe) == Values(13, "got foo")] + + # multi-arg lazy pipe as an unfold + fibos = [] + def nextfibo(a, b): # now two arguments + fibos.append(a) + return Values(a=b, b=(a + b)) # can return by name too + p = lazy_piped(1, 1) + for _ in range(10): + p = p | nextfibo + test[p | exitpipe == Values(a=89, b=144)] # final state + test[fibos == [1, 1, 2, 3, 5, 8, 13, 21, 34, 55]] + + # abuse multi-arg version for single-arg case + test[lazy_piped(42) | double | inc | exitpipe == 85] + with testset("integration with TCO"): with lazify: @trampolined @@ -456,7 +634,7 @@ def func2(x): test[func1(21) == 42] print("*** This error case SHOULD PRINT A WARNING:", file=stderr) - with test_raises(RuntimeError): + with test_raises[RuntimeError]: @trampolined def func3(): return jump(42) @@ -478,7 +656,7 @@ def withec2(ec): test[withec2 == 42] # Introducing the HasThon programming language. - # For a continuation-enabled HasThon, use "with lazify, autocurry, continuations". + # If you want to play around with this idea, see `unpythonic.dialects.pytkell`. with testset("HasThon, with 100% more Thon than popular brands"): with lazify, autocurry: def add3(a, b, c): @@ -491,10 +669,10 @@ def add2first(a, b, c): def f(a, b): return a - test[let[((c, 42), (d, 1 / 0)) in f(c)(d)] == 42] - test[letrec[((c, 42), (d, 1 / 0), (e, 2 * c)) in f(e)(d)] == 84] + test[let[[c << 42, d << 1 / 0] in f(c)(d)] == 42] + test[letrec[[c << 42, d << 1 / 0, e << 2 * c] in f(e)(d)] == 84] - test[letrec[((c, 42), (d, 1 / 0), (e, 2 * c)) in [local[x << f(e)(d)], # noqa: F821, `letrec` defines `x` here. + test[letrec[[c << 42, d << 1 / 0, e << 2 * c] in [local[x << f(e)(d)], # noqa: F821, `letrec` defines `x` here. x / 2]] == 42] # noqa: F821 # works also with continuations @@ -502,6 +680,16 @@ def f(a, b): # - cc built by chain_conts is treated as lazy, **itself**; then it's up to # the continuations chained by it to decide whether to force their args. # - the default cont ``identity`` is strict, so it will force return values + # - if you want a non-strict identity for use at the entry point to your + # continuation-enabled computation, do this: + # + # from unpythonic import identity + # from unpythonic.lazyutil import passthrough_lazy_args + # lazy_identity = passthrough_lazy_args(identity) + # + # and then explicitly set the kwarg `cc=lazy_identity` when invoking the + # continuation-enabled computation (e.g. in the example below, we could + # `ourpromises = doit(cc=lazy_identity)`). with testset("integration with continuations"): with lazify, continuations: k = None diff --git a/unpythonic/syntax/tests/test_letdo.py b/unpythonic/syntax/tests/test_letdo.py index fc6abf61..0ea8dd0b 100644 --- a/unpythonic/syntax/tests/test_letdo.py +++ b/unpythonic/syntax/tests/test_letdo.py @@ -1,8 +1,8 @@ # -*- coding: utf-8 -*- """Let constructs; do (imperative code in expression position).""" -# TODO: Update the @dlet, @dletseq, @dletrec, @blet, @bletseq, @bletrec examples -# TODO: to pass macro arguments using brackets once we bump to minimum Python 3.9. +# NOTE: Decorator macro arguments use parenthesis syntax in some examples below. +# Bracket syntax is preferred for new code; parenthesis syntax is deprecated but kept for backward compatibility. from ...syntax import macros, test, test_raises # noqa: F401 from ...test.fixtures import session, testset @@ -17,22 +17,48 @@ x = "the global x" # for lexical scoping tests def runtests(): - with testset("do (imperative code in an expression)"): + with testset("do (imperative code in an expression) (new env-assignment syntax 0.15.3+)"): # Macro wrapper for unpythonic.seq.do (imperative code in expression position) - # - Declare and initialize a local variable with ``local[var << value]``. + # - Declare and initialize a local variable with ``local[var := value]``. # Is in scope from the next expression onward, for the (lexical) remainder # of the do. - # - Assignment is ``var << value``. Valid from any level inside the ``do`` + # - Assignment is ``var := value``. Valid from any level inside the ``do`` # (including nested ``let`` constructs and similar). # - No need for ``lambda e: ...`` wrappers. Inserted automatically, # so the lines are only evaluated as the underlying seq.do() runs. + # + d1 = do[local[x := 17], + print(x), + (x := 23), + x] + test[d1 == 23] + + # Since we repurposed an existing assignment operator, let's check we didn't accidentally assign to the function scope. + test_raises[NameError, x, "only the `do[]` should have an `x` here"] + + # v0.14.0: do[] now supports deleting previously defined local names with delete[] + a = 5 + d = do[local[a := 17], # noqa: F841, yes, d is unused. + test[a == 17], + delete[a], + test[a == 5], # lexical scoping + True] + + test_raises[KeyError, do[delete[a], ], "should have complained about deleting nonexistent local 'a'"] + + # do0[]: like do[], but return the value of the **first** expression + d2 = do0[local[y := 5], # noqa: F821, `local` defines the name on the LHS of the `<<`. + print("hi there, y =", y), # noqa: F821 + 42] # evaluated but not used + test[d2 == 5] + + with testset("do (imperative code in an expression) (previous modern env-assignment syntax)"): d1 = do[local[x << 17], print(x), x << 23, - x] # do[] returns the value of the last expression + x] # do[] returns the value of the last expression # noqa: F823, it's the `x` from `do[]`, not from the enclosing scope. test[d1 == 23] - # v0.14.0: do[] now supports deleting previously defined local names with delete[] a = 5 d = do[local[a << 17], # noqa: F841, yes, d is unused. test[a == 17], @@ -42,65 +68,118 @@ def runtests(): test_raises[KeyError, do[delete[a], ], "should have complained about deleting nonexistent local 'a'"] - # do0[]: like do[], but return the value of the **first** expression d2 = do0[local[y << 5], # noqa: F821, `local` defines the name on the LHS of the `<<`. print("hi there, y =", y), # noqa: F821 42] # evaluated but not used test[d2 == 5] # Let macros. Lexical scoping supported. - with testset("let, letseq, letrec basic usage"): + with testset("let, letseq, letrec basic usage (new env-assignment syntax 0.15.3+)"): # parallel binding, i.e. bindings don't see each other - test[let[(x, 17), - (y, 23)][ # noqa: F821, `let` defines `y` here. + test[let[(x := 17), + (y := 23)][ # noqa: F821, `let` defines `y` here. (x, y)] == (17, 23)] # noqa: F821 # sequential binding, i.e. Scheme/Racket let* - test[letseq[(x, 1), - (y, x + 1)][ # noqa: F821 + test[letseq[(x := 1), + (y := x + 1)][ # noqa: F821 (x, y)] == (1, 2)] # noqa: F821 - test[letseq[(x, 1), - (x, x + 1)][ # in a letseq, rebinding the same name is ok + test[letseq[(x := 1), + (x := x + 1)][ # in a letseq, rebinding the same name is ok x] == 2] # letrec sugars unpythonic.lispylet.letrec, removing the need for quotes on LHS # and "lambda e: ..." wrappers on RHS (these are inserted by the macro): - test[letrec[(evenp, lambda x: (x == 0) or oddp(x - 1)), # noqa: F821, `letrec` defines `evenp` here. - (oddp, lambda x: (x != 0) and evenp(x - 1))][ # noqa: F821 + test[letrec[(evenp := (lambda x: (x == 0) or oddp(x - 1))), # noqa: F821, `letrec` defines `evenp` here. + (oddp := (lambda x: (x != 0) and evenp(x - 1)))][ # noqa: F821 evenp(42)] is True] # noqa: F821 # nested letrecs work, too - each environment is internally named by a gensym # so that outer ones "show through": - test[letrec[(z, 9000)][ # noqa: F821 - letrec[(evenp, lambda x: (x == 0) or oddp(x - 1)), # noqa: F821 - (oddp, lambda x: (x != 0) and evenp(x - 1))][ # noqa: F821 + test[letrec[(z := 9000)][ # noqa: F821 + letrec[(evenp := (lambda x: (x == 0) or oddp(x - 1))), # noqa: F821 + (oddp := (lambda x: (x != 0) and evenp(x - 1)))][ # noqa: F821 + (evenp(42), z)]] == (True, 9000)] # noqa: F821 + + with testset("let, letseq, letrec basic usage (previous modern env-assignment syntax)"): + # parallel binding, i.e. bindings don't see each other + test[let[x << 17, + y << 23][ # noqa: F821, `let` defines `y` here. + (x, y)] == (17, 23)] # noqa: F821 + + # sequential binding, i.e. Scheme/Racket let* + test[letseq[x << 1, + y << x + 1][ # noqa: F821 + (x, y)] == (1, 2)] # noqa: F821 + + test[letseq[x << 1, + x << x + 1][ # in a letseq, rebinding the same name is ok + x] == 2] + + # letrec sugars unpythonic.lispylet.letrec, removing the need for quotes on LHS + # and "lambda e: ..." wrappers on RHS (these are inserted by the macro): + test[letrec[evenp << (lambda x: (x == 0) or oddp(x - 1)), # noqa: F821, `letrec` defines `evenp` here. + oddp << (lambda x: (x != 0) and evenp(x - 1))][ # noqa: F821 + evenp(42)] is True] # noqa: F821 + + # nested letrecs work, too - each environment is internally named by a gensym + # so that outer ones "show through": + test[letrec[z << 9000][ # noqa: F821 + letrec[evenp << (lambda x: (x == 0) or oddp(x - 1)), # noqa: F821 + oddp << (lambda x: (x != 0) and evenp(x - 1))][ # noqa: F821 (evenp(42), z)]] == (True, 9000)] # noqa: F821 with testset("error cases"): # let is parallel binding, doesn't see the X in the same let test_raises[NameError, - let[(X, 1), # noqa: F821 - (y, X + 1)][ # noqa: F821 + let[X << 1, # noqa: F821 + y << X + 1][ # noqa: F821 print(X, y)], # noqa: F821 "should not see the X in the same let"] test_raises[NameError, - letseq[(X, y + 1), # noqa: F821 - (y, 2)][ # noqa: F821 + letseq[X << y + 1, # noqa: F821 + y << 2][ # noqa: F821 (X, y)], # noqa: F821 "y should not yet be defined on the first line"] test_raises[AttributeError, - let[(x, 1), - (x, 2)][ + let[x << 1, + x << 2][ print(x)], "should not be able to rebind the same name in the same let"] # implicit do: an extra set of brackets denotes a multi-expr body - with testset("implicit do (extra bracket syntax for multi-expr let body)"): + with testset("implicit do (extra bracket syntax for multi-expr let body) (new env-assignment syntax v0.15.3+)"): + a = let[(x := 1), + (y := 2)][[ # noqa: F821 + y := 1337, # noqa: F821 + (x, y)]] # noqa: F821 + test[a == (1, 1337)] + + # only the outermost extra brackets denote a multi-expr body a = let[(x, 1), (y, 2)][[ # noqa: F821 + [1, 2]]] + test[a == [1, 2]] + + # implicit do works also in letseq, letrec + a = letseq[(x := 1), + (y := x + 1)][[ # noqa: F821 + x := 1337, + (x, y)]] # noqa: F821 + test[a == (1337, 2)] + + a = letrec[(x := 1), + (y := x + 1)][[ # noqa: F821 + x := 1337, + (x, y)]] # noqa: F821 + test[a == (1337, 2)] + + with testset("implicit do (extra bracket syntax for multi-expr let body)"): + a = let[x << 1, + y << 2][[ # noqa: F821 y << 1337, # noqa: F821 (x, y)]] # noqa: F821 test[a == (1, 1337)] @@ -112,14 +191,14 @@ def runtests(): test[a == [1, 2]] # implicit do works also in letseq, letrec - a = letseq[(x, 1), - (y, x + 1)][[ # noqa: F821 + a = letseq[x << 1, + y << x + 1][[ # noqa: F821 x << 1337, (x, y)]] # noqa: F821 test[a == (1337, 2)] - a = letrec[(x, 1), - (y, x + 1)][[ # noqa: F821 + a = letrec[x << 1, + y << x + 1][[ # noqa: F821 x << 1337, (x, y)]] # noqa: F821 test[a == (1337, 2)] @@ -129,36 +208,36 @@ def runtests(): # (so the z in the inner scope expands to the inner environment's z, # which makes the outer expansion leave it alone): out = [] - letrec[(z, 1)][ # noqa: F821 + letrec[z << 1][ # noqa: F821 begin(out.append(z), # noqa: F821 - letrec[(z, 2)][ # noqa: F821 + letrec[z << 2][ # noqa: F821 out.append(z)])] # (be careful with the parentheses!) # noqa: F821 test[out == [1, 2]] # same using implicit do (extra brackets) out = [] - letrec[(z, 1)][[ # noqa: F821 + letrec[z << 1][[ # noqa: F821 out.append(z), # noqa: F821 - letrec[(z, 2)][ # noqa: F821 + letrec[z << 2][ # noqa: F821 out.append(z)]]] # noqa: F821 test[out == [1, 2]] # lexical scoping: assignment updates the innermost value by that name: out = [] - letrec[(z, 1)][ # noqa: F821 + letrec[z << 1][ # noqa: F821 begin(out.append(z), # outer z # noqa: F821 # assignment to env is an expression, returns the new value out.append(z << 5), # noqa: F821 - letrec[(z, 2)][ # noqa: F821 + letrec[z << 2][ # noqa: F821 begin(out.append(z), # inner z # noqa: F821 out.append(z << 7))], # update inner z # noqa: F821 out.append(z))] # outer z # noqa: F821 test[out == [1, 5, 2, 7, 5]] out = [] - letrec[(x, 1)][ + letrec[x << 1][ begin(out.append(x), - letrec[(z, 2)][ # noqa: F821 + letrec[z << 2][ # noqa: F821 begin(out.append(z), # noqa: F821 out.append(x << 7))], # x only defined in outer letrec, updates that out.append(x))] @@ -166,23 +245,23 @@ def runtests(): # same using implicit do out = [] - letrec[(x, 1)][[ + letrec[x << 1][[ out.append(x), - letrec[(z, 2)][[ # noqa: F821 + letrec[z << 2][[ # noqa: F821 out.append(z), # noqa: F821 out.append(x << 7)]], out.append(x)]] test[out == [1, 2, 7, 7]] # letrec bindings are evaluated sequentially - test[letrec[(x, 1), - (y, x + 2)][ # noqa: F821 + test[letrec[x << 1, + y << x + 2][ # noqa: F821 (x, y)] == (1, 3)] # noqa: F821 # so this is an error (just like in Racket): test_raises[AttributeError, - letrec[(x, y + 1), # noqa: F821, `y` being undefined here is the point of this test. - (y, 2)][ # noqa: F821 + letrec[x << y + 1, # noqa: F821, `y` being undefined here is the point of this test. + y << 2][ # noqa: F821 print(x)], "y should not be yet defined on the first line"] @@ -191,33 +270,33 @@ def runtests(): # # This is the whole point of having a letrec construct, # instead of just let, letseq. - test[letrec[(f, lambda t: t + y + 1), # noqa: F821 - (y, 2)][ # noqa: F821 + test[letrec[f << (lambda t: t + y + 1), # noqa: F821 + y << 2][ # noqa: F821 f(3)] == 6] # noqa: F821 # bindings are evaluated only once - a = letrec[(x, 1), - (y, x + 2)][[ # y computed now, using the current value of x # noqa: F821 + a = letrec[x << 1, + y << x + 2][[ # y computed now, using the current value of x # noqa: F821 x << 1337, # x updated now, no effect on y (x, y)]] # noqa: F821 test[a == (1337, 3)] # lexical scoping: a comprehension or lambda in a let body # shadows names from the surrounding let, but only in that subexpr - test[let[(x, 42)][[ + test[let[x << 42][[ [x for x in range(10)]]] == list(range(10))] - test[let[(x, 42)][[ + test[let[x << 42][[ [x for x in range(10)], x]] == 42] - test[let[(x, 42)][ + test[let[x << 42][ (lambda x: x**2)(10)] == 100] - test[let[(x, 42)][[ + test[let[x << 42][[ (lambda x: x**2)(10), x]] == 42] # let over lambda - in Python! with testset("let over lambda"): - count = let[(x, 0)][ + count = let[x << 0][ lambda: x << x + 1] test[count() == 1] test[count() == 2] @@ -226,7 +305,7 @@ def runtests(): # - sugar around unpythonic.lispylet.dlet et al. # - env is passed implicitly, and named with a gensym (so lexical scoping works) with testset("let over def"): - @dlet((x, 0)) + @dlet(x << 0) def count(): x << x + 1 # assigment to let environment uses the "assignment expr" syntax return x @@ -234,26 +313,26 @@ def count(): test[count() == 2] # nested dlets respect lexical scoping - @dlet((x, 22)) + @dlet(x << 22) def outer(): x << x + 1 - @dlet((x, 41)) + @dlet(x << 41) def inner(): return x << x + 1 return (x, inner()) test[outer() == (23, 42)] # letseq over def - @dletseq((x, 1), - (x, x + 1), - (x, x + 2)) + @dletseq(x << 1, + x << x + 1, + x << x + 2) def g(a): return a + x test[g(10) == 14] # letrec over def - @dletrec((evenp, lambda x: (x == 0) or oddp(x - 1)), # noqa: F821, `dletrec` defines `evenp` here. - (oddp, lambda x: (x != 0) and evenp(x - 1))) # noqa: F821 + @dletrec(evenp << (lambda x: (x == 0) or oddp(x - 1)), # noqa: F821, `dletrec` defines `evenp` here. + oddp << (lambda x: (x != 0) and evenp(x - 1))) # noqa: F821 def f(x): return evenp(x) # noqa: F821 test[f(42) is True] @@ -262,20 +341,20 @@ def f(x): with testset("let block"): # block version # - the def takes no args, runs immediately, replaced with return value - @blet((x, 21)) + @blet(x << 21) def result(): return 2 * x test[result == 42] - @bletseq((x, 1), - (x, x + 1), - (x, x + 2)) # noqa: F823, `bletseq` defines and assigns to `x`. + @bletseq(x << 1, + x << x + 1, + x << x + 2) # noqa: F823, `bletseq` defines and assigns to `x`. def result(): return x test[result == 4] - @bletrec((evenp, lambda x: (x == 0) or oddp(x - 1)), # noqa: F821, `bletrec` defines `evenp` here. - (oddp, lambda x: (x != 0) and evenp(x - 1))) # noqa: F821 + @bletrec(evenp << (lambda x: (x == 0) or oddp(x - 1)), # noqa: F821, `bletrec` defines `evenp` here. + oddp << (lambda x: (x != 0) and evenp(x - 1))) # noqa: F821 def result(): return evenp(42) # noqa: F821 test[result is True] @@ -283,43 +362,43 @@ def result(): # interaction of unpythonic's scoping system with Python's own lexical scoping with testset("integration of let scoping with Python's scoping"): x = "the nonlocal x" - @dlet((x, "the env x")) + @dlet(x << "the env x") def test1(): return x test[test1() == "the env x"] - @dlet((x, "the env x")) + @dlet(x << "the env x") def test2(): return x # local var assignment not in effect yet # noqa: F823, `dlet` defines `x` here. x = "the unused local x" # noqa: F841, this `x` being unused is the point of this test. # pragma: no cover test[test2() == "the env x"] - @dlet((x, "the env x")) + @dlet(x << "the env x") def test3(): x = "the local x" return x test[test3() == "the local x"] - @dlet((x, "the env x")) + @dlet(x << "the env x") def test4(): - nonlocal x + nonlocal x # noqa: F824, Python 3.12+ complain about this; just testing our let construct; it's correct that there's no local `x` as per Python's normal scoping rules. return x test[test4() == "the nonlocal x"] - @dlet((x, "the env x")) + @dlet(x << "the env x") def test5(): - global x + global x # noqa: F824, Python 3.12+ complain about this; just testing our let construct; it's correct that there's no local `x` as per Python's normal scoping rules. return x test[test5() == "the global x"] - @dlet((x, "the env x")) + @dlet(x << "the env x") def test6(): class Foo: x = "the classattr x" # name in store context, not the env x return x test[test6() == "the env x"] - @dlet((x, "the env x")) + @dlet(x << "the env x") def test7(): class Foo: x = "the classattr x" @@ -328,7 +407,7 @@ def doit(self): return (Foo().doit(), x) test[test7() == ("the classattr x", "the env x")] - @dlet((x, "the env x")) + @dlet(x << "the env x") def test8(): class Foo: x = "the classattr x" @@ -361,27 +440,27 @@ def doit(self): # in the standard library, but we also perform some expression-by-expression # analysis to make it possible to refer to the old bindings on the RHS of # "name << value", as well as to support local deletion.) - @dlet((x, "the env x")) + @dlet(x << "the env x") def test9(): x = x + " (copied to local)" # the local x = the env x # noqa: F823 return x # the local x test[test9() == "the env x (copied to local)"] - @dlet((x, "the env x")) + @dlet(x << "the env x") def test10(): x = x + " (copied to local)" # noqa: F823 del x # comes into effect for the next statement return x # so this is env's original x test[test10() == "the env x"] - @dlet((x, "the env x")) + @dlet(x << "the env x") def test11(): x = "the local x" return x # not deleted yet del x # this seems to be optimized out by Python. # pragma: no cover test[test11() == "the local x"] - @dlet((x, "the env x")) + @dlet(x << "the env x") def test12(): x = "the local x" del x @@ -389,7 +468,7 @@ def test12(): return x test[test12() == "the other local x"] - @dlet((x, "the env x")) + @dlet(x << "the env x") def test13(): x = "the local x" del x @@ -397,9 +476,9 @@ def test13(): x = "the unused local x" # noqa: F841, this `x` being unused is the point of this test. # pragma: no cover test[test13() == "the env x"] - with test_raises(NameError, "should have tried to access the deleted nonlocal x"): + with test_raises[NameError, "should have tried to access the deleted nonlocal x"]: x = "the nonlocal x" - @dlet((x, "the env x")) + @dlet(x << "the env x") def test14(): nonlocal x del x # ignored by unpythonic's scope analysis, too dynamic @@ -407,72 +486,99 @@ def test14(): test14() x = "the nonlocal x" # restore the test environment + # v0.15.3+: walrus syntax + @dlet(x := "the env x") + def test15(): + def inner(): + (x := "updated env x") # noqa: F841, this writes to the let env since there is no `x` in an intervening scope, according to Python's standard rules. + inner() + return x + test[test15() == "updated env x"] + + @dlet(x := "the env x") + def test16(): + def inner(): + x = "the inner x" # noqa: F841, unused on purpose, for testing. An assignment *statement* does NOT write to the let env. + inner() + return x + test[test16() == "the env x"] + + @dlet(x := "the env x") + def test17(): + x = "the local x" # This lexical variable shadows the env x. + def inner(): + # The env x is shadowed. Since we don't say `nonlocal x`, this creates a new lexical variable scoped to `inner`. + (x := "the inner x") # noqa: F841, unused on purpose, for testing. + inner() + return x + test[test17() == "the local x"] + # in do[] (also the implicit do), local[] takes effect from the next item - test[let[(x, "the let x"), - (y, None)][ # noqa: F821 + test[let[x << "the let x", + y << None][ # noqa: F821 do[y << x, # still the "x" of the let # noqa: F821 local[x << "the do x"], # from here on, "x" refers to the "x" of the do (x, y)]] == ("the do x", "the let x")] # noqa: F821 # don't code like this! ...but the scoping mechanism should understand it result = [] - let[(lst, [])][do[result.append(lst), # the let "lst" # noqa: F821 + let[lst << []][do[result.append(lst), # the let "lst" # noqa: F821 local[lst << lst + [1]], # LHS: do "lst", RHS: let "lst" # noqa: F821 result.append(lst)]] # the do "lst" # noqa: F821 test[result == [[], [1]]] # same using implicit do result = [] - let[(lst, [])][[result.append(lst), # noqa: F821 + let[lst << []][[result.append(lst), # noqa: F821 local[lst << lst + [1]], # noqa: F821 result.append(lst)]] # noqa: F821 test[result == [[], [1]]] with testset("haskelly syntax"): - result = let[((foo, 5), # noqa: F821, `let` defines `foo` here. - (bar, 2)) # noqa: F821 + result = let[[foo << 5, # noqa: F821, `let` defines `foo` here. + bar << 2] # noqa: F821 in foo + bar] # noqa: F821 test[result == 7] - result = letseq[((foo, 100), # noqa: F821, `letseq` defines `foo` here. - (foo, 2 * foo), # noqa: F821 - (foo, 4 * foo)) # noqa: F821 + result = letseq[[foo << 100, # noqa: F821, `letseq` defines `foo` here. + foo << 2 * foo, # noqa: F821 + foo << 4 * foo] # noqa: F821 in foo] # noqa: F821 test[result == 800] - result = letrec[((evenp, lambda x: (x == 0) or oddp(x - 1)), # noqa: F821, `letrec` defines `evenp` here. - (oddp, lambda x: (x != 0) and evenp(x - 1))) # noqa: F821 + result = letrec[[evenp << (lambda x: (x == 0) or oddp(x - 1)), # noqa: F821, `letrec` defines `evenp` here. + oddp << (lambda x: (x != 0) and evenp(x - 1))] # noqa: F821 in [print("hi from letrec-in"), evenp(42)]] # noqa: F821 test[result is True] # inverted let, for situations where a body-first style improves readability: result = let[foo + bar, # noqa: F821, the names in this expression are defined in the `where` clause of the `let`. - where((foo, 5), # noqa: F821, this defines `foo`. - (bar, 2))] # noqa: F821 + where[foo << 5, # noqa: F821, this defines `foo`. + bar << 2]] # noqa: F821 test[result == 7] result = letseq[foo, # noqa: F821 - where((foo, 100), # noqa: F821 - (foo, 2 * foo), # noqa: F821 - (foo, 4 * foo))] # noqa: F821 + where[foo << 100, # noqa: F821 + foo << 2 * foo, # noqa: F821 + foo << 4 * foo]] # noqa: F821 test[result == 800] # can also use the extra bracket syntax to get an implicit do # (note the [] should then enclose the body only). result = letrec[[print("hi from letrec-where"), evenp(42)], # noqa: F821 - where((evenp, lambda x: (x == 0) or oddp(x - 1)), # noqa: F821 - (oddp, lambda x: (x != 0) and evenp(x - 1)))] # noqa: F821 + where[evenp << (lambda x: (x == 0) or oddp(x - 1)), # noqa: F821 + oddp << (lambda x: (x != 0) and evenp(x - 1))]] # noqa: F821 test[result is True] - # TODO: for now, with more than one binding the outer parentheses + # With more than one binding the delimiters surrounding the bindings subform # are required, even in this format where they are somewhat redundant. - result = let[((x, 1), (y, 2)) in x + y] # noqa: F821 + result = let[[x << 1, y << 2] in x + y] # noqa: F821 test[result == 3] - # single binding special syntax, no need for outer parentheses - with testset("special syntax for single binding case"): + # single binding special syntax, no need for outer delimiters + with testset("special syntax for single binding case (classic format)"): result = let[x, 1][2 * x] test[result == 2] result = let[(x, 1) in 2 * x] @@ -510,12 +616,50 @@ def quux(): return x test[quux == 1] + with testset("special syntax for single binding case (modern format)"): + result = let[x << 1][2 * x] + test[result == 2] + result = let[[x << 1] in 2 * x] + test[result == 2] + result = let[2 * x, where[x << 1]] + test[result == 2] + + @dlet(x << 1) + def qux(): + return x + test[qux() == 1] + + @dletseq(x << 1) + def qux(): + return x + test[qux() == 1] + + @dletrec(x << 1) + def qux(): + return x + test[qux() == 1] + + @blet(x << 1) + def quux(): + return x + test[quux == 1] + + @bletseq(x << 1) + def quux(): + return x + test[quux == 1] + + @bletrec(x << 1) + def quux(): + return x + test[quux == 1] + with testset("object instance bound to let variable"): # The point is to test whether `s.a` below transforms # correctly to `e.s.a`. class Silly: a = "Ariane 5" - test[let[(s, Silly()) in s.a] == "Ariane 5"] # noqa: F821 + test[let[[s << Silly()] in s.a] == "Ariane 5"] # noqa: F821 if __name__ == '__main__': # pragma: no cover with session(__file__): diff --git a/unpythonic/syntax/tests/test_letdoutil.py b/unpythonic/syntax/tests/test_letdoutil.py index 5ca5832a..f30d844a 100644 --- a/unpythonic/syntax/tests/test_letdoutil.py +++ b/unpythonic/syntax/tests/test_letdoutil.py @@ -12,11 +12,9 @@ autocurry) from ast import Tuple, Name, Constant, Lambda, BinOp, Attribute, Call -import sys from mcpyrate import unparse -from ...syntax.astcompat import getconstant, Num from ...syntax.letdoutil import (canonize_bindings, isenvassign, islet, isdo, UnexpandedEnvAssignView, @@ -41,6 +39,10 @@ def validate(lst): test[validate(the[canonize_bindings(q[k0, v0].elts)])] # noqa: F821, it's quoted. test[validate(the[canonize_bindings(q[((k0, v0),)].elts)])] # noqa: F821 test[validate(the[canonize_bindings(q[(k0, v0), (k1, v1)].elts)])] # noqa: F821 + test[validate(the[canonize_bindings([q[(k0 := v0)]])])] # noqa: F821, it's quoted. + test[validate(the[canonize_bindings([q[k0 << v0]])])] # noqa: F821, it's quoted. + test[validate(the[canonize_bindings(q[(k0 := v0), (k1 := v1)].elts)])] # noqa: F821, it's quoted. + test[validate(the[canonize_bindings(q[k0 << v0, k1 << v1].elts)])] # noqa: F821, it's quoted. # -------------------------------------------------------------------------------- # AST structure matching @@ -49,46 +51,98 @@ def validate(lst): # need this utility, so we must test it first. with testset("isenvassign"): test[not isenvassign(q[x])] # noqa: F821 + test[isenvassign(q[(x := 42)])] # noqa: F821 test[isenvassign(q[x << 42])] # noqa: F821 with testset("islet"): test[not islet(q[x])] # noqa: F821 test[not islet(q[f()])] # noqa: F821 + # unpythonic 0.15.3+, Python 3.8+ + test[islet(the[expandrq[let[(x := 21)][2 * x]]]) == ("expanded_expr", "let")] # noqa: F821, `let` defines `x` + test[islet(the[expandrq[let[[x := 21] in 2 * x]]]) == ("expanded_expr", "let")] # noqa: F821 + test[islet(the[expandrq[let[2 * x, where[(x := 21)]]]]) == ("expanded_expr", "let")] # noqa: F821 + + # unpythonic 0.15.0 to 0.15.2, previous modern notation for bindings + test[islet(the[expandrq[let[x << 21][2 * x]]]) == ("expanded_expr", "let")] # noqa: F821, `let` defines `x` + test[islet(the[expandrq[let[[x << 21] in 2 * x]]]) == ("expanded_expr", "let")] # noqa: F821 + test[islet(the[expandrq[let[2 * x, where[x << 21]]]]) == ("expanded_expr", "let")] # noqa: F821 + + # classic notation for bindings test[islet(the[expandrq[let[(x, 21)][2 * x]]]) == ("expanded_expr", "let")] # noqa: F821, `let` defines `x` test[islet(the[expandrq[let[(x, 21) in 2 * x]]]) == ("expanded_expr", "let")] # noqa: F821 test[islet(the[expandrq[let[2 * x, where(x, 21)]]]) == ("expanded_expr", "let")] # noqa: F821 - with expandrq as testdata: # pragma: no cover - @dlet((x, 21)) # noqa: F821 + # unpythonic 0.15.3+, Python 3.8+ + with expandrq as testdata: + @dlet(x := 21) # noqa: F821 + def f0(): + return 2 * x # noqa: F821 + test[islet(the[testdata[0].decorator_list[0]]) == ("expanded_decorator", "let")] + + # unpythonic 0.15.0 to 0.15.2, previous modern notation for bindings + with expandrq as testdata: + @dlet(x << 21) # noqa: F821 def f1(): return 2 * x # noqa: F821 test[islet(the[testdata[0].decorator_list[0]]) == ("expanded_decorator", "let")] + # classic notation for bindings + with expandrq as testdata: + @dlet((x, 21)) # noqa: F821 + def f2(): + return 2 * x # noqa: F821 + test[islet(the[testdata[0].decorator_list[0]]) == ("expanded_decorator", "let")] + + testdata = q[let[(x := 21)][2 * x]] # noqa: F821 + test[islet(the[testdata], expanded=False) == ("lispy_expr", "let")] + + testdata = q[let[x << 21][2 * x]] # noqa: F821 + test[islet(the[testdata], expanded=False) == ("lispy_expr", "let")] + testdata = q[let[(x, 21)][2 * x]] # noqa: F821 test[islet(the[testdata], expanded=False) == ("lispy_expr", "let")] # one binding special case for haskelly let-in + testdata = q[let[[x, 21] in 2 * x]] # noqa: F821 + test[islet(the[testdata], expanded=False) == ("in_expr", "let")] testdata = q[let[(x, 21) in 2 * x]] # noqa: F821 test[islet(the[testdata], expanded=False) == ("in_expr", "let")] + testdata = q[let[2 * x, where[x, 21]]] # noqa: F821 + test[islet(the[testdata], expanded=False) == ("where_expr", "let")] + testdata = q[let[2 * x, where(x, 21)]] # noqa: F821 + test[islet(the[testdata], expanded=False) == ("where_expr", "let")] + testdata = q[let[[x := 21, y := 2] in y * x]] # noqa: F821 + test[islet(the[testdata], expanded=False) == ("in_expr", "let")] + testdata = q[let[[x << 21, y << 2] in y * x]] # noqa: F821 + test[islet(the[testdata], expanded=False) == ("in_expr", "let")] testdata = q[let[((x, 21), (y, 2)) in y * x]] # noqa: F821 test[islet(the[testdata], expanded=False) == ("in_expr", "let")] - testdata = q[let[2 * x, where(x, 21)]] # noqa: F821 - test[islet(the[testdata], expanded=False) == ("where_expr", "let")] - # some other macro invocation test[not islet(the[q[someothermacro((x, 21))[2 * x]]], expanded=False)] # noqa: F821 test[not islet(the[q[someothermacro[(x, 21) in 2 * x]]], expanded=False)] # noqa: F821 - # invalid syntax for haskelly let-in + # invalid syntax for haskelly let-in (no delimiters around bindings subform) testdata = q[let[a in b]] # noqa: F821 test[not islet(the[testdata], expanded=False)] - with q as testdata: # pragma: no cover + with q as testdata: @dlet((x, 21)) # noqa: F821 - def f2(): + def f3(): + return 2 * x # noqa: F821 + test[islet(the[testdata[0].decorator_list[0]], expanded=False) == ("decorator", "dlet")] + + with q as testdata: + @dlet(x << 21) # noqa: F821 + def f4(): + return 2 * x # noqa: F821 + test[islet(the[testdata[0].decorator_list[0]], expanded=False) == ("decorator", "dlet")] + + with q as testdata: + @dlet(x := 21) # noqa: F821 + def f5(): return 2 * x # noqa: F821 test[islet(the[testdata[0].decorator_list[0]], expanded=False) == ("decorator", "dlet")] @@ -100,19 +154,37 @@ def f2(): # representation, leading to arguably funny but nonsensical things like # `ctx=currycall(ast.Load)`. with expandrq as testdata: - with autocurry: # pragma: no cover + with autocurry: + let[x << 21][2 * x] # noqa: F821 # note this goes into an ast.Expr + thelet = testdata[0].value + test[islet(the[thelet]) == ("curried_expr", "let")] + + with expandrq as testdata: + with autocurry: + let[[x << 21] in 2 * x] # noqa: F821 + thelet = testdata[0].value + test[islet(the[thelet]) == ("curried_expr", "let")] + + with expandrq as testdata: + with autocurry: + let[2 * x, where[x << 21]] # noqa: F821 + thelet = testdata[0].value + test[islet(the[thelet]) == ("curried_expr", "let")] + + with expandrq as testdata: + with autocurry: let((x, 21))[2 * x] # noqa: F821 # note this goes into an ast.Expr thelet = testdata[0].value test[islet(the[thelet]) == ("curried_expr", "let")] with expandrq as testdata: - with autocurry: # pragma: no cover + with autocurry: let[(x, 21) in 2 * x] # noqa: F821 thelet = testdata[0].value test[islet(the[thelet]) == ("curried_expr", "let")] with expandrq as testdata: - with autocurry: # pragma: no cover + with autocurry: let[2 * x, where(x, 21)] # noqa: F821 thelet = testdata[0].value test[islet(the[thelet]) == ("curried_expr", "let")] @@ -121,16 +193,35 @@ def f2(): test[not isdo(q[x])] # noqa: F821 test[not isdo(q[f()])] # noqa: F821 + # unpythonic 0.15.3+, Python 3.8+ + test[isdo(the[expandrq[do[(x := 21), # noqa: F821 + 2 * x]]]) == "expanded"] # noqa: F821 + test[isdo(the[expandrq[do[x << 21, # noqa: F821 2 * x]]]) == "expanded"] # noqa: F821 - with expandrq as testdata: # pragma: no cover + with expandrq as testdata: with autocurry: do[x << 21, # noqa: F821 2 * x] # noqa: F821 thedo = testdata[0].value test[isdo(the[thedo]) == "curried"] + # unpythonic 0.15.3+, Python 3.8+ + testdata = q[do[(x := 21), # noqa: F821 + 2 * x]] # noqa: F821 + test[isdo(the[testdata], expanded=False) == "do"] + + testdata = q[do0[23, # noqa: F821 + (x := 21), # noqa: F821 + 2 * x]] # noqa: F821 + test[isdo(the[testdata], expanded=False) == "do0"] + + testdata = q[someothermacro[(x := 21), # noqa: F821 + 2 * x]] # noqa: F821 + test[not isdo(the[testdata], expanded=False)] + + # previous modern notation testdata = q[do[x << 21, # noqa: F821 2 * x]] # noqa: F821 test[isdo(the[testdata], expanded=False) == "do"] @@ -147,28 +238,56 @@ def f2(): # -------------------------------------------------------------------------------- # Destructuring - envassign + with testset("envassign destructuring (new env-assign syntax v0.15.3+)"): + testdata = q[(x := 42)] # noqa: F821 + view = UnexpandedEnvAssignView(testdata) + + # read + test[view.name == "x"] + constant_node = view.value + test[type(the[constant_node]) is Constant and constant_node.value == 42] + + # write + view.name = "y" + view.value = q[23] + test[view.name == "y"] + constant_node = view.value + test[type(the[constant_node]) is Constant and constant_node.value == 23] + + # it's a live view + test[unparse(testdata) == "(y := 23)"] # syntax type `:=` vs. `<<` is preserved + + # error cases + test_raises[TypeError, + UnexpandedEnvAssignView(q[x]), # noqa: F821 + "not an env assignment"] + with test_raises[TypeError, "name must be str"]: + view.name = 1234 + with testset("envassign destructuring"): testdata = q[x << 42] # noqa: F821 view = UnexpandedEnvAssignView(testdata) # read test[view.name == "x"] - test[type(the[view.value]) in (Constant, Num) and getconstant(view.value) == 42] # Python 3.8: ast.Constant + constant_node = view.value + test[type(the[constant_node]) is Constant and constant_node.value == 42] # write view.name = "y" view.value = q[23] test[view.name == "y"] - test[type(the[view.value]) in (Constant, Num) and getconstant(view.value) == 23] # Python 3.8: ast.Constant + constant_node = view.value + test[type(the[constant_node]) is Constant and constant_node.value == 23] # it's a live view - test[unparse(testdata) == "(y << 23)"] + test[unparse(testdata) == "(y << 23)"] # syntax type `:=` vs. `<<` is preserved # error cases test_raises[TypeError, UnexpandedEnvAssignView(q[x]), # noqa: F821 "not an env assignment"] - with test_raises(TypeError, "name must be str"): + with test_raises[TypeError, "name must be str"]: view.name = 1234 # -------------------------------------------------------------------------------- @@ -199,29 +318,83 @@ def testletdestructuring(testdata): test[unparse(view.body) == "(z * t)"] # lispy expr + testdata = q[let[(x := 21), (y := 2)][y * x]] # noqa: F821 + testletdestructuring(testdata) + testdata = q[let[x << 21, y << 2][y * x]] # noqa: F821 + testletdestructuring(testdata) + testdata = q[let[[x, 21], [y, 2]][y * x]] # noqa: F821 + testletdestructuring(testdata) testdata = q[let[(x, 21), (y, 2)][y * x]] # noqa: F821 testletdestructuring(testdata) # haskelly let-in + testdata = q[let[[x := 21, y := 2] in y * x]] # noqa: F821 + testletdestructuring(testdata) + testdata = q[let[[x << 21, y << 2] in y * x]] # noqa: F821 + testletdestructuring(testdata) + testdata = q[let[(x << 21, y << 2) in y * x]] # noqa: F821 + testletdestructuring(testdata) + testdata = q[let[[[x, 21], [y, 2]] in y * x]] # noqa: F821 + testletdestructuring(testdata) + testdata = q[let[[(x, 21), (y, 2)] in y * x]] # noqa: F821 + testletdestructuring(testdata) + testdata = q[let[([x, 21], [y, 2]) in y * x]] # noqa: F821 + testletdestructuring(testdata) testdata = q[let[((x, 21), (y, 2)) in y * x]] # noqa: F821 testletdestructuring(testdata) # haskelly let-where + testdata = q[let[y * x, where[(x := 21), (y := 2)]]] # noqa: F821 + testletdestructuring(testdata) + testdata = q[let[y * x, where[x << 21, y << 2]]] # noqa: F821 + testletdestructuring(testdata) + testdata = q[let[y * x, where(x << 21, y << 2)]] # noqa: F821 + testletdestructuring(testdata) + testdata = q[let[y * x, where[[x, 21], [y, 2]]]] # noqa: F821 + testletdestructuring(testdata) + testdata = q[let[y * x, where[(x, 21), (y, 2)]]] # noqa: F821 + testletdestructuring(testdata) + testdata = q[let[y * x, where([x, 21], [y, 2])]] # noqa: F821 + testletdestructuring(testdata) testdata = q[let[y * x, where((x, 21), (y, 2))]] # noqa: F821 testletdestructuring(testdata) # disembodied haskelly let-in (just the content, no macro invocation) + testdata = q[[x := 21, y := 2] in y * x] # noqa: F821 + testletdestructuring(testdata) + testdata = q[[x << 21, y << 2] in y * x] # noqa: F821 + testletdestructuring(testdata) + testdata = q[(x << 21, y << 2) in y * x] # noqa: F821 + testletdestructuring(testdata) + testdata = q[[[x, 21], [y, 2]] in y * x] # noqa: F821 + testletdestructuring(testdata) + testdata = q[[(x, 21), (y, 2)] in y * x] # noqa: F821 + testletdestructuring(testdata) + testdata = q[([x, 21], [y, 2]) in y * x] # noqa: F821 + testletdestructuring(testdata) testdata = q[((x, 21), (y, 2)) in y * x] # noqa: F821 testletdestructuring(testdata) # disembodied haskelly let-where (just the content, no macro invocation) + testdata = q[y * x, where[(x := 21), (y := 2)]] # noqa: F821 + testletdestructuring(testdata) + testdata = q[y * x, where[x << 21, y << 2]] # noqa: F821 + testletdestructuring(testdata) + testdata = q[y * x, where(x << 21, y << 2)] # noqa: F821 + testletdestructuring(testdata) + testdata = q[y * x, where[[x, 21], [y, 2]]] # noqa: F821 + testletdestructuring(testdata) + testdata = q[y * x, where[(x, 21), (y, 2)]] # noqa: F821 + testletdestructuring(testdata) + testdata = q[y * x, where([x, 21], [y, 2])] # noqa: F821 + testletdestructuring(testdata) testdata = q[y * x, where((x, 21), (y, 2))] # noqa: F821 testletdestructuring(testdata) # decorator - with q as testdata: # pragma: no cover + with q as testdata: @dlet((x, 21), (y, 2)) # noqa: F821 - def f3(): + def f6(): return 2 * x # noqa: F821 # read @@ -239,7 +412,7 @@ def f3(): test[len(view.bindings) == 2] test[unparse(view.bindings[0]) == "(z, 21)"] test[unparse(view.bindings[1]) == "(t, 2)"] - with test_raises(TypeError, "decorator let does not have an accessible body"): + with test_raises[TypeError, "decorator let does not have an accessible body"]: view.body = q[x] # noqa: F821 test_raises[TypeError, @@ -300,15 +473,15 @@ def testexpandedletdestructuring(testdata): testexpandedletdestructuring(testdata) # decorator - with expandrq as testdata: # pragma: no cover + with expandrq as testdata: @dlet((x, 21), (y, 2)) # noqa: F821 - def f4(): + def f7(): return 2 * x # noqa: F821 view = ExpandedLetView(testdata[0].decorator_list[0]) test_raises[TypeError, view.body, "decorator let does not have an accessible body"] - with test_raises(TypeError, "decorator let does not have an accessible body"): + with test_raises[TypeError, "decorator let does not have an accessible body"]: view.body = q[x] # noqa: F821 test[view.envname is None] # dlet decorator doesn't have an envname, either @@ -350,7 +523,7 @@ def testbindings(*expected): test[the[unparse(bk)] == the[f"'{k}'"]] test[type(the[lam]) is Lambda] lambody = lam.body - test[type(the[lambody]) in (Constant, Num) and getconstant(lambody) == the[v]] # Python 3.8: ast.Constant + test[type(the[lambody]) is Constant and lambody.value == the[v]] # read test[len(view.bindings.elts) == 2] @@ -396,15 +569,15 @@ def testbindings(*expected): testexpandedletrecdestructuring(testdata) # decorator, letrec - with expandrq as testdata: # pragma: no cover + with expandrq as testdata: @dletrec((x, 21), (y, 2)) # noqa: F821 - def f5(): + def f8(): return 2 * x # noqa: F821 view = ExpandedLetView(testdata[0].decorator_list[0]) test_raises[TypeError, view.body, "decorator let does not have an accessible body"] - with test_raises(TypeError, "decorator let does not have an accessible body"): + with test_raises[TypeError, "decorator let does not have an accessible body"]: view.body = q[x] # noqa: F821 test[view.envname is not None] # dletrec decorator has envname in the bindings @@ -413,13 +586,13 @@ def f5(): with testset("let destructuring (expanded) integration with autocurry"): with expandrq as testdata: - with autocurry: # pragma: no cover + with autocurry: let[((x, 21), (y, 2)) in y * x] # noqa: F821 # note this goes into an ast.Expr thelet = testdata[0].value testexpandedletdestructuring(thelet) with expandrq as testdata: - with autocurry: # pragma: no cover + with autocurry: letrec[((x, 21), (y, 2)) in y * x] # noqa: F821 thelet = testdata[0].value testexpandedletrecdestructuring(thelet) @@ -427,16 +600,43 @@ def f5(): # -------------------------------------------------------------------------------- # Destructuring - unexpanded do + with testset("do destructuring (unexpanded) (new env-assign syntax v0.15.3+)"): + testdata = q[do[local[x := 21], # noqa: F821 + 2 * x]] # noqa: F821 + view = UnexpandedDoView(testdata) + # read + thebody = view.body + thing = thebody[0].slice + test[isenvassign(the[thing])] + # write + # This mutates the original, but we have to assign `view.body` to trigger the setter. + thebody[0] = q[local[x := 9001]] # noqa: F821 + view.body = thebody + + # implicit do, a.k.a. extra bracket syntax + testdata = q[let[[local[x := 21], # noqa: F821 + 2 * x]]] # noqa: F821 + theimplicitdo = testdata.slice + view = UnexpandedDoView(theimplicitdo) + # read + thebody = view.body + thing = thebody[0].slice + test[isenvassign(the[thing])] + # write + thebody[0] = q[local[x := 9001]] # noqa: F821 + view.body = thebody + + test_raises[TypeError, + UnexpandedDoView(q[x]), # noqa: F821 + "not a do form"] + with testset("do destructuring (unexpanded)"): testdata = q[do[local[x << 21], # noqa: F821 2 * x]] # noqa: F821 view = UnexpandedDoView(testdata) # read thebody = view.body - if sys.version_info >= (3, 9, 0): # Python 3.9+: the Index wrapper is gone. - thing = thebody[0].slice - else: - thing = thebody[0].slice.value + thing = thebody[0].slice test[isenvassign(the[thing])] # write # This mutates the original, but we have to assign `view.body` to trigger the setter. @@ -446,17 +646,11 @@ def f5(): # implicit do, a.k.a. extra bracket syntax testdata = q[let[[local[x << 21], # noqa: F821 2 * x]]] # noqa: F821 - if sys.version_info >= (3, 9, 0): # Python 3.9+: the Index wrapper is gone. - theimplicitdo = testdata.slice - else: - theimplicitdo = testdata.slice.value + theimplicitdo = testdata.slice view = UnexpandedDoView(theimplicitdo) # read thebody = view.body - if sys.version_info >= (3, 9, 0): # Python 3.9+: the Index wrapper is gone. - thing = thebody[0].slice - else: - thing = thebody[0].slice.value + thing = thebody[0].slice test[isenvassign(the[thing])] # write thebody[0] = q[local[x << 9001]] # noqa: F821 @@ -494,7 +688,7 @@ def f5(): "not an expanded do form"] with testset("do destructuring (expanded) integration with autocurry"): - with expandrq as testdata: # pragma: no cover + with expandrq as testdata: with autocurry: do[local[x << 21], # noqa: F821 2 * x] # noqa: F821 diff --git a/unpythonic/syntax/tests/test_letsyntax.py b/unpythonic/syntax/tests/test_letsyntax.py index 60ef58cd..13940cdd 100644 --- a/unpythonic/syntax/tests/test_letsyntax.py +++ b/unpythonic/syntax/tests/test_letsyntax.py @@ -15,8 +15,8 @@ from ...syntax import macros, test, test_raises # noqa: F401 from ...test.fixtures import session, testset -from ...syntax import macros, let_syntax, abbrev # noqa: F401, F811 -from ...syntax import block, expr, where +from ...syntax import macros, let_syntax, abbrev, block, expr # noqa: F401, F811 +from ...syntax import where def runtests(): with testset("expression variant"): @@ -25,14 +25,14 @@ def verylongfunctionname(x=1): nonlocal evaluations evaluations += 1 return x - y = let_syntax((f, verylongfunctionname))[[ # extra brackets: implicit do # noqa: F821, `let_syntax` defines `f` here. + y = let_syntax(f << verylongfunctionname)[[ # extra brackets: implicit do # noqa: F821, `let_syntax` defines `f` here. f(), # noqa: F821 f(5)]] # noqa: F821 test[evaluations == 2] test[y == 5] # haskelly syntax - y = let_syntax[((f, verylongfunctionname)) # noqa: F821 + y = let_syntax[[f << verylongfunctionname] # noqa: F821 in [f(), # noqa: F821 f(17)]] # noqa: F821 test[evaluations == 4] @@ -40,15 +40,16 @@ def verylongfunctionname(x=1): y = let_syntax[[f(), # noqa: F821 f(23)], # noqa: F821 - where((f, verylongfunctionname))] # noqa: F821 + where[f << verylongfunctionname]] # noqa: F821 test[evaluations == 6] test[y == 23] # templates # - positional parameters only, no default values - y = let_syntax((f(a), verylongfunctionname(2 * a)))[[ # noqa: F821 - f(2), # noqa: F821 - f(3)]] # noqa: F821 + # TODO: updating this to use bracket syntax requires changes to `_destructure_and_apply_let`. + y = let_syntax(f[a] << verylongfunctionname(2 * a))[[ # noqa: F821 + f[2], # noqa: F821 + f[3]]] # noqa: F821 test[evaluations == 8] test[y == 6] @@ -56,7 +57,7 @@ def verylongfunctionname(x=1): class Silly: realthing = 42 # This test will either pass, or error out with an AttributeError. - test[let_syntax[((alias, realthing)) in Silly.alias] == 42] # noqa: F821 + test[let_syntax[[alias << realthing] in Silly.alias] == 42] # noqa: F821 with testset("block variant"): with let_syntax: @@ -86,11 +87,11 @@ class Silly: test[snd == 5] with let_syntax: - with block(a, b, c) as makeabc: # block template - parameters are expressions # noqa: F821, `let_syntax` defines `a`, `b`, `c` when we call `makeabc`. + with block[a, b, c] as makeabc: # block template - parameters are expressions # noqa: F821, `let_syntax` defines `a`, `b`, `c` when we call `makeabc`. lst = [a, b, c] # noqa: F821 makeabc(3 + 4, 2**3, 3 * 3) test[lst == [7, 8, 9]] - with expr(n) as nth: # single-expression template # noqa: F821, `let_syntax` defines `n` when we call `nth`. + with expr[n] as nth: # single-expression template # noqa: F821, `let_syntax` defines `n` when we call `nth`. lst[n] # noqa: F821 test[nth(2) == 9] @@ -147,7 +148,7 @@ def alias(): with block as append456: lst += [4, 5, 6] # template - applied before any barenames - with block(a) as twice: # noqa: F821 + with block[a] as twice: # noqa: F821 a # noqa: F821 a # noqa: F821 lst = [] @@ -160,18 +161,18 @@ def alias(): with let_syntax: # in this example, both substitutions are templates, so they must be # defined in the same order they are meant to be applied. - with block(a) as twice: # noqa: F821 + with block[a] as twice: # noqa: F821 a # noqa: F821 a # noqa: F821 - with block(x, y, z) as appendxyz: # noqa: F821 + with block[x, y, z] as appendxyz: # noqa: F821 lst += [x, y, z] # noqa: F821 lst = [] # template substitution invoked in a parameter twice(appendxyz(7, 8, 9)) # a call is an expression, so as long as not yet expanded, this is ok test[lst == [7, 8, 9] * 2] - with testset("abbrev (first-pass let_syntax)"): - # abbrev: like let_syntax, but expands in the first pass, outside in + with testset("abbrev (outside-in let_syntax)"): + # abbrev: like let_syntax, but expands outside-in # - no lexically scoped nesting # - but can locally rename also macros (since abbrev itself expands before its body) y = abbrev((f, verylongfunctionname))[[ # noqa: F821 @@ -180,14 +181,14 @@ def alias(): test[y == 5] # haskelly syntax - y = abbrev[((f, verylongfunctionname)) # noqa: F821 + y = abbrev[[f << verylongfunctionname] # noqa: F821 in [f(), # noqa: F821 f(17)]] # noqa: F821 test[y == 17] y = abbrev[[f(), # noqa: F821 f(23)], # noqa: F821 - where((f, verylongfunctionname))] # noqa: F821 + where[f << verylongfunctionname]] # noqa: F821 test[y == 23] # in abbrev, outer expands first, so in the test, diff --git a/unpythonic/syntax/tests/test_nameutil.py b/unpythonic/syntax/tests/test_nameutil.py index 791921df..6158e639 100644 --- a/unpythonic/syntax/tests/test_nameutil.py +++ b/unpythonic/syntax/tests/test_nameutil.py @@ -6,7 +6,10 @@ from mcpyrate.quotes import macros, q, h # noqa: F401, F811 -from ...syntax.nameutil import isx, make_isxpred, getname +from mcpyrate.expander import MacroExpander + +from ...syntax.nameutil import (isx, getname, + is_unexpanded_expr_macro, is_unexpanded_block_macro) from ast import Call @@ -26,17 +29,39 @@ def runtests(): test[isx(attribute, "ok")] test[not isx(attribute, "ok", accept_attr=False)] - with testset("make_isxpred"): - isfab = make_isxpred("fab") - test[isx(q[fab], isfab)] # noqa: F821 - test[isx(q[someobj.fab], isfab)] # noqa: F821 - with testset("getname"): test[getname(barename) == "ok"] test[getname(captured.func) == "capture_this"] test[getname(attribute) == "ok"] test[getname(attribute, accept_attr=False) is None] + with testset("is_unexpanded_expr_macro"): + def dummymacro(tree, **kw): + return tree + m = MacroExpander({"dummy": dummymacro}, filename="") + + # The tree being tested needs to invoke a macro that is bound in the expander we pass to the analyzer. + # Note we detect whether the invocation is bound to the macro function we expect, + # and we don't care about the name (because in `mcpyrate`, macros can be as-imported). + test[is_unexpanded_expr_macro(dummymacro, m, q[dummy[...]])] # noqa: F821, only quoted + test[not is_unexpanded_expr_macro(dummymacro, m, q[notdummy[...]])] # noqa: F821, only quoted + test[not is_unexpanded_expr_macro(dummymacro, m, q[42])] + + with testset("is_unexpanded_block_macro"): + with q as quoted: + with dummy: # noqa: F821, only quoted + ... + test[is_unexpanded_block_macro(dummymacro, m, quoted[0])] + + with q as quoted: + with notdummy: # noqa: F821, only quoted + ... + test[not is_unexpanded_block_macro(dummymacro, m, quoted[0])] + + with q as quoted: + a = 42 # noqa: F841 + test[not is_unexpanded_block_macro(dummymacro, m, quoted[0])] + if __name__ == '__main__': # pragma: no cover with session(__file__): runtests() diff --git a/unpythonic/syntax/tests/test_nb.py b/unpythonic/syntax/tests/test_nb.py index d4a4a78b..f83d9a0f 100644 --- a/unpythonic/syntax/tests/test_nb.py +++ b/unpythonic/syntax/tests/test_nb.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -from ...syntax import macros, test, error # noqa: F401 +from ...syntax import macros, test, warn # noqa: F401 from ...test.fixtures import session, testset from ...syntax import macros, nb # noqa: F401, F811 @@ -18,7 +18,7 @@ def runtests(): try: from sympy import symbols, pprint except ImportError: # pragma: no cover - error["SymPy not installed in this Python, cannot test symbolic math in nb."] + warn["SymPy not installed in this Python, skipping symbolic math tests in nb."] else: with nb[pprint]: # you can specify a custom print function (first positional arg) test[_ is None] # noqa: F821 diff --git a/unpythonic/syntax/tests/test_prefix.py b/unpythonic/syntax/tests/test_prefix.py index 59225a69..87071a84 100644 --- a/unpythonic/syntax/tests/test_prefix.py +++ b/unpythonic/syntax/tests/test_prefix.py @@ -6,8 +6,7 @@ from ...syntax import macros, test, test_raises, the # noqa: F401 from ...test.fixtures import session, testset, returns_normally -from ...syntax import macros, prefix, autocurry, let, do # noqa: F401, F811 -from ...syntax import q, u, kw +from ...syntax import macros, prefix, q, u, kw, autocurry, let, do # noqa: F401, F811 from ...fold import foldr from ...fun import composerc as compose, apply @@ -102,10 +101,11 @@ def double(x): (double, x)]] test[a == 6] - # Introducing the LisThEll programming language: an all-in-one solution with + # Introducing the Listhell programming language: an all-in-one solution with # the prefix syntax of Lisp, the speed of Python, and the readability of Haskell! - with testset("LisThEll"): - # `prefix` is a first-pass macro, so placed on the outside, it expands first. + # If you want to play around with this idea, see `unpythonic.dialects.listhell`. + with testset("Listhell"): + # `prefix` expands outside-in, so placed on the outside, it expands first. with prefix: with autocurry: mymap = lambda f: (foldr, (compose, cons, f), nil) diff --git a/unpythonic/syntax/tests/test_scopeanalyzer.py b/unpythonic/syntax/tests/test_scopeanalyzer.py index ff5e45e4..16cf0995 100644 --- a/unpythonic/syntax/tests/test_scopeanalyzer.py +++ b/unpythonic/syntax/tests/test_scopeanalyzer.py @@ -17,11 +17,11 @@ def runtests(): # test data with q as getnames_load: - x # noqa: F821, it's only quoted. # pragma: no cover + x # noqa: F821, it's only quoted. with q as getnames_del: - del x # noqa: F821 # pragma: no cover + del x # noqa: F821 with q as getnames_store_simple: - x = 42 # noqa: F841, it's only quoted. # pragma: no cover + x = 42 # noqa: F841, it's only quoted. with testset("isnewscope"): test[not isnewscope(q[x])] # noqa: F821, it's only quoted. @@ -29,15 +29,15 @@ def runtests(): test[not isnewscope(q[d['x']])] # noqa: F821 test[isnewscope(q[lambda x: 2 * x])] with q as fdef: - def f(): # pragma: no cover + def f(): pass test[isnewscope(fdef[0])] with q as afdef: # Python 3.5+ - async def g(): # pragma: no cover + async def g(): pass test[isnewscope(afdef[0])] with q as cdef: - class Cat: # pragma: no cover + class Cat: has_four_legs = True def sleep(): pass @@ -59,60 +59,59 @@ def sleep(): # Assignment # - # At least up to Python 3.7, all assignments produce Name nodes in - # Store context on their LHS, so we don't need to care what kind of - # assignment it is. + # All assignments produce Name nodes in #tore context on their LHS, + # so we don't need to care what kind of assignment it is. test[get_names_in_store_context(getnames_store_simple) == ["x"]] with q as getnames_tuple: - x, y = 1, 2 # noqa: F841 # pragma: no cover + x, y = 1, 2 # noqa: F841 test[get_names_in_store_context(getnames_tuple) == ["x", "y"]] with q as getnames_starredtuple: - x, y, *rest = range(5) # noqa: F841 # pragma: no cover + x, y, *rest = range(5) # noqa: F841 test[get_names_in_store_context(getnames_starredtuple) == ["x", "y", "rest"]] # Function name, async function name, class name with q as getnames_func: - def f1(): # pragma: no cover + def f1(): pass test[get_names_in_store_context(getnames_func) == ["f1"]] with q as getnames_afunc: # Python 3.5+ - async def f2(): # pragma: no cover + async def f2(): pass test[get_names_in_store_context(getnames_afunc) == ["f2"]] with q as getnames_class: - class Classy: # pragma: no cover + class Classy: pass test[get_names_in_store_context(getnames_class) == ["Classy"]] # For loop target with q as getnames_for_simple: - for x in range(5): # pragma: no cover + for x in range(5): pass test[get_names_in_store_context(getnames_for_simple) == ["x"]] with q as getnames_for_tuple: - for x, y in zip(range(5), range(5)): # pragma: no cover + for x, y in zip(range(5), range(5)): pass test[get_names_in_store_context(getnames_for_tuple) == ["x", "y"]] with q as getnames_for_mixed: - for j, (x, y) in enumerate(zip(range(5), range(5))): # pragma: no cover + for j, (x, y) in enumerate(zip(range(5), range(5))): pass test[get_names_in_store_context(getnames_for_mixed) == ["j", "x", "y"]] # Async for loop target (Python 3.5+) with q as getnames_afor_simple: - async def g1(): # pragma: no cover + async def g1(): async for x in range(5): pass test[get_names_in_store_context(getnames_afor_simple) == ["g1"]] # we stop at scope boundaries test[get_names_in_store_context(getnames_afor_simple[0].body) == ["x"]] with q as getnames_afor_tuple: - async def g2(): # pragma: no cover + async def g2(): async for x, y in zip(range(5), range(5)): pass test[get_names_in_store_context(getnames_afor_tuple) == ["g2"]] test[get_names_in_store_context(getnames_afor_tuple[0].body) == ["x", "y"]] with q as getnames_afor_mixed: - async def g3(): # pragma: no cover + async def g3(): async for j, (x, y) in enumerate(zip(range(5), range(5))): pass test[get_names_in_store_context(getnames_afor_mixed) == ["g3"]] @@ -120,30 +119,30 @@ async def g3(): # pragma: no cover # Import statement with q as getnames_import: - import mymod # noqa: F401 # pragma: no cover - import yourmod as renamedmod # noqa: F401 # pragma: no cover - from othermod import original as renamed, other # noqa: F401 # pragma: no cover + import mymod # noqa: F401 + import yourmod as renamedmod # noqa: F401 + from othermod import original as renamed, other # noqa: F401 test[get_names_in_store_context(getnames_import) == ["mymod", "renamedmod", "renamed", "other"]] # Except clause target in try statement with q as getnames_try: - try: # pragma: no cover + try: pass - except Exception as err: # noqa: F841 # pragma: no cover + except Exception as err: # noqa: F841 pass - except KeyboardInterrupt as kbi: # noqa: F841 # pragma: no cover + except KeyboardInterrupt as kbi: # noqa: F841 pass test[get_names_in_store_context(getnames_try) == ["err", "kbi"]] # With statement target with q as getnames_with: - with Manager() as boss: # noqa: F821, F841 # pragma: no cover + with Manager() as boss: # noqa: F821, F841 pass test[get_names_in_store_context(getnames_with) == ["boss"]] # Async with statement target (Python 3.5+) with q as getnames_awith: - async def g4(): # pragma: no cover + async def g4(): async with Manager() as boss: # noqa: F821, F841 pass test[get_names_in_store_context(getnames_awith) == ["g4"]] @@ -159,26 +158,26 @@ async def g4(): # pragma: no cover # We ignore `del o.x` and `del d['x']`, because these # don't delete the lexical variables `o` and `d`. with q as getnames_del_attrib: - del o.x # noqa: F821, F841 # pragma: no cover + del o.x # noqa: F821, F841 test[get_names_in_del_context(getnames_del_attrib) == []] with q as getnames_del_subscript: - del d["x"] # noqa: F821, F841 # pragma: no cover + del d["x"] # noqa: F821, F841 test[get_names_in_del_context(getnames_del_subscript) == []] with q as getnames_del_scope_boundary: - del x # noqa: F821 # pragma: no cover - def f3(): # pragma: no cover + del x # noqa: F821 + def f3(): del y # noqa: F821 test[get_names_in_del_context(getnames_del_scope_boundary) == ["x"]] test[get_names_in_del_context(getnames_del_scope_boundary[1].body) == ["y"]] with testset("get_lexical_variables"): with q as getlexvars_fdef: - y = 21 # pragma: no cover - def myfunc(x, *args, kwonlyarg, **kwargs): # pragma: no cover - nonlocal y # not really needed here, except for exercising the analyzer. - global g + y = 21 + def myfunc(x, *args, kwonlyarg, **kwargs): + nonlocal y # noqa: F824, for Python 3.12+; just testing our scope analyzer; it's correct that there's no local `y`. Also, not really needed here, except for exercising the analyzer. + global g # noqa: F824, Python 3.12+ complain about this; just testing our scope analyzer; it's correct that there's no local `g`. def inner(blah): abc = 123 # noqa: F841 z = 2 * y # noqa: F841 @@ -196,21 +195,21 @@ def inner(blah): ["y", "g"])] with q as getlexvars_classdef: - class WorldClassy(Classy): # pragma: no cover + class WorldClassy(Classy): pass test[get_lexical_variables(getlexvars_classdef[0]) == (["WorldClassy", "Classy"], [])] with q as getlexvars_listcomp_simple: - [x for x in range(5)] # note this goes into an ast.Expr # pragma: no cover + [x for x in range(5)] # note this goes into an ast.Expr test[get_lexical_variables(getlexvars_listcomp_simple[0].value) == (["x"], [])] with q as getlexvars_listcomp_tuple_in_expr: - [(x, y) for x in range(5) for y in range(x)] # pragma: no cover + [(x, y) for x in range(5) for y in range(x)] test[get_lexical_variables(getlexvars_listcomp_tuple_in_expr[0].value) == (["x", "y"], [])] with q as getlexvars_listcomp_tuple_in_target: - [(x, y) for x, y in zip(range(5), range(5))] # pragma: no cover + [(x, y) for x, y in zip(range(5), range(5))] test[get_lexical_variables(getlexvars_listcomp_tuple_in_target[0].value) == (["x", "y"], [])] @@ -227,49 +226,125 @@ def check(tree, actual_names): return check with q as scoped_onefunc: - def f(x): # noqa: F811 # pragma: no cover + def f(x): # noqa: F811 n["_apply_test_here_"] scoped_transform(scoped_onefunc, callback=make_checker(["f", "x"])) with q as scoped_nestedfunc1: - def f(x): # noqa: F811 # pragma: no cover + def f(x): # noqa: F811 n["_apply_test_here_"] def g(y): pass scoped_transform(scoped_nestedfunc1, callback=make_checker(["f", "x"])) with q as scoped_nestedfunc2: - def f(x): # noqa: F811 # pragma: no cover + def f(x): # noqa: F811 def g(y): n["_apply_test_here_"] scoped_transform(scoped_nestedfunc2, callback=make_checker(["f", "x", "g", "y"])) with q as scoped_classdef: - class WorldClassy(Classy): # noqa: F811 # pragma: no cover + class WorldClassy(Classy): # noqa: F811 n["_apply_test_here_"] scoped_transform(scoped_classdef, callback=make_checker(["WorldClassy", "Classy"])) with q as scoped_localvar1: - def f(): # noqa: F811 # pragma: no cover + def f(): # noqa: F811 x = 42 # noqa: F841 n["_apply_test_here_"] scoped_transform(scoped_localvar1, callback=make_checker(["f", "x"])) - # TODO: In 0.15.x, fully lexical scope analysis; update this test at that time. + # TODO: In 0.16.x, fully lexical scope analysis; update this test at that time. with q as scoped_localvar2: - def f(): # noqa: F811 # pragma: no cover + def f(): # noqa: F811 n["_apply_test_here_"] x = 42 # noqa: F841 scoped_transform(scoped_localvar2, callback=make_checker(["f"])) # x not yet created - # TODO: In 0.15.x, fully lexical scope analysis; update this test at that time. + # TODO: In 0.16.x, fully lexical scope analysis; update this test at that time. with q as scoped_localvar3: - def f(): # noqa: F811 # pragma: no cover + def f(): # noqa: F811 x = 42 # noqa: F841 del x n["_apply_test_here_"] scoped_transform(scoped_localvar3, callback=make_checker(["f"])) # x already deleted + # Python 3.10+: `match`/`case` + with testset("match/case: get_names_in_store_context"): + # Simple capture + with q as matchcase_simple: + match x: # noqa: F821, it's only quoted. + case y: # noqa: F841, it's only quoted. + pass + test[get_names_in_store_context(matchcase_simple) == ["y"]] + + # Wildcard `_` — does NOT capture + with q as matchcase_wildcard: + match x: # noqa: F821, it's only quoted. + case _: + pass + test[get_names_in_store_context(matchcase_wildcard) == []] + + # Sequence pattern with star capture + with q as matchcase_sequence: + match x: # noqa: F821, it's only quoted. + case [a, b, *rest]: # noqa: F841, it's only quoted. + pass + test[get_names_in_store_context(matchcase_sequence) == ["a", "b", "rest"]] + + # Class pattern — captures `x` and `y`, but NOT the class reference `Point` + with q as matchcase_class: + match x: # noqa: F821, it's only quoted. + case Point(x, y): # noqa: F821, F841, it's only quoted. + pass + names = get_names_in_store_context(matchcase_class) + test["x" in names] + test["y" in names] + test["Point" not in names] # class reference, not a capture + + # Class pattern with keyword captures + with q as matchcase_class_kw: + match x: # noqa: F821, it's only quoted. + case Point(x=px, y=py): # noqa: F821, F841, it's only quoted. + pass + names = get_names_in_store_context(matchcase_class_kw) + test["px" in names] + test["py" in names] + test["Point" not in names] + + # Mapping pattern with `**rest` + with q as matchcase_mapping: + match x: # noqa: F821, it's only quoted. + case {"key": value, **rest}: # noqa: F841, it's only quoted. + pass + names = get_names_in_store_context(matchcase_mapping) + test["value" in names] + test["rest" in names] + + # Nested: mapping containing a class pattern + with q as matchcase_nested: + match x: # noqa: F821, it's only quoted. + case {"key": Point(px, py)}: # noqa: F821, F841, it's only quoted. + pass + names = get_names_in_store_context(matchcase_nested) + test["px" in names] + test["py" in names] + test["Point" not in names] # class reference, not a capture + + # OR pattern + with q as matchcase_or: + match x: # noqa: F821, it's only quoted. + case 1 | 2 | 3: + pass + test[get_names_in_store_context(matchcase_or) == []] + + # `as` pattern with guard + with q as matchcase_as: + match x: # noqa: F821, it's only quoted. + case (1 | 2) as num: # noqa: F841, it's only quoted. + pass + test[get_names_in_store_context(matchcase_as) == ["num"]] + if __name__ == '__main__': # pragma: no cover with session(__file__): runtests() diff --git a/unpythonic/syntax/tests/test_scopeanalyzer_3_11.py b/unpythonic/syntax/tests/test_scopeanalyzer_3_11.py new file mode 100644 index 00000000..bd436b58 --- /dev/null +++ b/unpythonic/syntax/tests/test_scopeanalyzer_3_11.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- +"""Lexical scope analysis tools — try/except* tests. + +These tests require Python 3.11+ because the ``except*`` syntax +won't parse on earlier versions. + +TODO: Merge into test_scopeanalyzer.py when floor bumps to Python 3.11+. +""" + +from ...syntax import macros, test, test_raises, the # noqa: F401 +from ...test.fixtures import session, testset + +from mcpyrate.quotes import macros, q # noqa: F401, F811 + +from ...syntax.scopeanalyzer import get_names_in_store_context + +def runtests(): + with testset("try/except*: get_names_in_store_context"): + # except* binds names just like except + with q as exceptstar_simple: + try: + pass + except* ValueError as eg: # noqa: F841, it's only quoted. + pass + test[get_names_in_store_context(exceptstar_simple) == ["eg"]] + + with q as exceptstar_multi: + try: + pass + except* ValueError as eg1: # noqa: F841, it's only quoted. + pass + except* TypeError as eg2: # noqa: F841, it's only quoted. + pass + test[get_names_in_store_context(exceptstar_multi) == ["eg1", "eg2"]] + + # Names bound inside the try body are also collected + with q as exceptstar_with_assign: + try: + x = 42 # noqa: F841, it's only quoted. + except* ValueError as eg: # noqa: F841, it's only quoted. + y = 1 # noqa: F841, it's only quoted. + names = get_names_in_store_context(exceptstar_with_assign) + test["x" in the[names]] + test["y" in the[names]] + test["eg" in the[names]] + +if __name__ == '__main__': # pragma: no cover + with session(__file__): + runtests() diff --git a/unpythonic/syntax/tests/test_tco.py b/unpythonic/syntax/tests/test_tco.py index 6e7a2518..87691a5e 100644 --- a/unpythonic/syntax/tests/test_tco.py +++ b/unpythonic/syntax/tests/test_tco.py @@ -5,11 +5,12 @@ from ...test.fixtures import session, testset, returns_normally from ...syntax import (macros, tco, autoreturn, autocurry, do, let, letseq, dletrec, # noqa: F401, F811 - quicklambda, f, continuations, call_cc) + quicklambda, fn, continuations, call_cc) from ...ec import call_ec from ...fploop import looped_over from ...fun import withself, curry +from ...funutil import Values def runtests(): # - any explicit return statement in a function body is TCO'd @@ -57,28 +58,28 @@ def lamtest(): # works with let constructs with testset("basic usage in let constructs"): - @dletrec((evenp, lambda x: (x == 0) or oddp(x - 1)), # noqa: F821, `dletrec` defines `evenp` here. - (oddp, lambda x: (x != 0) and evenp(x - 1))) # noqa: F821 + @dletrec(evenp << (lambda x: (x == 0) or oddp(x - 1)), # noqa: F821, `dletrec` defines `evenp` here. + oddp << (lambda x: (x != 0) and evenp(x - 1))) # noqa: F821 def g(x): return evenp(x) test[g(9001) is False] def g(x): - return let[(y, 3 * x)][y] # noqa: F821, `let` defines `y` here. + return let[y << 3 * x][y] # noqa: F821, `let` defines `y` here. test[g(10) == 30] def h(x): - return let[(y, 2 * x)][g(y)] # noqa: F821 + return let[y << 2 * x][g(y)] # noqa: F821 test[h(10) == 60] def h(x): - return letseq[(y, x), # noqa: F821, `letseq` defines `y` here. - (y, y + 1), # noqa: F821 - (y, y + 1)][g(y)] # noqa: F821 + return letseq[y << x, # noqa: F821, `letseq` defines `y` here. + y << y + 1, # noqa: F821 + y << y + 1][g(y)] # noqa: F821 test[h(10) == 36] with testset("integration with autoreturn"): - # note: apply autoreturn first (first pass, so must be on the outside to run first) + # note: apply autoreturn first (outside-in, so must be on the outside to run first) with autoreturn: with tco: def evenp(x): @@ -142,8 +143,8 @@ def result(loop, x, acc): test[looped_over(range(10), acc=0)(lambda loop, x, acc: loop(acc + x)) == 45] with testset("integration with quicklambda"): - # f[] must expand first so that tco sees it as a lambda. - # `quicklambda` is a first-pass macro, so placed on the outside, it expands first. + # Use `quicklambda` to force `fn[]` to expand first, so that tco sees it as a lambda. + # `quicklambda` is an outside-in macro, so placed on the outside, it expands first. with quicklambda: with tco: def g(x): @@ -151,10 +152,10 @@ def g(x): # TODO: Improve test to actually detect the tail call. # TODO: Now we just test this runs without errors. - func1 = f[g(3 * _)] # tail call # noqa: F821, _ is magic. + func1 = fn[g(3 * _)] # tail call # noqa: F821, _ is magic. test[func1(10) == 60] - func2 = f[3 * g(_)] # no tail call # noqa: F821, _ is magic. + func2 = fn[3 * g(_)] # no tail call # noqa: F821, _ is magic. test[func2(10) == 60] with testset("integration with continuations"): @@ -168,7 +169,7 @@ def g(x): def setk(*args, cc): nonlocal k k = cc # current continuation, i.e. where to go after setk() finishes - return args # tuple means multiple-return-values + return Values(*args) # multiple-return-values def doit(): lst = ['the call returned'] *more, = call_cc[setk('A')] diff --git a/unpythonic/syntax/tests/test_util.py b/unpythonic/syntax/tests/test_util.py index 48277c51..1ef3f7c8 100644 --- a/unpythonic/syntax/tests/test_util.py +++ b/unpythonic/syntax/tests/test_util.py @@ -7,17 +7,15 @@ from mcpyrate.quotes import macros, q, n, h # noqa: F401, F811 from mcpyrate.metatools import macros, expandrq # noqa: F401, F811 -from ...syntax.astcompat import getconstant, Num, Str from ...syntax.util import (isec, detect_callec, detect_lambda, is_decorator, has_tco, has_curry, has_deco, suggest_decorator_index, is_lambda_decorator, is_decorated_lambda, destructure_decorated_lambda, sort_lambda_decorators, - transform_statements, eliminate_ifones, - wrapwith, isexpandedmacromarker) + transform_statements, eliminate_ifones) -from ast import Call, Name, Constant, Expr, With, withitem +from ast import Call, Name, Constant, Expr from ...ec import call_ec, throw # just so hq[] captures them, like in real code @@ -36,17 +34,17 @@ def runtests(): test["my_fancy_ec" in the[detect_callec(q[call_ec(lambda my_fancy_ec: None)])]] with q as call_ec_testdata: - @call_ec # pragma: no cover + @call_ec def f(my_fancy_ec): - pass # pragma: no cover + pass test["my_fancy_ec" in the[detect_callec(call_ec_testdata)]] with testset("detect_lambda"): # We expand the `do[]` to generate an implicit lambda that should be ignored by the detector # (it specifically checks for expanded `do[]` forms). with expandrq as detect_lambda_testdata: - a = lambda: None # noqa: F841 # pragma: no cover - b = do[local[x << 21], # noqa: F821, F841 # pragma: no cover + a = lambda: None # noqa: F841 + b = do[local[x << 21], # noqa: F821, F841 lambda y: x * y] # noqa: F821 test[len(detect_lambda(detect_lambda_testdata)) == 2] @@ -55,12 +53,12 @@ def f(my_fancy_ec): test[is_decorator(q[decorate_with("flowers")], "decorate_with")] # noqa: F821 with q as has_tco_testdata1: - @trampolined # noqa: F821, just quoted. # pragma: no cover + @trampolined # noqa: F821, just quoted. def ihavetco(): - pass # pragma: no cover + pass with q as has_tco_testdata2: - def idonthavetco(): # pragma: no cover - pass # pragma: no cover + def idonthavetco(): + pass test[has_tco(has_tco_testdata1[0])] test[not has_tco(has_tco_testdata2[0])] test[not has_tco(q[lambda: None])] @@ -70,12 +68,12 @@ def idonthavetco(): # pragma: no cover test[has_tco(q[trampolined(decorate(lambda: None))])] # noqa: F821 with q as has_curry_testdata1: - @curry # noqa: F821, just quoted. # pragma: no cover + @curry # noqa: F821, just quoted. def ihavecurry(): - pass # pragma: no cover + pass with q as has_curry_testdata2: - def idonthavecurry(): # pragma: no cover - pass # pragma: no cover + def idonthavecurry(): + pass test[has_curry(has_curry_testdata1[0])] test[not has_curry(has_curry_testdata2[0])] test[not has_curry(q[lambda: None])] @@ -87,12 +85,12 @@ def idonthavecurry(): # pragma: no cover test[has_deco(["decorate"], q["surprise!"]) is None] # wrong AST type, test not applicable with q as has_deco_testdata1: - @artdeco # noqa: F821, just quoted. # pragma: no cover + @artdeco # noqa: F821, just quoted. def ihaveartdeco(): - pass # pragma: no cover + pass with q as has_deco_testdata2: - def idonthaveartdeco(): # pragma: no cover - pass # pragma: no cover + def idonthaveartdeco(): + pass test[has_deco(["artdeco"], has_deco_testdata1[0])] test[not has_deco(["artdeco"], has_deco_testdata2[0])] test[not has_deco(["artdeco"], q[lambda: None])] @@ -114,9 +112,9 @@ def idonthaveartdeco(): # pragma: no cover with q as sdi_testdata1: # This set of decorators makes no sense, but we want to exercise # the different branches of the analysis code. - @curry # noqa: F821 # pragma: no cover - @memoize # noqa: F821 # pragma: no cover - @call # noqa: F821 # pragma: no cover + @curry # noqa: F821 + @memoize # noqa: F821 + @call # noqa: F821 def purespicy(a, b, c): pass # pragma: no cover test[suggest_decorator_index("artdeco", sdi_testdata1[0].decorator_list) is None] # unknown decorator @@ -125,10 +123,10 @@ def purespicy(a, b, c): test[suggest_decorator_index("passthrough_lazy_args", sdi_testdata1[0].decorator_list) == 3] # after all of those already specified with q as sdi_testdata2: - @artdeco # noqa: F821 # pragma: no cover - @neoclassical # noqa: F821 # pragma: no cover + @artdeco # noqa: F821 + @neoclassical # noqa: F821 def architectural(): - pass # pragma: no cover + pass test[suggest_decorator_index("trampolined", sdi_testdata2[0].decorator_list) is None] # known decorator, but only unknown decorators in the decorator_list with testset("decorated lambda machinery"): @@ -157,8 +155,8 @@ def architectural(): test[len(decos) == 3] test[all(type(node) is Call and type(node.func) is Name for node in decos)] test[[node.func.id for node in decos] == ["memoize", "trampolined", "curry"]] - test[type(lam.body) in (Constant, Num)] # Python 3.8+: ast.Constant - test[getconstant(lam.body) == 42] # Python 3.8+: ast.Constant + test[type(lam.body) is Constant] + test[lam.body.value == 42] def test_sort_lambda_decorators(testdata): sort_lambda_decorators(testdata) @@ -172,29 +170,32 @@ def test_sort_lambda_decorators(testdata): with testset("statement utilities"): with q as transform_statements_testdata: - def myfunction(x): # pragma: no cover + def myfunction(x): "function body" - try: # pragma: no cover + try: "try" - if x: # pragma: no cover + if x: "if body" else: "if else" - except ValueError: # pragma: no cover + except ValueError: "except" finally: "finally" collected = [] def collectstrings(tree): - if type(tree) is Expr and type(tree.value) in (Constant, Str): # Python 3.8+: ast.Constant - collected.append(getconstant(tree.value)) + if type(tree) is Expr and type(tree.value) is Constant: + constant_node = tree.value + collected.append(constant_node.value) return [tree] transform_statements(collectstrings, transform_statements_testdata) test[set(collected) == {"function body", "try", "if body", "if else", "finally", "except"}] def ishello(tree): - # Python 3.8+: ast.Constant - return type(tree) is Expr and type(tree.value) in (Constant, Str) and getconstant(tree.value) == "hello" + if type(tree) is Expr and type(tree.value) is Constant: + constant_node = tree.value + return constant_node.value == "hello" + return False # numeric with q as eliminate_ifones_testdata1: @@ -204,7 +205,7 @@ def ishello(tree): test[len(result) == 1 and ishello(result[0])] with q as eliminate_ifones_testdata2: - if 0: # pragma: no cover + if 0: "hello" result = eliminate_ifones(eliminate_ifones_testdata2) test[len(result) == 0] @@ -217,7 +218,7 @@ def ishello(tree): test[len(result) == 1 and ishello(result[0])] with q as eliminate_ifones_testdata4: - if False: # pragma: no cover + if False: "hello" result = eliminate_ifones(eliminate_ifones_testdata4) test[len(result) == 0] @@ -256,39 +257,6 @@ def ishello(tree): result = eliminate_ifones(eliminate_ifones_testdata8) test[len(result) == 1 and ishello(result[0])] - with testset("wrapwith"): - with q as wrapwith_testdata: - 42 # pragma: no cover - # known fake location information so we can check it copies correctly - wrapwith_testdata[0].lineno = 9001 - wrapwith_testdata[0].col_offset = 9 - wrapped = wrapwith(q[n["ExampleContextManager"]], wrapwith_testdata) - test[type(wrapped) is list] - thewith = wrapped[0] - test[type(thewith) is With] - test[thewith.lineno == 9001] - test[thewith.col_offset == 9] - test[type(thewith.items[0]) is withitem] - ctxmanager = thewith.items[0].context_expr - test[type(ctxmanager) is Name] - test[ctxmanager.id == "ExampleContextManager"] - firststmt = thewith.body[0] - test[type(firststmt) is Expr] - test[type(firststmt.value) in (Constant, Num)] # Python 3.8+: ast.Constant - test[getconstant(firststmt.value) == 42] # Python 3.8+: ast.Constant - - with testset("isexpandedmacromarker"): - with q as ismarker_testdata1: - with ExampleMarker: # noqa: F821 # pragma: no cover - ... - with q as ismarker_testdata2: - with NotAMarker1, NotAMarker2: # noqa: F821 # pragma: no cover - ... - test[isexpandedmacromarker("ExampleMarker", ismarker_testdata1[0])] - test[not isexpandedmacromarker("AnotherMarker", ismarker_testdata1[0])] # right AST node type, different marker - test[not isexpandedmacromarker("NotAMarker1", ismarker_testdata2[0])] # a marker must be the only ctxmanager in the `with` - test[not isexpandedmacromarker("ExampleMarker", q["surprise!"])] # wrong AST node type - if __name__ == '__main__': # pragma: no cover with session(__file__): runtests() diff --git a/unpythonic/syntax/tests/testing_testingtools.py b/unpythonic/syntax/tests/testing_testingtools.py index 431f5d77..78c799f7 100644 --- a/unpythonic/syntax/tests/testing_testingtools.py +++ b/unpythonic/syntax/tests/testing_testingtools.py @@ -23,7 +23,7 @@ TestFailure, TestError) from ...conditions import invoke, handlers, restarts, cerror # noqa: F401 -from ...misc import raisef +from ...excutil import raisef def runtests(): # Low-level machinery. @@ -214,17 +214,17 @@ def counter(): # return a + a == 4 # # # A test block can have a failure message: - # with test("should be three, no?"): + # with test["should be three, no?"]: # a = 2 # return a + a == 3 # # # Similarly, there are also `with test_raises` and `with test_signals` blocks, # # though they don't support `return` - they always assert that the block # # raises or signals, respectively. - # with test_raises(RuntimeError): + # with test_raises[RuntimeError]: # raise RuntimeError() # - # with test_raises(RuntimeError, "should have raised"): + # with test_raises[RuntimeError, "should have raised"]: # raise RuntimeError() # # # By default, for test failure reporting, `test[]` captures as "result": @@ -272,7 +272,7 @@ def counter(): # with testset("normal return, don't care about value"): # # There's also a block variant that asserts the block completes normally # # (no exception or signal). - # with test("block variant"): + # with test["block variant"]: # print("hello world") # # # To get that effect in the expression variant, call `returns_normally`: diff --git a/unpythonic/syntax/util.py b/unpythonic/syntax/util.py index 4abe2ed1..15cfa8ea 100644 --- a/unpythonic/syntax/util.py +++ b/unpythonic/syntax/util.py @@ -1,18 +1,30 @@ # -*- coding: utf-8 -*- -"""Utilities for working with syntax.""" +"""Utilities for working with syntax. + +This module also contains the definitions for working with "decorated lambdas". +""" + +__all__ = ["isec", "detect_callec", + "detect_lambda", + "is_decorator", + "is_lambda_decorator", "is_decorated_lambda", "destructure_decorated_lambda", + "has_tco", "has_curry", "has_deco", + "sort_lambda_decorators", "suggest_decorator_index", + "eliminate_ifones", "transform_statements", + "UnpythonicASTMarker", "UnpythonicExpandedMacroMarker", + "ExpandedContinuationsMarker", "ExpandedAutorefMarker"] from functools import partial -from ast import (Call, Lambda, FunctionDef, AsyncFunctionDef, - If, With, withitem, stmt) +from ast import Call, Constant, Lambda, FunctionDef, AsyncFunctionDef, If, stmt -from mcpyrate.markers import ASTMarker +from mcpyrate.core import add_postprocessor +from mcpyrate.markers import ASTMarker, delete_markers from mcpyrate.quotes import is_captured_value from mcpyrate.walkers import ASTTransformer, ASTVisitor -from .astcompat import getconstant from .letdoutil import isdo, ExpandedDoView -from .nameutil import isx, make_isxpred, getname +from .nameutil import isx, getname from ..regutil import all_decorators, tco_decorators, decorator_registry @@ -53,7 +65,7 @@ def g(ec): # <-- should grab from here and `throw` covers the use of `unpythonic.ec.throw`.) """ fallbacks = ["ec", "brk", "throw"] - iscallec = partial(isx, x=make_isxpred("call_ec")) + iscallec = partial(isx, x="call_ec") def detect(tree): class Detector(ASTVisitor): def examine(self, tree): @@ -74,11 +86,15 @@ def examine(self, tree): return fallbacks + detect(tree) def detect_lambda(tree): - """Find lambdas in tree. Helper for block macros. + """Find lambdas in tree. Helper for two-pass block macros. + + A two-pass block macro first performs some processing outside-in, then calls + `expander.visit_recursively(tree)` to make any nested macro invocations expand, + and then performs some processing inside-out. - Run ``detect_lambda(tree)`` in the first pass, before allowing any - nested macros to expand. (Those may generate more lambdas that your block - macro is not interested in.) + Run ``detect_lambda(tree)`` in the outside-in pass, before calling + `expander.visit_recursively(tree)`, because nested macro invocations + may generate more lambdas that your block macro is not interested in. The return value is a ``list``of ``id(lam)``, where ``lam`` is a Lambda node that appears in ``tree``. This list is suitable as ``userlambdas`` for the @@ -90,7 +106,7 @@ def detect_lambda(tree): """ class LambdaDetector(ASTVisitor): def examine(self, tree): - if isdo(tree): + if isdo(tree, expanded=True): thebody = ExpandedDoView(tree).body for thelambda in thebody: # lambda e: ... self.visit(thelambda.body) @@ -111,8 +127,8 @@ def is_decorator(tree, fname): We detect: - - ``Name``, ``Attribute`` or a `mcpyrate` hygienic capture matching - the given ``fname`` (non-parametric decorator), and + - ``Name``, ``Attribute``, a `mcpyrate.core.Done`, or a `mcpyrate` + hygienic capture matching the given ``fname`` (non-parametric decorator), and - ``Call`` whose ``.func`` matches the above rule (parametric decorator). """ @@ -267,7 +283,7 @@ def transform(self, tree): return FixIt().visit(tree) # TODO: should we just sort the decorators here, like we do for lambdas? -# (The current solution is less magic, but less uniform.) +# (The current solution is less magic, but also less uniform.) def suggest_decorator_index(deco_name, decorator_list): """Suggest insertion index for decorator deco_name in given decorator_list. @@ -336,16 +352,12 @@ def eliminate_ifones(body): include a ``call_cc`` (see the example in test_conts_gen.py)... """ def isifone(tree): - if type(tree) is If: - try: - value = getconstant(tree.test) - except TypeError: - pass - else: - if value in (1, True): - return "then" - elif value in (0, False, None): - return "else" + if type(tree) is If and type(tree.test) is Constant: + value = tree.test.value + if value in (1, True): + return "then" + elif value in (0, False, None): + return "else" return False def optimize(tree): # stmt -> list of stmts @@ -391,96 +403,36 @@ def transform(self, tree): return tree return StatementTransformer().visit(body) -def wrapwith(item, body, locref=None): - """Wrap ``body`` with a single-item ``with`` block, using ``item``. - - ``item`` must be an expr, used as ``context_expr`` of the ``withitem`` node. - - ``body`` must be a ``list`` of AST nodes. - - ``locref`` is an optional AST node to copy source location info from. - If not supplied, ``body[0]`` is used. - - Syntax transformer. Returns the wrapped body. - """ - if isinstance(locref, ASTMarker): # unwrap contents of Done() et al. - locref = locref.body - locref = locref or body[0] - wrapped = With(items=[withitem(context_expr=item, optional_vars=None)], - body=body, - lineno=locref.lineno, col_offset=locref.col_offset) - return [wrapped] - -def isexpandedmacromarker(typename, tree): - """Return whether tree is a specific expanded macro AST marker. Used by block macros. - - That is, whether ``tree`` is a ``with`` block with a single context manager, - which is represented by a ``Name`` whose ``id`` matches the given ``typename``. +# -------------------------------------------------------------------------------- +# AST markers. - Example. If ``tree`` is the AST for the following code:: +class UnpythonicASTMarker(ASTMarker): + """Base class for all AST markers used by `unpythonic`.""" +class UnpythonicExpandedMacroMarker(UnpythonicASTMarker): + """AST marker base class for expanded `unpythonic.syntax` macros.""" - with ContinuationsMarker: - ... - - then ``isexpandedmacromarker("ContinuationsMarker", tree)`` returns ``True``. - - **NOTE**: The markers this function detects remain in the AST at run time; - they inherit from `unpythonic.syntax.util.UnpythonicExpandedMacroMarker`. - They are semantically different from `mcpyrate.markers.ASTMarker`, which - are compiled away (and must all be deleted before handing the AST over to - Python's `compile`). - """ - if type(tree) is not With or len(tree.items) != 1: - return False - ctxmanager = tree.items[0].context_expr - return isx(ctxmanager, typename) - -# We use a custom metaclass to make __enter__ and __exit__ callable on the class -# instead of requiring an instance. -# -# Note ``thing.dostuff(...)`` means ``Thing.dostuff(thing, ...)``; the method -# is looked up *on the class* of the instance ``thing``, not on the instance -# itself. Hence, to make method lookup succeed when we have no instance, the -# method should be defined on the class of the class, i.e. *on the metaclass*. -# https://stackoverflow.com/questions/20247841/using-delitem-with-a-class-object-rather-than-an-instance-in-python -class UnpythonicExpandedMacroMarker(type): - """Metaclass for AST markers used by block macros. - - This can be used by block macros to tell other block macros that a section - of the AST is an already-expanded block of a given kind (so that others can - tune their processing or skip it, as appropriate). At run time a marker - does nothing. - - The difference to `mcpyrate.markers.ASTMarker` is that `mcpyrate`'s is a - compile-time thing only (and must be deleted from the AST before the AST - is handed over to Python's `compile`), whereas this one remains in the - AST at run time. - - Usage:: - - with SomeMarker: - ... # expanded code goes here - - We provide a custom metaclass so that there is no need to instantiate - ``SomeMarker``; suitable no-op ``__enter__`` and ``__exit__`` methods - are defined on the metaclass, so e.g. ``SomeMarker.__enter__`` is valid. - """ - def __enter__(cls): - pass # pragma: no cover - def __exit__(cls, exctype, excvalue, traceback): - pass # pragma: no cover - -class ContinuationsMarker(metaclass=UnpythonicExpandedMacroMarker): +class ExpandedContinuationsMarker(UnpythonicExpandedMacroMarker): """AST marker for an expanded "with continuations" block.""" - pass # pragma: no cover -# This one must be "instantiated", because we need to pass information at -# macro expansion time using the ctor call syntax, e.g. `AutorefMarker("o")`. -class AutorefMarker(metaclass=UnpythonicExpandedMacroMarker): +class ExpandedAutorefMarker(UnpythonicExpandedMacroMarker): """AST marker for an expanded "with autoref[o]" block.""" - def __init__(self, varname): - self.varname = varname # not needed, but doesn't hurt either. - def __enter__(cls): - pass # pragma: no cover - def __exit__(cls, exctype, excvalue, traceback): - pass # pragma: no cover + def __init__(self, body, varname): + super().__init__(body) + self.varname = varname + self._fields += ["varname"] + +# The point of having these two functions is: +# - `__init__` must explicitly enable the hook, thus making its existence +# obvious, since the entry-point source file for the macro layer has an +# obvious function call, instead of having the hook secretly registered +# by an innocuous-looking utility module. +# +# - We could register `partial(delete_markers, cls=UnpythonicASTMarker)`, +# but then we would be unable to `remove_postprocessor` it later, because +# the function object itself is the key used for unregistering. Currently +# we don't need to do that, but it's nice to have the possibility. +def register_postprocessor_hook(): + """Set up global postprocessor hook for `mcpyrate` to nuke `unpythonic`'s AST markers from the final tree.""" + add_postprocessor(_delete_unpythonic_ast_markers) +def _delete_unpythonic_ast_markers(tree): + return delete_markers(tree, cls=UnpythonicASTMarker) diff --git a/unpythonic/test/ansicolor.py b/unpythonic/test/ansicolor.py deleted file mode 100644 index f7312975..00000000 --- a/unpythonic/test/ansicolor.py +++ /dev/null @@ -1,95 +0,0 @@ -# -*- coding: utf-8; -*- -"""ANSI color support for *nix terminals. - -For a serious library that does this sort of thing in a cross-platform way, -see Colorama: - https://github.com/tartley/colorama -""" - -# TODO: We could also use Colorama (which also works on Windows), but that's one more dependency. -# TODO: Maybe this module should live in unpythonic.net, though we don't currently use it there. - -from enum import Enum - -__all__ = ["TC", "colorize"] - -class TC(Enum): - """Terminal colors, via ANSI escape sequences. - - This uses the terminal app palette (16 colors), so e.g. LIGHTGREEN may actually - be blue, depending on the user's color scheme. - - The colors are listed in palette order. - - See: - https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_(Select_Graphic_Rendition)_parameters - https://stackoverflow.com/questions/287871/print-in-terminal-with-colors - https://github.com/tartley/colorama - """ - # For grepping: \33 octal is \x1b hex. - RESET = '\33[0m' # return to normal state, ending colorization - RESETSTYLE = '\33[22m' # return to normal brightness - RESETFG = '\33[39m' - RESETBG = '\33[49m' - - # styles - BRIGHT = '\33[1m' # a.k.a. bold - DIM = '\33[2m' - ITALIC = '\33[3m' - URL = '\33[4m' # underline plus possibly a special color (depends on terminal app) - BLINK = '\33[5m' - BLINK2 = '\33[6m' # same effect as BLINK? - SELECTED = '\33[7m' - - # foreground colors - BLACK = '\33[30m' - RED = '\33[31m' - GREEN = '\33[32m' - YELLOW = '\33[33m' - BLUE = '\33[34m' - MAGENTA = '\33[35m' - CYAN = '\33[36m' - WHITE = '\33[37m' - LIGHTBLACK = '\33[90m' - LIGHTRED = '\33[91m' - LIGHTGREEN = '\33[92m' - LIGHTYELLOW = '\33[93m' - LIGHTBLUE = '\33[94m' - LIGHTMAGENTA = '\33[95m' - LIGHTCYAN = '\33[96m' - LIGHTWHITE = '\33[97m' - - # background colors - BLACKBG = '\33[40m' - REDBG = '\33[41m' - GREENBG = '\33[42m' - YELLOWBG = '\33[43m' - BLUEBG = '\33[44m' - MAGENTABG = '\33[45m' - CYANBG = '\33[46m' - WHITEBG = '\33[47m' - -def colorize(s, *colors): - """Colorize string `s` for ANSI terminal display. Reset color at end of `s`. - - For available `colors`, see the `TC` enum. - - Usage:: - - colorize("I'm new here", TC.GREEN) - colorize("I'm bold and bluetiful", TC.BRIGHT, TC.BLUE) - - Each entry can also be a `tuple` (arbitrarily nested), which is useful - for defining compound styles:: - - BRIGHT_BLUE = (TC.BRIGHT, TC.BLUE) - ... - colorize("I'm bold and bluetiful, too", BRIGHT_BLUE) - """ - def get_ansi_color_sequence(c): # recursive, so each entry can be a tuple. - if isinstance(c, tuple): - return "".join(get_ansi_color_sequence(elt) for elt in c) - if not isinstance(c, TC): - raise TypeError(f"Expected a TC instance, got {type(c)} with value {repr(c)}") # pragma: no cover - return c.value - return f"{get_ansi_color_sequence(colors)}{s}{get_ansi_color_sequence(TC.RESET)}" diff --git a/unpythonic/test/fixtures.py b/unpythonic/test/fixtures.py index b47d24c2..6bce7a67 100644 --- a/unpythonic/test/fixtures.py +++ b/unpythonic/test/fixtures.py @@ -47,7 +47,7 @@ test[2 + 2 == 5] # Testsets can be named. The name is printed in the output. - from unpythonic.misc import raisef + from unpythonic.excutil import raisef from unpythonic.conditions import cerror with testset("my fancy tests"): test[2 + 2 == 4] @@ -74,17 +74,18 @@ with testset("inner 2"): test[2 + 2 == 4] - # Unconditional errors can be emitted with `error[]`. + # Warnings can be emitted with `warn[]`. # Useful e.g. if an optional dependency is missing: with testset("integration"): try: import blargly except ImportError: - error["blargly not installed, cannot test integration with it."] + warn["blargly not installed, skipping integration tests."] else: ... # blargly integration tests go here - # Similarly, unconditional errors can be emitted with `fail[]`. + # Unconditional errors can be emitted with `error[]`. + # Unconditional failures can be emitted with `fail[]`. # Useful for marking a testing TODO, or for marking a line # that should be unreachable in a code example. with testset("really fancy tests"): @@ -116,15 +117,26 @@ import threading import sys -from ..conditions import handlers, find_restart, invoke +# The testing framework depends on `mcpyrate` anyway, because the test +# constructs are macros. +# +# This regular-code module depends on `mcpyrate`'s colorizer, but since +# `unpythonic.test` is not auto-loaded, it's fine. +# +# Using `Bunch` is debatable, since we have `env`, and `Bunch` is essentially +# just a stripped-down version of that. But `mcpyrate` uses `Bunch` for storing +# config constants, so meh - let's just use the same approach here for consistency. +from mcpyrate.bunch import Bunch +from mcpyrate.colorizer import Fore, Style, colorize + +from ..conditions import cerror, handlers, find_restart, invoke from ..collections import box, unbox from ..symbol import sym -from .ansicolor import TC, colorize - __all__ = ["session", "testset", "terminate", "returns_normally", "catch_signals", + "emit_warning", "TestConfig", "tests_run", "tests_failed", "tests_errored", "tests_warned", "TestingException", "TestFailure", "TestError", "TestWarning", @@ -172,6 +184,21 @@ def _reset(counter): with _counter_update_lock: counter << 0 +def emit_warning(msg): + """Emit a test warning from infrastructure code (outside a ``test[]`` expression). + + If you are writing tests, use the `warn[]` macro instead. + + Use this function in test runners and other infrastructure that needs to + signal a warning through the test framework without being inside a ``test[]`` + or ``warn[]`` macro. The warning will be displayed and counted by the + nearest enclosing ``testset``. + """ + # Unlike the ``warn[]`` macro, this does not adjust ``tests_run``, + # because no test has been counted for this warning to "replace". + _update(tests_warned, +1) + cerror(TestWarning(msg)) + completed = sym("completed") completed.__doc__ = """TestingException `mode`: the test ran to completion normally. @@ -297,13 +324,46 @@ def maybe_colorize(s, *colors): If color is disabled (`TestConfig.use_color` is falsey), then no-op, i.e. return the original `s` as-is. - See `unpythonic.test.ansicolor.colorize` for details. + See `mcpyrate.colorizer.colorize` for details. """ if not TestConfig.use_color: return s return colorize(s, *colors) -class TestConfig: +# We instantiate this later, since the instance lives inside `TestConfig` anyway. +class ColorScheme(Bunch): + """The color scheme for terminal output in `unpythonic`'s testing framework. + + This is just a bunch of constants. To change the colors, simply assign new + values to them. Changes take effect immediately for any new output. + + To replace the whole color scheme at once, fill in a suitable `Bunch`, and + then use the `replace` method. If you need to get the names of all settings + programmatically, call the `keys` method. + + Don't replace the color scheme object itself. + + See `Fore`, `Back` and `Style` in `mcpyrate.colorizer` for valid values. + To make a compound style, place the values into a tuple. + + The defaults are designed to fit the "Solarized" (Zenburn-like) theme + of `gnome-terminal`, with "Show bold text in bright colors" set to OFF. + But they work also with "Tango", and indeed with most themes. + """ + def __init__(self): + super().__init__() + + self.HEADING = Fore.LIGHTBLUE_EX + self.PASS = Fore.GREEN + self.FAIL = Fore.LIGHTRED_EX + self.ERROR = Fore.YELLOW + self.WARNING = Fore.YELLOW + self.GREYED_OUT = (Style.DIM, self.HEADING) + # These colors are used for the pass percentage. + self.SUMMARY_OK = Fore.GREEN + self.SUMMARY_NOTOK = Fore.YELLOW # more readable than red on a dark background, yet stands out. + +class TestConfig(Bunch): """Global settings for the testing utilities. This is just a bunch of constants. @@ -318,7 +378,7 @@ class TestConfig: Default is `True`. `postproc`: Exception -> None; optional. Default None (no postproc). `indent_per_level`: How many indent to indent per nesting level of `testset`. - `CS`: The color scheme. + `ColorScheme`: The color scheme. The optional `postproc` is a custom callback for examining failures and errors. `TestConfig.postproc` sets the default that is used when no other @@ -334,45 +394,30 @@ class TestConfig: If you want a failure in a particular testset to abort the whole unit, you can use `terminate` as your `postproc`. """ - # It is overwhelmingly common that tests are invoked from a single thread, - # so by default, all threads share the same printer. (It is not worth - # complicating the common use case here to cater for the rare use case.) - # - # However, if you want different printers in different threads, that can - # be done. As `printer`, use a `Shim` that contains a `ThreadLocalBox`. - # In each thread, place in that box a custom object that has a `__call__` - # method that takes the same args `print` does. Because `Shim` redirects - # all attribute accesses, it will redirect the lookup of `__call__` - # (it doesn't have its own `__call__`, so it assumes the client wants to - # call the thing that is inside the box), and hence that method will then - # be used for printing. - # - # TODO: This is subject to change later if I figure out a better design - # TODO: that conveniently caters for *both* the common and rare use cases. - printer = partial(print, file=sys.stderr) - use_color = True - postproc = None - indent_per_level = 2 - - class CS: - """The color scheme. - - See the `unpythonic.test.ansicolor.TC` enum for valid values. To make a - compound style, place the values into a tuple. - - The defaults are designed to fit the "Solarized" (Zenburn-like) theme - of `gnome-terminal`, with "Show bold text in bright colors" set to OFF. - But they should work with most color schemes. - """ - HEADING = TC.LIGHTBLUE - PASS = TC.GREEN - FAIL = TC.LIGHTRED - ERROR = TC.YELLOW - WARNING = TC.YELLOW - GREYED_OUT = (TC.DIM, HEADING) - # These colors are used for the pass percentage. - SUMMARY_OK = TC.GREEN - SUMMARY_NOTOK = TC.YELLOW # more readable than red on a dark background, yet stands out. + def __init__(self): + super().__init__() + + # It is overwhelmingly common that tests are invoked from a single thread, + # so by default, all threads share the same printer. (It is not worth + # complicating the common use case here to cater for the rare use case.) + # + # However, if you want different printers in different threads, that can + # be done. As `printer`, use a `Shim` that contains a `ThreadLocalBox`. + # In each thread, place in that box a custom object that has a `__call__` + # method that takes the same args `print` does. Because `Shim` redirects + # all attribute accesses, it will redirect the lookup of `__call__` + # (it doesn't have its own `__call__`, so it assumes the client wants to + # call the thing that is inside the box), and hence that method will then + # be used for printing. + # + # TODO: This is subject to change later if I figure out a better design + # TODO: that conveniently caters for *both* the common and rare use cases. + self.printer = partial(print, file=sys.stderr) + self.use_color = True + self.postproc = None + self.indent_per_level = 2 + self.ColorScheme = ColorScheme() +TestConfig = TestConfig() # type: ignore[assignment, misc] def describe_exception(exc): """Return a human-readable (possibly multi-line) description of exception `exc`. @@ -390,7 +435,7 @@ def describe_instance(instance): if instance.__traceback__ is not None: snippets.append(maybe_colorize("\nTraceback (most recent call last):\n" + - "".join(format_tb(instance.__traceback__)), TC.DIM)) + "".join(format_tb(instance.__traceback__)), Style.DIM)) msg = str(instance) if msg: @@ -445,32 +490,32 @@ def summarize(runs, fails, errors, warns): # In techni... ANSI color: snippets = [] - color = TestConfig.CS.PASS if passes else TestConfig.CS.GREYED_OUT - snippets.extend([maybe_colorize("Pass", TC.BRIGHT, color), + color = TestConfig.ColorScheme.PASS if passes else TestConfig.ColorScheme.GREYED_OUT + snippets.extend([maybe_colorize("Pass", Style.BRIGHT, color), " ", maybe_colorize(f"{passes}", color), - maybe_colorize(", ", TestConfig.CS.HEADING)]) - color = TestConfig.CS.FAIL if fails else TestConfig.CS.GREYED_OUT - snippets.extend([maybe_colorize("Fail", TC.BRIGHT, color), + maybe_colorize(", ", TestConfig.ColorScheme.HEADING)]) + color = TestConfig.ColorScheme.FAIL if fails else TestConfig.ColorScheme.GREYED_OUT + snippets.extend([maybe_colorize("Fail", Style.BRIGHT, color), " ", maybe_colorize(f"{fails}", color), - maybe_colorize(", ", TestConfig.CS.HEADING)]) - color = TestConfig.CS.ERROR if errors else TestConfig.CS.GREYED_OUT - snippets.extend([maybe_colorize("Error", TC.BRIGHT, color), + maybe_colorize(", ", TestConfig.ColorScheme.HEADING)]) + color = TestConfig.ColorScheme.ERROR if errors else TestConfig.ColorScheme.GREYED_OUT + snippets.extend([maybe_colorize("Error", Style.BRIGHT, color), " ", maybe_colorize(f"{errors}", color), - maybe_colorize(", ", TestConfig.CS.HEADING)]) - color = TestConfig.CS.HEADING if runs else TestConfig.CS.GREYED_OUT - snippets.extend([maybe_colorize("Total", TC.BRIGHT, color), + maybe_colorize(", ", TestConfig.ColorScheme.HEADING)]) + color = TestConfig.ColorScheme.HEADING if runs else TestConfig.ColorScheme.GREYED_OUT + snippets.extend([maybe_colorize("Total", Style.BRIGHT, color), " ", maybe_colorize(f"{runs}", color)]) - color = TestConfig.CS.SUMMARY_OK if passes == runs else TestConfig.CS.SUMMARY_NOTOK + color = TestConfig.ColorScheme.SUMMARY_OK if passes == runs else TestConfig.ColorScheme.SUMMARY_NOTOK snippets.extend([" ", - maybe_colorize(f"({int(pass_percentage)}% pass)", TC.BRIGHT, color)]) + maybe_colorize(f"({int(pass_percentage)}% pass)", Style.BRIGHT, color)]) if warns > 0: - color = TestConfig.CS.WARNING + color = TestConfig.ColorScheme.WARNING snippets.extend([" ", - maybe_colorize(f"+ {warns} Warn", TC.BRIGHT, color)]) + maybe_colorize(f"+ {warns} Warn", Style.BRIGHT, color)]) return "".join(snippets) class TestSessionExit(Exception): @@ -482,7 +527,7 @@ def terminate(exc=None): # the parameter is ignored this can be used as a `postproc`, if you want a failure in a particular testset to abort the session. """ - TestConfig.printer(maybe_colorize("** TERMINATING SESSION", TC.BRIGHT, TestConfig.CS.HEADING)) + TestConfig.printer(maybe_colorize("** TERMINATING SESSION", Style.BRIGHT, TestConfig.ColorScheme.HEADING)) raise TestSessionExit def returns_normally(expr): @@ -550,11 +595,11 @@ def session(name=None): if _threadlocals.nesting_level > 0: raise RuntimeError("A test `session` cannot be nested inside a `testset`.") - title = maybe_colorize("SESSION", TC.BRIGHT, TestConfig.CS.HEADING) + title = maybe_colorize("SESSION", Style.BRIGHT, TestConfig.ColorScheme.HEADING) if name is not None: - title += maybe_colorize(f" '{name}'", TC.ITALIC, TestConfig.CS.HEADING) - TestConfig.printer(maybe_colorize(f"{title} ", TestConfig.CS.HEADING) + - maybe_colorize("BEGIN", TC.BRIGHT, TestConfig.CS.HEADING)) + title += maybe_colorize(f" '{name}'", Style.ITALIC, TestConfig.ColorScheme.HEADING) + TestConfig.printer(maybe_colorize(f"{title} ", TestConfig.ColorScheme.HEADING) + + maybe_colorize("BEGIN", Style.BRIGHT, TestConfig.ColorScheme.HEADING)) # We are paused when the user triggers the exception; `contextlib` detects the # exception and re-raises it into us. @@ -569,8 +614,8 @@ def session(name=None): except TestSessionExit: pass - TestConfig.printer(maybe_colorize(f"{title} ", TestConfig.CS.HEADING) + - maybe_colorize("END", TC.BRIGHT, TestConfig.CS.HEADING)) + TestConfig.printer(maybe_colorize(f"{title} ", TestConfig.ColorScheme.HEADING) + + maybe_colorize("END", Style.BRIGHT, TestConfig.ColorScheme.HEADING)) # We use a stack for postprocs so that the local overrides can be nested. _threadlocals.postproc_stack = deque() @@ -605,9 +650,9 @@ def makeindent(level): title = f"{indent}Testset" if name is not None: - title += maybe_colorize(f" '{name}'", TC.ITALIC) - TestConfig.printer(maybe_colorize(f"{title} ", TestConfig.CS.HEADING) + - maybe_colorize("BEGIN", TC.BRIGHT, TestConfig.CS.HEADING)) + title += maybe_colorize(f" '{name}'", Style.ITALIC, TestConfig.ColorScheme.HEADING) + TestConfig.printer(maybe_colorize(f"{title} ", TestConfig.ColorScheme.HEADING) + + maybe_colorize("BEGIN", Style.BRIGHT, TestConfig.ColorScheme.HEADING)) def print_and_proceed(condition): # The assert helpers in `unpythonic.syntax.testingtools` signal only @@ -615,13 +660,13 @@ def print_and_proceed(condition): # inside the test expression. if isinstance(condition, TestFailure): msg = maybe_colorize(f"{errmsg_indent}FAIL: ", - TC.BRIGHT, TestConfig.CS.FAIL) + str(condition) + Style.BRIGHT, TestConfig.ColorScheme.FAIL) + str(condition) elif isinstance(condition, TestError): msg = maybe_colorize(f"{errmsg_indent}ERROR: ", - TC.BRIGHT, TestConfig.CS.ERROR) + str(condition) + Style.BRIGHT, TestConfig.ColorScheme.ERROR) + str(condition) elif isinstance(condition, TestWarning): msg = maybe_colorize(f"{errmsg_indent}WARNING: ", - TC.BRIGHT, TestConfig.CS.WARNING) + str(condition) + Style.BRIGHT, TestConfig.ColorScheme.WARNING) + str(condition) # So any other signal must come from another source. else: if not _threadlocals.catch_uncaught_signals[0]: @@ -630,7 +675,7 @@ def print_and_proceed(condition): _update(tests_run, +1) _update(tests_errored, +1) msg = maybe_colorize(f"{errmsg_indent}Testset received signal outside test[]: ", - TC.BRIGHT, TestConfig.CS.ERROR) + describe_exception(condition) + Style.BRIGHT, TestConfig.ColorScheme.ERROR) + describe_exception(condition) TestConfig.printer(msg) # the custom callback @@ -670,7 +715,7 @@ def print_and_proceed(condition): _update(tests_run, +1) _update(tests_errored, +1) msg = maybe_colorize(f"{errmsg_indent}Testset terminated by exception outside test[]: ", - TC.BRIGHT, TestConfig.CS.ERROR) + Style.BRIGHT, TestConfig.ColorScheme.ERROR) msg += describe_exception(err) TestConfig.printer(msg) finally: @@ -685,8 +730,8 @@ def print_and_proceed(condition): errors = e2 - e1 warns = w2 - w1 - msg = (maybe_colorize(f"{title} ", TestConfig.CS.HEADING) + - maybe_colorize("END", TC.BRIGHT, TestConfig.CS.HEADING) + - maybe_colorize(": ", TestConfig.CS.HEADING) + + msg = (maybe_colorize(f"{title} ", TestConfig.ColorScheme.HEADING) + + maybe_colorize("END", Style.BRIGHT, TestConfig.ColorScheme.HEADING) + + maybe_colorize(": ", TestConfig.ColorScheme.HEADING) + summarize(runs, fails, errors, warns)) TestConfig.printer(msg) diff --git a/unpythonic/test/runner.py b/unpythonic/test/runner.py new file mode 100644 index 00000000..afcac5df --- /dev/null +++ b/unpythonic/test/runner.py @@ -0,0 +1,89 @@ +# -*- coding: utf-8 -*- +"""Generic test runner for projects using ``unpythonic.test.fixtures``. + +Provides test module discovery, version-suffix gating, and a ``run`` +function that wraps the standard session/testset/import_module pattern. + +Usage from a project's top-level ``runtests.py``:: + + import os + from unpythonic.test.runner import discover_testmodules, run + + import mcpyrate.activate # noqa: F401 + + testsets = [("my tests", discover_testmodules(os.path.join("mypackage", "tests")))] + if not run(testsets): + raise SystemExit(1) + +Version-suffixed test modules (e.g. ``test_foo_3_11.py``) are automatically +skipped with a warning on older Pythons. +""" + +import os +import re +import sys +from importlib import import_module + +from .fixtures import session, testset, emit_warning, tests_errored, tests_failed +from ..collections import unbox + +__all__ = ["discover_testmodules", "run"] + +def discover_testmodules(path, prefix="test_", suffix=".py"): + """Discover test modules in a directory. + + Returns a sorted list of dotted module names (e.g. + ``["mypackage.tests.test_foo", "mypackage.tests.test_bar"]``). + + Modules are discovered by filename convention: files matching + ``{prefix}*{suffix}`` in the given directory. + """ + filenames = [fn for fn in os.listdir(path) if fn.startswith(prefix) and fn.endswith(suffix)] + modnames = [_filename_to_modulename(path, fn) for fn in filenames] + return list(sorted(modnames)) + +def _filename_to_modulename(path, filename): + """Convert a path and filename to a dotted module name. + + ``("some/dir", "mod.py")`` → ``"some.dir.mod"`` + """ + modpath = re.sub(os.path.sep, r".", path) + themod = re.sub(r"\.py$", r"", filename) + return ".".join([modpath, themod]) + +def _version_suffix(modulename): + """Parse version suffix from module name. + + E.g. ``"mypackage.tests.test_foo_3_11"`` → ``(3, 11)``, or ``None``. + """ + m = re.search(r"_(\d+)_(\d+)$", modulename) + if m: + return (int(m.group(1)), int(m.group(2))) + return None + +def run(testsets): + """Run test modules, reporting results through ``unpythonic.test.fixtures``. + + ``testsets``: iterable of ``(name, modulenames)`` pairs, where ``name`` + is a human-readable label and ``modulenames`` is a list of dotted module + names. Each module must export a ``runtests()`` function. + + Version-suffixed modules (e.g. ``test_foo_3_11``) are automatically + skipped with a warning on Pythons older than the indicated version. + + Returns ``True`` if all tests passed (no failures or errors). + """ + with session(): + for tsname, modnames in testsets: + with testset(tsname): + for m in modnames: + with testset(m): + ver = _version_suffix(m) + if ver is not None and sys.version_info < ver: + msg = (f"Skipping '{m}' (requires Python {ver[0]}.{ver[1]}+, " + f"running {sys.version_info.major}.{sys.version_info.minor})") + emit_warning(msg) + continue + mod = import_module(m) + mod.runtests() + return (unbox(tests_failed) + unbox(tests_errored)) == 0 diff --git a/unpythonic/tests/test_arity.py b/unpythonic/tests/test_arity.py index ee1aa9a0..46977c25 100644 --- a/unpythonic/tests/test_arity.py +++ b/unpythonic/tests/test_arity.py @@ -3,10 +3,12 @@ from ..syntax import macros, test, test_raises, the # noqa: F401 from ..test.fixtures import session, testset +import sys + from ..arity import (arities, arity_includes, required_kwargs, optional_kwargs, kwargs, resolve_bindings, tuplify_bindings, - getfunc) + getfunc, UnknownArity) def runtests(): def barefunction(x): @@ -119,84 +121,33 @@ def f(a): def f(a=42): pass # pragma: no cover - test[r(f) == (("args", (("a", 42),)), - ("vararg", None), ("vararg_name", None), - ("kwarg", None), ("kwarg_name", None))] - test[r(f, 17) == (("args", (("a", 17),)), - ("vararg", None), ("vararg_name", None), - ("kwarg", None), ("kwarg_name", None))] - test[r(f, a=23) == (("args", (("a", 23),)), - ("vararg", None), ("vararg_name", None), - ("kwarg", None), ("kwarg_name", None))] + test[r(f) == (("a", 42),)] + test[r(f, 17) == (("a", 17),)] + test[r(f, a=23) == (("a", 23),)] def f(a, b, c): pass # pragma: no cover - test[r(f, 1, 2, 3) == (("args", (("a", 1), ("b", 2), ("c", 3))), - ("vararg", None), ("vararg_name", None), - ("kwarg", None), ("kwarg_name", None))] - test[r(f, a=1, b=2, c=3) == (("args", (("a", 1), ("b", 2), ("c", 3))), - ("vararg", None), ("vararg_name", None), - ("kwarg", None), ("kwarg_name", None))] - test[r(f, 1, 2, c=3) == (("args", (("a", 1), ("b", 2), ("c", 3))), - ("vararg", None), ("vararg_name", None), - ("kwarg", None), ("kwarg_name", None))] - test[r(f, 1, c=3, b=2) == (("args", (("a", 1), ("b", 2), ("c", 3))), - ("vararg", None), ("vararg_name", None), - ("kwarg", None), ("kwarg_name", None))] - test[r(f, c=3, b=2, a=1) == (("args", (("a", 1), ("b", 2), ("c", 3))), - ("vararg", None), ("vararg_name", None), - ("kwarg", None), ("kwarg_name", None))] + test[r(f, 1, 2, 3) == (("a", 1), ("b", 2), ("c", 3))] + test[r(f, a=1, b=2, c=3) == (("a", 1), ("b", 2), ("c", 3))] + test[r(f, 1, 2, c=3) == (("a", 1), ("b", 2), ("c", 3))] + test[r(f, 1, c=3, b=2) == (("a", 1), ("b", 2), ("c", 3))] + test[r(f, c=3, b=2, a=1) == (("a", 1), ("b", 2), ("c", 3))] def f(a, b, c, *args): pass # pragma: no cover - test[r(f, 1, 2, 3, 4, 5) == (("args", (("a", 1), ("b", 2), ("c", 3))), - ("vararg", (4, 5)), ("vararg_name", "args"), - ("kwarg", None), ("kwarg_name", None))] - - # On Pythons < 3.6, there's no guarantee about the ordering of the kwargs. - # Our analysis machinery preserves the order it gets, but the *input* - # may already differ from how the invocation of `r` is written in the - # source code here. - # - # So we must allow for arbitrary ordering of the kwargs when checking - # the result. - # - def checkpre36(result, truth): - args_r, vararg_r, vararg_name_r, kwarg_r, kwarg_name_r = result - args_t, vararg_t, vararg_name_t, kwarg_t, kwarg_name_t = truth - couldbe = (args_r == args_t and vararg_r == vararg_t and - vararg_name_r == vararg_name_t and kwarg_name_r == kwarg_name_t) - if not couldbe: - return False # pragma: no cover, should only happen if the tests fail. - name_r, contents_r = kwarg_r - name_t, contents_t = kwarg_t - return name_r == name_t and set(contents_r) == set(contents_t) + test[r(f, 1, 2, 3, 4, 5) == (('a', 1), ('b', 2), ('c', 3), + ('args', (4, 5)))] def f(a, b, c, **kw): - pass # pragma: no cover - test[checkpre36(the[r(f, 1, 2, 3, d=4, e=5)], (("args", (("a", 1), ("b", 2), ("c", 3))), - ("vararg", None), ("vararg_name", None), - ("kwarg", (("d", 4), ("e", 5))), ("kwarg_name", "kw")))] + pass + test[r(f, 1, 2, 3, d=4, e=5) == (('a', 1), ('b', 2), ('c', 3), + ('kw', (('d', 4), ('e', 5))))] def f(a, b, c, *args, **kw): - pass # pragma: no cover - test[checkpre36(the[r(f, 1, 2, 3, 4, 5, d=6, e=7)], (("args", (("a", 1), ("b", 2), ("c", 3))), - ("vararg", (4, 5)), ("vararg_name", "args"), - ("kwarg", (("d", 6), ("e", 7))), ("kwarg_name", "kw")))] - - # TODO: On Python 3.6+, this becomes just: - # - # def f(a, b, c, **kw): - # pass - # test[r(f, 1, 2, 3, d=4, e=5) == (("args", (("a", 1), ("b", 2), ("c", 3))), - # ("vararg", None), ("vararg_name", None), - # ("kwarg", (("d", 4), ("e", 5))), ("kwarg_name", "kw"))] - # - # def f(a, b, c, *args, **kw): - # pass - # test[r(f, 1, 2, 3, 4, 5, d=6, e=7) == (("args", (("a", 1), ("b", 2), ("c", 3))), - # ("vararg", (4, 5)), ("vararg_name", "args"), - # ("kwarg", (("d", 6), ("e", 7))), ("kwarg_name", "kw"))] + pass + test[r(f, 1, 2, 3, 4, 5, d=6, e=7) == (('a', 1), ('b', 2), ('c', 3), + ('args', (4, 5)), + ('kw', (('d', 6), ('e', 7))))] with testset("resolve_bindings error cases"): def f(a): @@ -205,16 +156,6 @@ def f(a): test_raises[TypeError, resolve_bindings(f, 1, a=2)] # same arg assigned twice test_raises[TypeError, resolve_bindings(f, 1, b=2)] # unexpected kwarg - # The number of missing required positional args affects the error message - # à la Python 3.6, so let's exercise that part of the code, too. - test_raises[TypeError, resolve_bindings(f)] # missing 1 required positional arg - def g(a, b): - pass # pragma: no cover - test_raises[TypeError, resolve_bindings(g)] # missing 2 required positional args - def h(a, b, c): - pass # pragma: no cover - test_raises[TypeError, resolve_bindings(h)] # missing 3 required positional args - if __name__ == '__main__': # pragma: no cover with session(__file__): runtests() diff --git a/unpythonic/tests/test_assignonce.py b/unpythonic/tests/test_assignonce.py index 5968e6c3..ccdf9e6f 100644 --- a/unpythonic/tests/test_assignonce.py +++ b/unpythonic/tests/test_assignonce.py @@ -7,17 +7,17 @@ def runtests(): with assignonce() as e: - with test("basic usage"): + with test["basic usage"]: e.a = 2 e.b = 3 - with test_raises(AttributeError, "should not be able to redefine an already defined name"): + with test_raises[AttributeError, "should not be able to redefine an already defined name"]: e.a = 5 - with test("rebind"): + with test["rebind"]: e.set("a", 42) # rebind - with test_raises(AttributeError, "should not be able to rebind an unbound name"): + with test_raises[AttributeError, "should not be able to rebind an unbound name"]: e.set("c", 3) if __name__ == '__main__': # pragma: no cover diff --git a/unpythonic/tests/test_collections.py b/unpythonic/tests/test_collections.py index 1015fab8..3d6ce120 100644 --- a/unpythonic/tests/test_collections.py +++ b/unpythonic/tests/test_collections.py @@ -4,6 +4,7 @@ from ..test.fixtures import session, testset from collections.abc import Mapping, MutableMapping, Hashable, Container, Iterable, Sized +from itertools import count, repeat from pickle import dumps, loads import threading @@ -11,6 +12,7 @@ frozendict, view, roview, ShadowedSequence, mogrify, in_slice, index_in_slice) from ..fold import foldr +from ..gmemo import imemoize from ..llist import cons, ll def runtests(): @@ -104,7 +106,7 @@ def f(b): b4 << cat # same as b4.set(cat) test[unbox(b4) is cat] - with test_raises(TypeError, "box is mutable, should not be hashable"): + with test_raises[TypeError, "box is mutable, should not be hashable"]: d = {} d[b] = "foo" @@ -253,7 +255,7 @@ class Zee: with testset("frozendict"): d3 = frozendict({'a': 1, 'b': 2}) test[d3['a'] == 1] - with test_raises(TypeError, "frozendict is immutable, should not be writable"): + with test_raises[TypeError, "frozendict is immutable, should not be writable"]: d3['c'] = 42 d4 = frozendict(d3, a=42) # functional update @@ -393,11 +395,11 @@ class Zee: lst = list(range(5)) v = view(lst)[2:] - with test_raises(TypeError): + with test_raises[TypeError]: v[2, 3] = 42 # multidimensional indexing not supported - with test_raises(IndexError): + with test_raises[IndexError]: v[9001] = 42 - with test_raises(IndexError): + with test_raises[IndexError]: v[-9001] = 42 # read-only live view for sequences @@ -411,7 +413,7 @@ class Zee: test[type(v[1:]) is roview] # slicing a read-only view gives another read-only view test[v[1:] == [3, 4, 5]] test_raises[TypeError, view(v[1:])] # cannot create a writable view into a read-only view - with test_raises(TypeError, "read-only view should not support item assignment"): + with test_raises[TypeError, "read-only view should not support item assignment"]: v[2] = 3 test_raises[AttributeError, v.reverse()] # read-only view does not support in-place reverse @@ -459,7 +461,7 @@ class Zee: test[s2 == (1, 2, 23, 42, 5)] test[tpl == (1, 2, 3, 4, 5)] - with test_raises(TypeError): + with test_raises[TypeError]: ShadowedSequence(s4, "la la la", "new value") # not a valid index specification # no-op ShadowedSequence is allowed @@ -469,6 +471,16 @@ class Zee: s6 = ShadowedSequence(tpl, slice(2, 4), (23,)) # replacement too short... test_raises[IndexError, s6[3]] # ...which is detected here + # infinite replacements + # Here we must `tuple()` the LHS so that the replacement *iterable*, + # which is not a sequence, is iterated over only once. + test[tuple(ShadowedSequence(tpl, slice(None, None, None), repeat(42))) == (42, 42, 42, 42, 42)] + test[tuple(ShadowedSequence(tpl, slice(None, None, None), count(start=10))) == (10, 11, 12, 13, 14)] + + # reading the start of a memoized infinite replacement backwards + test[tuple(ShadowedSequence(tpl, slice(None, None, -1), imemoize(repeat(42))())) == (42, 42, 42, 42, 42)] + test[tuple(ShadowedSequence(tpl, slice(None, None, -1), imemoize(count(start=10))())) == (14, 13, 12, 11, 10)] + # mogrify: in-place map for various data structures (see docstring for details) with testset("mogrify"): double = lambda x: 2 * x diff --git a/unpythonic/tests/test_conditions.py b/unpythonic/tests/test_conditions.py index 999c890a..d7587832 100644 --- a/unpythonic/tests/test_conditions.py +++ b/unpythonic/tests/test_conditions.py @@ -22,8 +22,10 @@ available_restarts, available_handlers, error, cerror, proceed, warn, muffle, - ControlError) -from ..misc import raisef, slurp + ControlError, + resignal_in, resignal) +from ..excutil import raisef +from ..misc import slurp from ..collections import box, unbox from ..it import subset @@ -48,7 +50,7 @@ def lowlevel(): with restarts(use_value=(lambda x: x), double=(lambda x: 2 * x), drop=(lambda x: _drop), - bail=(lambda x: raisef(ValueError, x))) as result: + bail=(lambda x: raisef(ValueError(x)))) as result: # Let's pretend we only want to deal with even numbers. # Realistic errors would be something like nonexistent file, disk full, network down, ... if k % 2 == 1: @@ -66,7 +68,7 @@ def lowlevel(): return out # High-level logic. Choose here which action the low-level logic should take - # for each named signal. Here we only have one signal, named "odd_number". + # for each condition type. Here we only have one signal, `OddNumberError`. def highlevel(): # When using error() or cerror() to signal, not handling the condition # is a fatal error (like an uncaught exception). The `error` function @@ -91,7 +93,7 @@ def highlevel(): # When the "proceed" restart is invoked, it causes the `cerror()` call in # the low-level code to return normally. So execution resumes from where it # left off, never mind that a condition occurred. - with test("basic usage proceed"): # barrier against stray exceptions/signals + with test["basic usage proceed"]: # barrier against stray exceptions/signals with handlers((OddNumberError, proceed)): # We would like to: # `test[lowlevel() == list(range(10))]` @@ -116,22 +118,22 @@ def highlevel(): # The restart name "use_value" is commonly used for the use case "resume with this value", # so the library has a eponymous function to invoke it. - with test("basic usage use_value"): + with test["basic usage use_value"]: with handlers((OddNumberError, lambda c: use_value(c.x))): result = lowlevel() test[result == list(range(10))] - with test("basic usage double"): + with test["basic usage double"]: with handlers((OddNumberError, lambda c: invoke("double", c.x))): result = lowlevel() test[result == [0, 2 * 1, 2, 2 * 3, 4, 2 * 5, 6, 2 * 7, 8, 2 * 9]] - with test("basic usage drop"): + with test["basic usage drop"]: with handlers((OddNumberError, lambda c: invoke("drop", c.x))): result = lowlevel() test[result == [0, 2, 4, 6, 8]] - with test("basic usage bail"): + with test["basic usage bail"]: try: with handlers((OddNumberError, lambda c: invoke("bail", c.x))): lowlevel() @@ -161,7 +163,7 @@ def lowlevel(): out.append(k) return out def highlevel(): - with test("basic usage use_value 2"): + with test["basic usage use_value 2"]: with handlers((OddNumberError, lambda c: use_value(42))): result = lowlevel() test[result == [0, 42, 2, 42, 4, 42, 6, 42, 8, 42]] @@ -222,7 +224,7 @@ def highlevel1(): # Use case where we want to resume at the low level (in a real-world application, repairing the error). # Note we need new code only at the high level; the mid and low levels remain as-is. def highlevel2(): - with test("resume at low level"): + with test["resume at low level"]: with handlers((TellMeHowToRecover, lambda c: invoke("resume_low", "resumed at low level"))): result = midlevel() test[result == "resumed at low level > normal exit from low level > normal exit from mid level"] @@ -230,7 +232,7 @@ def highlevel2(): # Use case where we want to resume at the mid level (in a real-world application, skipping the failed part). def highlevel3(): - with test("resume at mid level"): + with test["resume at mid level"]: with handlers((TellMeHowToRecover, lambda c: invoke("resume_mid", "resumed at mid level"))): result = midlevel() test[result == "resumed at mid level > normal exit from mid level"] @@ -276,8 +278,11 @@ def test_usevalue(): fail["This line should not be reached in the tests."] # pragma: no cover test[unbox(result) == 42] - # can be shortened using the predefined `use_value` function, which immediately + # This can be shortened using the predefined `use_value` function, which immediately # invokes the eponymous restart with the args and kwargs given. + # + # If you need to do the same for your own restart, use `functools.partial(invoke, restart_name)`. + # That will give you a function that you can use in a handler, and pass in args at that time. with handlers((JustTesting, lambda c: use_value(42))): with restarts(use_value=(lambda x: x)) as result: signal(JustTesting()) @@ -403,7 +408,6 @@ def warn_protocol(): # An unhandled `error` or `cerror`, when it **raises** `ControlError`, # sets the cause of that `ControlError` to the original unhandled signal. # In Python 3.7+, this will also produce nice stack traces. - # In up to Python 3.6, it will at least show the chain of causes. with catch_signals(False): try: exc1 = JustTesting("Hullo") @@ -449,7 +453,7 @@ def invoke_if_exists(restart_name): # # Note we place the `test_raises` construct on the outside, to avoid intercepting # the `signal(JustACondition)`. - with test_raises(NoItDidntExist, "nonexistent restart"): + with test_raises[NoItDidntExist, "nonexistent restart"]: with handlers((JustACondition, lambda: invoke_if_exists("myrestart"))): signal(JustACondition()) finding() @@ -470,7 +474,7 @@ def errorcases(): test_raises[ControlError, invoke("woo")] # error case: invoke an undefined restart - with test_signals(ControlError, "should yell when trying to invoke a nonexistent restart"): + with test_signals[ControlError, "should yell when trying to invoke a nonexistent restart"]: with restarts(foo=(lambda x: x)): invoke("bar") @@ -482,10 +486,10 @@ def errorcases(): test_signals[TypeError, invoke(42)] # invalid bindings - with test_signals(TypeError): + with test_signals[TypeError]: with restarts(myrestart=42): # name=callable, ... pass # pragma: no cover - with test_signals(TypeError): + with test_signals[TypeError]: with handlers(("ha ha ha", 42)): # (excspec, callable), ... pass # pragma: no cover errorcases() @@ -552,7 +556,7 @@ def lowlevel3(): cancel_and_delegate() # Multithreading. Threads behave independently. - with testset("multithreading"): + with testset("thread-safety"): def multithreading(): comm = Queue() def lowlevel4(tag): @@ -576,6 +580,42 @@ def worker(comm, tid): test[the[tuple(sorted(tag for tag, x in results)) == tuple(range(n))]] # de-spam: don't capture LHS multithreading() + with testset("resignal_in, resignal"): + def resignal_tests(): + class LibraryException(Exception): + pass + class MoreSophisticatedLibraryException(LibraryException): + pass + class UnrelatedException(Exception): + pass + class ApplicationException(Exception): + pass + test_signals[ApplicationException, resignal_in(lambda: signal(LibraryException), + {LibraryException: ApplicationException})] + # subclasses + test_signals[ApplicationException, resignal_in(lambda: signal(MoreSophisticatedLibraryException), + {LibraryException: ApplicationException})] + # tuple of types as input + test_signals[ApplicationException, resignal_in(lambda: signal(UnrelatedException), + {(LibraryException, UnrelatedException): + ApplicationException})] + test[returns_normally(resignal_in(lambda: 42, + {LibraryException: ApplicationException}))] + + with test_signals[ApplicationException]: + with resignal({LibraryException: ApplicationException}): + signal(LibraryException) + with test_signals[ApplicationException]: + with resignal({LibraryException: ApplicationException}): + signal(MoreSophisticatedLibraryException) + with test_signals[ApplicationException]: + with resignal({(LibraryException, UnrelatedException): ApplicationException}): + signal(LibraryException) + with test["should return normally"]: + with resignal({LibraryException: ApplicationException}): + 42 + resignal_tests() + if __name__ == '__main__': # pragma: no cover with session(__file__): runtests() diff --git a/unpythonic/tests/test_dispatch.py b/unpythonic/tests/test_dispatch.py index fa222725..e75e513b 100644 --- a/unpythonic/tests/test_dispatch.py +++ b/unpythonic/tests/test_dispatch.py @@ -3,22 +3,28 @@ from ..syntax import macros, test, test_raises, fail, the # noqa: F401 from ..test.fixtures import session, testset, returns_normally +import collections +import contextlib +import io +import re import typing + from ..fun import curry -from ..dispatch import generic, generic_for, typed +from ..dispatch import generic, augment, typed, format_methods @generic def zorblify(x: int, y: int): return 2 * x + y @generic -def zorblify(x: str, y: int): # noqa: F811, registered as a method of the same generic function. +def zorblify(x: str, y: int): # noqa: F811, registered as a multimethod of the same generic function. # Because dispatching occurs on both arguments, this method is not reached by the tests. fail["this method should not be reached by the tests"] # pragma: no cover @generic def zorblify(x: str, y: float): # noqa: F811 return f"{x[::-1]} {y}" - -# TODO: def zorblify(x: int, *args: typing.Sequence[str]): +@generic +def zorblify(x: int, *args: typing.Sequence[str]): # noqa: F811 + return f"{x}, {', '.join(args)}" # @generic can also be used to simplify argument handling code in functions # where the role of an argument in a particular position changes depending on @@ -41,8 +47,11 @@ def _example_impl(start, step, stop): # no @generic! # shorter, same effect @generic -def example2(start: int, stop: int): - return example2(start, 1, stop) # just call the method that has the implementation +def example2(stop: int): + return example2(0, 1, stop) # just call the multimethod that has the implementation +@generic +def example2(start: int, stop: int): # noqa: F811 + return example2(start, 1, stop) @generic def example2(start: int, step: int, stop: int): # noqa: F811 return start, step, stop @@ -58,6 +67,14 @@ def gargle(*args: typing.Tuple[float, ...]): # any number of floats # noqa: F8 def gargle(*args: typing.Tuple[int, float, str]): # three args, matching the given types # noqa: F811 return "int, float, str" +# v0.15.0: dispatching on a homogeneous type inside **kwargs is also supported, via `typing.Dict` +@generic +def kittify(**kwargs: typing.Dict[str, int]): # all kwargs are ints + return "int" +@generic +def kittify(**kwargs: typing.Dict[str, float]): # all kwargs are floats # noqa: F811 + return "float" + # One-method pony, which automatically enforces argument types. # The type specification may use features from the `typing` stdlib module. @typed @@ -75,6 +92,7 @@ def runtests(): test[zorblify(y=8, x=17) == 42] test[zorblify("tac", 1.0) == "cat 1.0"] test[zorblify(y=1.0, x="tac") == "cat 1.0"] + test[zorblify(23, "cat", "meow") == "23, cat, meow"] test_raises[TypeError, zorblify(1.0, 2.0)] # there's no zorblify(float, float) @@ -82,6 +100,7 @@ def runtests(): test[example(2, 10) == (2, 1, 10)] test[example(2, 3, 10) == (2, 3, 10)] + test[example2(5) == (0, 1, 5)] test[example2(1, 5) == (1, 1, 5)] test[example2(1, 1, 5) == (1, 1, 5)] test[example2(1, 2, 5) == (1, 2, 5)] @@ -91,11 +110,44 @@ def runtests(): test[gargle(42, 6.022e23, "hello") == "int, float, str"] test[gargle(1, 2, 3) == "int"] # as many as in the [int, float, str] case - with testset("@generic_for"): + test[kittify(x=1, y=2) == "int"] + test[kittify(x=1.0, y=2.0) == "float"] + test_raises[TypeError, kittify(x=1, y=2.0)] + + with testset("@generic integration with curry"): + @generic + def curryable(x: int, y: int): + return "int" + @generic + def curryable(x: float, y: float): # noqa: F811 + return "float" + f = curry(curryable, 1) + test[callable(the[f])] + test[f(2) == "int"] + + # When the final set of arguments does not match any multimethod, it is a type error. + test_raises[TypeError, f(2.0)] + + # CAUTION: Partially applying by name starts keyword-only processing in `inspect.signature`, + # which is used by `unpythonic.arity.arities`, which in turn is used by `unpythonic.fun.curry`. + # Hence, if we pass `x=1` by name here, the remaining positional arity becomes 0... + f = curry(curryable, x=1) + test[callable(the[f])] + # ...so, we must pass `y` by name here. + test[f(y=2) == "int"] + + f = curry(curryable, 1) + test[callable(the[f])] + test[f(y=2) == "int"] + + # When no multimethod can match the given partial signature, it is a type error. + test_raises[TypeError, curry(curryable, "abc")] + + with testset("@augment"): @generic def f1(x: typing.Any): return False - @generic_for(f1) + @augment(f1) def f2(x: int): return x test[f1("hello") is False] @@ -103,8 +155,8 @@ def f2(x: int): def f3(x: typing.Any): # not @generic! return False - with test_raises(TypeError, "should not be able to @generic_for a non-generic function"): - @generic_for(f3) + with test_raises[TypeError, "should not be able to @augment a non-generic function"]: + @augment(f3) def f4(x: int): return x @@ -183,9 +235,9 @@ def instmeth(self, x: float): return f"floating with {self.a * x}" tt2 = BabyTestTarget(3) - # the new generic-function methods become available, installed on the OOP method + # the new multimethods become available, installed on the OOP method test[tt2.instmeth(3.14) == "floating with 9.42"] - # old generic-function methods registered by the ancestor remain available + # old multimethods registered by the ancestor remain available test[tt2.instmeth("hi") == "hi hi hi"] test[tt2.instmeth(21) == 63] test[tt2.clsmeth(3.14) == "Test target floats: 6.28"] @@ -201,7 +253,7 @@ def instmeth(self, x: float): # # See discussions on interaction between `@staticmethod` and `super` in Python: # https://bugs.python.org/issue31118 - # https://stackoverflow.com/questions/26788214/super-and-staticmethod-interaction/26807879 + # https://stackoverflow.com/questions/26788214/super-and-staticmethod-interaction/26807879 test[tt2.staticmeth(3.14) == "float 6.28"] # this is available on `tt2` test_raises[TypeError, tt2.staticmeth("hi")] # but this is not (no MRO) test_raises[TypeError, tt2.staticmeth(21)] @@ -214,14 +266,38 @@ def instmeth(self, x: float): with testset("@typed"): test[blubnify(2, 21.0) == 42] test_raises[TypeError, blubnify(2, 3)] # blubnify only accepts (int, float) - test[not hasattr(blubnify, "register")] # and no more methods can be registered on it + with test_raises[TypeError, "should not be able to add more multimethods to a @typed function"]: + @augment(blubnify) + def blubnify2(x: float, y: float): + pass test[jack(42) == 42] test[jack("foo") == "foo"] test_raises[TypeError, jack(3.14)] # jack only accepts int or str + with testset("list_methods"): + def check_formatted_multimethods(result, expected): + def _remove_space_before_typehint(string): # Python 3.6 didn't print a space there, later versions do + return string.replace(": ", ":") + result_list = result.split("\n") + human_readable_header, *multimethod_descriptions = result_list + multimethod_descriptions = [x.strip() for x in multimethod_descriptions] + test[the[len(multimethod_descriptions)] == the[len(expected)]] + for r, e in zip(multimethod_descriptions, expected): + r = _remove_space_before_typehint(r) + e = _remove_space_before_typehint(e) + test[the[r].startswith(the[e])] + # @generic + check_formatted_multimethods(format_methods(example2), + ["example2(start: int, step: int, stop: int)", + "example2(start: int, stop: int)", + "example2(stop: int)"]) + # @typed + check_formatted_multimethods(format_methods(blubnify), + ["blubnify(x: int, y: float)"]) + with testset("error cases"): - with test_raises(TypeError, "@typed should only accept a single method"): + with test_raises[TypeError, "@typed should only accept a single method"]: @typed def errorcase1(x: int): pass # pragma: no cover @@ -229,7 +305,7 @@ def errorcase1(x: int): def errorcase1(x: str): # noqa: F811 pass # pragma: no cover - with test_raises(TypeError, "@generic should complain about missing type annotations"): + with test_raises[TypeError, "@generic should complain about missing type annotations"]: @generic def errorcase2(x): pass # pragma: no cover @@ -239,10 +315,8 @@ def errorcase2(x): test[callable(the[f])] test[f(21.0) == 42] - # But be careful: - f = curry(blubnify, 2.0) # wrong argument type; error not triggered yet - test[callable(the[f])] - test_raises[TypeError, f(21.0) == 42] # error will occur now, when the call is triggered + # Wrong argument type during partial application of @typed function - error reported immediately. + test_raises[TypeError, curry(blubnify, 2.0)] with testset("holy traits in Python with @generic"): # Note we won't get the performance benefits of Julia, because this is a @@ -269,12 +343,12 @@ def flippable(x: typing.Any): # default # Since these are in the same lexical scope as the original definition of the # generic function `flippable`, we could do this using `@generic`, but # later extensions (which are the whole point of traits) will need to specify - # on which function the new methods are to be registered, using `@generic_for`. + # on which function the new methods are to be registered, using `@augment`. # So let's do that to show how it's done. - @generic_for(flippable) + @augment(flippable) def flippable(x: str): # noqa: F811 return IsFlippable() - @generic_for(flippable) + @augment(flippable) def flippable(x: int): # noqa: F811 return IsNotFlippable() @@ -289,7 +363,7 @@ def flippable(x: int): # noqa: F811 def flip(x: typing.Any): return flip(flippable(x), x) - # Implementation of `flip`. Same comment about `@generic_for` as above. + # Implementation of `flip`. Same comment about `@augment` as above. # # Here we provide one implementation for "flippable" objects and another one # for "nonflippable" objects. Note this dispatches regardless of the actual @@ -299,10 +373,10 @@ def flip(x: typing.Any): # We could also add methods for specific types if needed. Note this is not # Julia, so the first matching definition wins, instead of the most specific # one. - @generic_for(flip) + @augment(flip) def flip(traitvalue: IsFlippable, x: typing.Any): # noqa: F811 return x[::-1] - @generic_for(flip) + @augment(flip) def flip(traitvalue: IsNotFlippable, x: typing.Any): # noqa: F811 raise TypeError(f"{repr(x)} is IsNotFlippable") @@ -310,6 +384,113 @@ def flip(traitvalue: IsNotFlippable, x: typing.Any): # noqa: F811 test_raises[TypeError, flip(42), "int should not be flippable"] test_raises[NotImplementedError, flip(2.0), "float should not be registered for the flippable trait"] + # Exercise new typing features (D4 sets 1 and 2) through the dispatch machinery. + # Most-recently-registered multimethod is tried first, so register the + # general case first and the specific ones after (to override). + with testset("@generic with Literal dispatch"): + @generic + def handle_code(code: int): + return "other" + @generic + def handle_code(code: typing.Literal[200, 201]): # noqa: F811 + return "success" + @generic + def handle_code(code: typing.Literal[404]): # noqa: F811 + return "not found" + test[handle_code(200) == "success"] + test[handle_code(201) == "success"] + test[handle_code(404) == "not found"] + test[handle_code(500) == "other"] + + with testset("@generic with Type dispatch"): + @generic + def describe_type(cls: typing.Type[int]): + return "integer type" + @generic + def describe_type(cls: typing.Type[str]): # noqa: F811 + return "string type" + test[describe_type(int) == "integer type"] + test[describe_type(bool) == "integer type"] # bool is a subclass of int + test[describe_type(str) == "string type"] + test_raises[TypeError, describe_type(float)] + + with testset("@generic with mapping variants"): + @generic + def process_mapping(d: typing.Dict[str, int]): + return "dict" + @generic + def process_mapping(d: typing.DefaultDict[str, int]): # noqa: F811 + return "defaultdict" + @generic + def process_mapping(d: typing.Counter[str]): # noqa: F811 + return "counter" + @generic + def process_mapping(d: typing.OrderedDict[str, int]): # noqa: F811 + return "ordereddict" + test[process_mapping(collections.defaultdict(int, a=1)) == "defaultdict"] + test[process_mapping(collections.Counter("abc")) == "counter"] + test[process_mapping(collections.OrderedDict(a=1)) == "ordereddict"] + test[process_mapping({"a": 1}) == "dict"] + + with testset("@generic with IO dispatch"): + @generic + def read_stream(s: typing.TextIO): + return "text" + @generic + def read_stream(s: typing.BinaryIO): # noqa: F811 + return "binary" + test[read_stream(io.StringIO("hello")) == "text"] + test[read_stream(io.BytesIO(b"hello")) == "binary"] + + with testset("@generic with Pattern dispatch"): + @generic + def describe_pattern(p: typing.Pattern[str]): + return "str pattern" + @generic + def describe_pattern(p: typing.Pattern[bytes]): # noqa: F811 + return "bytes pattern" + test[describe_pattern(re.compile(r"\d+")) == "str pattern"] + test[describe_pattern(re.compile(rb"\d+")) == "bytes pattern"] + + with testset("@generic with Generator and ContextManager"): + @generic + def classify(x: typing.Generator): + return "generator" + @generic + def classify(x: typing.ContextManager): # noqa: F811 + return "context manager" + @generic + def classify(x: int): # noqa: F811 + return "int" + def mygen(): + yield 1 + test[classify(mygen()) == "generator"] + test[classify(contextlib.nullcontext()) == "context manager"] + test[classify(42) == "int"] + + with testset("@generic with Iterable dispatch"): + # Best-effort element checking: concrete collections dispatch correctly. + @generic + def process_items(x: typing.Iterable[int]): + return "ints" + @generic + def process_items(x: typing.Iterable[str]): # noqa: F811 + return "strs" + test[process_items([1, 2, 3]) == "ints"] + test[process_items(["a", "b"]) == "strs"] + test[process_items((1, 2)) == "ints"] + test[process_items({"hello", "world"}) == "strs"] + + with testset("@generic with Collection dispatch"): + @generic + def summarize(x: typing.Collection[int]): + return f"collection of {len(list(x))} ints" + @generic + def summarize(x: typing.Collection[str]): # noqa: F811 + return f"collection of {len(list(x))} strs" + test[summarize([1, 2, 3]) == "collection of 3 ints"] + test[summarize(["a", "b"]) == "collection of 2 strs"] + if __name__ == '__main__': # pragma: no cover with session(__file__): runtests() diff --git a/unpythonic/tests/test_dynassign.py b/unpythonic/tests/test_dynassign.py index c6846334..d19d8ec9 100644 --- a/unpythonic/tests/test_dynassign.py +++ b/unpythonic/tests/test_dynassign.py @@ -38,14 +38,14 @@ def basictests(): test_raises[AttributeError, dyn.b] # no longer exists - with testset("multithreading"): + with testset("thread-safety"): comm = Queue() - def threadtest(q): + def threadtest(que): try: dyn.c # just access dyn.c except AttributeError as err: - q.put(err) - q.put(None) + que.put(err) + que.put(None) with dyn.let(c=42): t1 = threading.Thread(target=threadtest, args=(comm,), kwargs={}) @@ -81,7 +81,7 @@ def threadtest(q): test[noimplicits(dyn.items()) == (("a", 1), ("b", 2), ("c", 23), ("d", 4))] dyn.a = 42 # update occurs in the nearest enclosing dynamic scope that has the name bound test[noimplicits(dyn.items()) == (("a", 42), ("b", 2), ("c", 23), ("d", 4))] - with test_raises(AttributeError, "should not be able to update unbound dynamic variable"): + with test_raises[AttributeError, "should not be able to update unbound dynamic variable"]: dyn.e = 5 # subscript notation also works for updating @@ -112,7 +112,7 @@ def threadtest(q): test[noimplicits(dyn.items()) == (("a", 10), ("b", 20))] test[noimplicits(dyn.items()) == ()] - with testset("mass update with multithreading"): + with testset("mass update, thread-safety"): comm = Queue() def worker(): # test[] itself is thread-safe, but the worker threads don't have a diff --git a/unpythonic/tests/test_ec.py b/unpythonic/tests/test_ec.py index febde782..eb63de72 100644 --- a/unpythonic/tests/test_ec.py +++ b/unpythonic/tests/test_ec.py @@ -81,7 +81,7 @@ def inner(): test[result == 42] with testset("error case"): - with test_raises(RuntimeError, "should not be able to call an ec instance outside its dynamic extent"): + with test_raises[RuntimeError, "should not be able to call an ec instance outside its dynamic extent"]: @call_ec def erroneous(ec): return ec diff --git a/unpythonic/tests/test_env.py b/unpythonic/tests/test_env.py index 5eeb45fc..ec0efbd8 100644 --- a/unpythonic/tests/test_env.py +++ b/unpythonic/tests/test_env.py @@ -105,7 +105,7 @@ def runtests(): with testset("error cases"): with env(x=1) as e: e.finalize() - with test_raises(AttributeError, "should not be able to add new bindings to a finalized environment"): + with test_raises[AttributeError, "should not be able to add new bindings to a finalized environment"]: e.y = 42 # undefined name @@ -116,28 +116,28 @@ def runtests(): test_raises[AttributeError, e.set("foo", 42)] # invalid, set() only modifies existing bindings with env() as e: - with test_raises(ValueError, "should detect invalid identifier in __setitem__"): + with test_raises[ValueError, "should detect invalid identifier in __setitem__"]: e["∞"] = 1 # invalid identifier in store context (__setitem__) with env() as e: - with test_raises(ValueError, "should detect invalid identifier in __getitem__"): + with test_raises[ValueError, "should detect invalid identifier in __getitem__"]: e["∞"] # invalid identifier in load context (__getitem__) with env() as e: - with test_raises(ValueError, "should detect invalid identifier in __delitem__"): + with test_raises[ValueError, "should detect invalid identifier in __delitem__"]: del e["∞"] # invalid identifier in del context (__delitem__) with env() as e: - with test_raises(AttributeError, "overwriting a reserved name should not be allowed"): + with test_raises[AttributeError, "overwriting a reserved name should not be allowed"]: e.set = {1, 2, 3} with env(x=1) as e: e.finalize() - with test_raises(TypeError, "deleting binding from finalized environment should not be allowed"): + with test_raises[TypeError, "deleting binding from finalized environment should not be allowed"]: del e.x with env() as e: - with test_raises(AttributeError, "deleting nonexistent binding should not be allowed"): + with test_raises[AttributeError, "deleting nonexistent binding should not be allowed"]: del e.x with env(x=1, y=2) as e: diff --git a/unpythonic/tests/test_excutil.py b/unpythonic/tests/test_excutil.py new file mode 100644 index 00000000..926bfb53 --- /dev/null +++ b/unpythonic/tests/test_excutil.py @@ -0,0 +1,156 @@ +# -*- coding: utf-8 -*- + +from ..syntax import macros, test, test_raises, error, warn, the # noqa: F401 +from ..test.fixtures import session, testset, returns_normally + +import threading +from time import sleep +import sys + +from ..excutil import (raisef, tryf, + equip_with_traceback, + reraise_in, reraise, + async_raise) +from ..env import env + +def runtests(): + # raisef: raise an exception from an expression position + with testset("raisef (raise exception from an expression)"): + raise_instance = lambda: raisef(ValueError("all ok")) # the argument works the same as in `raise ...` + test_raises[ValueError, raise_instance()] + try: + raise_instance() + except ValueError as err: + test[err.__cause__ is None] # like plain `raise ...`, no cause set (default behavior) + + # using the `cause` parameter, raisef can also perform a `raise ... from ...` + exc = TypeError("oof") + raise_instance = lambda: raisef(ValueError("all ok"), cause=exc) + test_raises[ValueError, raise_instance()] + try: + raise_instance() + except ValueError as err: + test[err.__cause__ is exc] # cause specified, like `raise ... from ...` + + # can also raise an exception class (no instance) + test_raises[StopIteration, raisef(StopIteration)] + + # tryf: handle an exception in an expression position + with testset("tryf (try/except/finally in an expression)"): + raise_instance = lambda: raisef(ValueError("all ok")) + raise_class = lambda: raisef(ValueError) + + test[tryf(lambda: "hello") == "hello"] + test[tryf(lambda: "hello", + elsef=lambda: "there") == "there"] + test[tryf(lambda: raise_instance(), + (ValueError, lambda: "got a ValueError")) == "got a ValueError"] + test[tryf(lambda: raise_instance(), + (ValueError, lambda err: f"got a ValueError: '{err.args[0]}'")) == "got a ValueError: 'all ok'"] + test[tryf(lambda: raise_instance(), + ((RuntimeError, ValueError), lambda err: f"got a RuntimeError or ValueError: '{err.args[0]}'")) == "got a RuntimeError or ValueError: 'all ok'"] + test[tryf(lambda: "hello", + (ValueError, lambda: "got a ValueError"), + elsef=lambda: "there") == "there"] + test[tryf(lambda: raisef(ValueError("oof")), + (TypeError, lambda: "got a TypeError"), + ((TypeError, ValueError), lambda: "got a TypeError or a ValueError"), + (ValueError, lambda: "got a ValueError")) == "got a TypeError or a ValueError"] + + e = env(finally_ran=False) + test[e.finally_ran is False] + test[tryf(lambda: "hello", + elsef=lambda: "there", + finallyf=lambda: e << ("finally_ran", True)) == "there"] + test[e.finally_ran is True] + + test[tryf(lambda: raise_class(), + (ValueError, lambda: "ok")) == "ok"] + test[tryf(lambda: raise_class(), + ((RuntimeError, ValueError), lambda: "ok")) == "ok"] + + test_raises[TypeError, tryf(lambda: "hello", + (str, lambda: "got a string"))] # str is not an exception type + test_raises[TypeError, tryf(lambda: "hello", + ((ValueError, str), lambda: "got a string"))] # same, in the tuple case + test_raises[TypeError, tryf(lambda: "hello", + ("not a type at all!", lambda: "got a string"))] + + with testset("equip_with_traceback"): + e = Exception("just testing") + e = equip_with_traceback(e) + test[e.__traceback__ is not None] # Can't do meaningful testing on the result, so just check it's there. + + test_raises[TypeError, equip_with_traceback("not an exception")] + + with testset("reraise_in, reraise"): + class LibraryException(Exception): + pass + class MoreSophisticatedLibraryException(LibraryException): + pass + class UnrelatedException(Exception): + pass + class ApplicationException(Exception): + pass + + test_raises[ApplicationException, reraise_in(lambda: raisef(LibraryException), + {LibraryException: ApplicationException})] + # subclasses + test_raises[ApplicationException, reraise_in(lambda: raisef(MoreSophisticatedLibraryException), + {LibraryException: ApplicationException})] + # tuple of types as input + test_raises[ApplicationException, reraise_in(lambda: raisef(UnrelatedException), + {(LibraryException, UnrelatedException): + ApplicationException})] + test[returns_normally(reraise_in(lambda: 42, + {LibraryException: ApplicationException}))] + + with test_raises[ApplicationException]: + with reraise({LibraryException: ApplicationException}): + raise LibraryException + with test_raises[ApplicationException]: + with reraise({LibraryException: ApplicationException}): + raise MoreSophisticatedLibraryException + with test_raises[ApplicationException]: + with reraise({(LibraryException, UnrelatedException): ApplicationException}): + raise LibraryException + with test["should return normally"]: + with reraise({LibraryException: ApplicationException}): + 42 + + # async_raise - evil ctypes hack to inject an asynchronous exception into another running thread + if sys.implementation.name != "cpython": + warn["async_raise only supported on CPython, skipping test."] # pragma: no cover + else: + with testset("async_raise (inject KeyboardInterrupt)"): + try: + # Test whether the Python we're running on provides ctypes. At least CPython and PyPy3 do. + # For PyPy3, the general rule is "if it imports, it should work", so let's go along with that. + import ctypes # noqa: F401 + out = [] # box, really, but let's not depend on unpythonic.collections in this unrelated unit test module + def test_async_raise_worker(): + try: + for j in range(10): + sleep(0.1) + except KeyboardInterrupt: # normally, KeyboardInterrupt is only raised in the main thread + pass + out.append(j) + t = threading.Thread(target=test_async_raise_worker) + t.start() + sleep(0.1) # make sure we're in the while loop + async_raise(t, KeyboardInterrupt) + t.join() + test[out[0] < 9] # terminated early due to the injected KeyboardInterrupt + except NotImplementedError: # pragma: no cover + error["async_raise not supported on this Python interpreter."] + + test_raises[TypeError, async_raise(42, KeyboardInterrupt)] # not a thread + + t = threading.Thread(target=lambda: None) + t.start() + t.join() + test_raises[ValueError, async_raise(t, KeyboardInterrupt)] # thread no longer running + +if __name__ == '__main__': # pragma: no cover + with session(__file__): + runtests() diff --git a/unpythonic/tests/test_fix.py b/unpythonic/tests/test_fix.py index 0a93b18d..bc17d873 100644 --- a/unpythonic/tests/test_fix.py +++ b/unpythonic/tests/test_fix.py @@ -105,7 +105,7 @@ def iterate1_rec(f, x): f, c = cosser2(1) # f ends up in the return value because it's in the args of iterate1_rec. test[the[c] == the[cos(c)]] - with testset("multithreading"): + with testset("thread-safety"): def threadtest(): a_calls = [] @fix() @@ -119,9 +119,9 @@ def b(tid, k): return a(tid, (k + 1) % 3) comm = Queue() - def worker(q): + def worker(que): r = a(id(threading.current_thread()), 0) - q.put(r is NoReturn) + que.put(r is NoReturn) n = 1000 threads = [threading.Thread(target=worker, args=(comm,), kwargs={}) for _ in range(n)] diff --git a/unpythonic/tests/test_fold.py b/unpythonic/tests/test_fold.py index af2103ef..d12ce6ea 100644 --- a/unpythonic/tests/test_fold.py +++ b/unpythonic/tests/test_fold.py @@ -10,6 +10,7 @@ foldl, foldr, reducel, reducer, rreducel, rfoldl, unfold, unfold1, prod, running_minmax, minmax) from ..fun import curry, composer, composerc, composel, to1st, rotate +from ..funutil import Values from ..llist import cons, nil, ll, lreverse from ..it import take, tail @@ -93,9 +94,11 @@ def mymap_one2(f, iterable): doubler = mymap_one4(double) test[doubler(ll(1, 2, 3)) == ll(2, 4, 6)] - # curry supports passing through on the right any args over the max arity. - # If an intermediate result is a callable, it is invoked on the remaining - # positional args: + # curry supports passthrough for any args/kwargs that can't be accepted by + # the function's call signature (too many positionals or unknown named args). + # Positionals are passed through on the right. + # If the first positional return value of an intermediate result is a callable, + # it is curried, and invoked on the remaining args/kwargs: test[curry(mymap_one4, double, ll(1, 2, 3)) == ll(2, 4, 6)] # But having any args remaining when the top-level curry context exits @@ -118,7 +121,7 @@ def mymap_one2(f, iterable): # # The iterables are taken by the processing function. acc, being the last # argument, is passed through on the right. The output from the processing - # function - one new item - and acc then become a two-tuple, which gets + # function - one new item - and acc then become two arguments, which get # passed into cons. myadd = lambda x, y: x + y # can't inspect signature of builtin add test[curry(mymap, myadd, ll(1, 2, 3), ll(2, 4, 6)) == ll(3, 6, 9)] @@ -180,15 +183,18 @@ def step2(k): # x0, x0 + 2, x0 + 4, ... return (k, k + 2) # (value, newstate) def fibo(a, b): - return (a, b, a + b) # (value, *newstates) + # First positional return value is the value to yield. + # Everything else is newstate, to be unpacked to `fibo`'s + # args/kwargs at the next iteration. + return Values(a, a=b, b=a + b) def myiterate(f, x): # x0, f(x0), f(f(x0)), ... - return (x, f, f(x)) + return Values(x, f=f, x=f(x)) def zip_two(As, Bs): if len(As) and len(Bs): (A0, *moreAs), (B0, *moreBs) = As, Bs - return ((A0, B0), moreAs, moreBs) + return Values((A0, B0), As=moreAs, Bs=moreBs) test[tuple(take(10, unfold1(step2, 10))) == (10, 12, 14, 16, 18, 20, 22, 24, 26, 28)] test[tuple(take(10, unfold(fibo, 1, 1))) == (1, 1, 2, 3, 5, 8, 13, 21, 34, 55)] diff --git a/unpythonic/tests/test_fploop.py b/unpythonic/tests/test_fploop.py index 43dc1de4..ac1ca062 100644 --- a/unpythonic/tests/test_fploop.py +++ b/unpythonic/tests/test_fploop.py @@ -6,10 +6,11 @@ from ..fploop import looped, looped_over, breakably_looped, breakably_looped_over from ..tco import trampolined, jump +from ..ec import catch, throw +from ..funutil import call from ..let import let +from ..misc import timer from ..seq import begin -from ..misc import call, timer -from ..ec import catch, throw def runtests(): with testset("basic usage"): @@ -61,27 +62,27 @@ def s(loop, acc=0, i=0): test[s == 35] with testset("error cases"): - with test_raises(ValueError, "@looped: should detect invalid definition, no loop parameter"): + with test_raises[ValueError, "@looped: should detect invalid definition, no loop parameter"]: @looped def s(): fail["Should not be reached because the definition is faulty."] # pragma: no cover - with test_raises(ValueError, "@looped: should detect invalid definition, extra parameter not initialized"): + with test_raises[ValueError, "@looped: should detect invalid definition, extra parameter not initialized"]: @looped def s(loop, myextra): fail["Should not be reached because the definition is faulty."] # pragma: no cover - with test_raises(ValueError, "@looped_over: should detect invalid definition, no (loop, x, acc) parameters for loop body"): + with test_raises[ValueError, "@looped_over: should detect invalid definition, no (loop, x, acc) parameters for loop body"]: @looped_over(range(10), acc=()) def s(): fail["Should not be reached because the definition is faulty."] # pragma: no cover - with test_raises(ValueError, "@looped_over: should detect invalid definition, no acc parameter for loop body"): + with test_raises[ValueError, "@looped_over: should detect invalid definition, no acc parameter for loop body"]: @looped_over(range(10), acc=()) def s(loop, x): fail["Should not be reached because the definition is faulty."] # pragma: no cover - with test_raises(ValueError, "@looped_over: should detect invalid definition, extra parameter not initialized"): + with test_raises[ValueError, "@looped_over: should detect invalid definition, extra parameter not initialized"]: @looped_over(range(10), acc=()) def s(loop, x, acc, myextra): fail["Should not be reached because the definition is faulty."] # pragma: no cover @@ -327,7 +328,7 @@ def result(loop, brk, acc=0, i=0): return loop(acc + i, i + 1) # provide the additional parameters test[result == 45] - with test_raises(ValueError): + with test_raises[ValueError]: @breakably_looped def result(loop): # missing `brk` parameter pass # pragma: no cover @@ -363,12 +364,12 @@ def s(loop, x, acc, cnt, brk): return loop(acc + x) # pragma: no cover test[s == 0] - with test_raises(ValueError): + with test_raises[ValueError]: @breakably_looped_over(range(10), acc=0) def s(loop, x, acc): # missing `cnt` and `brk` parameters return loop(acc + x) # pragma: no cover - with test_raises(ValueError): + with test_raises[ValueError]: @breakably_looped_over(range(10), acc=0) def s(loop, x, acc, cnt): # missing `brk` parameter return loop(acc + x) # pragma: no cover diff --git a/unpythonic/tests/test_fpnumerics.py b/unpythonic/tests/test_fpnumerics.py index 4c530a07..e7f8eea3 100644 --- a/unpythonic/tests/test_fpnumerics.py +++ b/unpythonic/tests/test_fpnumerics.py @@ -5,14 +5,16 @@ Based on various sources; links provided in the source code comments. """ -from ..syntax import macros, test # noqa: F401 +from ..syntax import macros, test, warn # noqa: F401 from ..test.fixtures import session, testset, returns_normally from operator import add, mul from itertools import repeat -from math import sin, pi, log2 +from math import sin, cos, pi, log2 +from cmath import sin as complex_sin from ..fun import curry +from ..funutil import Values from ..it import unpack, drop, take, tail, first, second, last, iterate1, within from ..fold import scanl, scanl1, unfold from ..mathseq import gmathify, imathify @@ -132,7 +134,7 @@ def nats(start=0): @gmathify def fibos(): def nextfibo(a, b): - return a, b, a + b + return Values(a, a=b, b=a + b) return unfold(nextfibo, 1, 1) @gmathify def pows(): @@ -192,6 +194,46 @@ def best_differentiate_with_tol(h0, f, x, eps): # Thanks to super_improve, this actually requires taking only three terms. test[abs(best_differentiate_with_tol(0.1, sin, pi / 2, 1e-8)) < 1e-11] + # This is strictly speaking not FP, but it is worth noting that + # numerical derivatives of real-valued functions can also be estimated + # using a not very well known trick based on complex numbers. + # + # Let f be a complex analytic function (or a complex analytic piece of a piecewise defined + # function) that takes on real values for inputs on the real line. Consider the Taylor series + # f(x + iε) = f(x) + i ε f'(x) + O(ε²) + # where x is a real number, i = √-1, and ε is a small real number. We have + # real(f(x + iε)) = f(x) + O(ε²) + # imag(f(x + iε) / ε) = f'(x) + # This gives us both f(x) and f'(x) with one complex-valued computation. + # No cancellation, so we can take a really small ε (e.g. ε = 1e-150). + # + # This comes from + # Goodfellow, Bengio and Courville (2016): Deep Learning, MIT press, p. 434: + # https://www.deeplearningbook.org/contents/guidelines.html + # who cite it to originate from + # William Squire and George Trapp (1998). Using Complex Variables to Estimate Derivatives + # of Real Functions. SIAM Review, 40(1), 110-112. http://doi.org/10.1137/S003614459631241X + # who, in turn, cite it to originate from + # J. N. Lyness and C. B. Moler. 1967. Numerical differentiation of analytic functions, + # SIAM J. Numer. Anal., 4, pp. 202–210. + # and + # J. N. Lyness. 1967. Numerical algorithms based on the theory of complex variables, + # Proc. ACM 22nd Nat. Conf., Thompson Book Co., Washington, DC, pp. 124–134. + # + # See also + # Joaquim J Martins, Peter Sturdza, Juan J Alonso. The complex-step derivative approximation. + # ACM Transactions on Mathematical Software, Association for Computing Machinery, 2003, 29, + # pp.245-262. 10.1145/838250.838251. hal-01483287. + # https://hal.archives-ouvertes.fr/hal-01483287/document + # + # So this technique has been known since the late 1960s, but even as of this writing, + # 55 years later (2022), it has not seen much use. + eps = 1e-150 + def complex_diff(f, x): + return (f(x + eps * 1j) / eps).imag + # This is so accurate in this simple case that we can test for floating point equality. + test[complex_diff(complex_sin, 0.1) == cos(0.1)] + # pi approximation with Euler series acceleration # # See SICP, 2nd ed., sec. 3.5.3. diff --git a/unpythonic/tests/test_fun.py b/unpythonic/tests/test_fun.py index e743cc2b..cfcd7551 100644 --- a/unpythonic/tests/test_fun.py +++ b/unpythonic/tests/test_fun.py @@ -1,12 +1,16 @@ # -*- coding: utf-8 -*- -from ..syntax import macros, test, test_raises, fail # noqa: F401 -from ..test.fixtures import session, testset +from ..syntax import macros, test, test_raises, fail, the # noqa: F401 +from ..test.fixtures import session, testset, returns_normally from collections import Counter import sys +from queue import Queue +import threading +from time import sleep -from ..fun import (memoize, curry, apply, +from ..dispatch import generic +from ..fun import (memoize, partial, curry, apply, identity, const, andf, orf, notf, flip, rotate, @@ -14,18 +18,20 @@ composelc, composerc, to1st, to2nd, tokth, tolast, to, withself) +from ..funutil import Values +from ..it import allsame +from ..misc import slurp from ..dynassign import dyn -from ..arity import UnknownArity def runtests(): with testset("identity function"): - test[identity(1, 2, 3) == (1, 2, 3)] + test[identity(1, 2, 3) == Values(1, 2, 3)] test[identity(42) == 42] test[identity() is None] # no args, default value with testset("constant function"): - test[const(1, 2, 3)(42, "foo") == (1, 2, 3)] + test[const(1, 2, 3)(42, "foo") == Values(1, 2, 3)] test[const(42)("anything") == 42] test[const()("anything") is None] @@ -63,7 +69,8 @@ def memotuple(*args): test[memotuple((1, 2, 3)) is memotuple((1, 2, 3))] test[memotuple((1, 2, 3)) is not memotuple((1, 2))] - # "memoize lambda": classic evaluate-at-most-once thunk + # "memoize lambda": classic evaluate-at-most-once thunk. + # See also the `lazy[]` macro. thunk = memoize(lambda: print("hi from thunk")) thunk() thunk() @@ -133,6 +140,47 @@ def t(): fail["memoize should not prevent exception propagation."] # pragma: no cover test[evaluations == 1] + with testset("@memoize thread-safety"): + def threadtest(): + @memoize + def f(x): + # Sleep a "long" time to make actual concurrent operation more likely. + sleep(0.001) + + # The trick here is that because only one thread will acquire the lock + # for the memo, then for the same `x`, all the results should be the same. + return (id(threading.current_thread()), x) + + comm = Queue() + def worker(que): + # The value of `x` doesn't matter, as long as it's the same in all workers. + r = f(42) + que.put(r) + + n = 1000 + threads = [threading.Thread(target=worker, args=(comm,), kwargs={}) for _ in range(n)] + for t in threads: + t.start() + for t in threads: + t.join() + + # Test that all threads finished, and that the results from each thread are the same. + results = slurp(comm) + test[the[len(results)] == the[n]] + test[allsame(results)] + threadtest() + + with testset("partial (type-checking wrapper)"): + def nottypedfunc(x): + return "ok" + test[returns_normally(partial(nottypedfunc, 42))] + test[returns_normally(partial(nottypedfunc, "abc"))] + + def typedfunc(x: int): + return "ok" + test[returns_normally(partial(typedfunc, 42))] + test_raises[TypeError, partial(typedfunc, "abc")] + with testset("@curry"): @curry def add3(a, b, c): @@ -143,6 +191,15 @@ def add3(a, b, c): test[add3(1)(2, 3) == 6] test[add3(1, 2, 3) == 6] + # curry uses the type-checking `partial` + @curry + def add3ints(a: int, b: int, c: int): + return a + b + c + test[add3ints(1)(2)(3) == 6] + test[callable(the[add3ints(1)])] + test_raises[TypeError, add3ints(1.0)] + test_raises[TypeError, add3ints(1)(2.0)] + @curry def lispyadd(*args): return sum(args) @@ -166,49 +223,106 @@ def t(): a = curry(add) test[curry(a) is a] # curry wrappers should not stack - # Curry passes through extra args on the right, like in Haskell. Each - # call consumes args up to the maximum arity of the function being - # called. If the return value is callable, it is the next function - # to be (implicitly curried and then) called. + # curry supports passthrough for any args/kwargs that can't be accepted by + # the function's call signature (too many positionals or unknown named args). + # Positionals are passed through on the right. + # If the first positional return value of an intermediate result is a callable, + # it is curried, and invoked on the remaining args/kwargs: @curry def f(x): # note f takes only one arg return lambda y: x * y test[f(2, 21) == 42] - # Curry raises by default when the top-level curry context exits with - # args remaining. This is so that providing too many args will still - # raise `TypeError`. + # By default, `curry` raises `TypeError` when the top-level curry context exits + # with args/kwargs remaining. This is a safety feature: providing args/kwargs + # not consumed during the curry chain will raise an error, rather than silently + # produce results that are likely not what was intended. def double(x): return 2 * x - with test_raises(TypeError, "leftover args should not be allowed by default"): + with test_raises[TypeError, "leftover positional args should not be allowed by default"]: curry(double, 2, "foo") + with test_raises[TypeError, "leftover named args should not be allowed by default"]: + curry(double, 2, nosucharg="foo") - # To disable the error, use this trick to explicitly state you want to do so: - with test("leftover args should be allowed with manually created surrounding context"): + # The check can be disabled, by stating explicitly that you want to do so: + with test["leftover positional args should be allowed with manually created surrounding context"]: with dyn.let(curry_context=["whatever"]): # any human-readable label is fine. # a `with test` can optionally return a value, which becomes the asserted expr. - return curry(double, 2, "foo") == (4, "foo") - - # Methods of builtin types have uninspectable arity up to Python 3.6. - # Python 3.7 seems to fix this at least for `list`, and PyPy3 (7.3.0; Python 3.6.9) - # doesn't have this error either. - if sys.version_info < (3, 7, 0) and sys.implementation.name == "cpython": - with testset("uninspectable builtins"): - lst = [] - test_raises[UnknownArity, curry(lst.append)] # uninspectable method of builtin type - - # Internal feature, used by curry macro. If uninspectables are said to be ok, - # then attempting to curry an uninspectable simply returns the original function. - # - # Due to Python's method binding machinery re-triggering the descriptor on each lookup, - # each lookup of `lst.append` will produce a *new* instance of the object that - # represents the bound method (builtin method, in this case). They print the same, - # they look the same... but they `is not` the same. - # - # To avoid this pitfall, we do the lookup exactly once - and then reuse the result. - m1 = lst.append - m2 = curry(m1, _curry_allow_uninspectable=True) - test[m2 is m1] + return the[curry(double, 2, "foo")] == Values(4, "foo") + + with test["leftover named args should be allowed with manually created surrounding context"]: + with dyn.let(curry_context=["whatever"]): + return the[curry(double, 2, nosucharg="foo")] == Values(4, nosucharg="foo") + + # This doesn't occur on PyPy3, or on CPython 3.11+. + if sys.implementation.name == "cpython": # pragma: no cover + if sys.version_info < (3, 11, 0): + with testset("uninspectable builtin functions"): + test_raises[ValueError, curry(print)] # builtin function that fails `inspect.signature` + + # Internal feature, used by curry macro. If uninspectables are said to be ok, + # then attempting to curry an uninspectable simply returns the original function. + m1 = print + m2 = curry(print, _curry_allow_uninspectable=True) + test[the[m2] is the[m1]] + + with testset("curry kwargs support"): + @curry + def testing12(x, y): + return x, y + test[testing12(1)(2) == (1, 2)] + test[testing12(1)(y=2) == (1, 2)] + test[testing12(x=1)(y=2) == (1, 2)] + test[testing12(y=2)(x=1) == (1, 2)] + + @curry + def makemul(x): + def mymul(y): + return x * y + return mymul + test[callable(makemul())] # not enough args/kwargs yet + test[makemul(2)(3) == 6] # just enough args + test[makemul(2, 3) == 6] # extra args + test[makemul(2, y=3) == 6] # extra kwargs, fine if callable intermediate result can accept them + test[makemul(x=2, y=3) == 6] + test[makemul(y=3, x=2) == 6] + + with testset("curry integration with @generic"): # v0.15.0+ + @generic + def f(x: int): + return "int" + @generic + def f(x: float, y: str): # noqa: F811, new multimethod for the same generic function. + return "float, str" + test[callable(curry(f))] + test[curry(f, 42) == "int"] + # Although `f` has a multimethod that takes one argument, if that argument is a float, + # the call signature does not match fully. But it does match partially, so in that case + # `curry` waits for more arguments (because there is at least one multimethod that matches + # the partial arguments given so far). + test[callable(curry(f, 3.14))] # partial match + test[curry(f, 3.14, "cat") == "float, str"] # exact match + + # Partial match, but let's use the return value of `curry` (does it chain correctly?). + tmp = curry(f, 3.14) + test[tmp("cat") == "float, str"] + + @curry + @generic + def makemul_typed(x: int): + @generic + def mymul_typed(y: int): + return x * y + return mymul_typed + test[callable(makemul_typed())] # not enough args/kwargs yet + test[makemul_typed(2)(3) == 6] # just enough args + test[makemul_typed(2, 3) == 6] # extra args + test[makemul_typed(2, y=3) == 6] # extra kwargs, fine if callable intermediate result can accept them + test[makemul_typed(x=2, y=3) == 6] + test[makemul_typed(y=3, x=2) == 6] + test_raises[TypeError, makemul_typed(2.0)] # only defined for int + test_raises[TypeError, makemul_typed(2.0, 3)] # should notice it even with extra args + test_raises[TypeError, makemul_typed(2, 3.0)] with testset("compose"): double = lambda x: 2 * x @@ -233,32 +347,43 @@ def double(x): test[inc2_then_double(3) == 10] test[double_then_inc2(3) == 8] + with testset("compose with multiple-return-values, named return values"): + f = lambda x, y: Values(2 * x, 3 * y) + g = lambda x, y: Values(x + 2, y + 3) + f_then_g = composel(f, g) + test[f_then_g(1, 2) == Values(4, 9)] + + f = lambda x, y: Values(x=2 * x, y=3 * y) + g = lambda x, y: Values(x=x + 2, y=y + 3) + f_then_g = composel(f, g) + test[f_then_g(1, 2) == Values(x=4, y=9)] + with testset("curry in compose chain"): def f1(a, b): - return 2 * a, 3 * b + return Values(2 * a, 3 * b) def f2(a, b): return a + b f1_then_f2_a = composelc(f1, f2) f1_then_f2_b = composerc(f2, f1) - test[f1_then_f2_a(2, 3) == f1_then_f2_b(2, 3) == 13] + test[the[f1_then_f2_a(2, 3)] == the[f1_then_f2_b(2, 3)] == 13] def f3(a, b): - return a, b + return Values(a, b) def f4(a, b, c): return a + b + c f1_then_f3_then_f4 = composelc(f1, f3, f4) test[f1_then_f3_then_f4(2, 3, 5) == 18] # extra arg passed through on the right with testset("to1st, to2nd, tolast, to (argument shunting)"): - test[to1st(double)(1, 2, 3) == (2, 2, 3)] - test[to2nd(double)(1, 2, 3) == (1, 4, 3)] - test[tolast(double)(1, 2, 3) == (1, 2, 6)] + test[to1st(double)(1, 2, 3) == Values(2, 2, 3)] + test[to2nd(double)(1, 2, 3) == Values(1, 4, 3)] + test[tolast(double)(1, 2, 3) == Values(1, 2, 6)] processor = to((0, double), (-1, inc), (1, composer(double, double)), (0, inc)) - test[processor(1, 2, 3) == (3, 8, 4)] + test[processor(1, 2, 3) == Values(3, 8, 4)] with testset("tokth error cases"): test_raises[TypeError, tokth(3, double)()] # expect at least one argument @@ -272,11 +397,11 @@ def f(a, b): test[(flip(f))(1, b=2) == (1, 2)] # b -> kwargs with testset("rotate arglist"): - test[(rotate(-1)(identity))(1, 2, 3) == (3, 1, 2)] - test[(rotate(1)(identity))(1, 2, 3) == (2, 3, 1)] + test[(rotate(-1)(identity))(1, 2, 3) == Values(3, 1, 2)] + test[(rotate(1)(identity))(1, 2, 3) == Values(2, 3, 1)] # inner to outer: (a, b, c) -> (b, c, a) -> (a, c, b) - test[flip(rotate(-1)(identity))(1, 2, 3) == (1, 3, 2)] + test[flip(rotate(-1)(identity))(1, 2, 3) == Values(1, 3, 2)] with testset("rotate error cases"): test_raises[TypeError, (rotate(1)(identity))()] # expect at least one argument diff --git a/unpythonic/tests/test_funutil.py b/unpythonic/tests/test_funutil.py new file mode 100644 index 00000000..16fe240f --- /dev/null +++ b/unpythonic/tests/test_funutil.py @@ -0,0 +1,150 @@ +# -*- coding: utf-8 -*- + +from ..syntax import macros, test, the # noqa: F401 +from ..test.fixtures import session, testset + +from operator import add +from functools import partial + +# `Values` is also tested where function composition utilities that use it are. +from ..funutil import call, callwith, Values, valuify + +def runtests(): + with testset("@call (def as code block)"): + # def as a code block (function overwritten by return value) + @call + def result(): + return "hello" + test[result == "hello"] + + # use case 1: make temporaries fall out of scope + @call + def x(): + a = 2 # many temporaries that help readability... + b = 3 # ...of this calculation, but would just pollute locals... + c = 5 # ...after the block exits + return a * b * c + test[x == 30] + + # use case 2: multi-break out of nested loops + @call + def result(): + for x in range(10): + for y in range(10): + if x * y == 42: + return (x, y) + ... # more code here # pragma: no cover + test[result == (6, 7)] + + # can also be used normally + test[the[call(add, 2, 3)] == the[add(2, 3)]] + + with testset("@callwith (argument freezer), and pythonic solutions to avoid it"): + # to pass arguments when used as decorator, use @callwith instead + @callwith(3) + def result(x): + return x**2 + test[result == 9] + + # specialize for given arguments, choose function later + apply23 = callwith(2, 3) + def myadd(a, b): + return a + b + def mymul(a, b): + return a * b + test[apply23(myadd) == 5] + test[apply23(mymul) == 6] + + # callwith is not essential; we can do the same pythonically like this: + a = [2, 3] + test[myadd(*a) == 5] + test[mymul(*a) == 6] + + # build up the argument list as we go + # - note curry does not help, must use partial; this is because curry + # will happily call "callwith" (and thus terminate the gathering step) + # as soon as it gets at least one argument. + p1 = partial(callwith, 2) + p2 = partial(p1, 3) + p3 = partial(p2, 4) + apply234 = p3() # terminate gathering step by actually calling callwith + def add3(a, b, c): + return a + b + c + def mul3(a, b, c): + return a * b * c + test[apply234(add3) == 9] + test[apply234(mul3) == 24] + + # pythonic solution: + a = [2] + a += [3] + a += [4] + test[add3(*a) == 9] + test[mul3(*a) == 24] + + # callwith in map, if we want to vary the function instead of the data + m = map(callwith(3), [lambda x: 2 * x, + lambda x: x**2, + lambda x: x**(1 / 2)]) + test[tuple(m) == (6, 9, 3**(1 / 2))] + + # pythonic solution - use comprehension notation: + m = (f(3) for f in [lambda x: 2 * x, + lambda x: x**2, + lambda x: x**(1 / 2)]) + test[tuple(m) == (6, 9, 3**(1 / 2))] + + # The `Values` abstraction is used by various parts of `unpythonic` that + # deal with function composition; particularly `curry`, the `compose` and + # `pipe` families, and the `with continuations` macro. + with testset("Values (multiple-return-values, named return values)"): + def f(): + return Values(1, 2, 3) + result = f() + test[isinstance(result, Values)] + test[result.rets == (1, 2, 3)] + test[not result.kwrets] + test[result[0] == 1] + test[result[:-1] == (1, 2)] + a, b, c = result # if no kwrets, can be unpacked like a tuple + a, b, c = f() + + def g(): + return Values(x=3) # named return value + result = g() + test[isinstance(result, Values)] + test[not result.rets] + test[result.kwrets == {"x": 3}] # actually a `frozendict` + test["x" in result] # `in` looks in the named part + test[result["x"] == 3] + test[result.get("x", None) == 3] + test[result.get("y", None) is None] + test[tuple(result.keys()) == ("x",)] # also `values()`, `items()` + + def h(): + return Values(1, 2, x=3) + result = h() + test[isinstance(result, Values)] + test[result.rets == (1, 2)] + test[result.kwrets == {"x": 3}] + a, b = result.rets # positionals can always be unpacked explicitly + test[result[0] == 1] + test["x" in result] + test[result["x"] == 3] + + def silly_but_legal(): + return Values(42) + result = silly_but_legal() + test[result.rets[0] == 42] + test[result.ret == 42] # shorthand for single-value case + + with testset("valuify (convert tuple as multiple-return-values into Values)"): + @valuify + def f(x, y, z): + return x, y, z + test[isinstance(f(1, 2, 3), Values)] + test[f(1, 2, 3) == Values(1, 2, 3)] + +if __name__ == '__main__': # pragma: no cover + with session(__file__): + runtests() diff --git a/unpythonic/tests/test_fup.py b/unpythonic/tests/test_fup.py index da9f64b6..ee77832f 100644 --- a/unpythonic/tests/test_fup.py +++ b/unpythonic/tests/test_fup.py @@ -3,11 +3,12 @@ from ..syntax import macros, test, test_raises, the # noqa: F401 from ..test.fixtures import session, testset -from itertools import repeat +from itertools import count, repeat from collections import namedtuple from ..fup import fupdate from ..collections import frozendict +from ..gmemo import imemoize def runtests(): with testset("mutable sequence"): @@ -77,6 +78,9 @@ def runtests(): test[tup == (1, 2, 3, 4, 5)] test[out == (4, 3, 2, 1, 0)] + out = fupdate(tup, slice(None, None, -1), range(5)) # no tuple() needed + test[out == (4, 3, 2, 1, 0)] + with testset("multiple individual items"): tup = (1, 2, 3, 4, 5) out = fupdate(tup, (1, 2, 3), (17, 23, 42)) @@ -90,6 +94,24 @@ def runtests(): test[tup == tuple(range(10))] test[out == (2, 3, 2, 3, 2, 3, 2, 3, 2, 3)] + with testset("infinite replacement"): + tup = (1, 2, 3, 4, 5) + out = fupdate(tup, slice(None, None, None), repeat(42)) + test[out == (42, 42, 42, 42, 42)] + + tup = (1, 2, 3, 4, 5) + out = fupdate(tup, slice(None, None, None), count(start=10)) + test[out == (10, 11, 12, 13, 14)] + + with testset("memoized infinite replacement, reading its start backwards"): + tup = (1, 2, 3, 4, 5) + out = fupdate(tup, slice(None, None, -1), imemoize(repeat(42))()) + test[out == (42, 42, 42, 42, 42)] + + tup = (1, 2, 3, 4, 5) + out = fupdate(tup, slice(None, None, -1), imemoize(count(start=10))()) + test[out == (14, 13, 12, 11, 10)] + with testset("mix and match"): tup = tuple(range(10)) out = fupdate(tup, (slice(0, 10, 2), slice(1, 10, 2), 6), @@ -98,13 +120,17 @@ def runtests(): test[out == (2, 3, 2, 3, 2, 3, 42, 3, 2, 3)] with testset("error cases"): - with test_raises(IndexError, "should detect replacement sequence too short"): + with test_raises[IndexError, "should detect replacement sequence too short"]: tup = (1, 2, 3, 4, 5) out = fupdate(tup, slice(1, None, 2), (10,)) # need 2 items, have 1 # cannot specify both indices and bindings test_raises[ValueError, fupdate(tup, slice(1, None, 2), (10,), somename="some value")] + # not memoized, cannot read a general iterable backwards + tup = (1, 2, 3, 4, 5) + test_raises[IndexError, fupdate(tup, slice(None, None, -1), count(start=10))] + if __name__ == '__main__': # pragma: no cover with session(__file__): runtests() diff --git a/unpythonic/tests/test_gmemo.py b/unpythonic/tests/test_gmemo.py index 63c1a9ea..d3539044 100644 --- a/unpythonic/tests/test_gmemo.py +++ b/unpythonic/tests/test_gmemo.py @@ -10,7 +10,8 @@ from ..it import take, drop, last from ..fold import prod -from ..misc import call, timer +from ..funutil import call +from ..misc import timer def runtests(): with testset("multiple instances, interleaved"): @@ -91,6 +92,47 @@ def gen(): fail["Should have raised at the second next() call."] # pragma: no cover test[total_evaluations == 2] + with testset("subscripting to get already computed items"): + @gmemoize + def gen(): + yield from range(5) + g3 = gen() + + # Any item that has entered the memo can be retrieved by subscripting. + # len() is the current length of the memo. + test[len(g3) == 0] + next(g3) + test[len(g3) == 1] + next(g3) + test[len(g3) == 2] + next(g3) + test[len(g3) == 3] + test[g3[0] == 0] + test[g3[1] == 1] + test[g3[2] == 2] + + # Items not yet memoized cannot be retrieved from the memo. + test_raises[IndexError, g3[3]] + + # Negative indices work too, counting from the current end of the memo. + test[g3[-1] == 2] + test[g3[-2] == 1] + test[g3[-3] == 0] + + # Counting back past the start is an error, just like in `list`. + test_raises[IndexError, g3[-4]] + + # Slicing is supported. + test[g3[0:3] == [0, 1, 2]] + test[g3[0:2] == [0, 1]] + test[g3[::-1] == [2, 1, 0]] + test[g3[0::2] == [0, 2]] + test[g3[2::-2] == [2, 0]] + + # Out-of-range slices produce the empty list, like in `list`. + test[g3[3:] == []] + test[g3[-4::-1] == []] + with testset("memoizing a sequence partially"): # To do this, build a chain of generators, then memoize only the last one: evaluations = Counter() diff --git a/unpythonic/tests/test_it.py b/unpythonic/tests/test_it.py index 788f81f1..50fe546a 100644 --- a/unpythonic/tests/test_it.py +++ b/unpythonic/tests/test_it.py @@ -7,7 +7,7 @@ from itertools import tee, count, takewhile from operator import add, itemgetter from collections import deque -from math import cos, sqrt +from math import cos from ..it import (map, mapr, rmap, zipr, rzip, map_longest, mapr_longest, rmap_longest, @@ -22,18 +22,18 @@ flatten, flatten1, flatten_in, iterate1, iterate, partition, - partition_int, inn, iindex, find, window, chunked, - within, fixpoint, + within, interleave, subset, powerset, allsame) from ..fun import composel, identity, curry +from ..funutil import Values from ..gmemo import imemoize, gmemoize from ..mathseq import s -from ..misc import Popper, ulp +from ..misc import Popper def runtests(): with testset("mapping and zipping"): @@ -72,11 +72,11 @@ def noneadd(a, b): # but actually requires 2. Solution: use partial instead of curry. lzip2 = partial(map, identity) rzip2 = lambda *iterables: map(identity, *(rev(s) for s in iterables)) - test[tuple(lzip2((1, 2, 3), (4, 5, 6), (7, 8))) == ((1, 4, 7), (2, 5, 8))] - test[tuple(rzip2((1, 2, 3), (4, 5, 6), (7, 8))) == ((3, 6, 8), (2, 5, 7))] + test[tuple(lzip2((1, 2, 3), (4, 5, 6), (7, 8))) == (Values(1, 4, 7), Values(2, 5, 8))] + test[tuple(rzip2((1, 2, 3), (4, 5, 6), (7, 8))) == (Values(3, 6, 8), Values(2, 5, 7))] rzip3 = partial(rmap, identity) - test[tuple(rzip3((1, 2, 3), (4, 5, 6), (7, 8))) == ((3, 6, 8), (2, 5, 7))] + test[tuple(rzip3((1, 2, 3), (4, 5, 6), (7, 8))) == (Values(3, 6, 8), Values(2, 5, 7))] with testset("first, second, nth, last"): test[first(range(5)) == 0] @@ -274,26 +274,26 @@ def primes(): with testset("window"): lst = list(range(5)) out = [] - for a, b, c in window(lst, n=3): + for a, b, c in window(3, lst): out.append((a, b, c)) test[lst == list(range(5))] test[out == [(0, 1, 2), (1, 2, 3), (2, 3, 4)]] lst = range(5) out = [] - for a, b, c in window(lst, n=3): + for a, b, c in window(3, lst): out.append((a, b, c)) test[lst == range(5)] test[out == [(0, 1, 2), (1, 2, 3), (2, 3, 4)]] lst = (x for x in range(5)) out = [] - for a, b, c in window(lst, n=3): + for a, b, c in window(3, lst): out.append((a, b, c)) test[out == [(0, 1, 2), (1, 2, 3), (2, 3, 4)]] - test_raises[ValueError, window(range(5), n=1)] - test[tuple(window(range(5), n=10)) == ()] + test_raises[ValueError, window(1, range(5))] # n must be at least 2 + test[tuple(window(10, range(5))) == ()] # iterable shorter than window length with testset("window integration with Popper"): # This works because window() iter()s the Popper, but the Popper never @@ -304,7 +304,7 @@ def primes(): # because the window needs them to initialize itself.) inp = deque(range(3)) out = [] - for a, b in window(Popper(inp)): + for a, b in window(2, Popper(inp)): out.append((a, b)) if a < 10: inp.append(a + 10) @@ -341,7 +341,9 @@ def primes(): S = {"cat", "lynx", "lion", "tiger"} # unordered test[all(subset(tuple(s), S) for s in powerset(S))] - # repeated function application + # Repeated function application. + # If you want to compute arithmetic fixpoints (like we do here for testing), + # see `unpythonic.numutil.fixpoint`. with testset("iterate1, iterate"): test[last(take(100, iterate1(cos, 1.0))) == 0.7390851332151607] @@ -349,9 +351,9 @@ def primes(): # it doesn't matter where you start, the fixed point of cosine # remains the same. def cos3(a, b, c): - return cos(a), cos(b), cos(c) + return Values(cos(a), cos(b), cos(c)) fp = 0.7390851332151607 - test[the[last(take(100, iterate(cos3, 1.0, 2.0, 3.0)))] == (the[fp], fp, fp)] + test[the[last(take(100, iterate(cos3, 1.0, 2.0, 3.0)))] == Values(the[fp], fp, fp)] # within() - terminate a Cauchy sequence after a tolerance is reached. # The condition is `abs(a - b) <= tol` **for the last two yielded items**. @@ -371,47 +373,13 @@ def g2(): yield 4 test[tuple(within(0, g2())) == (1, 2, 3, 4, 4)] - # Arithmetic fixed points. - with testset("fixpoint (arithmetic fixed points)"): - c = fixpoint(cos, x0=1) - test[the[c] == the[cos(c)]] # 0.7390851332151607 - - # Actually "Newton's" algorithm for the square root was already known to the - # ancient Babylonians, ca. 2000 BCE. (Carl Boyer: History of mathematics) - def sqrt_newton(n): - def sqrt_iter(x): # has an attractive fixed point at sqrt(n) - return (x + n / x) / 2 - return fixpoint(sqrt_iter, x0=n / 2) - # different algorithm, so not necessarily equal down to the last bit - # (caused by the fixpoint update becoming smaller than the ulp, so it - # stops there, even if the limit is still one ulp away). - test[abs(the[sqrt_newton(2)] - the[sqrt(2)]) <= the[ulp(1.414)]] - # partition: split an iterable according to a predicate with testset("partition"): iseven = lambda item: item % 2 == 0 test[[tuple(it) for it in partition(iseven, range(10))] == [(1, 3, 5, 7, 9), (0, 2, 4, 6, 8)]] - # partition_int: split a small positive integer, in all possible ways, into smaller integers that sum to it - with testset("partition_int"): - test[tuple(partition_int(4)) == ((4,), (3, 1), (2, 2), (2, 1, 1), (1, 3), (1, 2, 1), (1, 1, 2), (1, 1, 1, 1))] - test[tuple(partition_int(5, lower=2)) == ((5,), (3, 2), (2, 3))] - test[tuple(partition_int(5, lower=2, upper=3)) == ((3, 2), (2, 3))] - test[tuple(partition_int(10, lower=3, upper=5)) == ((5, 5), (4, 3, 3), (3, 4, 3), (3, 3, 4))] - test[all(sum(terms) == 10 for terms in partition_int(10))] - test[all(sum(terms) == 10 for terms in partition_int(10, lower=3))] - test[all(sum(terms) == 10 for terms in partition_int(10, lower=3, upper=5))] - - test_raises[TypeError, partition_int("not a number")] - test_raises[TypeError, partition_int(4, lower="not a number")] - test_raises[TypeError, partition_int(4, upper="not a number")] - test_raises[ValueError, partition_int(-3)] - test_raises[ValueError, partition_int(4, lower=-1)] - test_raises[ValueError, partition_int(4, lower=5)] - test_raises[ValueError, partition_int(4, upper=-1)] - test_raises[ValueError, partition_int(4, upper=5)] - test_raises[ValueError, partition_int(4, lower=3, upper=2)] - + # Test whether all items of an iterable are equal. + # (Short-circuits at the first item that is different.) with testset("allsame"): test[allsame(())] test[allsame((1,))] diff --git a/unpythonic/tests/test_let.py b/unpythonic/tests/test_let.py index fa8dd9a1..142c6d99 100644 --- a/unpythonic/tests/test_let.py +++ b/unpythonic/tests/test_let.py @@ -6,7 +6,7 @@ from ..let import let, letrec, dlet, dletrec, blet, bletrec from ..env import env as _envcls -from ..misc import call +from ..funutil import call from ..seq import begin def runtests(): @@ -151,7 +151,7 @@ def result(*, env): body=lambda e: e.set('y', 3)), "e.y should not be defined"] - with test_raises(AttributeError, "let environment should be final (should not be able to create new bindings in it inside the let body)"): + with test_raises[AttributeError, "let environment should be final (should not be able to create new bindings in it inside the let body)"]: @blet(x=1) def error1(*, env): env.y = 2 # error, cannot introduce new bindings into a let environment diff --git a/unpythonic/tests/test_lispylet.py b/unpythonic/tests/test_lispylet.py index cad4ed72..8d3ec2c6 100644 --- a/unpythonic/tests/test_lispylet.py +++ b/unpythonic/tests/test_lispylet.py @@ -142,7 +142,7 @@ def result(*, env): e.set('y', 3)), "e.y should not be defined"] - with test_raises(AttributeError, "let environment should be final (should not be able to create new bindings in it inside the let body)"): + with test_raises[AttributeError, "let environment should be final (should not be able to create new bindings in it inside the let body)"]: @blet((('x', 1),)) def error1(*, env): env.y = 2 # error, cannot introduce new bindings into a let environment diff --git a/unpythonic/tests/test_llist.py b/unpythonic/tests/test_llist.py index 053d3ac2..c6798cf1 100644 --- a/unpythonic/tests/test_llist.py +++ b/unpythonic/tests/test_llist.py @@ -22,7 +22,7 @@ def runtests(): test_raises[TypeError, car("sedan")] test_raises[TypeError, cdr("disc")] - with test_raises(TypeError, "cons cells should be immutable"): + with test_raises[TypeError, "cons cells should be immutable"]: c.car = 3 test[the[c == c]] diff --git a/unpythonic/tests/test_mathseq.py b/unpythonic/tests/test_mathseq.py index 8388c285..7e4ccd09 100644 --- a/unpythonic/tests/test_mathseq.py +++ b/unpythonic/tests/test_mathseq.py @@ -1,20 +1,20 @@ # -*- coding: utf-8 -*- -from ..syntax import macros, test, test_raises, error, the # noqa: F401 +from ..syntax import macros, test, test_raises, warn, the # noqa: F401 from ..test.fixtures import session, testset -from operator import mul +from operator import add, mul from math import exp, trunc, floor, ceil -from sys import float_info from ..mathseq import (s, imathify, gmathify, sadd, smul, spow, cauchyprod, - primes, fibonacci, - sign, log, almosteq) + primes, fibonacci, triangular, + sign, log) from ..it import take, last from ..fold import scanl from ..gmemo import imemoize -from ..misc import timer, ulp +from ..misc import timer +from ..numutil import ulp def runtests(): with testset("sign (adapter, numeric and symbolic)"): @@ -27,7 +27,7 @@ def runtests(): try: from sympy import symbols except ImportError: # pragma: no cover - error["SymPy not installed in this Python, cannot test symbolic input for mathseq."] + warn["SymPy not installed in this Python, skipping symbolic input tests for mathseq."] else: x = symbols("x", positive=True) test[sign(x) == +1] @@ -40,37 +40,12 @@ def runtests(): try: from sympy import symbols, exp as symbolicExp, E as NeperE except ImportError: # pragma: no cover - error["SymPy not installed in this Python, cannot test symbolic input for mathseq."] + warn["SymPy not installed in this Python, skipping symbolic input tests for mathseq."] else: test[log(NeperE**2) == 2] x = symbols("x", positive=True) test[log(symbolicExp(x)) == x] - with testset("almosteq"): - # For anything but floating-point inputs, it's exact equality. - test[almosteq("abc", "abc")] - test[not almosteq("ab", "abc")] - - test[almosteq(1.0, 1.0 + ulp(1.0))] - - # TODO: counterintuitively, need a large tolerance here, because when one operand is zero, - # TODO: the final tolerance is actually tol*min_normal. - min_normal = float_info.min - test[almosteq(min_normal / 2, 0, tol=1.0)] - - too_large = 2**int(1e6) - test_raises[OverflowError, float(too_large), "UPDATE THIS, need a float overflow here."] - test[almosteq(too_large, too_large + 1)] # works, because 1/too_large is very small. - - try: - from mpmath import mpf - except ImportError: # pragma: no cover - error["mpmath not installed in this Python, cannot test arbitrary precision input for mathseq."] - else: - test[almosteq(mpf(1.0), mpf(1.0 + ulp(1.0)))] - test[almosteq(1.0, mpf(1.0 + ulp(1.0)))] - test[almosteq(mpf(1.0), 1.0 + ulp(1.0))] - # explicitly listed elements, same as a genexpr using tuple input (but supports infix math) with testset("s, convenience"): test[tuple(s(1)) == (1,)] @@ -353,7 +328,7 @@ def runtests(): try: from sympy import symbols except ImportError: # pragma: no cover - error["SymPy not installed in this Python, cannot test symbolic input for mathseq."] + warn["SymPy not installed in this Python, skipping symbolic input tests for mathseq."] else: x0 = symbols("x0", real=True) k = symbols("k", positive=True) # important for geometric series @@ -384,10 +359,14 @@ def runtests(): with testset("some special sequences"): test[tuple(take(10, primes())) == (2, 3, 5, 7, 11, 13, 17, 19, 23, 29)] test[tuple(take(10, fibonacci())) == (1, 1, 2, 3, 5, 8, 13, 21, 34, 55)] + test[tuple(take(10, triangular())) == (1, 3, 6, 10, 15, 21, 28, 36, 45, 55)] test[tuple(take(10, primes(optimize="speed"))) == (2, 3, 5, 7, 11, 13, 17, 19, 23, 29)] test[tuple(take(10, primes(optimize="memory"))) == (2, 3, 5, 7, 11, 13, 17, 19, 23, 29)] - test_raises[ValueError, primes(optimize="fun")] # only "speed" and "memory" modes exist + test_raises[ValueError, primes(optimize="fun")] # unfortunately only "speed" and "memory" modes exist + + triangulars = imemoize(scanl(add, 1, s(2, 3, ...))) + test[tuple(take(10, triangulars())) == tuple(take(10, triangular()))] factorials = imemoize(scanl(mul, 1, s(1, 2, ...))) # 0!, 1!, 2!, ... test[last(take(6, factorials())) == 120] diff --git a/unpythonic/tests/test_misc.py b/unpythonic/tests/test_misc.py index edadb78f..3002c21c 100644 --- a/unpythonic/tests/test_misc.py +++ b/unpythonic/tests/test_misc.py @@ -1,183 +1,22 @@ # -*- coding: utf-8 -*- -from ..syntax import macros, test, test_raises, error, warn, the # noqa: F401 +from ..syntax import macros, test, test_raises, the # noqa: F401 from ..test.fixtures import session, testset -from operator import add -from functools import partial from collections import deque -from sys import float_info from queue import Queue -from time import sleep -import threading -import sys -from ..misc import (call, callwith, raisef, tryf, equip_with_traceback, - pack, namelambda, timer, - getattrrec, setattrrec, Popper, CountingIterator, ulp, slurp, - async_raise, callsite_filename, safeissubclass) +from ..misc import (pack, + namelambda, + timer, + getattrrec, setattrrec, + Popper, CountingIterator, + slurp, + callsite_filename, + safeissubclass) from ..fun import withself -from ..env import env def runtests(): - with testset("@call (def as code block)"): - # def as a code block (function overwritten by return value) - @call - def result(): - return "hello" - test[result == "hello"] - - # use case 1: make temporaries fall out of scope - @call - def x(): - a = 2 # many temporaries that help readability... - b = 3 # ...of this calculation, but would just pollute locals... - c = 5 # ...after the block exits - return a * b * c - test[x == 30] - - # use case 2: multi-break out of nested loops - @call - def result(): - for x in range(10): - for y in range(10): - if x * y == 42: - return (x, y) - ... # more code here # pragma: no cover - test[result == (6, 7)] - - # can also be used normally - test[the[call(add, 2, 3)] == the[add(2, 3)]] - - with testset("@callwith (argument freezer), and pythonic solutions to avoid it"): - # to pass arguments when used as decorator, use @callwith instead - @callwith(3) - def result(x): - return x**2 - test[result == 9] - - # specialize for given arguments, choose function later - apply23 = callwith(2, 3) - def myadd(a, b): - return a + b - def mymul(a, b): - return a * b - test[apply23(myadd) == 5] - test[apply23(mymul) == 6] - - # callwith is not essential; we can do the same pythonically like this: - a = [2, 3] - test[myadd(*a) == 5] - test[mymul(*a) == 6] - - # build up the argument list as we go - # - note curry does not help, must use partial; this is because curry - # will happily call "callwith" (and thus terminate the gathering step) - # as soon as it gets at least one argument. - p1 = partial(callwith, 2) - p2 = partial(p1, 3) - p3 = partial(p2, 4) - apply234 = p3() # terminate gathering step by actually calling callwith - def add3(a, b, c): - return a + b + c - def mul3(a, b, c): - return a * b * c - test[apply234(add3) == 9] - test[apply234(mul3) == 24] - - # pythonic solution: - a = [2] - a += [3] - a += [4] - test[add3(*a) == 9] - test[mul3(*a) == 24] - - # callwith in map, if we want to vary the function instead of the data - m = map(callwith(3), [lambda x: 2 * x, lambda x: x**2, lambda x: x**(1 / 2)]) - test[tuple(m) == (6, 9, 3**(1 / 2))] - - # pythonic solution - use comprehension notation: - m = (f(3) for f in [lambda x: 2 * x, lambda x: x**2, lambda x: x**(1 / 2)]) - test[tuple(m) == (6, 9, 3**(1 / 2))] - - # raisef: raise an exception from an expression position - with testset("raisef (raise exception from an expression)"): - raise_instance = lambda: raisef(ValueError("all ok")) # the argument works the same as in `raise ...` - test_raises[ValueError, raise_instance()] - try: - raise_instance() - except ValueError as err: - test[err.__cause__ is None] # like plain `raise ...`, no cause set (default behavior) - - # using the `cause` parameter, raisef can also perform a `raise ... from ...` - exc = TypeError("oof") - raise_instance = lambda: raisef(ValueError("all ok"), cause=exc) - test_raises[ValueError, raise_instance()] - try: - raise_instance() - except ValueError as err: - test[err.__cause__ is exc] # cause specified, like `raise ... from ...` - - # raisef with old-style parameters (as of v0.14.2, deprecated, will be dropped in v0.15.0) - raise_instance = lambda: raisef(ValueError, "all ok") - test_raises[ValueError, raise_instance()] - - # can also raise an exception class (no instance) - test_raises[StopIteration, raisef(StopIteration)] - - # tryf: handle an exception in an expression position - with testset("tryf (try/except/finally in an expression)"): - raise_instance = lambda: raisef(ValueError("all ok")) - raise_class = lambda: raisef(ValueError) - - test[tryf(lambda: "hello") == "hello"] - test[tryf(lambda: "hello", - elsef=lambda: "there") == "there"] - test[tryf(lambda: raise_instance(), - (ValueError, lambda: "got a ValueError")) == "got a ValueError"] - test[tryf(lambda: raise_instance(), - (ValueError, lambda err: f"got a ValueError: '{err.args[0]}'")) == "got a ValueError: 'all ok'"] - test[tryf(lambda: raise_instance(), - ((RuntimeError, ValueError), lambda err: f"got a RuntimeError or ValueError: '{err.args[0]}'")) == "got a RuntimeError or ValueError: 'all ok'"] - test[tryf(lambda: "hello", - (ValueError, lambda: "got a ValueError"), - elsef=lambda: "there") == "there"] - test[tryf(lambda: raisef(ValueError("oof")), - (TypeError, lambda: "got a TypeError"), - ((TypeError, ValueError), lambda: "got a TypeError or a ValueError"), - (ValueError, lambda: "got a ValueError")) == "got a TypeError or a ValueError"] - - e = env(finally_ran=False) - test[e.finally_ran is False] - test[tryf(lambda: "hello", - elsef=lambda: "there", - finallyf=lambda: e << ("finally_ran", True)) == "there"] - test[e.finally_ran is True] - - test[tryf(lambda: raise_class(), - (ValueError, lambda: "ok")) == "ok"] - test[tryf(lambda: raise_class(), - ((RuntimeError, ValueError), lambda: "ok")) == "ok"] - - test_raises[TypeError, tryf(lambda: "hello", - (str, lambda: "got a string"))] # str is not an exception type - test_raises[TypeError, tryf(lambda: "hello", - ((ValueError, str), lambda: "got a string"))] # same, in the tuple case - test_raises[TypeError, tryf(lambda: "hello", - ("not a type at all!", lambda: "got a string"))] - - with testset("equip_with_traceback"): - e = Exception("just testing") - try: - e = equip_with_traceback(e) - except NotImplementedError: - warn["equip_with_traceback only supported on Python 3.7+, skipping test."] - else: - # Can't do meaningful testing on the result, so just check it's there. - test[e.__traceback__ is not None] - - test_raises[TypeError, equip_with_traceback("not an exception")] - with testset("pack"): myzip = lambda lol: map(pack, *lol) lol = ((1, 2), (3, 4), (5, 6)) @@ -270,53 +109,12 @@ def __init__(self, x): test[it.count == k] test[it.count == 5] - # Unit in the Last Place, float utility - # https://en.wikipedia.org/wiki/Unit_in_the_last_place - with testset("ulp (unit in the last place; float utility)"): - test[ulp(1.0) == float_info.epsilon] - # test also at some base-2 exponent switch points - test[ulp(2.0) == 2 * float_info.epsilon] - test[ulp(0.5) == 0.5 * float_info.epsilon] - with testset("slurp (drain a queue into a list)"): q = Queue() for k in range(10): q.put(k) test[slurp(q) == list(range(10))] - # async_raise - evil ctypes hack to inject an asynchronous exception into another running thread - if sys.implementation.name != "cpython": - warn["async_raise only supported on CPython, skipping test."] # pragma: no cover - else: - with testset("async_raise (inject KeyboardInterrupt)"): - try: - # Test whether the Python we're running on provides ctypes. At least CPython and PyPy3 do. - # For PyPy3, the general rule is "if it imports, it should work", so let's go along with that. - import ctypes # noqa: F401 - out = [] # box, really, but let's not depend on unpythonic.collections in this unrelated unit test module - def test_async_raise_worker(): - try: - for j in range(10): - sleep(0.1) - except KeyboardInterrupt: # normally, KeyboardInterrupt is only raised in the main thread - pass - out.append(j) - t = threading.Thread(target=test_async_raise_worker) - t.start() - sleep(0.1) # make sure we're in the while loop - async_raise(t, KeyboardInterrupt) - t.join() - test[out[0] < 9] # terminated early due to the injected KeyboardInterrupt - except NotImplementedError: # pragma: no cover - error["async_raise not supported on this Python interpreter."] - - test_raises[TypeError, async_raise(42, KeyboardInterrupt)] # not a thread - - t = threading.Thread(target=lambda: None) - t.start() - t.join() - test_raises[ValueError, async_raise(t, KeyboardInterrupt)] # thread no longer running - with testset("callsite_filename"): test["test_misc.py" in the[callsite_filename()]] diff --git a/unpythonic/tests/test_numutil.py b/unpythonic/tests/test_numutil.py new file mode 100644 index 00000000..0c7a09c7 --- /dev/null +++ b/unpythonic/tests/test_numutil.py @@ -0,0 +1,111 @@ +# -*- coding: utf-8 -*- + +from ..syntax import macros, test, test_raises, warn, the # noqa: F401 +from ..test.fixtures import session, testset + +from itertools import count, takewhile +from math import cos, sqrt +import sys + +from ..numutil import (almosteq, fixpoint, ulp, + partition_int, partition_int_triangular, partition_int_custom) +from ..it import rev + +def runtests(): + with testset("ulp (unit in the last place; float utility)"): + test[ulp(1.0) == sys.float_info.epsilon] + # test also at some base-2 exponent switch points + test[ulp(2.0) == 2 * sys.float_info.epsilon] + test[ulp(0.5) == 0.5 * sys.float_info.epsilon] + + with testset("almosteq"): + # For anything but floating-point inputs, it's exact equality. + test[almosteq("abc", "abc")] + test[not almosteq("ab", "abc")] + + test[almosteq(1.0, 1.0 + ulp(1.0))] + + # TODO: counterintuitively, need a large tolerance here, because when one operand is zero, + # TODO: the final tolerance is actually tol*min_normal. + min_normal = sys.float_info.min + test[almosteq(min_normal / 2, 0, tol=1.0)] + + too_large = 2**int(1e6) + test_raises[OverflowError, float(too_large), "UPDATE THIS, need a float overflow here."] + test[almosteq(too_large, too_large + 1)] # works, because 1/too_large is very small. + + try: + from mpmath import mpf + except ImportError: # pragma: no cover + warn["mpmath not installed in this Python, skipping arbitrary precision input tests."] + else: + test[almosteq(mpf(1.0), mpf(1.0 + ulp(1.0)))] + test[almosteq(1.0, mpf(1.0 + ulp(1.0)))] + test[almosteq(mpf(1.0), 1.0 + ulp(1.0))] + + # Arithmetic fixed points. + with testset("fixpoint (arithmetic fixed points)"): + c = fixpoint(cos, x0=1) + test[the[c] == the[cos(c)]] # 0.7390851332151607 + + # Actually "Newton's" algorithm for the square root was already known to the + # ancient Babylonians, ca. 2000 BCE. (Carl Boyer: History of mathematics) + # Concerning naming, see also https://en.wikipedia.org/wiki/Stigler's_law_of_eponymy + def sqrt_newton(n): + def sqrt_iter(x): # has an attractive fixed point at sqrt(n) + return (x + n / x) / 2 + return fixpoint(sqrt_iter, x0=n / 2) + # different algorithm, so not necessarily equal down to the last bit + # (caused by the fixpoint update becoming smaller than the ulp, so it + # stops there, even if the limit is still one ulp away). + test[abs(the[sqrt_newton(2)] - the[sqrt(2)]) <= the[ulp(1.414)]] + + # partition_int: split a small positive integer, in all possible ways, into smaller integers that sum to it + with testset("partition_int"): + test[tuple(partition_int(4)) == ((4,), (3, 1), (2, 2), (2, 1, 1), (1, 3), (1, 2, 1), (1, 1, 2), (1, 1, 1, 1))] + test[tuple(partition_int(5, lower=2)) == ((5,), (3, 2), (2, 3))] + test[tuple(partition_int(5, lower=2, upper=3)) == ((3, 2), (2, 3))] + test[tuple(partition_int(10, lower=3, upper=5)) == ((5, 5), (4, 3, 3), (3, 4, 3), (3, 3, 4))] + test[all(sum(terms) == 10 for terms in partition_int(10))] + test[all(sum(terms) == 10 for terms in partition_int(10, lower=3))] + test[all(sum(terms) == 10 for terms in partition_int(10, lower=3, upper=5))] + + test_raises[TypeError, partition_int("not a number")] + test_raises[TypeError, partition_int(4, lower="not a number")] + test_raises[TypeError, partition_int(4, upper="not a number")] + test_raises[ValueError, partition_int(-3)] + test_raises[ValueError, partition_int(4, lower=-1)] + test_raises[ValueError, partition_int(4, lower=5)] + test_raises[ValueError, partition_int(4, upper=-1)] + test_raises[ValueError, partition_int(4, upper=5)] + test_raises[ValueError, partition_int(4, lower=3, upper=2)] + + # partition_int_triangular: like partition_int, but in the output, allow triangular numbers only. + # Triangular numbers are 1, 3, 6, 10, ... + with testset("partition_int_triangular"): + test[frozenset(tuple(sorted(c)) for c in partition_int_triangular(78, lower=10)) == + frozenset({(10, 10, 10, 10, 10, 28), + (10, 10, 15, 15, 28), + (15, 21, 21, 21), + (21, 21, 36), + (78,)})] + + # partition_int_custom: like partition_int, but lets you specify allowed components manually. + # Can be used to build other functions like `partition_int` and `partition_int_triangular`. + with testset("partition_int_custom"): + test[tuple(partition_int_custom(4, [1])) == ((1, 1, 1, 1),)] + test[tuple(partition_int_custom(4, [1, 3])) == ((1, 1, 1, 1), (1, 3), (3, 1))] + + evens_upto_n = lambda n: takewhile(lambda m: m <= n, count(start=2, step=2)) + test[tuple(partition_int_custom(4, rev(evens_upto_n(4)))) == ((4,), (2, 2))] + test[tuple(partition_int_custom(6, rev(evens_upto_n(6)))) == ((6,), (4, 2), (2, 4), (2, 2, 2))] + + test_raises[TypeError, partition_int_custom("not a number", evens_upto_n("blah"))] + test_raises[TypeError, tuple(partition_int_custom(4, [2.0]))] + test_raises[ValueError, partition_int_custom(-3, evens_upto_n(-3))] + test_raises[ValueError, tuple(partition_int_custom(4, [-1]))] + test_raises[ValueError, tuple(partition_int_custom(4, [1, -1]))] + +if __name__ == '__main__': # pragma: no cover + with session(__file__): + runtests() diff --git a/unpythonic/tests/test_seq.py b/unpythonic/tests/test_seq.py index c556df63..4e127188 100644 --- a/unpythonic/tests/test_seq.py +++ b/unpythonic/tests/test_seq.py @@ -3,6 +3,7 @@ from ..syntax import macros, test, test_raises, fail # noqa: F401 from ..test.fixtures import session, testset +from ..funutil import Values from ..seq import (begin, begin0, lazy_begin, lazy_begin0, pipe1, pipe, pipec, piped1, piped, exitpipe, @@ -43,32 +44,45 @@ def runtests(): test[pipe(42, inc, double) == 86] # 2-in-2-out - a, b = pipe((2, 3), - lambda x, y: (x + 1, 2 * y), - lambda x, y: (x * 2, y + 1)) + a, b = pipe(Values(2, 3), + lambda x, y: Values(x + 1, 2 * y), + lambda x, y: Values(x * 2, y + 1)) test[(a, b) == (6, 7)] + # 2-in-2-out, pass intermediate result by name + a, b = pipe(Values(2, 3), + lambda x, y: Values(x=(x + 1), y=(2 * y)), + lambda x, y: Values(x * 2, y + 1)) + test[(a, b) == (6, 7)] + + # 2-in-2-out, also return final result by name + v = pipe(Values(2, 3), + lambda x, y: Values(x=(x + 1), y=(2 * y)), + lambda x, y: Values(a=(x * 2), b=(y + 1))) + test[v == Values(a=6, b=7)] + test[v["a"] == 6 and v["b"] == 7] # can access them via subscripting too + # 2-in-eventually-3-out - a, b, c = pipe((2, 3), - lambda x, y: (x + 1, 2 * y, "foo"), - lambda x, y, z: (x * 2, y + 1, f"got {z}")) + a, b, c = pipe(Values(2, 3), + lambda x, y: Values(x + 1, 2 * y, "foo"), + lambda x, y, z: Values(x * 2, y + 1, f"got {z}")) test[(a, b, c) == (6, 7, "got foo")] # 2-in-3-in-between-2-out - a, b = pipe((2, 3), - lambda x, y: (x + 1, 2 * y, "foo"), - lambda x, y, s: (x * 2, y + 1, f"got {s}"), - lambda x, y, s: (x + y, s)) + a, b = pipe(Values(2, 3), + lambda x, y: Values(x + 1, 2 * y, "foo"), + lambda x, y, s: Values(x * 2, y + 1, f"got {s}"), + lambda x, y, s: Values(x + y, s)) test[(a, b) == (13, "got foo")] # pipec: curry the functions before running the pipeline - a, b = pipec((1, 2), - lambda x: x + 1, # extra args passed through on the right - lambda x, y: (x * 2, y + 1)) + a, b = pipec(Values(1, 2), + lambda x: x + 1, # extra values passed through by curry (positionals on the right) + lambda x, y: Values(x * 2, y + 1)) test[(a, b) == (4, 3)] - with test_raises(TypeError, "should error when the curry context exits with args remaining"): - a, b = pipec((1, 2), + with test_raises[TypeError, "should error when the curry context exits with args remaining"]: + a, b = pipec(Values(1, 2), lambda x: x + 1, lambda x: x * 2) @@ -80,10 +94,10 @@ def runtests(): test[y | exitpipe == 84] # y is never modified by the pipe system # multi-arg version - f = lambda x, y: (2 * x, y + 1) - g = lambda x, y: (x + 1, 2 * y) + f = lambda x, y: Values(2 * x, y + 1) + g = lambda x, y: Values(x + 1, 2 * y) x = piped(2, 3) | f | g | exitpipe # --> (5, 8) - test[x == (5, 8)] + test[x == Values(5, 8)] # abuse multi-arg version for single-arg case test[piped(42) | double | inc | exitpipe == 85] @@ -91,9 +105,9 @@ def runtests(): with testset("lazy pipe (plan computations)"): # lazy pipe: compute later lst = [1] - def append_succ(l): - l.append(l[-1] + 1) - return l # important, handed to the next function in the pipe + def append_succ(lis): + lis.append(lis[-1] + 1) + return lis # important, handed to the next function in the pipe p = lazy_piped1(lst) | append_succ | append_succ # plan a computation test[lst == [1]] # nothing done yet p | exitpipe # run the computation @@ -104,7 +118,7 @@ def append_succ(l): def nextfibo(state): a, b = state fibos.append(a) # store result by side effect - return (b, a + b) # new state, handed to next function in the pipe + return (b, a + b) # new state, handed to the next function in the pipe p = lazy_piped1((1, 1)) # load initial state into a lazy pipe for _ in range(10): # set up pipeline p = p | nextfibo @@ -113,24 +127,24 @@ def nextfibo(state): # multi-arg lazy pipe p1 = lazy_piped(2, 3) - p2 = p1 | (lambda x, y: (x + 1, 2 * y, "foo")) - p3 = p2 | (lambda x, y, s: (x * 2, y + 1, f"got {s}")) - p4 = p3 | (lambda x, y, s: (x + y, s)) + p2 = p1 | (lambda x, y: Values(x + 1, 2 * y, "foo")) + p3 = p2 | (lambda x, y, s: Values(x * 2, y + 1, f"got {s}")) + p4 = p3 | (lambda x, y, s: Values(x + y, s)) # nothing done yet, and all computations purely functional: - test[(p1 | exitpipe) == (2, 3)] - test[(p2 | exitpipe) == (3, 6, "foo")] # runs the chain up to p2 - test[(p3 | exitpipe) == (6, 7, "got foo")] # runs the chain up to p3 - test[(p4 | exitpipe) == (13, "got foo")] + test[(p1 | exitpipe) == Values(2, 3)] + test[(p2 | exitpipe) == Values(3, 6, "foo")] # runs the chain up to p2 + test[(p3 | exitpipe) == Values(6, 7, "got foo")] # runs the chain up to p3 + test[(p4 | exitpipe) == Values(13, "got foo")] # multi-arg lazy pipe as an unfold fibos = [] def nextfibo(a, b): # now two arguments fibos.append(a) - return (b, a + b) # two return values, still expressed as a tuple + return Values(a=b, b=(a + b)) # can return by name too p = lazy_piped(1, 1) for _ in range(10): p = p | nextfibo - p | exitpipe + test[p | exitpipe == Values(a=89, b=144)] # final state test[fibos == [1, 1, 2, 3, 5, 8, 13, 21, 34, 55]] # abuse multi-arg version for single-arg case diff --git a/unpythonic/tests/test_slicing.py b/unpythonic/tests/test_slicing.py index 4ae6bcd5..b810abf9 100644 --- a/unpythonic/tests/test_slicing.py +++ b/unpythonic/tests/test_slicing.py @@ -4,9 +4,10 @@ from ..syntax import macros, test, test_raises # noqa: F401 from ..test.fixtures import session, testset -from itertools import repeat +from itertools import count, repeat from ..slicing import fup, islice +from ..gmemo import imemoize from ..mathseq import primes, s def runtests(): @@ -20,6 +21,10 @@ def runtests(): test[fup(tup)[1::2] << tuple(repeat(10, 3)) == (1, 10, 3, 10, 5)] test[fup(tup)[::2] << tuple(repeat(10, 3)) == (10, 2, 10, 4, 10)] test[fup(tup)[::-1] << tuple(range(5)) == (4, 3, 2, 1, 0)] + test[fup(tup)[0::2] << repeat(10) == (10, 2, 10, 4, 10)] # infinite replacement + test[fup(tup)[0::2] << count(start=10) == (10, 2, 11, 4, 12)] + test[fup(tup)[::2] << imemoize(repeat(10))() == (10, 2, 10, 4, 10)] # memoized infinite replacement backwards + test[fup(tup)[::-2] << imemoize(count(start=10))() == (12, 2, 11, 4, 10)] test[tup == (1, 2, 3, 4, 5)] test_raises[TypeError, fup(tup)[2, 3]] # multidimensional indexing not supported diff --git a/unpythonic/tests/test_symbol.py b/unpythonic/tests/test_symbol.py index 476384b1..349271f2 100644 --- a/unpythonic/tests/test_symbol.py +++ b/unpythonic/tests/test_symbol.py @@ -34,7 +34,7 @@ def runtests(): # Symbol interning has nothing to do with string interning. many = 5000 test[the[sym("λ" * many) is sym("λ" * many)]] - # To defeat string interning, used to be that 80 exotic characters + # To defeat string interning, it used to be that 80 exotic characters # would be enough in Python 3.6 to make CPython decide not to intern it, # but Python 3.7 bumped that up. test[the["λ" * many is not "λ" * many]] diff --git a/unpythonic/tests/test_tco.py b/unpythonic/tests/test_tco.py index e82aa97d..647dcb1a 100644 --- a/unpythonic/tests/test_tco.py +++ b/unpythonic/tests/test_tco.py @@ -81,7 +81,7 @@ def withec(ec): print("*** These error cases SHOULD PRINT A WARNING:", file=stderr) print("** Attempted jump into an inert data value:", file=stderr) - with test_raises(RuntimeError): + with test_raises[RuntimeError]: @trampolined def errorcase1(): return jump(42) diff --git a/unpythonic/tests/test_timeutil.py b/unpythonic/tests/test_timeutil.py new file mode 100644 index 00000000..ba7574b0 --- /dev/null +++ b/unpythonic/tests/test_timeutil.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- + +from ..syntax import macros, test # noqa: F401 +from ..test.fixtures import session, testset, returns_normally + +from ..timeutil import seconds_to_human, format_human_time, ETAEstimator + +def runtests(): + with testset("seconds_to_human"): + test[seconds_to_human(30) == (0, 0, 0, 30)] + test[seconds_to_human(30.0) == (0, 0, 0, 30.0)] + test[seconds_to_human(90) == (0, 0, 1, 30)] + test[seconds_to_human(3690) == (0, 1, 1, 30)] + test[seconds_to_human(86400 + 3690) == (1, 1, 1, 30)] + test[seconds_to_human(2 * 86400 + 3690) == (2, 1, 1, 30)] + + with testset("format_human_time"): + test[format_human_time(30) == "30 seconds"] + test[format_human_time(90) == "01:30"] # mm:ss + test[format_human_time(3690) == "01:01:30"] # hh:mm:ss + test[format_human_time(86400 + 3690) == "1 day 01:01:30"] + test[format_human_time(2 * 86400 + 3690) == "2 days 01:01:30"] + + # This is a UI thing so we can't test functionality reliably. Let's just check it doesn't crash. + with testset("ETAEstimator"): + e = ETAEstimator(total=5) + test[returns_normally(e.estimate)] # before the first tick + test[returns_normally(e.elapsed)] + test[returns_normally(e.formatted_eta)] + test[returns_normally(e.tick())] + test[returns_normally(e.estimate)] # after the first tick + test[returns_normally(e.elapsed)] + test[returns_normally(e.formatted_eta)] + +if __name__ == '__main__': # pragma: no cover + with session(__file__): + runtests() diff --git a/unpythonic/tests/test_typecheck.py b/unpythonic/tests/test_typecheck.py index 28d62668..3b8af53f 100644 --- a/unpythonic/tests/test_typecheck.py +++ b/unpythonic/tests/test_typecheck.py @@ -3,7 +3,12 @@ from ..syntax import macros, test, test_raises, warn # noqa: F401 from ..test.fixtures import session, testset +import asyncio import collections +import contextlib +import io +import re +import sys import typing from ..collections import frozendict @@ -32,6 +37,17 @@ def runtests(): test[isoftype("something", typing.Any)] test[isoftype(lambda: ..., typing.Any)] + # NoReturn / Never — the bottom type; no value can match. + with testset("typing.NoReturn"): + test[not isoftype(None, typing.NoReturn)] + test[not isoftype(42, typing.NoReturn)] + test[not isoftype("anything", typing.NoReturn)] + + if sys.version_info >= (3, 11): + with testset("typing.Never"): + test[not isoftype(None, typing.Never)] + test[not isoftype(42, typing.Never)] + # TypeVar, bare; a named type, but behaves like Any. with testset("typing.TypeVar (bare; like a named Any)"): X = typing.TypeVar("X") @@ -67,6 +83,45 @@ def runtests(): test[isoftype(1337, typing.Optional[int])] test[not isoftype(3.14, typing.Optional[int])] + with testset("typing.Literal"): + test[isoftype(1, typing.Literal[1, 2, 3])] + test[isoftype(3, typing.Literal[1, 2, 3])] + test[not isoftype(4, typing.Literal[1, 2, 3])] + test[isoftype("red", typing.Literal["red", "green", "blue"])] + test[not isoftype("yellow", typing.Literal["red", "green", "blue"])] + # Literal values are compared by equality, not identity + test[isoftype(True, typing.Literal[True, False])] + test[not isoftype(None, typing.Literal[True, False])] + + with testset("typing.Type"): + test[isoftype(int, typing.Type[int])] + test[isoftype(bool, typing.Type[int])] # bool is a subclass of int + test[not isoftype(str, typing.Type[int])] + test[not isoftype(42, typing.Type[int])] # an instance, not a class + # bare Type: any class matches + test[isoftype(int, typing.Type)] + test[isoftype(str, typing.Type)] + test[not isoftype(42, typing.Type)] + + with testset("typing.ClassVar"): + test[isoftype(42, typing.ClassVar[int])] + test[not isoftype("hello", typing.ClassVar[int])] + # Compound: ClassVar wrapping a Union + test[isoftype(42, typing.ClassVar[typing.Union[int, str]])] + test[isoftype("hello", typing.ClassVar[typing.Union[int, str]])] + test[not isoftype(3.14, typing.ClassVar[typing.Union[int, str]])] + + with testset("typing.Final"): + test[isoftype(42, typing.Final[int])] + test[not isoftype("hello", typing.Final[int])] + test[isoftype("hello", typing.Final[str])] + + # Empty collections reject parametric type specs (e.g. `Tuple[int, ...]`, + # `List[int]`, `Dict[str, int]`). An empty collection has no elements to + # infer the type from, so matching it against a specific element type would + # be guesswork — which would make multiple dispatch unpredictable. + # Bare (unparametrized) specs like `Tuple` or `Dict` still accept empties. + with testset("typing.Tuple"): test[isoftype((1, 2, 3), typing.Tuple)] test[isoftype((1, 2, 3), typing.Tuple[int, ...])] @@ -101,6 +156,34 @@ def runtests(): # no type arguments: any key/value types ok (consistent with Python 3.7+) test[isoftype({"cat": "animal", "pi": 3.14159, 2.71828: "e"}, typing.Dict)] + with testset("typing.DefaultDict"): + dd = collections.defaultdict(int, {"a": 1, "b": 2}) + test[isoftype(dd, typing.DefaultDict[str, int])] + test[not isoftype(dd, typing.DefaultDict[int, int])] + test[not isoftype({}, typing.DefaultDict[str, int])] # regular dict is not defaultdict + test[not isoftype(collections.defaultdict(int), typing.DefaultDict[str, int])] # empty + + with testset("typing.OrderedDict"): + od = collections.OrderedDict({"x": 1, "y": 2}) + test[isoftype(od, typing.OrderedDict[str, int])] + test[not isoftype(od, typing.OrderedDict[int, int])] + test[not isoftype({}, typing.OrderedDict[str, int])] # regular dict is not OrderedDict + test[not isoftype(collections.OrderedDict(), typing.OrderedDict[str, int])] # empty + + with testset("typing.Counter"): + c = collections.Counter("abracadabra") + test[isoftype(c, typing.Counter[str])] + test[not isoftype(c, typing.Counter[int])] + test[not isoftype({}, typing.Counter[str])] # regular dict is not Counter + test[not isoftype(collections.Counter(), typing.Counter[str])] # empty + + with testset("typing.ChainMap"): + cm = collections.ChainMap({"a": 1}, {"b": 2}) + test[isoftype(cm, typing.ChainMap[str, int])] + test[not isoftype(cm, typing.ChainMap[int, int])] + test[not isoftype({}, typing.ChainMap[str, int])] # regular dict is not ChainMap + test[not isoftype(collections.ChainMap(), typing.ChainMap[str, int])] # empty + # type alias (at run time, this is just an assignment) with testset("type alias"): U = typing.Union[int, str] @@ -179,8 +262,112 @@ def runtests(): test[isoftype([1, 2, 3], typing.Iterable)] test[isoftype([1, 2, 3], typing.Reversible)] test[isoftype([1, 2, 3], typing.Container)] - if hasattr(typing, "Collection"): # Python 3.6+ - test[isoftype([1, 2, 3], typing.Collection)] # Sized Iterable Container + test[isoftype([1, 2, 3], typing.Collection)] # Sized Iterable Container + + with testset("parametric ABCs — uncheckable (type arg ignored)"): + # Iterator: consumed by iteration, can't check elements. + test[isoftype(iter([1, 2, 3]), typing.Iterator[int])] + test[isoftype(iter([1, 2, 3]), typing.Iterator[str])] # type arg ignored + test[not isoftype(42, typing.Iterator[int])] + + # Container: only has __contains__, can't enumerate elements. + test[isoftype([1, 2, 3], typing.Container[int])] + test[isoftype([1, 2, 3], typing.Container[str])] # type arg ignored + test[not isoftype(42, typing.Container[int])] + + with testset("parametric ABCs — best-effort element checking"): + # Iterable[T]: elements checked when value is Sized (concrete collection). + test[isoftype([1, 2, 3], typing.Iterable[int])] + test[not isoftype([1, 2, 3], typing.Iterable[str])] + test[not isoftype([], typing.Iterable[int])] # empty rejects parametric + test[isoftype([], typing.Iterable)] # bare form still accepts empty + test[not isoftype(42, typing.Iterable[int])] + # Opaque iterator (not Sized) — accepts on ABC alone, can't check elements. + test[isoftype(iter([1, 2, 3]), typing.Iterable[int])] + test[isoftype(iter([1, 2, 3]), typing.Iterable[str])] # can't check, accepts + + # Collection[T]: Sized + Iterable + Container. + test[isoftype([1, 2, 3], typing.Collection[int])] + test[not isoftype([1, 2, 3], typing.Collection[str])] + test[not isoftype([], typing.Collection[int])] # empty rejects parametric + test[isoftype([], typing.Collection)] # bare form accepts empty + test[not isoftype(42, typing.Collection[int])] + + # Reversible[T] + test[isoftype([1, 2, 3], typing.Reversible[int])] + test[not isoftype([1, 2, 3], typing.Reversible[str])] + test[not isoftype([], typing.Reversible[int])] # empty rejects parametric + test[isoftype([], typing.Reversible)] # bare form accepts empty + test[not isoftype(42, typing.Reversible[int])] + + # Compound type in element spec + test[isoftype([1, "two", 3], typing.Iterable[typing.Union[int, str]])] + test[not isoftype([1, "two", 3.0], typing.Iterable[typing.Union[int, str]])] + + with testset("typing.TypedDict"): + class Point(typing.TypedDict): + x: float + y: float + + test[isoftype({"x": 1.0, "y": 2.0}, Point)] + test[not isoftype({"x": 1.0}, Point)] # missing required key + test[not isoftype({"x": 1.0, "y": 2.0, "z": 3.0}, Point)] # extra key + test[not isoftype({"x": "hello", "y": 2.0}, Point)] # wrong value type + test[not isoftype(42, Point)] # not a dict + test[not isoftype([], Point)] # not a dict + + # total=False: all keys optional + class Config(typing.TypedDict, total=False): + debug: bool + verbose: bool + + test[isoftype({}, Config)] # all optional, empty is ok + test[isoftype({"debug": True}, Config)] + test[isoftype({"debug": True, "verbose": False}, Config)] + test[not isoftype({"debug": "yes"}, Config)] # wrong type + test[not isoftype({"unknown": True}, Config)] # extra key + + # Inheritance + class Base(typing.TypedDict): + name: str + + class Derived(Base): + age: int + + test[isoftype({"name": "alice", "age": 30}, Derived)] + test[not isoftype({"name": "alice"}, Derived)] # missing age + test[not isoftype({"age": 30}, Derived)] # missing name + + # Compound value types + class Nested(typing.TypedDict): + tags: typing.List[str] + count: typing.Optional[int] + + test[isoftype({"tags": ["a", "b"], "count": 42}, Nested)] + test[isoftype({"tags": ["a"], "count": None}, Nested)] + test[not isoftype({"tags": [1, 2], "count": 42}, Nested)] # wrong list element type + + with testset("typing.Protocol"): + @typing.runtime_checkable + class Drawable(typing.Protocol): + def draw(self) -> None: ... + + class Circle: + def draw(self): + pass + + class Square: + pass + + test[isoftype(Circle(), Drawable)] + test[not isoftype(Square(), Drawable)] + test[not isoftype(42, Drawable)] + + # Non-runtime-checkable Protocol raises TypeError + class NonCheckable(typing.Protocol): + def frobnicate(self) -> int: ... + + test_raises[TypeError, isoftype(Circle(), NonCheckable)] with testset("typing.KeysView, typing.ValuesView, typing.ItemsView"): d = {17: "cat", 23: "fox", 42: "python"} @@ -203,6 +390,94 @@ def runtests(): # https://docs.python.org/3/glossary.html#term-dictionary-view # https://docs.python.org/3/library/stdtypes.html#dict-views + with testset("typing.IO, typing.TextIO, typing.BinaryIO"): + sio = io.StringIO("hello") + bio = io.BytesIO(b"hello") + test[isoftype(sio, typing.IO)] + test[isoftype(bio, typing.IO)] + test[isoftype(sio, typing.TextIO)] + test[not isoftype(bio, typing.TextIO)] + test[isoftype(bio, typing.BinaryIO)] + test[not isoftype(sio, typing.BinaryIO)] + test[not isoftype(42, typing.IO)] + # Parametric IO: IO[str] matches text, IO[bytes] matches binary + test[isoftype(sio, typing.IO[str])] + test[not isoftype(bio, typing.IO[str])] + test[isoftype(bio, typing.IO[bytes])] + test[not isoftype(sio, typing.IO[bytes])] + + with testset("typing.Pattern, typing.Match"): + pstr = re.compile(r"\d+") + pbytes = re.compile(rb"\d+") + mstr = pstr.match("123") + mbytes = pbytes.match(b"123") + # Bare Pattern/Match — any string type + test[isoftype(pstr, typing.Pattern)] + test[isoftype(pbytes, typing.Pattern)] + test[isoftype(mstr, typing.Match)] + test[isoftype(mbytes, typing.Match)] + test[not isoftype("not a pattern", typing.Pattern)] + test[not isoftype(42, typing.Match)] + # Parametric — string type checked + test[isoftype(pstr, typing.Pattern[str])] + test[not isoftype(pstr, typing.Pattern[bytes])] + test[isoftype(pbytes, typing.Pattern[bytes])] + test[not isoftype(pbytes, typing.Pattern[str])] + test[isoftype(mstr, typing.Match[str])] + test[not isoftype(mstr, typing.Match[bytes])] + test[isoftype(mbytes, typing.Match[bytes])] + test[not isoftype(mbytes, typing.Match[str])] + + with testset("typing.ContextManager"): + # contextlib.nullcontext is a context manager + cm = contextlib.nullcontext() + test[isoftype(cm, typing.ContextManager)] + test[isoftype(cm, typing.ContextManager[None])] # type arg ignored (can't check) + test[not isoftype(42, typing.ContextManager)] + + with testset("typing.Generator"): + def mygen(): + yield 1 + yield 2 + g = mygen() + test[isoftype(g, typing.Generator)] + test[isoftype(g, typing.Generator[int, None, None])] # type args ignored + test[not isoftype(42, typing.Generator)] + test[not isoftype([1, 2, 3], typing.Generator)] # iterable, but not a generator + + with testset("typing.Awaitable, typing.Coroutine"): + async def mycoro(): + return 42 + c = mycoro() + test[isoftype(c, typing.Awaitable)] + test[isoftype(c, typing.Coroutine)] + test[isoftype(c, typing.Awaitable[int])] # type arg ignored + test[not isoftype(42, typing.Awaitable)] + test[not isoftype(42, typing.Coroutine)] + c.close() # prevent RuntimeWarning about unawaited coroutine + + with testset("typing.AsyncIterable, typing.AsyncIterator"): + class MyAsyncIter: + def __aiter__(self): + return self + async def __anext__(self): + raise StopAsyncIteration + ai = MyAsyncIter() + test[isoftype(ai, typing.AsyncIterable)] + test[isoftype(ai, typing.AsyncIterator)] + test[isoftype(ai, typing.AsyncIterable[int])] # type arg ignored + test[not isoftype(42, typing.AsyncIterable)] + test[not isoftype([1, 2], typing.AsyncIterator)] # sync iterable, not async + + with testset("typing.AsyncGenerator"): + async def myasyncgen(): + yield 1 + ag = myasyncgen() + test[isoftype(ag, typing.AsyncGenerator)] + test[isoftype(ag, typing.AsyncGenerator[int, None])] # type args ignored + test[not isoftype(42, typing.AsyncGenerator)] + asyncio.run(ag.aclose()) # prevent RuntimeWarning + if __name__ == '__main__': # pragma: no cover with session(__file__): runtests() diff --git a/unpythonic/timeutil.py b/unpythonic/timeutil.py new file mode 100644 index 00000000..b56924ae --- /dev/null +++ b/unpythonic/timeutil.py @@ -0,0 +1,127 @@ +# -*- coding: utf-8 -*- +"""Some additional batteries for time handling.""" + +__all__ = ["seconds_to_human", "format_human_time", + "ETAEstimator"] + +from collections import deque +import time +import typing + +def seconds_to_human(s: typing.Union[float, int]) -> typing.Tuple[int, int, int, float]: + """Convert a number of seconds into (days, hours, minutes, seconds).""" + d = int(s // 86400) + s -= d * 86400 + h = int(s // 3600) + s -= h * 3600 + m = int(s // 60) + s -= m * 60 + return d, h, m, s + + +def format_human_time(s: typing.Union[float, int]) -> str: + """Convert a number of seconds to a human-readable string. + + The representation format switches automatically depending on + how large `s` is. Examples: + + assert format_human_time(30) == "30 seconds" + assert format_human_time(90) == "01:30" # mm:ss + assert format_human_time(3690) == "01:01:30" # hh:mm:ss + assert format_human_time(86400 + 3690) == "1 day 01:01:30" + assert format_human_time(2 * 86400 + 3690) == "2 days 01:01:30" + """ + d, h, m, s = seconds_to_human(s) + + if all(x == 0 for x in (d, h, m)): # under one minute + plural = "s" if int(s) != 1.0 else "" + return f"{int(s):d} second{plural}" + + if d > 0: + plural = "s" if d > 1 else "" + days = f"{d:d} day{plural} " + else: + days = "" + hours = f"{h:02d}:" if (d > 0 or h > 0) else "" + minutes = f"{m:02d}:" + seconds = f"{int(s):02d}" + return f"{days}{hours}{minutes}{seconds}" + + +class ETAEstimator: + """Estimate the time of completion. + + `total`: number of tasks in the whole job, used for estimating + how much work is still needed. + + Stored in `self.total`, which is writable; but note that + if you move the goalposts, the ETA cannot be accurate. + Changing `self.total` is mostly useful if you suddenly + discover that the workload is actually larger or smaller + than what was initially expected, and want the estimate + to reflect this sudden new information. + + `keep_last`: use the timings from at most this many most recently + completed tasks when computing the estimate. + + If not given, keep all. + + If you need it, the number of tasks that have been marked completed + is available in `self.completed`. + """ + def __init__(self, total: int, keep_last: typing.Optional[int] = None): + self.t1 = time.monotonic() # time since last tick + self.t0 = self.t1 # time since beginning + self.total = total # total number of work items + self.completed = 0 # number of completed work items + self.que = deque([], maxlen=keep_last) + + def tick(self) -> None: + """Mark one more task as completed, automatically updating the internal timings cache.""" + self.completed += 1 + t = time.monotonic() + dt = t - self.t1 + self.t1 = t + self.que.append(dt) + + def _estimate(self) -> typing.Optional[float]: + if self.completed == 0: + return None + # TODO: Smoother ETA? + # + # Let us consider the ETA estimation process as downsampling the data + # vector (deque) into an extremely low-resolution version that has just + # one sample. + # + # As we know from signal processing, as a downsampling filter, the + # running average has an abysmal frequency response; so we should + # expect the ETA to fluctuate wildly depending on the smoothness of + # the input data (i.e. the time taken by each task)... which actually + # matches observation. + # + # Maybe we could use a Lanczos downsampling filter to make the ETA + # behave more smoothly? + remaining = self.total - self.completed + if remaining <= 0: + return 0.0 + dt_avg = sum(self.que) / len(self.que) + return remaining * dt_avg + estimate = property(fget=_estimate, doc="Estimate of time remaining, in seconds. Computed when read; read-only. If no tasks have been marked completed yet, the estimate is `None`.") + + def _elapsed(self) -> float: + return time.monotonic() - self.t0 + elapsed = property(fget=_elapsed, doc="Total elapsed time, in seconds. Computed when read; read-only.") + + def _formatted_eta(self) -> str: + elapsed = self.elapsed + estimate = self.estimate + if estimate is not None: + total = elapsed + estimate + formatted_estimate = format_human_time(estimate) + formatted_total = format_human_time(total) + else: + formatted_estimate = "unknown" + formatted_total = "unknown" + formatted_elapsed = format_human_time(elapsed) + return f"elapsed {formatted_elapsed}, ETA {formatted_estimate}, total {formatted_total}" + formatted_eta = property(fget=_formatted_eta, doc="Human-readable estimate, with elapsed, ETA and remaining time. See `format_human_time` for details of the format used.") diff --git a/unpythonic/typecheck.py b/unpythonic/typecheck.py index 567527dc..09ec0014 100644 --- a/unpythonic/typecheck.py +++ b/unpythonic/typecheck.py @@ -1,33 +1,26 @@ # -*- coding: utf-8; -*- -"""Simplistic run-time type checker. +"""Lightweight run-time type checker. -This implements just a minimal feature set needed for checking function -arguments in typical uses of multiple dispatch (see `unpythonic.dispatch`). -That said, this DOES support many (but not all) features of the `typing` stdlib -module. +Originally built for the minimal feature set needed by multiple dispatch +(see `unpythonic.dispatch`), but designed as a general-purpose utility. +Supports many (but not all) features of the `typing` stdlib module. We currently provide `isoftype` (cf. `isinstance`), but no `issubtype` (cf. `issubclass`). -If you need a run-time type checker for serious general use, consider `typeguard`: +If you need a run-time type checker, but not the other features of `unpythonic`, +see `typeguard`: https://github.com/agronholm/typeguard """ import collections +import contextlib +import io +import re +import sys +import types import typing -try: - _MyGenericAlias = typing._GenericAlias # Python 3.7+ -except AttributeError: # Python 3.6 and earlier # pragma: no cover - class _MyGenericAlias: # unused, but must be a class to support isinstance() check. - pass - -try: - _MySupportsIndex = typing.SupportsIndex -except AttributeError: # Python 3.7 and earlier # pragma: no cover - class _MySupportsIndex: # unused, but must be a class to support isinstance() check. - pass - from .misc import safeissubclass __all__ = ["isoftype"] @@ -50,14 +43,33 @@ def isoftype(value, T): - `TypeVar` - `NewType` (any instance of the underlying actual type will match) - `Union[T1, T2, ..., TN]` + - `NoReturn`, `Never` (no value matches; `Never` requires Python 3.11+) + - `Literal[v1, v2, ...]` + - `Type[X]` (value must be a class that is `X` or a subclass of `X`) + - `ClassVar[T]`, `Final[T]` (wrapper stripped, inner type checked) - `Tuple`, `Tuple[T, ...]`, `Tuple[T1, T2, ..., TN]`, `Sequence[T]` - `List[T]`, `MutableSequence[T]` - `FrozenSet[T]`, `AbstractSet[T]` - `Set[T]`, `MutableSet[T]` - - `Dict[K, V]`, `MutableMapping[K, V]`, `Mapping[K, V]` + - `Dict[K, V]`, `DefaultDict[K, V]`, `OrderedDict[K, V]` + - `Counter[T]` (element type checked; value type is always `int`) + - `ChainMap[K, V]` + - `MutableMapping[K, V]`, `Mapping[K, V]` - `ItemsView[K, V]`, `KeysView[K]`, `ValuesView[V]` - `Callable` (argument and return value types currently NOT checked) - - `Text` + - `IO`, `TextIO`, `BinaryIO` (mapped to ``io`` module ABCs) + - `Pattern[T]`, `Match[T]` (string type checked when parametric) + - `ContextManager[T]`, `AsyncContextManager[T]` + - `Awaitable[T]`, `Coroutine[T1, T2, T3]` + - `AsyncIterable[T]`, `AsyncIterator[T]` + - `Generator[Y, S, R]`, `AsyncGenerator[Y, S]` + - `Iterable[T]`, `Collection[T]`, `Reversible[T]` (best-effort element + checking: elements checked when value is ``Sized``; ABC-only when not) + - `Iterator[T]`, `Container[T]` (parametric form accepted; type arg ignored) + - `Hashable`, `Sized` (non-generic; bare form only) + - `TypedDict` (structural check: required/optional keys, value types) + - ``@runtime_checkable`` ``Protocol`` subclasses + - `Text` (deprecated since Python 3.11; will be removed at floor Python 3.12) Any checks on the type arguments of the meta-utilities are performed recursively using `isoftype`, in order to allow compound specifications. @@ -70,122 +82,45 @@ def isoftype(value, T): Returns `True` if `value` matches the type specification; `False` if not. """ - # TODO: This function is one big hack. - # - # As of Python 3.6, there seems to be no consistent way to identify a type - # specification at run time. So what we have is a mess. - # - # - Many `typing` meta-utilities explicitly `raise TypeError` when one - # attempts The One Obvious Way To Do It (`isinstance`, `issubclass`). + # Many `typing` meta-utilities explicitly raise TypeError from isinstance/issubclass, + # so we identify them via typing.get_origin, isinstance checks, or identity comparisons. + # We also access some internal fields (__args__, __constraints__, __supertype__) where + # Python provides no official public API for run-time type introspection. # - # - Their `type` can be something like `typing.TypeVar`, `typing.Union`, - # ``, ``... the - # format is case-dependent. A check like `type(T) is typing.TypeVar` - # doesn't work. - # - # So, we inspect `repr(T.__class__)` to match on the names of the prickly types, - # and call `issubclass` on those that don't hate us for doing so (catching - # `TypeError`, just in case `T` is an unsupported yet prickly type). - # - # Obviously, this won't work if someone subclasses one of the prickly types. - # `issubclass` would be The Right Thing, but since it's explicitly blocked, - # there's not much we can do. - - # TODO: Right now we're accessing internal fields to get what we need. - # TODO: Would be nice to update this if Python, at some point, adds an - # TODO: official API to access the static type information at run time. + # Unsupported typing features: + # NamedTuple (specific NamedTuple subclasses work via isinstance fallback), + # Generic, ForwardRef if T is typing.Any: return True + # NoReturn means a function never returns — no value has this type. + # Never (3.11+) is the bottom type; semantically the same for our purposes. + if T is typing.NoReturn: + return False + if sys.version_info >= (3, 11) and T is typing.Never: + return False + # AnyStr normalizes to TypeVar("AnyStr", str, bytes) - # Python 3.6 has "typing.TypeVar" as the repr, but Python 3.7+ adds the "" around it. - if repr(T.__class__) == "typing.TypeVar" or repr(T.__class__) == "": + if isinstance(T, typing.TypeVar): if not T.__constraints__: # just an abstract type name return True return any(isoftype(value, U) for U in T.__constraints__) - # TODO: Here is THE FULL LIST of `typing` features we **don't** currently support, - # TODO: as of Python 3.8 (March 2020). https://docs.python.org/3/library/typing.html - # TODO: If you add a feature to the type checker, please update this list. - # - # Python 3.6+: - # NamedTuple, DefaultDict, Counter, ChainMap, - # IO, TextIO, BinaryIO, - # Pattern, Match, (regular expressions) - # Generic, Type, - # Awaitable, Coroutine, AsyncIterable, AsyncIterator, - # ContextManager, AsyncContextManager, - # Generator, AsyncGenerator, - # NoReturn (callable return value only), - # ClassVar, Final - # - # Python 3.7+: OrderedDict - # Python 3.8+: Protocol, TypedDict, Literal - # - # TODO: Do we need to support `typing.ForwardRef`? - # No, if `get_type_hints` already resolves that. Consider our main use case, - # in `unpythonic.dispatch`. And see: - # https://docs.python.org/3/library/typing.html#typing.get_type_hints - - # TODO: Python 3.8 adds `typing.get_origin` and `typing.get_args`: - # https://docs.python.org/3/library/typing.html#typing.get_origin - # TODO: We replicate them here so that we can use them in 3.7. - # TODO: Delete the local copies once we start requiring Python 3.8. - # - # Used under the PSF license. Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, - # 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020 Python Software Foundation; All Rights Reserved - # https://github.com/python/cpython/blob/3.8/LICENSE - def get_origin(tp): - """Get the unsubscripted version of a type. - This supports generic types, Callable, Tuple, Union, Literal, Final and ClassVar. - Return None for unsupported types. Examples:: - get_origin(Literal[42]) is Literal - get_origin(int) is None - get_origin(ClassVar[int]) is ClassVar - get_origin(Generic) is Generic - get_origin(Generic[T]) is Generic - get_origin(Union[T, int]) is Union - get_origin(List[Tuple[T, T]][int]) == list - """ - if isinstance(tp, _MyGenericAlias): - return tp.__origin__ - if tp is typing.Generic: - return typing.Generic - return None - # def get_args(tp): - # """Get type arguments with all substitutions performed. - # For unions, basic simplifications used by Union constructor are performed. - # Examples:: - # get_args(Dict[str, int]) == (str, int) - # get_args(int) == () - # get_args(Union[int, Union[T, int], str][int]) == (int, str) - # get_args(Union[int, Tuple[T, int]][str]) == (int, Tuple[str, int]) - # get_args(Callable[[], T][int]) == ([], int) - # """ - # if isinstance(tp, _MyGenericAlias) and not tp._special: - # res = tp.__args__ - # if get_origin(tp) is collections.abc.Callable and res[0] is not Ellipsis: - # res = (list(res[:-1]), res[-1]) - # return res - # return () - # <--- end of local copies of get_origin and get_args. The rest is our code. - - # Optional normalizes to Union[argtype, NoneType]. - # Python 3.6 has the repr, 3.7+ use typing._GenericAlias. - if repr(T.__class__) == "typing.Union" or get_origin(T) is typing.Union: - if T.__args__ is None: # Python 3.6 bare `typing.Union`; empty, has no types in it, so no value can match. - return False + # typing.Union[X, Y] and the builtin X | Y syntax (types.UnionType, Python 3.10+). + # Optional[X] normalizes to Union[X, NoneType]. + if typing.get_origin(T) is typing.Union or isinstance(T, types.UnionType): if not any(isoftype(value, U) for U in T.__args__): return False return True - # Python 3.7+ bare typing.Union; empty, has no types in it, so no value can match. - if T is typing.Union: # isinstance(T, typing._SpecialForm) and T._name == "Union": - return False # pragma: no cover, Python 3.7+ only. + # Bare typing.Union; empty, has no types in it, so no value can match. + if T is typing.Union: + return False # pragma: no cover - # TODO: in Python 3.7+, what is the mysterious callable that doesn't have `__qualname__`? - if callable(T) and hasattr(T, "__qualname__") and T.__qualname__ == "NewType..new_type": + def isNewType(T): + return isinstance(T, typing.NewType) + if isNewType(T): # This is the best we can do, because the static types created by `typing.NewType` # have a constructor that discards the type information at runtime: # UserId = typing.NewType("UserId", int) @@ -193,44 +128,149 @@ def get_origin(tp): # print(type(i)) # int return isinstance(value, T.__supertype__) - # Some one-trick ponies. - for U in (typing.Iterator, # can't non-destructively check element type - typing.Iterable, # can't non-destructively check element type - typing.Container, # can't check element type - typing.Collection, # Sized Iterable Container; can't check element type - typing.Hashable, - typing.Sized): - if U is T: - return isinstance(value, U) + # Literal[v1, v2, ...] — value must be one of the listed constants. + if typing.get_origin(T) is typing.Literal: + return value in T.__args__ - if T is typing.Reversible: # can't non-destructively check element type - # We don't isinstance(), because in Python 3.5, typing.Reversible used to be just a protocol, - # and ": Protocols cannot be used with isinstance()." - # https://docs.python.org/3/library/collections.abc.html#module-collections.abc - return hasattr(value, "__reversed__") + # Type[X] — value must be a class that is X or a subclass of X. + if typing.get_origin(T) is type: + if not isinstance(value, type): + return False + args = getattr(T, "__args__", None) + if args is None: + return True # bare Type, any class matches + return issubclass(value, args[0]) + + # ClassVar[T] and Final[T] — these are declaration wrappers. At runtime, + # we just strip the wrapper and check the inner type. + for wrapper_origin in (typing.ClassVar, typing.Final): + if typing.get_origin(T) is wrapper_origin: + args = getattr(T, "__args__", None) + if args is None: + return True # bare ClassVar or Final, no inner type constraint + return isoftype(value, args[0]) + + # Non-generic ABCs, and parametric ABCs where element type can't be checked. + # Iterator: consumed by iteration. Container: only has __contains__, can't enumerate. + # Hashable, Sized: not generic (can't be parameterized). + for abc in (collections.abc.Hashable, + collections.abc.Sized, + collections.abc.Iterator, + collections.abc.Container): + if typing.get_origin(T) is abc: + return isinstance(value, abc) + + # Parametric ABCs with best-effort element checking. + # If the value is Sized (a concrete collection), we can safely iterate + # and check elements. Otherwise (opaque iterator), accept on ABC alone. + for abc in (collections.abc.Iterable, + collections.abc.Collection, + collections.abc.Reversible): + if typing.get_origin(T) is abc: + if not isinstance(value, abc): + return False + args = getattr(T, "__args__", None) + if args is None: + return True # bare form, no element type constraint + assert len(args) == 1 + if not isinstance(value, collections.abc.Sized): + return True # opaque iterator — can't check elements non-destructively + if not value: # empty sized collection has no element type + return False + U = args[0] + return all(isoftype(elt, U) for elt in value) # "Protocols cannot be used with isinstance()", so: for U in (typing.SupportsInt, typing.SupportsFloat, typing.SupportsComplex, typing.SupportsBytes, - _MySupportsIndex, + typing.SupportsIndex, typing.SupportsAbs, typing.SupportsRound): if U is T: return safeissubclass(type(value), U) + # TypedDict — structural check on dict contents. + # isinstance doesn't work with TypedDict, so we check keys and value types. + if typing.is_typeddict(T): + if not isinstance(value, dict): + return False + hints = typing.get_type_hints(T) + required = T.__required_keys__ + optional = T.__optional_keys__ + allowed = required | optional + if not required.issubset(value.keys()): + return False + if not set(value.keys()).issubset(allowed): + return False + for k, v in value.items(): + if not isoftype(v, hints[k]): + return False + return True + # We don't have a match yet, so T might still be one of those meta-utilities # that hate `issubclass` with a passion. - if safeissubclass(T, typing.Text): # https://docs.python.org/3/library/typing.html#typing.Text - return isinstance(value, str) # alias for str + # DEPRECATED: typing.Text is deprecated since Python 3.11 (it's just an alias for str). + # TODO: Remove this branch when the floor bumps to Python 3.12. + if safeissubclass(T, typing.Text): + return isinstance(value, str) + + # IO, TextIO, BinaryIO — typing module stubs that don't participate in the + # MRO of real IO classes. Map to the io module ABCs instead. + # IO[str] → TextIO, IO[bytes] → BinaryIO when parametric. + if T is typing.IO or typing.get_origin(T) is typing.IO: + args = getattr(T, "__args__", None) + if args is not None: + if args[0] is str: + return isinstance(value, io.TextIOBase) + if args[0] is bytes: + return isinstance(value, (io.RawIOBase, io.BufferedIOBase)) + return isinstance(value, io.IOBase) + if T is typing.TextIO: + return isinstance(value, io.TextIOBase) + if T is typing.BinaryIO: + return isinstance(value, (io.RawIOBase, io.BufferedIOBase)) + + # Pattern[T] and Match[T] — the type arg (str or bytes) can be checked. + if typing.get_origin(T) is re.Pattern: + if not isinstance(value, re.Pattern): + return False + args = getattr(T, "__args__", None) + if args is not None: + return isinstance(value.pattern, args[0]) + return True + if typing.get_origin(T) is re.Match: + if not isinstance(value, re.Match): + return False + args = getattr(T, "__args__", None) + if args is not None: + return isinstance(value.string, args[0]) + return True - # Subclass test for Python 3.6 only. Python 3.7+ have typing._GenericAlias for the generics. - if safeissubclass(T, typing.Tuple) or get_origin(T) is tuple: + # ContextManager and AsyncContextManager — can't check the return type + # of __enter__ non-destructively, so just check the ABC. + if typing.get_origin(T) is contextlib.AbstractContextManager: + return isinstance(value, contextlib.AbstractContextManager) + if typing.get_origin(T) is contextlib.AbstractAsyncContextManager: + return isinstance(value, contextlib.AbstractAsyncContextManager) + + # Async ABCs and generator types — type parameters (yield, send, return) + # can't be checked non-destructively, so just check the ABC. + for runtimetype in (collections.abc.Awaitable, + collections.abc.Coroutine, + collections.abc.AsyncIterable, + collections.abc.AsyncIterator, + collections.abc.Generator, + collections.abc.AsyncGenerator): + if typing.get_origin(T) is runtimetype: + return isinstance(value, runtimetype) + + if typing.get_origin(T) is tuple: if not isinstance(value, tuple): return False # bare `typing.Tuple`, no restrictions on length or element type. - if not T.__args__: + if not getattr(T, "__args__", None): return True # homogeneous element type, arbitrary length if len(T.__args__) == 2 and T.__args__[1] is Ellipsis: @@ -246,33 +286,47 @@ def get_origin(tp): return all(isoftype(elt, U) for elt, U in zip(value, T.__args__)) # Check mapping types that allow non-destructive iteration. - def ismapping(statictype, runtimetype): + def ismapping(runtimetype): if not isinstance(value, runtimetype): return False - if T.__args__ is None: # Python 3.6: consistent behavior with 3.7+, which use unconstrained TypeVar KT, VT. + args = getattr(T, "__args__", None) + if args is None: args = (typing.TypeVar("KT"), typing.TypeVar("VT")) - else: - args = T.__args__ assert len(args) == 2 if not value: # An empty dict has no key and value types. return False K, V = args return all(isoftype(k, K) and isoftype(v, V) for k, v in value.items()) - for statictype, runtimetype in ((typing.Dict, dict), - (typing.MutableMapping, collections.abc.MutableMapping), - (typing.Mapping, collections.abc.Mapping)): - if safeissubclass(T, statictype) or get_origin(T) is runtimetype: - return ismapping(statictype, runtimetype) + # Counter[T] is a mapping (keys: T, values: int), but has only one type arg. + if typing.get_origin(T) is collections.Counter: + if not isinstance(value, collections.Counter): + return False + args = getattr(T, "__args__", None) + if args is None: + args = (typing.TypeVar("T"),) + assert len(args) == 1 + if not value: + return False + U = args[0] + return all(isoftype(k, U) and isinstance(v, int) for k, v in value.items()) + + for runtimetype in (dict, + collections.defaultdict, + collections.OrderedDict, + collections.ChainMap, + collections.abc.MutableMapping, + collections.abc.Mapping): + if typing.get_origin(T) is runtimetype: + return ismapping(runtimetype) # ItemsView is a special-case mapping in that we must not call # `.items()` on `value`. - if safeissubclass(T, typing.ItemsView) or get_origin(T) is collections.abc.ItemsView: + if typing.get_origin(T) is collections.abc.ItemsView: if not isinstance(value, collections.abc.ItemsView): return False - if T.__args__ is None: # Python 3.6: consistent behavior with 3.7+, which use unconstrained TypeVar KT, VT. + args = getattr(T, "__args__", None) + if args is None: args = (typing.TypeVar("KT"), typing.TypeVar("VT")) - else: - args = T.__args__ assert len(args) == 2 if not value: # An empty dict has no key and value types. return False @@ -288,14 +342,17 @@ def ismapping(statictype, runtimetype): def iscollection(statictype, runtimetype): if not isinstance(value, runtimetype): return False - if safeissubclass(statictype, typing.ByteString) or get_origin(statictype) is collections.abc.ByteString: + if typing.get_origin(statictype) is collections.abc.ByteString: + # DEPRECATED: typing.ByteString is deprecated since Python 3.12. + # TODO: Remove this branch and the ByteString entry in the loop below + # when the floor bumps to Python 3.12. + # # WTF? A ByteString is a Sequence[int], but only statically. - # At run time, the `__args__` are actually empty - it looks + # At run time, the `__args__` are actually empty — it looks # like a bare Sequence, which is invalid. HACK the special case. typeargs = (int,) else: - typeargs = T.__args__ - # Python 3.6: consistent behavior with 3.7+, which use an unconstrained TypeVar T. + typeargs = getattr(T, "__args__", None) if typeargs is None: typeargs = (typing.TypeVar("T"),) # Judging by the docs, List takes one type argument. The rest are similar. @@ -309,7 +366,7 @@ def iscollection(statictype, runtimetype): (typing.FrozenSet, frozenset), (typing.Set, set), (typing.Deque, collections.deque), - (typing.ByteString, collections.abc.ByteString), # must check before Sequence + (typing.ByteString, collections.abc.ByteString), # DEPRECATED; must check before Sequence (typing.MutableSet, collections.abc.MutableSet), # must check mutable first # because a mutable value has *also* the interface of the immutable variant # (e.g. MutableSet is a subtype of AbstractSet) @@ -320,10 +377,10 @@ def iscollection(statictype, runtimetype): (typing.MutableSequence, collections.abc.MutableSequence), (typing.MappingView, collections.abc.MappingView), (typing.Sequence, collections.abc.Sequence)): - if safeissubclass(T, statictype) or get_origin(T) is runtimetype: + if typing.get_origin(T) is runtimetype: return iscollection(statictype, runtimetype) - if safeissubclass(T, typing.Callable) or get_origin(T) is collections.abc.Callable: + if typing.get_origin(T) is collections.abc.Callable: if not callable(value): return False return True @@ -353,6 +410,18 @@ def iscollection(statictype, runtimetype): # return False # return True + # Protocol — support @runtime_checkable Protocols; clear error for others. + # Specific Protocols (Supports* ABCs) are already handled above by identity check. + # We use _is_protocol (not issubclass) because issubclass(X, Protocol) returns + # True for some non-Protocol types (e.g. int) on Python 3.10. + if isinstance(T, type) and T is not typing.Protocol and getattr(T, '_is_protocol', False): + if getattr(T, '_is_runtime_protocol', False): + return isinstance(value, T) + raise TypeError( + f"isoftype: {T.__qualname__} is a Protocol but not @typing.runtime_checkable, " + f"so runtime structural checks are not possible. " + f"Add @typing.runtime_checkable to enable isinstance checks.") + # Catch any `typing` meta-utilities we don't currently support. if hasattr(T, "__module__") and T.__module__ == "typing": # pragma: no cover, only happens when something goes wrong. fullname = repr(T.__class__)