From c93c3f5d82c2c4a727da2c757230f4331bf5dfef Mon Sep 17 00:00:00 2001
From: Rahul Krishna <rkrsn@ibm.com>
Date: Sat, 20 Jun 2026 16:04:13 -0400
Subject: [PATCH 1/9] ci(release): categorized GitHub-native release notes
 (install + download + What's Changed)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The mikepenz changelog builder relies on GitHub's compare API, which 404s
("no common ancestor") for this repo's tag range after the history rewrites, so
it produced an empty changelog. Replace it with GitHub's native generated notes,
which list merged PRs/issues without needing the compare merge-base:

- Compose an install + download-table header (cargo-dist style, like the
  codeanalyzer-typescript release notes).
- softprops/action-gh-release@v2 with generate_release_notes: true appends a
  categorized "What's Changed" section.
- .github/release.yml groups PRs under emoji headings by label (Features, Fixes,
  Docs, Tests, Breaking, …), mirroring the old mikepenz categories.
- Drop the unused mindsers/mikepenz steps and release_config.json.
---
 .github/release.yml                   | 26 +++++++++++
 .github/workflows/release.yml         | 64 +++++++++++++++-----------
 .github/workflows/release_config.json | 65 ---------------------------
 3 files changed, 63 insertions(+), 92 deletions(-)
 create mode 100644 .github/release.yml
 delete mode 100644 .github/workflows/release_config.json

diff --git a/.github/release.yml b/.github/release.yml
new file mode 100644
index 0000000..1fc55ad
--- /dev/null
+++ b/.github/release.yml
@@ -0,0 +1,26 @@
+# Configures GitHub's auto-generated release notes (the "What's Changed" section
+# appended by `generate_release_notes` in .github/workflows/release.yml). Merged
+# PRs are grouped under these emoji headings by label, mirroring the emoji
+# categories used by the codeanalyzer-typescript backend.
+changelog:
+  exclude:
+    authors:
+      - dependabot
+      - github-actions
+  categories:
+    - title: 🚀 Features
+      labels: [enhancement, kind/feature]
+    - title: 🐛 Fixes
+      labels: [bug, fix]
+    - title: ♻️ Refactoring
+      labels: [refactoring]
+    - title: ⚡️ Performance
+      labels: [performance]
+    - title: 📚 Documentation
+      labels: [documentation, doc]
+    - title: 🚦 Tests
+      labels: [test]
+    - title: 🚨 Breaking Changes
+      labels: [breaking]
+    - title: 🛠 Other Changes
+      labels: ["*"]
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 32fa884..5002cb9 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -87,40 +87,50 @@ jobs:
           echo "current_version=${GITHUB_REF#refs/tags/v}" >> $GITHUB_OUTPUT
         shell: bash
 
-      - name: Read Changelog Entry
-        id: changelog_reader
-        uses: mindsers/changelog-reader-action@v2
-        with:
-          validation_level: warn
-          version: ${{ steps.tag_name.outputs.current_version }}
-          path: ./CHANGELOG.md
-
-      - name: Build changelog
-        id: gen_changelog
-        continue-on-error: true   # auto-PR-diff is best-effort; CHANGELOG.md is the source of truth
-        uses: mikepenz/release-changelog-builder-action@v5
-        with:
-          failOnError: "false"
-          configuration: .github/workflows/release_config.json
+      # cargo-dist-style notes: install one-liners + a download table. The categorized
+      # "What's Changed" (merged PRs/issues grouped under emoji headings via
+      # .github/release.yml) is appended by generate_release_notes below. Indented code
+      # blocks avoid backticks in the heredoc.
+      - name: Compose release notes header (install + download)
         env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          VERSION: ${{ steps.tag_name.outputs.current_version }}
+        run: |
+          REPO="codellm-devkit/codeanalyzer-python"
+          BASE="https://github.com/$REPO/releases/download/v$VERSION"
+          cat > "$RUNNER_TEMP/RELEASE_BODY.md" <<EOF
+          ## Install codeanalyzer-python v$VERSION
+
+          Shell script (installs the canpy CLI via uv / pipx / pip):
+
+              curl --proto '=https' --tlsv1.2 -LsSf https://github.com/$REPO/releases/latest/download/canpy-installer.sh | sh
+
+          PyPI:
+
+              pip install codeanalyzer-python==$VERSION
+
+          For the optional live Neo4j push (--emit neo4j --neo4j-uri ...):
+
+              pip install 'codeanalyzer-python[neo4j]==$VERSION'
+
+          ## Download
+
+          | File | Description |
+          | --- | --- |
+          | [codeanalyzer_python-$VERSION-py3-none-any.whl]($BASE/codeanalyzer_python-$VERSION-py3-none-any.whl) | Python wheel |
+          | [codeanalyzer_python-$VERSION.tar.gz]($BASE/codeanalyzer_python-$VERSION.tar.gz) | Source distribution |
+          | [canpy-installer.sh]($BASE/canpy-installer.sh) | Shell installer (uv / pipx / pip) |
+          | [schema.json]($BASE/schema.json) | Neo4j schema contract |
+          EOF
+          echo "----- composed header -----"; cat "$RUNNER_TEMP/RELEASE_BODY.md"
 
       - name: Publish release on GitHub
-        uses: softprops/action-gh-release@v1
+        uses: softprops/action-gh-release@v2
         with:
           files: |
             dist/*
             release-assets/*
-          body: |
-            ## Release Notes (from CHANGELOG.md)
-            
-            ${{ steps.changelog_reader.outputs.changes }}
-            
-            ---
-            
-            ## Detailed Changes (auto-generated)
-            
-            ${{ steps.gen_changelog.outputs.changelog }}
+          body_path: ${{ runner.temp }}/RELEASE_BODY.md
+          generate_release_notes: true   # appends categorized "What's Changed" (see .github/release.yml)
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 
diff --git a/.github/workflows/release_config.json b/.github/workflows/release_config.json
deleted file mode 100644
index abb8698..0000000
--- a/.github/workflows/release_config.json
+++ /dev/null
@@ -1,65 +0,0 @@
-{
-    "categories": [
-        {
-            "title": "## ✨ Release",
-            "labels": [
-                "release"
-            ]
-        },
-        {
-            "title": "## 🚀 Features",
-            "labels": [
-                "kind/feature",
-                "enhancement"
-            ]
-        },
-        {
-            "title": "## 🐛 Fixes",
-            "labels": [
-                "fix",
-                "bug"
-            ]
-        },
-        {
-            "title": "## ♻️ Refactoring",
-            "labels": [
-                "refactoring"
-            ]
-        },
-        {
-            "title": "## ⚡️ Performance Improvements",
-            "labels": [
-                "performance"
-            ]
-        },
-        {
-            "title": "## \uD83D\uDCDA Documentation",
-            "labels": [
-                "documentation",
-                "doc"
-            ]
-        },
-        {
-            "title": "## \uD83D\uDEA6 Tests",
-            "labels": [
-                "test"
-            ]
-        },
-        {
-            "title": "## \uD83D\uDEE0 Other Updates",
-            "labels": [
-                "other",
-                "kind/dependency-change"
-            ]
-        },
-        {
-            "title": "## 🚨 Breaking Changes",
-            "labels": [
-                "breaking"
-            ]
-        }
-    ],
-    "ignore_labels": [
-        "ignore"
-    ]
-}

From ddd6df4578e32db3764f628369d7dc5d2022b84d Mon Sep 17 00:00:00 2001
From: Rahul Krishna <rkrsn@ibm.com>
Date: Sat, 20 Jun 2026 16:08:43 -0400
Subject: [PATCH 2/9] docs(readme): widen the --help block and add a GitHub
 release badge

- update_readme.py: pin typer.rich_utils.MAX_WIDTH to WIDTH (100). Typer caps
  help at MAX_WIDTH=80 regardless of COLUMNS, so CI rendered the box much
  narrower than a dev machine and the release doc-sync kept shrinking it. Pinning
  it makes the rendered help wide and byte-identical local vs CI.
- README: regenerate the (now 100-wide) help block; add a GitHub release badge
  (github/v/release) alongside the PyPI / workflow / license badges.
---
 README.md                | 152 ++++++++++++++-------------------------
 scripts/update_readme.py |  10 +++
 2 files changed, 65 insertions(+), 97 deletions(-)

diff --git a/README.md b/README.md
index 8c85783..8211145 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,8 @@
 
 [![PyPI](https://img.shields.io/pypi/v/codeanalyzer-python?style=for-the-badge&logo=pypi&logoColor=white)](https://pypi.org/project/codeanalyzer-python/)
 [![Python](https://img.shields.io/pypi/pyversions/codeanalyzer-python?style=for-the-badge&logo=python&logoColor=white)](https://pypi.org/project/codeanalyzer-python/)
-[![Release](https://img.shields.io/github/actions/workflow/status/codellm-devkit/codeanalyzer-python/release.yml?style=for-the-badge&label=release&logo=github)](https://github.com/codellm-devkit/codeanalyzer-python/actions/workflows/release.yml)
+[![GitHub release](https://img.shields.io/github/v/release/codellm-devkit/codeanalyzer-python?style=for-the-badge&logo=github&label=GitHub&color=2dba4e)](https://github.com/codellm-devkit/codeanalyzer-python/releases/latest)
+[![Release](https://img.shields.io/github/actions/workflow/status/codellm-devkit/codeanalyzer-python/release.yml?style=for-the-badge&label=release&logo=githubactions&logoColor=white)](https://github.com/codellm-devkit/codeanalyzer-python/actions/workflows/release.yml)
 [![License](https://img.shields.io/badge/License-Apache%202.0-blue?style=for-the-badge)](./LICENSE)
 
 </div>
@@ -133,102 +134,59 @@ $ canpy --help
 
  Static Analysis on Python source code using Jedi, CodeQL and Tree sitter.
 
-╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --input           -i                     PATH              Path to the       │
-│                                                            project root      │
-│                                                            directory (not    │
-│                                                            required for      │
-│                                                            --emit schema).   │
-│ --output          -o                     PATH              Output directory  │
-│                                                            for artifacts.    │
-│ --format          -f                     [json|msgpack]    Output format for │
-│                                                            --emit json: json │
-│                                                            or msgpack.       │
-│                                                            [default: json]   │
-│ --emit                                   [json|neo4j|sche  Output target:    │
-│                                          ma]               json              │
-│                                                            (analysis.json,   │
-│                                                            default) | neo4j  │
-│                                                            (graph.cypher or  │
-│                                                            live Bolt push) | │
-│                                                            schema (the Neo4j │
-│                                                            schema.json       │
-│                                                            contract).        │
-│                                                            [default: json]   │
-│ --app-name                               TEXT              Logical           │
-│                                                            application name  │
-│                                                            for the graph     │
-│                                                            :PyApplication    │
-│                                                            anchor (default:  │
-│                                                            input dir name).  │
-│ --neo4j-uri                              TEXT              Push the graph to │
-│                                                            a live Neo4j over │
-│                                                            Bolt              │
-│                                                            (incremental);    │
-│                                                            omit to write     │
-│                                                            graph.cypher.     │
-│                                                            [env var:         │
-│                                                            NEO4J_URI]        │
-│ --neo4j-user                             TEXT              Neo4j username.   │
-│                                                            [env var:         │
-│                                                            NEO4J_USERNAME]   │
-│                                                            [default: neo4j]  │
-│ --neo4j-password                         TEXT              Neo4j password.   │
-│                                                            Prefer the env    │
-│                                                            var over the flag │
-│                                                            (the flag is      │
-│                                                            visible in shell  │
-│                                                            history / process │
-│                                                            list).            │
-│                                                            [env var:         │
-│                                                            NEO4J_PASSWORD]   │
-│                                                            [default: neo4j]  │
-│ --neo4j-database                         TEXT              Neo4j database    │
-│                                                            name (default:    │
-│                                                            server default).  │
-│                                                            [env var:         │
-│                                                            NEO4J_DATABASE]   │
-│ --codeql              --no-codeql                          Enable            │
-│                                                            CodeQL-based      │
-│                                                            analysis.         │
-│                                                            [default:         │
-│                                                            no-codeql]        │
-│ --ray                 --no-ray                             Enable Ray for    │
-│                                                            distributed       │
-│                                                            analysis.         │
-│                                                            [default: no-ray] │
-│ --eager               --lazy                               Enable eager or   │
-│                                                            lazy analysis.    │
-│                                                            Defaults to lazy. │
-│                                                            [default: lazy]   │
-│ --skip-tests          --include-tests                      Skip test files   │
-│                                                            in analysis.      │
-│                                                            [default:         │
-│                                                            skip-tests]       │
-│ --file-name                              PATH              Analyze only the  │
-│                                                            specified file    │
-│                                                            (relative to      │
-│                                                            input directory). │
-│ --cache-dir       -c                     PATH              Directory to      │
-│                                                            store analysis    │
-│                                                            cache. Defaults   │
-│                                                            to                │
-│                                                            '.codeanalyzer'   │
-│                                                            in the input      │
-│                                                            directory.        │
-│ --clear-cache         --keep-cache                         Clear cache after │
-│                                                            analysis. By      │
-│                                                            default, cache is │
-│                                                            retained.         │
-│                                                            [default:         │
-│                                                            keep-cache]       │
-│                   -v                     INTEGER           Increase          │
-│                                                            verbosity: -v,    │
-│                                                            -vv, -vvv         │
-│                                                            [default: 0]      │
-│ --help                                                     Show this message │
-│                                                            and exit.         │
-╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮
+│ --input           -i                     PATH                 Path to the project root directory │
+│                                                               (not required for --emit schema).  │
+│ --output          -o                     PATH                 Output directory for artifacts.    │
+│ --format          -f                     [json|msgpack]       Output format for --emit json:     │
+│                                                               json or msgpack.                   │
+│                                                               [default: json]                    │
+│ --emit                                   [json|neo4j|schema]  Output target: json                │
+│                                                               (analysis.json, default) | neo4j   │
+│                                                               (graph.cypher or live Bolt push) | │
+│                                                               schema (the Neo4j schema.json      │
+│                                                               contract).                         │
+│                                                               [default: json]                    │
+│ --app-name                               TEXT                 Logical application name for the   │
+│                                                               graph :PyApplication anchor        │
+│                                                               (default: input dir name).         │
+│ --neo4j-uri                              TEXT                 Push the graph to a live Neo4j     │
+│                                                               over Bolt (incremental); omit to   │
+│                                                               write graph.cypher.                │
+│                                                               [env var: NEO4J_URI]               │
+│ --neo4j-user                             TEXT                 Neo4j username.                    │
+│                                                               [env var: NEO4J_USERNAME]          │
+│                                                               [default: neo4j]                   │
+│ --neo4j-password                         TEXT                 Neo4j password. Prefer the env var │
+│                                                               over the flag (the flag is visible │
+│                                                               in shell history / process list).  │
+│                                                               [env var: NEO4J_PASSWORD]          │
+│                                                               [default: neo4j]                   │
+│ --neo4j-database                         TEXT                 Neo4j database name (default:      │
+│                                                               server default).                   │
+│                                                               [env var: NEO4J_DATABASE]          │
+│ --codeql              --no-codeql                             Enable CodeQL-based analysis.      │
+│                                                               [default: no-codeql]               │
+│ --ray                 --no-ray                                Enable Ray for distributed         │
+│                                                               analysis.                          │
+│                                                               [default: no-ray]                  │
+│ --eager               --lazy                                  Enable eager or lazy analysis.     │
+│                                                               Defaults to lazy.                  │
+│                                                               [default: lazy]                    │
+│ --skip-tests          --include-tests                         Skip test files in analysis.       │
+│                                                               [default: skip-tests]              │
+│ --file-name                              PATH                 Analyze only the specified file    │
+│                                                               (relative to input directory).     │
+│ --cache-dir       -c                     PATH                 Directory to store analysis cache. │
+│                                                               Defaults to '.codeanalyzer' in the │
+│                                                               input directory.                   │
+│ --clear-cache         --keep-cache                            Clear cache after analysis. By     │
+│                                                               default, cache is retained.        │
+│                                                               [default: keep-cache]              │
+│                   -v                     INTEGER              Increase verbosity: -v, -vv, -vvv  │
+│                                                               [default: 0]                       │
+│ --help                                                        Show this message and exit.        │
+╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
 ```
 
 <!-- END canpy-help -->
diff --git a/scripts/update_readme.py b/scripts/update_readme.py
index 75cb8f0..e424448 100644
--- a/scripts/update_readme.py
+++ b/scripts/update_readme.py
@@ -31,6 +31,16 @@ def render_help() -> str:
     os.environ["TERM"] = "dumb"
     os.environ["NO_COLOR"] = "1"
 
+    # Typer caps help width at rich_utils.MAX_WIDTH (default 80) regardless of
+    # COLUMNS, so CI renders the box narrower than a dev machine. Pin it to WIDTH
+    # so the rendered help is wide and byte-identical everywhere.
+    try:
+        import typer.rich_utils as _ru
+
+        _ru.MAX_WIDTH = WIDTH
+    except Exception:  # pragma: no cover - defensive across Typer versions
+        pass
+
     from click.testing import CliRunner
     from typer.main import get_command
 

From cdaa793127b800c17af169b20a8e5509dc1bc8c6 Mon Sep 17 00:00:00 2001
From: Rahul Krishna <rkrsn@ibm.com>
Date: Sat, 20 Jun 2026 16:16:26 -0400
Subject: [PATCH 3/9] feat(homebrew): add a Homebrew tap formula + release job

Add `brew install codellm-devkit/tap/codeanalyzer-python`:
- packaging/homebrew/generate_formula.sh emits a formula that depends on `uv`
  and installs version-pinned wrappers running the published PyPI release via
  `uvx`. The package is pure-Python with heavy native deps (ray, pandas, numpy),
  so vendoring every transitive dependency as a Homebrew resource is impractical
  and pip-at-build-time is blocked by the sandbox; the uv-tool approach keeps the
  formula tiny and `brew install` sandbox-safe.
- release.yml gains a `homebrew` job (needs: release) that regenerates the
  formula on each tag and pushes it to codellm-devkit/homebrew-tap (requires the
  HOMEBREW_TAP_TOKEN secret, same as the codeanalyzer-typescript release).
- README: Install via Homebrew section; CHANGELOG: Unreleased entry.
---
 .github/workflows/release.yml          | 40 ++++++++++++++++++
 CHANGELOG.md                           |  5 +++
 README.md                              | 10 +++++
 packaging/homebrew/generate_formula.sh | 56 ++++++++++++++++++++++++++
 4 files changed, 111 insertions(+)
 create mode 100755 packaging/homebrew/generate_formula.sh

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 5002cb9..d1e0a33 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -136,3 +136,43 @@ jobs:
 
       - name: Publish to PyPI via Trusted Publishing
         run: uv publish
+
+  # Regenerate the Homebrew formula and push it to the shared tap. Split into its
+  # own job (needs: release) so a tap-push failure -- e.g. a missing
+  # HOMEBREW_TAP_TOKEN -- is isolated from the PyPI and GitHub Release steps above.
+  # The non-Rust equivalent of what cargo-dist does for you.
+  homebrew:
+    needs: release
+    if: startsWith(github.ref, 'refs/tags/')
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out code
+        uses: actions/checkout@v4
+
+      - name: Derive version from tag
+        id: ver
+        run: echo "version=${GITHUB_REF#refs/tags/v}" >> "$GITHUB_OUTPUT"
+
+      - name: Generate Homebrew formula
+        env:
+          REPO: ${{ github.repository }}
+          VERSION: ${{ steps.ver.outputs.version }}
+        run: |
+          chmod +x packaging/homebrew/generate_formula.sh
+          ./packaging/homebrew/generate_formula.sh > codeanalyzer-python.rb
+          cat codeanalyzer-python.rb
+
+      - name: Push formula to codellm-devkit/homebrew-tap
+        env:
+          TAP_TOKEN: ${{ secrets.HOMEBREW_TAP_TOKEN }}   # PAT with write access to homebrew-tap
+          VERSION: ${{ steps.ver.outputs.version }}
+        run: |
+          git clone "https://x-access-token:${TAP_TOKEN}@github.com/codellm-devkit/homebrew-tap.git" tap
+          mkdir -p tap/Formula
+          cp codeanalyzer-python.rb tap/Formula/codeanalyzer-python.rb
+          cd tap
+          git config user.name  "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+          git add Formula/codeanalyzer-python.rb
+          git commit -m "codeanalyzer-python ${VERSION}" || { echo "no formula change"; exit 0; }
+          git push
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6200df6..7a36b1b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,11 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [Unreleased]
+
+### Added
+- **Homebrew tap** — `brew install codellm-devkit/tap/codeanalyzer-python`. The release workflow auto-generates a formula (`packaging/homebrew/generate_formula.sh`) that installs the pinned PyPI release as an isolated `uv` tool, and pushes it to `codellm-devkit/homebrew-tap`. Because the package is pure-Python with heavy native dependencies (`ray`, `pandas`, `numpy`), the formula depends on `uv` and runs the release via `uvx` rather than vendoring every transitive dependency as a Homebrew resource.
+
 ## [0.2.0] - 2026-06-20
 
 ### Added
diff --git a/README.md b/README.md
index 8211145..9f87640 100644
--- a/README.md
+++ b/README.md
@@ -36,6 +36,7 @@ and merges them with the Jedi-derived edges, also backfilling callees Jedi could
   - [Prerequisites](#prerequisites)
   - [Install via pip (PyPI)](#install-via-pip-pypi)
   - [Install via shell script](#install-via-shell-script)
+  - [Install via Homebrew](#install-via-homebrew)
   - [Build from source](#build-from-source)
 - [Usage](#usage)
   - [Options](#options)
@@ -102,6 +103,15 @@ Install the CLI as an isolated tool with the one-line installer (provisions via
 curl --proto '=https' --tlsv1.2 -LsSf https://github.com/codellm-devkit/codeanalyzer-python/releases/latest/download/canpy-installer.sh | sh
 ```
 
+### Install via Homebrew
+
+```sh
+brew install codellm-devkit/tap/codeanalyzer-python
+```
+
+The formula depends on [uv](https://docs.astral.sh/uv/) and installs `canpy` as an isolated,
+version-pinned uv tool (the package and its dependencies are resolved and cached on first run).
+
 ### Build from source
 
 This project uses [uv](https://docs.astral.sh/uv/) for dependency management.
diff --git a/packaging/homebrew/generate_formula.sh b/packaging/homebrew/generate_formula.sh
new file mode 100755
index 0000000..458f436
--- /dev/null
+++ b/packaging/homebrew/generate_formula.sh
@@ -0,0 +1,56 @@
+#!/usr/bin/env bash
+#
+# Generate the Homebrew formula for codeanalyzer-python (the `canpy` CLI).
+#
+# Unlike the codeanalyzer-typescript sibling -- which ships a single self-contained
+# binary that the formula just downloads -- codeanalyzer-python is a pure-Python
+# package published to PyPI with heavy native dependencies (ray, pandas, numpy).
+# Vendoring every transitive dependency as a Homebrew `resource` is impractical
+# (ray is not buildable from an sdist), and pip-installing at build time is blocked
+# by Homebrew's network sandbox.
+#
+# So the formula stays tiny: it depends on `uv` and installs version-pinned wrapper
+# scripts that run the published PyPI release via `uvx` (uv resolves and caches the
+# isolated environment on first run). This keeps `brew install` sandbox-safe (no
+# network at build time) while pinning the exact released version.
+#
+# Usage:
+#   REPO=codellm-devkit/codeanalyzer-python VERSION=0.2.0 \
+#     ./generate_formula.sh > codeanalyzer-python.rb
+#
+set -euo pipefail
+
+REPO="${REPO:?set REPO, e.g. codellm-devkit/codeanalyzer-python}"
+VERSION="${VERSION:?set VERSION, e.g. 0.2.0}"
+
+cat <<EOF
+# This file is auto-generated by packaging/homebrew/generate_formula.sh on release.
+# Do not edit by hand -- changes will be overwritten on the next tag.
+class CodeanalyzerPython < Formula
+  desc "CLDK Python analyzer (canpy) -- emits canonical analysis.json or a Neo4j graph"
+  homepage "https://github.com/${REPO}"
+  version "${VERSION}"
+  license "Apache-2.0"
+
+  # codeanalyzer-python is a pure-Python PyPI package with heavy native deps
+  # (ray, pandas, numpy). Rather than vendor every transitive dependency as a
+  # Homebrew resource, install the pinned PyPI release as an isolated uv tool;
+  # uv resolves and caches the environment on first run.
+  depends_on "uv"
+
+  def install
+    %w[canpy codeanalyzer].each do |exe|
+      (bin/exe).write <<~SH
+        #!/bin/bash
+        exec uvx --from "codeanalyzer-python==#{version}" #{exe} "\$@"
+      SH
+      chmod 0755, bin/exe
+    end
+  end
+
+  test do
+    assert_match "codeanalyzer-python==#{version}", File.read(bin/"canpy")
+    assert_predicate bin/"canpy", :executable?
+  end
+end
+EOF

From aa60bd77553d98fc76ec940fd9712426ac97103a Mon Sep 17 00:00:00 2001
From: Rahul Krishna <rkrsn@ibm.com>
Date: Sat, 20 Jun 2026 16:26:33 -0400
Subject: [PATCH 4/9] fix(homebrew): formula must declare url + sha256

`brew install` failed with "formula requires at least a URL" because the formula
had only version + depends_on. Point url at the released sdist and add its
sha256 (byte-identical to the PyPI sdist); the install method still just writes
the uv wrappers. The release `homebrew` job now hashes the published sdist and
passes SHA256 to generate_formula.sh.
---
 .github/workflows/release.yml          |  7 ++++++-
 packaging/homebrew/generate_formula.sh | 11 ++++++++++-
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index d1e0a33..ab14353 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -159,7 +159,12 @@ jobs:
           VERSION: ${{ steps.ver.outputs.version }}
         run: |
           chmod +x packaging/homebrew/generate_formula.sh
-          ./packaging/homebrew/generate_formula.sh > codeanalyzer-python.rb
+          # The release job just published the sdist as a Release asset; hash the
+          # exact bytes users will download so the formula checksum always matches.
+          sdist="https://github.com/${REPO}/releases/download/v${VERSION}/codeanalyzer_python-${VERSION}.tar.gz"
+          SHA256="$(curl -fLsS "$sdist" | shasum -a 256 | cut -d' ' -f1)"
+          REPO="$REPO" VERSION="$VERSION" SHA256="$SHA256" \
+            ./packaging/homebrew/generate_formula.sh > codeanalyzer-python.rb
           cat codeanalyzer-python.rb
 
       - name: Push formula to codellm-devkit/homebrew-tap
diff --git a/packaging/homebrew/generate_formula.sh b/packaging/homebrew/generate_formula.sh
index 458f436..70846cf 100755
--- a/packaging/homebrew/generate_formula.sh
+++ b/packaging/homebrew/generate_formula.sh
@@ -14,14 +14,21 @@
 # isolated environment on first run). This keeps `brew install` sandbox-safe (no
 # network at build time) while pinning the exact released version.
 #
+# Homebrew requires every formula to declare a source `url` + `sha256` for its
+# stable spec, so we point at the released sdist (byte-identical to the PyPI one).
+# The install method ignores the unpacked source and just writes uv wrappers, but
+# the url anchors the version and satisfies Homebrew's spec requirement.
+#
 # Usage:
-#   REPO=codellm-devkit/codeanalyzer-python VERSION=0.2.0 \
+#   REPO=codellm-devkit/codeanalyzer-python VERSION=0.2.0 SHA256=<sdist sha256> \
 #     ./generate_formula.sh > codeanalyzer-python.rb
 #
 set -euo pipefail
 
 REPO="${REPO:?set REPO, e.g. codellm-devkit/codeanalyzer-python}"
 VERSION="${VERSION:?set VERSION, e.g. 0.2.0}"
+SHA256="${SHA256:?set SHA256 of the released sdist}"
+SDIST_URL="https://github.com/${REPO}/releases/download/v${VERSION}/codeanalyzer_python-${VERSION}.tar.gz"
 
 cat <<EOF
 # This file is auto-generated by packaging/homebrew/generate_formula.sh on release.
@@ -29,6 +36,8 @@ cat <<EOF
 class CodeanalyzerPython < Formula
   desc "CLDK Python analyzer (canpy) -- emits canonical analysis.json or a Neo4j graph"
   homepage "https://github.com/${REPO}"
+  url "${SDIST_URL}"
+  sha256 "${SHA256}"
   version "${VERSION}"
   license "Apache-2.0"
 

From 08501604b396a9ff39e0197829d9de4dd8e4949a Mon Sep 17 00:00:00 2001
From: Rahul Krishna <rkrsn@ibm.com>
Date: Mon, 22 Jun 2026 16:33:23 -0400
Subject: [PATCH 5/9] feat(analysis): install the analysis venv with uv and
 wire it to Jedi

Closes #47

The per-project analysis venv was built and populated but never used: __init__
left self.virtualenv = None and never reassigned it, so SymbolTableBuilder got
virtualenv=None and Jedi resolved against the default environment, ignoring the
installed dependencies. Set self.virtualenv to the venv path on both a fresh
build and a lazy reuse so Jedi resolves the project's third-party imports.

Also install dependencies with uv (uv pip install --python <venv>) instead of
pip: uv resolves and downloads in parallel with a shared global cache, which is
dramatically faster for large dependency trees (e.g. Odoo). uv ships as a
self-contained binary in its wheel, so it is present wherever canpy is installed
(including Docker); fall back to python -m pip when uv cannot be located.
---
 codeanalyzer/core.py | 51 +++++++++++++++++++++++++++++++-------------
 pyproject.toml       |  4 ++++
 2 files changed, 40 insertions(+), 15 deletions(-)

diff --git a/codeanalyzer/core.py b/codeanalyzer/core.py
index b8cfcca..dd27b72 100644
--- a/codeanalyzer/core.py
+++ b/codeanalyzer/core.py
@@ -226,6 +226,29 @@ def _get_base_interpreter() -> Path:
             f"a working Python interpreter that can create virtual environments."
         )
 
+    @staticmethod
+    def _uv_bin() -> Optional[str]:
+        """Path to a uv binary: the one bundled with the ``uv`` PyPI package (a
+        dependency, so normally always present -- including inside a Docker image),
+        else a uv on PATH, else ``None`` (callers fall back to pip)."""
+        try:
+            from uv import find_uv_bin
+
+            return str(find_uv_bin())
+        except Exception:
+            return shutil.which("uv")
+
+    def _install_into_venv(self, venv_python: Path, args: List[str]) -> None:
+        """Install packages into the target venv, preferring uv for speed (parallel
+        downloads + a shared global cache) and falling back to the venv's own pip
+        when uv is unavailable."""
+        uv = self._uv_bin()
+        if uv:
+            cmd = [uv, "pip", "install", "--python", str(venv_python), *args]
+        else:
+            cmd = [str(venv_python), "-m", "pip", "install", *args]
+        self._cmd_exec_helper(cmd, cwd=self.project_dir, check=True)
+
     def __enter__(self) -> "Codeanalyzer":
         # If no virtualenv is provided, try to create one using requirements.txt or pyproject.toml
         venv_path = self.cache_dir / self.project_dir.name / "virtualenv"
@@ -249,24 +272,19 @@ def __enter__(self) -> "Codeanalyzer":
                 ("test-requirements.txt", ["-r"]),
             ]
 
-            for dep_file, pip_args in dependency_files:
+            for dep_file, _ in dependency_files:
                 if (self.project_dir / dep_file).exists():
                     logger.info(f"Installing dependencies from {dep_file}")
-                    self._cmd_exec_helper(
-                        [str(venv_python), "-m", "pip", "install", "-U"] + pip_args + [str(self.project_dir / dep_file)],
-                        cwd=self.project_dir,
-                        check=True,
+                    self._install_into_venv(
+                        venv_python,
+                        ["--upgrade", "-r", str(self.project_dir / dep_file)],
                     )
 
             # Handle Pipenv files
             if (self.project_dir / "Pipfile").exists():
                 logger.info("Installing dependencies from Pipfile")
                 # Note: This would require pipenv to be installed
-                self._cmd_exec_helper(
-                    [str(venv_python), "-m", "pip", "install", "pipenv"],
-                    cwd=self.project_dir,
-                    check=True,
-                )
+                self._install_into_venv(venv_python, ["pipenv"])
                 self._cmd_exec_helper(
                     ["pipenv", "install", "--dev"],
                     cwd=self.project_dir,
@@ -289,14 +307,17 @@ def __enter__(self) -> "Codeanalyzer":
 
             if any((self.project_dir / file).exists() for file in package_definition_files):
                 logger.info("Installing project in editable mode")
-                self._cmd_exec_helper(
-                    [str(venv_python), "-m", "pip", "install", "-e", str(self.project_dir)],
-                    cwd=self.project_dir,
-                    check=True,
-                )
+                self._install_into_venv(venv_python, ["-e", str(self.project_dir)])
             else:
                 logger.warning("No package definition files found, skipping editable installation")
 
+        # Point Jedi at the analysis venv so it resolves the project's third-party
+        # imports. This runs on both a fresh build and a lazy reuse of an existing
+        # venv -- previously self.virtualenv stayed None, so the install above was
+        # never actually used by the symbol-table builder.
+        if venv_path.exists():
+            self.virtualenv = venv_path
+
         if self.using_codeql:
             logger.info(f"(Re-)initializing CodeQL analysis for {self.project_dir}")
 
diff --git a/pyproject.toml b/pyproject.toml
index 4b2b57c..acc00a7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -43,6 +43,10 @@ dependencies = [
     "ray==2.0.0; python_version < '3.11'",
     "ray>=2.10.0,<3.0.0; python_version >= '3.11'",
     "packaging>=25.0",
+    # uv -- installs the analyzed project's deps into the analysis venv quickly.
+    # Shipped as a self-contained binary in its wheel, so it's available wherever
+    # canpy is pip-installed (incl. Docker); core.py falls back to pip without it.
+    "uv>=0.5.0",
 ]
 
 [project.optional-dependencies]

From df0eae935269910917969d3b422449223d3800af Mon Sep 17 00:00:00 2001
From: Rahul Krishna <rkrsn@ibm.com>
Date: Mon, 22 Jun 2026 17:02:48 -0400
Subject: [PATCH 6/9] feat(schema,neo4j): first-class external_symbols; fix
 dropped call edges (#44)

Closes #44

Adopt the model codeanalyzer-typescript uses: external call targets are now
first-class in the IR instead of being re-derived ad hoc during Neo4j projection.

- schema: add PyExternalSymbol{name, module} and PyApplication.external_symbols,
  keyed by signature (mirrors TSExternalSymbol).
- core: _compute_external_symbols() classifies every call-graph endpoint not
  declared in the symbol table as an external (name/module from the signature),
  so analysis.json carries external info that was previously a bare target string.
- neo4j: :PyExternal gains a `module` property (SCHEMA_VERSION 1.0.0 -> 1.1.0,
  additive). project()'s _call_endpoint classifies authoritatively from
  external_symbols rather than a "present in the graph" heuristic, so an imported
  module name (a :PyPackage) can no longer shadow a call target and silently drop
  the PY_CALLS edge.
- rows: track node identity by (merge_label, value) so deferred PY_EXTENDS /
  PY_RESOLVES_TO edges can't be shadowed either.

Fixes the ~3.7% of call edges (e.g. targets os/re/json) that were dropped from
the emitted graph. Adds a regression test and exercises external_symbols in the
sample app; regenerates schema.neo4j.json.
---
 codeanalyzer/core.py             | 58 ++++++++++++++++++++++++++++++--
 codeanalyzer/neo4j/catalog.py    |  4 +--
 codeanalyzer/neo4j/project.py    | 28 +++++++++------
 codeanalyzer/neo4j/rows.py       | 15 ++++++---
 codeanalyzer/schema/__init__.py  |  2 ++
 codeanalyzer/schema/py_schema.py | 15 +++++++++
 schema.neo4j.json                |  5 +--
 test/sample_graph_app.py         |  4 +++
 test/test_neo4j_schema.py        | 34 +++++++++++++++++++
 9 files changed, 144 insertions(+), 21 deletions(-)

diff --git a/codeanalyzer/core.py b/codeanalyzer/core.py
index dd27b72..314b5ec 100644
--- a/codeanalyzer/core.py
+++ b/codeanalyzer/core.py
@@ -8,7 +8,13 @@
 
 import ray
 from codeanalyzer.utils import logger
-from codeanalyzer.schema import PyApplication, PyModule, model_dump_json, model_validate_json
+from codeanalyzer.schema import (
+    PyApplication,
+    PyExternalSymbol,
+    PyModule,
+    model_dump_json,
+    model_validate_json,
+)
 from codeanalyzer.schema.py_schema import PyCallEdge
 from codeanalyzer.semantic_analysis.call_graph import (
     jedi_call_graph_edges,
@@ -379,6 +385,43 @@ def __exit__(self, *args, **kwargs) -> None:
             logger.info(f"Clearing cache directory: {self.cache_dir}")
             shutil.rmtree(self.cache_dir)
 
+    @staticmethod
+    def _compute_external_symbols(symbol_table, call_graph):
+        """Build the external-symbol map: every call-graph endpoint whose signature
+        is not a declared class/callable in the symbol table is an external (an
+        imported library or builtin member). ``name``/``module`` are derived from
+        the signature (best effort: split on the last dot)."""
+        declared = set()
+
+        def walk_callable(c):
+            declared.add(c.signature)
+            for ic in (c.inner_callables or {}).values():
+                walk_callable(ic)
+            for cl in (c.inner_classes or {}).values():
+                walk_class(cl)
+
+        def walk_class(cl):
+            declared.add(cl.signature)
+            for m in (cl.methods or {}).values():
+                walk_callable(m)
+            for ic in (cl.inner_classes or {}).values():
+                walk_class(ic)
+
+        for mod in symbol_table.values():
+            for c in (mod.functions or {}).values():
+                walk_callable(c)
+            for cl in (mod.classes or {}).values():
+                walk_class(cl)
+
+        externals: Dict[str, PyExternalSymbol] = {}
+        for edge in call_graph:
+            for sig in (edge.source, edge.target):
+                if sig in declared or sig in externals:
+                    continue
+                module, name = sig.rsplit(".", 1) if "." in sig else (sig, sig)
+                externals[sig] = PyExternalSymbol(name=name, module=module)
+        return externals
+
     def analyze(self) -> PyApplication:
         """Analyze the project and return a PyApplication with symbol table.
         
@@ -418,8 +461,19 @@ def analyze(self) -> PyApplication:
         jedi_edges = jedi_call_graph_edges(symbol_table)
         call_graph = merge_edges(jedi_edges, codeql_edges)
 
+        # Classify call-graph endpoints that are not declared in the symbol table
+        # (imported library / builtin members) once, so the JSON and Neo4j backends
+        # share one authoritative external-symbol set.
+        external_symbols = self._compute_external_symbols(symbol_table, call_graph)
+
         # Recreate pyapplication
-        app = PyApplication.builder().symbol_table(symbol_table).call_graph(call_graph).build()
+        app = (
+            PyApplication.builder()
+            .symbol_table(symbol_table)
+            .call_graph(call_graph)
+            .external_symbols(external_symbols)
+            .build()
+        )
         
         # Save to cache
         self._save_analysis_cache(app, cache_file)
diff --git a/codeanalyzer/neo4j/catalog.py b/codeanalyzer/neo4j/catalog.py
index 37f8a1a..155d86a 100644
--- a/codeanalyzer/neo4j/catalog.py
+++ b/codeanalyzer/neo4j/catalog.py
@@ -34,7 +34,7 @@
 
 from codeanalyzer.neo4j.schema import CONSTRAINTS, INDEXES
 
-SCHEMA_VERSION = "1.0.0"
+SCHEMA_VERSION = "1.1.0"
 
 # PropType ∈ {"string", "integer", "float", "boolean", "string[]", "integer[]"}.
 
@@ -119,7 +119,7 @@ class RelType:
         "PyExternal",
         "PySymbol",
         "signature",
-        {"signature": "string", "name": "string"},
+        {"signature": "string", "name": "string", "module": "string"},
     ),
     NodeLabel("PyPackage", "PyPackage", "name", {"name": "string"}),
     NodeLabel(
diff --git a/codeanalyzer/neo4j/project.py b/codeanalyzer/neo4j/project.py
index 4878cda..7c4deb7 100644
--- a/codeanalyzer/neo4j/project.py
+++ b/codeanalyzer/neo4j/project.py
@@ -60,11 +60,12 @@ def project(app: PyApplication, app_name: str) -> GraphRows:
         b.edge("PY_HAS_MODULE", app_ref, mod_ref)
         _project_module_body(b, file_key, mod_ref, mod)
 
-    # The aggregated :PY_CALLS twin. Endpoints not present in the symbol table become
-    # :PyExternal ghost nodes (the analyzer already preserves them as ghost nodes).
+    # The aggregated :PY_CALLS twin. Endpoints listed in app.external_symbols become
+    # :PyExternal ghost nodes; the rest are declared :PySymbol nodes already emitted.
+    externals = app.external_symbols or {}
     for e in app.call_graph:
-        src = _call_endpoint(b, e.source)
-        tgt = _call_endpoint(b, e.target)
+        src = _call_endpoint(b, e.source, externals)
+        tgt = _call_endpoint(b, e.target, externals)
         b.edge("PY_CALLS", src, tgt, _call_edge_props(e.weight, list(e.provenance or [])))
 
     return b.finish()
@@ -74,13 +75,20 @@ def _sym(signature: str) -> NodeRef:
     return NodeRef("PySymbol", "signature", signature)
 
 
-def _call_endpoint(b: RowBuilder, signature: str) -> NodeRef:
-    """A call-graph endpoint: a known callable already emitted, or a phantom
-    :PyExternal symbol materialized on demand for a ghost target."""
-    if b.has_key(signature):
+def _call_endpoint(b: RowBuilder, signature: str, externals: dict) -> NodeRef:
+    """A call-graph endpoint: a declared callable already emitted, or an external
+    symbol (imported library / builtin member) materialized as a :PyExternal ghost.
+
+    Classification is authoritative -- it comes from ``app.external_symbols``, not a
+    "present in the graph" heuristic -- so an imported module name (which exists only
+    as a :PyPackage) can never shadow the call target. A small fallback still
+    materializes an external for any endpoint that is neither declared nor listed."""
+    ext = externals.get(signature)
+    if ext is None and b.has_key("PySymbol", signature):
         return _sym(signature)
-    name = signature.rsplit(".", 1)[-1] if "." in signature else signature
-    return b.node(["PySymbol", "PyExternal"], "signature", signature, {"name": name})
+    name = ext.name if ext is not None else (signature.rsplit(".", 1)[-1] if "." in signature else signature)
+    module = ext.module if ext is not None else None
+    return b.node(["PySymbol", "PyExternal"], "signature", signature, prune({"name": name, "module": module}))
 
 
 # ----------------------------------------------------------------------------------------------
diff --git a/codeanalyzer/neo4j/rows.py b/codeanalyzer/neo4j/rows.py
index 9edecde..cbc381f 100644
--- a/codeanalyzer/neo4j/rows.py
+++ b/codeanalyzer/neo4j/rows.py
@@ -83,7 +83,11 @@ def __init__(self) -> None:
         self._nodes: Dict[str, NodeRow] = {}  # key: f"{labels[0]} {value}"
         self._edges: List[EdgeRow] = []
         self._deferred: List[EdgeRow] = []  # edges gated against node existence at finish()
-        self._keys: set = set()  # every node value seen, for resolved-gating
+        # (merge_label, value) of every node seen, for resolved-gating. Keyed by
+        # label too so a :PyPackage name can't shadow a :PySymbol signature (and
+        # vice versa) — otherwise a call to an imported module name like ``os``
+        # resolves to a :PySymbol node that was never created and the edge is lost.
+        self._keys: set = set()
 
     def node(self, labels: List[str], key_prop: str, value: str, props: Props) -> NodeRef:
         """Upsert a node. Re-seeing the same ``(labels[0], value)`` merges props
@@ -98,7 +102,7 @@ def node(self, labels: List[str], key_prop: str, value: str, props: Props) -> No
                     existing.labels.append(label)
         else:
             self._nodes[node_id] = NodeRow(list(labels), key_prop, value, dict(props))
-        self._keys.add(value)
+        self._keys.add((labels[0], value))
         return NodeRef(labels[0], key_prop, value)
 
     def edge(self, type_: str, from_ref: NodeRef, to_ref: NodeRef, props: Optional[Props] = None) -> None:
@@ -121,12 +125,13 @@ def edge_to_symbol(
             )
         )
 
-    def has_key(self, value: str) -> bool:
-        return value in self._keys
+    def has_key(self, label: str, value: str) -> bool:
+        """Whether a node with this ``(merge_label, value)`` identity was emitted."""
+        return (label, value) in self._keys
 
     def finish(self) -> GraphRows:
         for e in self._deferred:
-            if e.to_ref.value in self._keys:
+            if (e.to_ref.label, e.to_ref.value) in self._keys:
                 self._edges.append(e)
         nodes = sorted(self._nodes.values(), key=lambda n: f"{n.labels[0]} {n.value}")
         edges = sorted(self._edges, key=lambda e: f"{e.type} {e.from_ref.value} {e.to_ref.value}")
diff --git a/codeanalyzer/schema/__init__.py b/codeanalyzer/schema/__init__.py
index 8853909..bcfa976 100644
--- a/codeanalyzer/schema/__init__.py
+++ b/codeanalyzer/schema/__init__.py
@@ -8,6 +8,7 @@
     PyClass,
     PyClassAttribute,
     PyComment,
+    PyExternalSymbol,
     PyImport,
     PyModule,
     PyVariableDeclaration,
@@ -15,6 +16,7 @@
 
 __all__ = [
     "PyApplication",
+    "PyExternalSymbol",
     "PyImport",
     "PyComment",
     "PyModule",
diff --git a/codeanalyzer/schema/py_schema.py b/codeanalyzer/schema/py_schema.py
index 8bef391..c69e5fb 100644
--- a/codeanalyzer/schema/py_schema.py
+++ b/codeanalyzer/schema/py_schema.py
@@ -358,6 +358,17 @@ class PyCallEdge(BaseModel):
     provenance: List[Literal["jedi", "codeql", "joern"]] = []
 
 
+@builder
+@msgpk
+class PyExternalSymbol(BaseModel):
+    """A call-graph target outside the analyzed project -- an imported library or
+    builtin member. Mirrors codeanalyzer-typescript's ``TSExternalSymbol`` and is
+    keyed in ``PyApplication.external_symbols`` by its call-graph signature."""
+
+    name: str  # the member/short name, e.g. "get" for "requests.get"
+    module: Optional[str] = None  # best-effort owning module, e.g. "requests"
+
+
 @builder
 @msgpk
 class PyApplication(BaseModel):
@@ -365,3 +376,7 @@ class PyApplication(BaseModel):
 
     symbol_table: Dict[str, PyModule]
     call_graph: List[PyCallEdge] = []
+    # Call-graph endpoints not declared in the symbol table (imported library /
+    # builtin members), keyed by signature. Populated by the analyzer so every
+    # backend (JSON and Neo4j) shares one authoritative external-symbol set.
+    external_symbols: Dict[str, PyExternalSymbol] = {}
diff --git a/schema.neo4j.json b/schema.neo4j.json
index ffccf29..de5d100 100644
--- a/schema.neo4j.json
+++ b/schema.neo4j.json
@@ -1,5 +1,5 @@
 {
-  "schema_version": "1.0.0",
+  "schema_version": "1.1.0",
   "generator": "codeanalyzer-python",
   "marker_labels": [],
   "node_labels": [
@@ -67,7 +67,8 @@
       "key": "signature",
       "properties": {
         "signature": "string",
-        "name": "string"
+        "name": "string",
+        "module": "string"
       }
     },
     {
diff --git a/test/sample_graph_app.py b/test/sample_graph_app.py
index 11124f4..b4232b9 100644
--- a/test/sample_graph_app.py
+++ b/test/sample_graph_app.py
@@ -14,6 +14,7 @@
     PyClass,
     PyClassAttribute,
     PyComment,
+    PyExternalSymbol,
     PyImport,
     PyModule,
     PyVariableDeclaration,
@@ -149,4 +150,7 @@ def make_sample_app() -> PyApplication:
     return PyApplication(
         symbol_table={"src/service.py": service_mod, "src/util.py": util_mod},
         call_graph=call_graph,
+        # The ghost edge's target (requests.get) is a library member, recorded as a
+        # first-class external symbol so the projection emits a :PyExternal for it.
+        external_symbols={"requests.get": PyExternalSymbol(name="get", module="requests")},
     )
diff --git a/test/test_neo4j_schema.py b/test/test_neo4j_schema.py
index 401b465..bba6336 100644
--- a/test/test_neo4j_schema.py
+++ b/test/test_neo4j_schema.py
@@ -12,6 +12,8 @@
 from codeanalyzer.neo4j import NODE_LABELS, REL_TYPES, build_schema_document, project
 from codeanalyzer.neo4j.catalog import MARKER_LABELS
 from codeanalyzer.neo4j.cypher import render_cypher
+from codeanalyzer.schema import PyApplication, PyCallable, PyImport, PyModule
+from codeanalyzer.schema.py_schema import PyCallEdge
 
 from sample_graph_app import make_sample_app
 
@@ -87,6 +89,38 @@ def test_render_cypher_is_deterministic_and_self_contained():
     assert "MERGE (n:PySymbol {signature: row.k})" in a
 
 
+def test_call_edge_to_imported_module_name_is_not_dropped():
+    """Regression for #44: a call whose target is a bare module name that is also
+    imported (e.g. ``os``) must not be dropped. The import creates a :PyPackage
+    named ``os``; that must not shadow the call target's :PySymbol signature."""
+    caller = PyCallable(
+        name="caller", path="m.py", signature="m.caller", return_type="None",
+        code="def caller():\n    os.getcwd()", start_line=1, end_line=2,
+        code_start_line=1, cyclomatic_complexity=1,
+    )
+    mod = PyModule(
+        file_path="m.py", module_name="m",
+        imports=[PyImport(module="os", name="getcwd")],
+        functions={"caller": caller},
+        content_hash="h", last_modified=1.0, file_size=10,
+    )
+    app = PyApplication(
+        symbol_table={"m.py": mod},
+        call_graph=[PyCallEdge(source="m.caller", target="os", weight=1, provenance=["jedi"])],
+    )
+    rows = project(app, "app")
+
+    calls_to_os = [e for e in rows.edges if e.type == "PY_CALLS" and e.to_ref.value == "os"]
+    assert len(calls_to_os) == 1, "PY_CALLS edge to imported module name 'os' was dropped"
+
+    # 'os' is materialized as a :PyExternal symbol (the call target) ...
+    assert any(n.value == "os" and "PyExternal" in n.labels for n in rows.nodes), \
+        ":PyExternal ghost for the call target 'os' is missing"
+    # ... distinct from the :PyPackage 'os' created by the import.
+    assert any(n.value == "os" and "PyPackage" in n.labels for n in rows.nodes), \
+        ":PyPackage for the import 'os' is missing"
+
+
 def test_checked_in_schema_matches_catalog():
     """Run `canpy --emit schema > schema.neo4j.json` if this fails."""
     on_disk_path = Path(__file__).resolve().parents[1] / "schema.neo4j.json"

From de41937cbe60851d55f3c26952002ff41cba9628 Mon Sep 17 00:00:00 2001
From: Rahul Krishna <rkrsn@ibm.com>
Date: Mon, 22 Jun 2026 17:08:36 -0400
Subject: [PATCH 7/9] fix(neo4j): scope bolt full-run orphan prune to the
 application anchor

Closes #45

The full-run prune deleted any :PyModule whose file_key was not in the current
emit across the ENTIRE database -- not just the application being written -- so a
full-run push for application B wiped application A's modules, leaving an orphaned
:PyApplication with zero PY_HAS_MODULE edges. A single Neo4j database therefore
could not hold multiple applications via full-run --emit neo4j.

Anchor the prune to the :PyApplication {name} being emitted
(MATCH (:PyApplication {name:$app})-[:PY_HAS_MODULE]->(m:PyModule) WHERE NOT
m.file_key IN $present ...), so it only removes that application's vanished
modules. Adds a container regression test (app-b push leaves app-a intact).
---
 codeanalyzer/neo4j/bolt.py | 15 +++++++++++++--
 test/test_neo4j_bolt.py    | 30 ++++++++++++++++++++++++++++++
 2 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/codeanalyzer/neo4j/bolt.py b/codeanalyzer/neo4j/bolt.py
index 4ae102b..dc60986 100644
--- a/codeanalyzer/neo4j/bolt.py
+++ b/codeanalyzer/neo4j/bolt.py
@@ -77,6 +77,13 @@ def session():
             for stmt in [*CONSTRAINTS, *INDEXES]:
                 s.run(stmt)
 
+        # The application anchor (a shared node) — used to scope the orphan prune
+        # so it never touches modules belonging to a different :PyApplication.
+        app_name = next(
+            (n.value for n in rows.nodes if n.labels and n.labels[0] == "PyApplication"),
+            None,
+        )
+
         # Partition nodes by owning module; shared nodes have no _module.
         by_module: Dict[str, List[NodeRow]] = {}
         shared: List[NodeRow] = []
@@ -135,13 +142,17 @@ def _purge(tx, module=m, node_keys=keys):
         _upsert_edges(session, neo4j, edges)
 
         # 6. orphan prune — only safe on a full run (a targeted run can't tell deleted from untargeted).
-        if full_run:
+        # Scope to THIS application's anchor so a full run for application B never
+        # deletes application A's modules from a shared database.
+        if full_run and app_name is not None:
             present = list(by_module.keys())
             with session() as s:
                 res = s.run(
-                    "MATCH (m:PyModule) WHERE NOT m.file_key IN $present "
+                    "MATCH (:PyApplication {name: $app})-[:PY_HAS_MODULE]->(m:PyModule) "
+                    "WHERE NOT m.file_key IN $present "
                     f"OPTIONAL MATCH (m)-{DESCENDANTS}->(x) DETACH DELETE x, m "
                     "RETURN count(m) AS pruned",
+                    app=app_name,
                     present=present,
                 )
                 pruned = res.single()
diff --git a/test/test_neo4j_bolt.py b/test/test_neo4j_bolt.py
index ee84e01..6f02bd8 100644
--- a/test/test_neo4j_bolt.py
+++ b/test/test_neo4j_bolt.py
@@ -15,9 +15,24 @@
 
 from codeanalyzer.neo4j import project
 from codeanalyzer.neo4j.bolt import BoltConfig, bolt_writer
+from codeanalyzer.schema import PyApplication, PyCallable, PyModule
 
 from sample_graph_app import make_sample_app
 
+
+def _single_module_app(file_key: str = "appb/main.py") -> PyApplication:
+    """A minimal second application with its own (distinct) module file_key."""
+    fn = PyCallable(
+        name="main", path=file_key, signature="appb.main", return_type="None",
+        code="def main():\n    ...", start_line=1, end_line=2,
+        code_start_line=1, cyclomatic_complexity=1,
+    )
+    mod = PyModule(
+        file_path=file_key, module_name="appb.main", functions={"main": fn},
+        content_hash="h-b", last_modified=1.0, file_size=10,
+    )
+    return PyApplication(symbol_table={file_key: mod}, call_graph=[])
+
 pytestmark = pytest.mark.skipif(
     not os.environ.get("RUN_CONTAINER_TESTS"),
     reason="opt-in: set RUN_CONTAINER_TESTS=1 (needs Docker/Podman) to run the Neo4j bolt test",
@@ -105,6 +120,21 @@ def test_full_push_materializes_the_whole_graph_and_schema(driver, cfg):
     assert _num(driver, "MATCH (e:PyExternal) RETURN count(e)") >= 1
 
 
+def test_full_run_does_not_prune_another_applications_modules(driver, cfg):
+    """Regression for #45: a full-run push for one application must not prune the
+    modules of a *different* application sharing the database."""
+    bolt_writer(project(make_sample_app(), "app-a"), cfg, full_run=True)
+    before = _num(driver, "MATCH (:PyApplication {name:'app-a'})-[:PY_HAS_MODULE]->(m) RETURN count(m)")
+    assert before > 0
+
+    # A full-run push for a different application must leave app-a untouched.
+    bolt_writer(project(_single_module_app(), "app-b"), cfg, full_run=True)
+
+    after = _num(driver, "MATCH (:PyApplication {name:'app-a'})-[:PY_HAS_MODULE]->(m) RETURN count(m)")
+    assert after == before, "full-run push for app-b pruned app-a's modules (#45)"
+    assert _num(driver, "MATCH (:PyApplication {name:'app-b'})-[:PY_HAS_MODULE]->(m) RETURN count(m)") == 1
+
+
 def test_re_pushing_identical_analysis_is_idempotent(driver, cfg):
     rows = project(make_sample_app(), "sample-app")
     bolt_writer(rows, cfg, full_run=True)

From 63cf46fbaa507d6f2e0a0d169b8e38df36bbfbd1 Mon Sep 17 00:00:00 2001
From: Rahul Krishna <rkrsn@ibm.com>
Date: Mon, 22 Jun 2026 17:12:38 -0400
Subject: [PATCH 8/9] feat(cli): add --no-venv to skip virtualenv creation and
 use the ambient env

Closes #46

Add a --no-venv flag (AnalysisOptions.no_venv) that skips virtualenv creation and
dependency installation and resolves imports against the ambient interpreter
(self.virtualenv stays None, so Jedi uses the default environment). Useful in CI /
containers where the project's dependencies are already installed, for sandboxed
runs where network installs are disallowed, and for speed. Tradeoff: import /
call-resolution quality then depends on what is installed in the ambient env.

Regenerates the README --help block; adds a CLI regression test (no virtualenv is
created and analysis.json is still produced).
---
 README.md                       |  5 +++++
 codeanalyzer/__main__.py        |  9 +++++++++
 codeanalyzer/core.py            | 13 ++++++++++---
 codeanalyzer/options/options.py |  1 +
 test/test_cli.py                | 21 +++++++++++++++++++++
 5 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 9f87640..17cea55 100644
--- a/README.md
+++ b/README.md
@@ -185,6 +185,11 @@ $ canpy --help
 │                                                               [default: lazy]                    │
 │ --skip-tests          --include-tests                         Skip test files in analysis.       │
 │                                                               [default: skip-tests]              │
+│ --no-venv             --venv                                  Skip virtualenv creation and       │
+│                                                               dependency installation; resolve   │
+│                                                               imports against the ambient Python │
+│                                                               environment instead.               │
+│                                                               [default: venv]                    │
 │ --file-name                              PATH                 Analyze only the specified file    │
 │                                                               (relative to input directory).     │
 │ --cache-dir       -c                     PATH                 Directory to store analysis cache. │
diff --git a/codeanalyzer/__main__.py b/codeanalyzer/__main__.py
index d386d3b..d7f4ab3 100644
--- a/codeanalyzer/__main__.py
+++ b/codeanalyzer/__main__.py
@@ -104,6 +104,14 @@ def main(
             help="Skip test files in analysis.",
         ),
     ] = True,
+    no_venv: Annotated[
+        bool,
+        typer.Option(
+            "--no-venv/--venv",
+            help="Skip virtualenv creation and dependency installation; resolve "
+            "imports against the ambient Python environment instead.",
+        ),
+    ] = False,
     file_name: Annotated[
         Optional[Path],
         typer.Option(
@@ -144,6 +152,7 @@ def main(
         using_ray=using_ray,
         rebuild_analysis=rebuild_analysis,
         skip_tests=skip_tests,
+        no_venv=no_venv,
         file_name=file_name,
         cache_dir=cache_dir,
         clear_cache=clear_cache,
diff --git a/codeanalyzer/core.py b/codeanalyzer/core.py
index 314b5ec..9b5f538 100644
--- a/codeanalyzer/core.py
+++ b/codeanalyzer/core.py
@@ -66,6 +66,7 @@ def __init__(self, options: AnalysisOptions) -> None:
         self.skip_tests = options.skip_tests
         self.using_codeql = options.using_codeql
         self.rebuild_analysis = options.rebuild_analysis
+        self.no_venv = options.no_venv
         self.cache_dir = (
             options.cache_dir.resolve() if options.cache_dir is not None else self.project_dir
         ) / ".codeanalyzer"
@@ -260,8 +261,13 @@ def __enter__(self) -> "Codeanalyzer":
         venv_path = self.cache_dir / self.project_dir.name / "virtualenv"
         # Ensure the cache directory exists for this project
         venv_path.parent.mkdir(parents=True, exist_ok=True)
+        if self.no_venv:
+            logger.info(
+                "--no-venv: using the ambient Python environment "
+                "(skipping virtualenv creation and dependency installation)"
+            )
         # Create the virtual environment if it does not exist
-        if not venv_path.exists() or self.rebuild_analysis:
+        if not self.no_venv and (not venv_path.exists() or self.rebuild_analysis):
             logger.info(f"(Re-)creating virtual environment at {venv_path}")
             self._cmd_exec_helper(
                 [str(self._get_base_interpreter()), "-m", "venv", str(venv_path)],
@@ -320,8 +326,9 @@ def __enter__(self) -> "Codeanalyzer":
         # Point Jedi at the analysis venv so it resolves the project's third-party
         # imports. This runs on both a fresh build and a lazy reuse of an existing
         # venv -- previously self.virtualenv stayed None, so the install above was
-        # never actually used by the symbol-table builder.
-        if venv_path.exists():
+        # never actually used by the symbol-table builder. With --no-venv we leave
+        # it None so Jedi resolves against the ambient interpreter instead.
+        if not self.no_venv and venv_path.exists():
             self.virtualenv = venv_path
 
         if self.using_codeql:
diff --git a/codeanalyzer/options/options.py b/codeanalyzer/options/options.py
index 541fb85..e314c5e 100644
--- a/codeanalyzer/options/options.py
+++ b/codeanalyzer/options/options.py
@@ -38,6 +38,7 @@ class AnalysisOptions:
     using_ray: bool = False
     rebuild_analysis: bool = False
     skip_tests: bool = True
+    no_venv: bool = False
     file_name: Optional[Path] = None
     cache_dir: Optional[Path] = None
     clear_cache: bool = False
diff --git a/test/test_cli.py b/test/test_cli.py
index b4ba50d..11a5490 100644
--- a/test/test_cli.py
+++ b/test/test_cli.py
@@ -38,6 +38,27 @@ def test_cli_call_symbol_table_with_json(cli_runner, whole_applications__xarray)
     assert len(json_obj["symbol_table"]) > 0, "Symbol table should not be empty"
 
 
+def test_no_venv_skips_virtualenv(
+    cli_runner, single_functionalities__stuff_nested_in_functions, tmp_path
+):
+    """#46: --no-venv must skip virtualenv creation/installation and still analyze."""
+    out = tmp_path / "out"
+    cache = tmp_path / "cache"
+    result = cli_runner.invoke(
+        app,
+        [
+            "--input", str(single_functionalities__stuff_nested_in_functions),
+            "--output", str(out),
+            "--cache-dir", str(cache),
+            "--no-venv", "--no-codeql", "--no-ray",
+        ],
+        env={"NO_COLOR": "1", "TERM": "dumb"},
+    )
+    assert result.exit_code == 0, result.output
+    assert (out / "analysis.json").exists(), "analysis.json should still be produced with --no-venv"
+    assert not list(cache.rglob("virtualenv")), "--no-venv must not create a virtualenv"
+
+
 def test_single_file(cli_runner, single_functionalities__stuff_nested_in_functions):
     """Must be able to run the CLI with single file analysis using --file-name flag."""
     output_dir = single_functionalities__stuff_nested_in_functions.joinpath(".output")

From c02b92da97cfbd6628bc2336e6ce4025d29f5e51 Mon Sep 17 00:00:00 2001
From: Rahul Krishna <rkrsn@ibm.com>
Date: Mon, 22 Jun 2026 17:28:45 -0400
Subject: [PATCH 9/9] chore(release): 0.2.1

---
 CHANGELOG.md   | 12 +++++++++++-
 pyproject.toml |  2 +-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7a36b1b..15afc94 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,10 +5,20 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## [Unreleased]
+## [0.2.1] - 2026-06-22
 
 ### Added
 - **Homebrew tap** — `brew install codellm-devkit/tap/codeanalyzer-python`. The release workflow auto-generates a formula (`packaging/homebrew/generate_formula.sh`) that installs the pinned PyPI release as an isolated `uv` tool, and pushes it to `codellm-devkit/homebrew-tap`. Because the package is pure-Python with heavy native dependencies (`ray`, `pandas`, `numpy`), the formula depends on `uv` and runs the release via `uvx` rather than vendoring every transitive dependency as a Homebrew resource.
+- **First-class external symbols** — `PyApplication.external_symbols` (a `{signature → PyExternalSymbol{name, module}}` map) records call-graph targets outside the analyzed project, mirroring the `codeanalyzer-typescript` backend. `analysis.json` now carries external info that was previously only a bare target string, and the Neo4j projection emits `:PyExternal` authoritatively from it ([#44](https://github.com/codellm-devkit/codeanalyzer-python/issues/44)).
+- **`--no-venv` / `--venv` flag** — skip virtualenv creation and dependency installation and resolve imports against the ambient Python interpreter. Useful in CI / containers where the project's dependencies are already installed, for sandboxed runs without network, and for speed ([#46](https://github.com/codellm-devkit/codeanalyzer-python/issues/46)).
+
+### Changed
+- The per-project analysis virtualenv is now installed with **`uv`** (parallel downloads + a shared global cache; falls back to `pip`), and is now **wired to Jedi** — previously `self.virtualenv` stayed `None`, so the install was never used by the symbol-table builder ([#47](https://github.com/codellm-devkit/codeanalyzer-python/issues/47)).
+- Neo4j `:PyExternal` gains a `module` property; `SCHEMA_VERSION` bumped `1.0.0 → 1.1.0` (additive) ([#44](https://github.com/codellm-devkit/codeanalyzer-python/issues/44)).
+
+### Fixed
+- `--emit neo4j` no longer drops call edges whose target is a bare imported module name (e.g. `os`, `re`, `json`): a `:PyPackage` name can no longer shadow a call target's `:PySymbol` signature, and the node-identity tracking is keyed by `(label, value)` so deferred `PY_EXTENDS` / `PY_RESOLVES_TO` edges can't be shadowed either ([#44](https://github.com/codellm-devkit/codeanalyzer-python/issues/44)).
+- `--emit neo4j` (Bolt) full-run orphan prune is now scoped to the `:PyApplication` anchor, so a full-run push for one application no longer deletes another application's modules from a shared database ([#45](https://github.com/codellm-devkit/codeanalyzer-python/issues/45)).
 
 ## [0.2.0] - 2026-06-20
 
diff --git a/pyproject.toml b/pyproject.toml
index acc00a7..d7f2514 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "codeanalyzer-python"
-version = "0.2.0"
+version = "0.2.1"
 description = "Static Analysis on Python source code using Jedi, CodeQL and Treesitter — emits analysis.json or a Neo4j property graph."
 readme = "README.md"
 authors = [