From ab0fc06f09ae503b73f69c00313722efe1aaae3a Mon Sep 17 00:00:00 2001 From: Mickael Date: Sun, 27 Jul 2025 01:13:18 +0200 Subject: [PATCH 01/11] ci: use pull_request_target for PR temp env creation (#455) --- .github/workflows/deploy-pr.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy-pr.yml b/.github/workflows/deploy-pr.yml index 7deb0333..f16d55b9 100644 --- a/.github/workflows/deploy-pr.yml +++ b/.github/workflows/deploy-pr.yml @@ -1,6 +1,6 @@ name: Manage PR Temp Envs 'on': - pull_request: + pull_request_target: types: - labeled - unlabeled From b8189659b5d268ff05d7a570cda9c5eb1d9e5059 Mon Sep 17 00:00:00 2001 From: Mickael Date: Sun, 27 Jul 2025 01:14:45 +0200 Subject: [PATCH 02/11] ci: use pull_request_target for docker build (#456) --- .github/workflows/docker_image.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker_image.yml b/.github/workflows/docker_image.yml index aa758a04..85c9a1f8 100644 --- a/.github/workflows/docker_image.yml +++ b/.github/workflows/docker_image.yml @@ -7,7 +7,7 @@ on: tags: - '*' merge_group: - pull_request: + pull_request_target: types: [labeled, synchronize, reopened, ready_for_review, opened] concurrency: From 44ad8106807001d4c257a2e3466588ab3575a094 Mon Sep 17 00:00:00 2001 From: Mickael Date: Sun, 27 Jul 2025 02:13:41 +0200 Subject: [PATCH 03/11] chore: exclude changelog.md from markdownlint (#458) --- .pre-commit-config.yaml | 2 +- CHANGELOG.md | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 529d352a..4e3b4d86 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -89,7 +89,7 @@ repos: hooks: - id: markdownlint description: 'Lint markdown files.' - args: ['--disable=line-length'] + args: ['--disable=line-length', '--ignore=CHANGELOG.md'] - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.12.2 diff --git a/CHANGELOG.md b/CHANGELOG.md index 009ff527..551b7769 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,12 +2,6 @@ ## [0.2.0](https://github.com/coderamp-labs/gitingest/compare/v0.1.5...v0.2.0) (2025-07-26) - -### ⚠ BREAKING CHANGES - -* -* - ### Features * `include_submodules` option ([#313](https://github.com/coderamp-labs/gitingest/issues/313)) ([38c2317](https://github.com/coderamp-labs/gitingest/commit/38c23171a14556a2cdd05c0af8219f4dc789defd)) From 7ed521d371ec7e6c5bebbc73813461d791b0e82d Mon Sep 17 00:00:00 2001 From: Mickael Date: Sun, 27 Jul 2025 02:24:31 +0200 Subject: [PATCH 04/11] ci: add stale (#457) --- .github/workflows/stale.yml | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 .github/workflows/stale.yml diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml new file mode 100644 index 00000000..98809d49 --- /dev/null +++ b/.github/workflows/stale.yml @@ -0,0 +1,32 @@ +name: "Close stale issues and PRs" + +on: + schedule: + - cron: "0 6 * * *" + workflow_dispatch: {} + +permissions: + issues: write + pull-requests: write + +jobs: + stale: + runs-on: ubuntu-latest + steps: + - uses: actions/stale@v9 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + days-before-stale: 45 + days-before-close: 10 + stale-issue-label: stale + stale-pr-label: stale + stale-issue-message: | + Hi there! We haven’t seen activity here for 45 days, so I’m marking this issue as stale. + If you’d like to keep it open, please leave a comment within 10 days. Thanks! + stale-pr-message: | + Hi there! We haven’t seen activity on this pull request for 45 days, so I’m marking it as stale. + If you’d like to keep it open, please leave a comment within 10 days. Thanks! + close-issue-message: | + Hi there! We haven’t heard anything for 10 days, so I’m closing this issue. Feel free to reopen if you’d like to continue the discussion. Thanks! + close-pr-message: | + Hi there! We haven’t heard anything for 10 days, so I’m closing this pull request. Feel free to reopen if you’d like to continue working on it. Thanks! From 12a003329ac4d727983231f029147e632e161014 Mon Sep 17 00:00:00 2001 From: Mickael Date: Sun, 27 Jul 2025 02:47:40 +0200 Subject: [PATCH 05/11] ci: optimize workflow with caching and concurrency (#459) --- .github/workflows/ci.yml | 104 +++++++++++++++++++-------------------- 1 file changed, 52 insertions(+), 52 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7b58e938..17e6628a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,9 +2,13 @@ name: CI on: push: - branches: [ main ] + branches: [main] pull_request: - branches: [ main ] + branches: [main] + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true permissions: contents: read @@ -17,67 +21,63 @@ jobs: matrix: os: [ubuntu-latest, macos-latest, windows-latest] python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] - include: - os: ubuntu-latest python-version: "3.13" coverage: true steps: - - name: Harden the runner (Audit all outbound calls) - uses: step-security/harden-runner@ec9f2d5744a09debf3a187a3f4f675c53b671911 # v2.13.0 - with: - egress-policy: audit - - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - name: Harden the runner (Audit all outbound calls) + uses: step-security/harden-runner@ec9f2d5744a09debf3a187a3f4f675c53b671911 # v2.13.0 + with: + egress-policy: audit - - name: Set up Python - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 - with: - python-version: ${{ matrix.python-version }} + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - name: Locate pip cache - id: pip-cache - shell: bash - run: echo "dir=$(python -m pip cache dir)" >> "$GITHUB_OUTPUT" + - name: Set up Python + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' - - name: Cache pip - uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 - with: - path: ${{ steps.pip-cache.outputs.dir }} - key: ${{ runner.os }}-pip-${{ hashFiles('pyproject.toml') }} - restore-keys: ${{ runner.os }}-pip- + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install ".[dev,server]" - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install ".[dev,server]" + - name: Cache pytest results + uses: actions/cache@v4 + with: + path: .pytest_cache + key: ${{ runner.os }}-pytest-${{ matrix.python-version }}-${{ hashFiles('**/pytest.ini') }} + restore-keys: | + ${{ runner.os }}-pytest-${{ matrix.python-version }}- - - name: Run tests - if: ${{ matrix.coverage != true }} - run: pytest + - name: Run tests + if: ${{ matrix.coverage != true }} + run: pytest - - name: Run tests and collect coverage - if: ${{ matrix.coverage == true }} - run: | - pytest \ - --cov=gitingest \ - --cov=server \ - --cov-branch \ - --cov-report=xml \ - --cov-report=term + - name: Run tests and collect coverage + if: ${{ matrix.coverage == true }} + run: | + pytest \ + --cov=gitingest \ + --cov=server \ + --cov-branch \ + --cov-report=xml \ + --cov-report=term - - name: Upload coverage to Codecov - if: ${{ matrix.coverage == true }} - uses: codecov/codecov-action@18283e04ce6e62d37312384ff67231eb8fd56d24 # v5.4.3 - with: - token: ${{ secrets.CODECOV_TOKEN }} - files: coverage.xml - flags: ${{ matrix.os }}-py${{ matrix.python-version }} - name: codecov-${{ matrix.os }}-${{ matrix.python-version }} - fail_ci_if_error: true - verbose: true + - name: Upload coverage to Codecov + if: ${{ matrix.coverage == true }} + uses: codecov/codecov-action@18283e04ce6e62d37312384ff67231eb8fd56d24 # v5.4.3 + with: + token: ${{ secrets.CODECOV_TOKEN }} + files: coverage.xml + flags: ${{ matrix.os }}-py${{ matrix.python-version }} + name: codecov-${{ matrix.os }}-${{ matrix.python-version }} + fail_ci_if_error: true + verbose: true - - name: Run pre-commit hooks - uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1 - if: ${{ matrix.python-version == '3.13' && matrix.os == 'ubuntu-latest' }} + - name: Run pre-commit hooks + uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1 + if: ${{ matrix.python-version == '3.13' && matrix.os == 'ubuntu-latest' }} From 63521631c054ba08d0860ae0dde0a3c88f2ec02c Mon Sep 17 00:00:00 2001 From: Mickael Date: Sun, 27 Jul 2025 02:58:09 +0200 Subject: [PATCH 06/11] ci: add workflow to label PRs needing rebase (#460) --- .github/workflows/rebase-needed.yml | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 .github/workflows/rebase-needed.yml diff --git a/.github/workflows/rebase-needed.yml b/.github/workflows/rebase-needed.yml new file mode 100644 index 00000000..3e01efb3 --- /dev/null +++ b/.github/workflows/rebase-needed.yml @@ -0,0 +1,29 @@ +name: PR Needs Rebase + +on: + workflow_dispatch: {} + schedule: + - cron: '0 * * * *' + +permissions: + pull-requests: write + +jobs: + label-rebase-needed: + runs-on: ubuntu-latest + if: github.repository == 'coderamp-labs/gitingest' + + concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + + steps: + - name: Check for merge conflicts + uses: eps1lon/actions-label-merge-conflict@v3 + with: + dirtyLabel: 'rebase needed :construction:' + repoToken: '${{ secrets.GITHUB_TOKEN }}' + commentOnClean: This pull request has resolved merge conflicts and is ready for review. + commentOnDirty: This pull request has merge conflicts that must be resolved before it can be merged. + retryMax: 30 + continueOnMissingPermissions: false From c8eb5be143d38e95512d89c52e44a6c35dc3d64c Mon Sep 17 00:00:00 2001 From: Mickael Date: Sun, 27 Jul 2025 04:24:58 +0200 Subject: [PATCH 07/11] ci: add ECR container build (#461) --- .github/workflows/docker-build.ecr.yml | 82 +++++++++++++++++++ ...docker_image.yml => docker-build.ghcr.yml} | 8 +- 2 files changed, 86 insertions(+), 4 deletions(-) create mode 100644 .github/workflows/docker-build.ecr.yml rename .github/workflows/{docker_image.yml => docker-build.ghcr.yml} (90%) diff --git a/.github/workflows/docker-build.ecr.yml b/.github/workflows/docker-build.ecr.yml new file mode 100644 index 00000000..26d1b48f --- /dev/null +++ b/.github/workflows/docker-build.ecr.yml @@ -0,0 +1,82 @@ +name: Build & Push Container + +on: + push: + branches: + - 'main' + tags: + - '*' + merge_group: + pull_request_target: + types: [labeled, synchronize, reopened, ready_for_review, opened] + +env: + PUSH_FROM_PR: >- + ${{ github.event_name == 'pull_request_target' && + ( + contains(github.event.pull_request.labels.*.name, 'push-container') || + contains(github.event.pull_request.labels.*.name, 'deploy-pr-temp-env') + ) + }} + +jobs: + terraform: + name: "ECR" + runs-on: ubuntu-latest + if: github.repository == 'coderamp-labs/gitingest' + + permissions: + id-token: write + contents: read + pull-requests: write + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: configure aws credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{ secrets.CODERAMP_AWS_ECR_REGISTRY_PUSH_ROLE_ARN }} + role-session-name: GitHub_to_AWS_via_FederatedOIDC + aws-region: eu-west-1 + + - name: Set current timestamp + id: vars + run: | + echo "timestamp=$(date +%s)" >> $GITHUB_OUTPUT + echo "sha_short=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT + + - name: Login to Amazon ECR + id: login-ecr + uses: aws-actions/amazon-ecr-login@v2 + + - name: Docker Meta + id: meta + uses: docker/metadata-action@v5 + with: + images: | + ${{ secrets.ECR_REGISTRY_URL }} + flavor: | + latest=false + tags: | + type=ref,event=branch,branch=main,suffix=-${{ steps.vars.outputs.sha_short }}-${{ steps.vars.outputs.timestamp }} + type=ref,event=pr,suffix=-${{ steps.vars.outputs.sha_short }}-${{ steps.vars.outputs.timestamp }} + type=pep440,pattern={{raw}} + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Build and push + uses: docker/build-push-action@v6 + with: + context: . + platforms: linux/amd64, linux/arm64 + push: ${{ github.event_name != 'pull_request_target' || env.PUSH_FROM_PR == 'true' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/.github/workflows/docker_image.yml b/.github/workflows/docker-build.ghcr.yml similarity index 90% rename from .github/workflows/docker_image.yml rename to .github/workflows/docker-build.ghcr.yml index 85c9a1f8..35c061b4 100644 --- a/.github/workflows/docker_image.yml +++ b/.github/workflows/docker-build.ghcr.yml @@ -17,9 +17,8 @@ concurrency: env: REGISTRY: ghcr.io IMAGE_NAME: ${{ github.repository }} - # Now allow pushing from PRs when either 'push-container' OR 'deploy-pr-temp-env' is present: PUSH_FROM_PR: >- - ${{ github.event_name == 'pull_request' && + ${{ github.event_name == 'pull_request_target' && ( contains(github.event.pull_request.labels.*.name, 'push-container') || contains(github.event.pull_request.labels.*.name, 'deploy-pr-temp-env') @@ -31,6 +30,7 @@ permissions: jobs: docker-build: + name: "GHCR" runs-on: ubuntu-latest permissions: contents: read @@ -84,14 +84,14 @@ jobs: with: context: . platforms: linux/amd64, linux/arm64 - push: ${{ github.event_name != 'pull_request' || env.PUSH_FROM_PR == 'true' }} + push: ${{ github.event_name != 'pull_request_target' || env.PUSH_FROM_PR == 'true' }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} cache-from: type=gha cache-to: type=gha,mode=max - name: Generate artifact attestation - if: github.event_name != 'pull_request' || env.PUSH_FROM_PR == 'true' + if: github.event_name != 'pull_request_target' || env.PUSH_FROM_PR == 'true' uses: actions/attest-build-provenance@e8998f949152b193b063cb0ec769d69d929409be # v2.4.0 with: subject-name: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME}} From 148f1719aad6f15208fe83a09e3679eff0f15192 Mon Sep 17 00:00:00 2001 From: Mickael Date: Sun, 27 Jul 2025 06:44:15 +0200 Subject: [PATCH 08/11] ci: switch from pull_request_target to pull_request trigger (#463) --- .github/workflows/deploy-pr.yml | 2 +- .github/workflows/docker-build.ecr.yml | 6 +++--- .github/workflows/docker-build.ghcr.yml | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/deploy-pr.yml b/.github/workflows/deploy-pr.yml index f16d55b9..7deb0333 100644 --- a/.github/workflows/deploy-pr.yml +++ b/.github/workflows/deploy-pr.yml @@ -1,6 +1,6 @@ name: Manage PR Temp Envs 'on': - pull_request_target: + pull_request: types: - labeled - unlabeled diff --git a/.github/workflows/docker-build.ecr.yml b/.github/workflows/docker-build.ecr.yml index 26d1b48f..0a819e12 100644 --- a/.github/workflows/docker-build.ecr.yml +++ b/.github/workflows/docker-build.ecr.yml @@ -7,12 +7,12 @@ on: tags: - '*' merge_group: - pull_request_target: + pull_request: types: [labeled, synchronize, reopened, ready_for_review, opened] env: PUSH_FROM_PR: >- - ${{ github.event_name == 'pull_request_target' && + ${{ github.event_name == 'pull_request' && ( contains(github.event.pull_request.labels.*.name, 'push-container') || contains(github.event.pull_request.labels.*.name, 'deploy-pr-temp-env') @@ -75,7 +75,7 @@ jobs: with: context: . platforms: linux/amd64, linux/arm64 - push: ${{ github.event_name != 'pull_request_target' || env.PUSH_FROM_PR == 'true' }} + push: ${{ github.event_name != 'pull_request' || env.PUSH_FROM_PR == 'true' }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} cache-from: type=gha diff --git a/.github/workflows/docker-build.ghcr.yml b/.github/workflows/docker-build.ghcr.yml index 35c061b4..de72fbaa 100644 --- a/.github/workflows/docker-build.ghcr.yml +++ b/.github/workflows/docker-build.ghcr.yml @@ -7,7 +7,7 @@ on: tags: - '*' merge_group: - pull_request_target: + pull_request: types: [labeled, synchronize, reopened, ready_for_review, opened] concurrency: @@ -18,7 +18,7 @@ env: REGISTRY: ghcr.io IMAGE_NAME: ${{ github.repository }} PUSH_FROM_PR: >- - ${{ github.event_name == 'pull_request_target' && + ${{ github.event_name == 'pull_request' && ( contains(github.event.pull_request.labels.*.name, 'push-container') || contains(github.event.pull_request.labels.*.name, 'deploy-pr-temp-env') @@ -84,14 +84,14 @@ jobs: with: context: . platforms: linux/amd64, linux/arm64 - push: ${{ github.event_name != 'pull_request_target' || env.PUSH_FROM_PR == 'true' }} + push: ${{ github.event_name != 'pull_request' || env.PUSH_FROM_PR == 'true' }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} cache-from: type=gha cache-to: type=gha,mode=max - name: Generate artifact attestation - if: github.event_name != 'pull_request_target' || env.PUSH_FROM_PR == 'true' + if: github.event_name != 'pull_request' || env.PUSH_FROM_PR == 'true' uses: actions/attest-build-provenance@e8998f949152b193b063cb0ec769d69d929409be # v2.4.0 with: subject-name: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME}} From 05b44d92870297b99b10efc32dca25314af96df3 Mon Sep 17 00:00:00 2001 From: Nicolas Iragne Date: Sun, 27 Jul 2025 18:20:28 +0200 Subject: [PATCH 09/11] ci: restore package build for pypi (#466) --- .github/workflows/publish_to_pypi.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/publish_to_pypi.yml b/.github/workflows/publish_to_pypi.yml index 8fbd5b66..51a6ff58 100644 --- a/.github/workflows/publish_to_pypi.yml +++ b/.github/workflows/publish_to_pypi.yml @@ -31,6 +31,7 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install build twine + python -m build twine check dist/* - name: Upload dist artefact uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 From 932bfef85db66704985c83f3f7c427756bd14023 Mon Sep 17 00:00:00 2001 From: Zarial <39010759+ix-56h@users.noreply.github.com> Date: Sun, 27 Jul 2025 18:36:47 +0200 Subject: [PATCH 10/11] fix: remove logarithm conversion from the backend and correctly process max file size in kb (#464) Co-authored-by: Nicolas Iragne --- src/gitingest/schemas/ingestion.py | 2 +- src/server/models.py | 8 ++-- src/server/query_processor.py | 14 +++--- src/server/routers/ingest.py | 6 +-- src/server/routers_utils.py | 2 +- src/server/server_config.py | 4 +- src/server/server_utils.py | 21 +-------- .../templates/components/git_form.jinja | 4 +- src/static/js/utils.js | 44 ++++++++++++++++--- 9 files changed, 58 insertions(+), 47 deletions(-) diff --git a/src/gitingest/schemas/ingestion.py b/src/gitingest/schemas/ingestion.py index 92572aeb..21369075 100644 --- a/src/gitingest/schemas/ingestion.py +++ b/src/gitingest/schemas/ingestion.py @@ -41,7 +41,7 @@ class IngestionQuery(BaseModel): # pylint: disable=too-many-instance-attributes tag : str | None The tag of the repository. max_file_size : int - The maximum file size to ingest (default: 10 MB). + The maximum file size to ingest in bytes (default: 10 MB). ignore_patterns : set[str] The patterns to ignore (default: ``set()``). include_patterns : set[str] | None diff --git a/src/server/models.py b/src/server/models.py index a1aed314..533da611 100644 --- a/src/server/models.py +++ b/src/server/models.py @@ -3,14 +3,16 @@ from __future__ import annotations from enum import Enum -from typing import Union +from typing import TYPE_CHECKING, Union from pydantic import BaseModel, Field, field_validator from gitingest.utils.compat_func import removesuffix +from server.server_config import MAX_FILE_SIZE_KB # needed for type checking (pydantic) -from server.form_types import IntForm, OptStrForm, StrForm # noqa: TC001 (typing-only-first-party-import) +if TYPE_CHECKING: + from server.form_types import IntForm, OptStrForm, StrForm class PatternType(str, Enum): @@ -39,7 +41,7 @@ class IngestRequest(BaseModel): """ input_text: str = Field(..., description="Git repository URL or slug to ingest") - max_file_size: int = Field(..., ge=0, le=500, description="File size slider position (0-500)") + max_file_size: int = Field(..., ge=1, le=MAX_FILE_SIZE_KB, description="File size in KB") pattern_type: PatternType = Field(default=PatternType.EXCLUDE, description="Pattern type for file filtering") pattern: str = Field(default="", description="Glob/regex pattern for file filtering") token: str | None = Field(default=None, description="GitHub PAT for private repositories") diff --git a/src/server/query_processor.py b/src/server/query_processor.py index 88d7ff50..172330ac 100644 --- a/src/server/query_processor.py +++ b/src/server/query_processor.py @@ -13,12 +13,12 @@ from server.models import IngestErrorResponse, IngestResponse, IngestSuccessResponse, PatternType from server.s3_utils import generate_s3_file_path, is_s3_enabled, upload_to_s3 from server.server_config import MAX_DISPLAY_SIZE -from server.server_utils import Colors, log_slider_to_size +from server.server_utils import Colors async def process_query( input_text: str, - slider_position: int, + max_file_size: int, pattern_type: PatternType, pattern: str, token: str | None = None, @@ -32,8 +32,8 @@ async def process_query( ---------- input_text : str Input text provided by the user, typically a Git repository URL or slug. - slider_position : int - Position of the slider, representing the maximum file size in the query. + max_file_size : int + Max file size in KB to be include in the digest. pattern_type : PatternType Type of pattern to use (either "include" or "exclude") pattern : str @@ -55,8 +55,6 @@ async def process_query( if token: validate_github_token(token) - max_file_size = log_slider_to_size(slider_position) - try: query = await parse_remote_repo(input_text, token=token) except Exception as exc: @@ -65,7 +63,7 @@ async def process_query( return IngestErrorResponse(error=str(exc)) query.url = cast("str", query.url) - query.max_file_size = max_file_size + query.max_file_size = max_file_size * 1024 # Convert to bytes since we currently use KB in higher levels query.ignore_patterns, query.include_patterns = process_patterns( exclude_patterns=pattern if pattern_type == PatternType.EXCLUDE else None, include_patterns=pattern if pattern_type == PatternType.INCLUDE else None, @@ -142,7 +140,7 @@ async def process_query( digest_url=digest_url, tree=tree, content=content, - default_max_file_size=slider_position, + default_max_file_size=max_file_size, pattern_type=pattern_type, pattern=pattern, ) diff --git a/src/server/routers/ingest.py b/src/server/routers/ingest.py index 42efefdf..ce9e6512 100644 --- a/src/server/routers/ingest.py +++ b/src/server/routers/ingest.py @@ -11,7 +11,7 @@ from server.models import IngestRequest from server.routers_utils import COMMON_INGEST_RESPONSES, _perform_ingestion from server.s3_utils import is_s3_enabled -from server.server_config import MAX_DISPLAY_SIZE +from server.server_config import DEFAULT_FILE_SIZE_KB from server.server_utils import limiter ingest_counter = Counter("gitingest_ingest_total", "Number of ingests", ["status", "url"]) @@ -58,7 +58,7 @@ async def api_ingest_get( request: Request, # noqa: ARG001 (unused-function-argument) # pylint: disable=unused-argument user: str, repository: str, - max_file_size: int = MAX_DISPLAY_SIZE, + max_file_size: int = DEFAULT_FILE_SIZE_KB, pattern_type: str = "exclude", pattern: str = "", token: str = "", @@ -74,7 +74,7 @@ async def api_ingest_get( - **repository** (`str`): GitHub repository name **Query Parameters** - - **max_file_size** (`int`, optional): Maximum file size to include in the digest (default: 50 KB) + - **max_file_size** (`int`, optional): Maximum file size in KB to include in the digest (default: 5120 KB) - **pattern_type** (`str`, optional): Type of pattern to use ("include" or "exclude", default: "exclude") - **pattern** (`str`, optional): Pattern to include or exclude in the query (default: "") - **token** (`str`, optional): GitHub personal access token for private repositories (default: "") diff --git a/src/server/routers_utils.py b/src/server/routers_utils.py index 83242e26..3eaf0e59 100644 --- a/src/server/routers_utils.py +++ b/src/server/routers_utils.py @@ -33,7 +33,7 @@ async def _perform_ingestion( result = await process_query( input_text=input_text, - slider_position=max_file_size, + max_file_size=max_file_size, pattern_type=pattern_type, pattern=pattern, token=token, diff --git a/src/server/server_config.py b/src/server/server_config.py index 0257db8b..d0b51c4d 100644 --- a/src/server/server_config.py +++ b/src/server/server_config.py @@ -10,8 +10,8 @@ DELETE_REPO_AFTER: int = 60 * 60 # In seconds (1 hour) # Slider configuration (if updated, update the logSliderToSize function in src/static/js/utils.js) -MAX_FILE_SIZE_KB: int = 100 * 1024 # 100 MB -MAX_SLIDER_POSITION: int = 500 # Maximum slider position +DEFAULT_FILE_SIZE_KB: int = 5 * 1024 # 5 mb +MAX_FILE_SIZE_KB: int = 100 * 1024 # 100 mb EXAMPLE_REPOS: list[dict[str, str]] = [ {"name": "Gitingest", "url": "https://github.com/coderamp-labs/gitingest"}, diff --git a/src/server/server_utils.py b/src/server/server_utils.py index b0371661..ee6f9eca 100644 --- a/src/server/server_utils.py +++ b/src/server/server_utils.py @@ -1,7 +1,6 @@ """Utility functions for the server.""" import asyncio -import math import shutil import time from contextlib import asynccontextmanager, suppress @@ -15,7 +14,7 @@ from slowapi.util import get_remote_address from gitingest.config import TMP_BASE_PATH -from server.server_config import DELETE_REPO_AFTER, MAX_FILE_SIZE_KB, MAX_SLIDER_POSITION +from server.server_config import DELETE_REPO_AFTER # Initialize a rate limiter limiter = Limiter(key_func=get_remote_address) @@ -161,24 +160,6 @@ def _append_line(path: Path, line: str) -> None: fp.write(f"{line}\n") -def log_slider_to_size(position: int) -> int: - """Convert a slider position to a file size in bytes using a logarithmic scale. - - Parameters - ---------- - position : int - Slider position ranging from 0 to 500. - - Returns - ------- - int - File size in bytes corresponding to the slider position. - - """ - maxv = math.log(MAX_FILE_SIZE_KB) - return round(math.exp(maxv * pow(position / MAX_SLIDER_POSITION, 1.5))) * 1024 - - ## Color printing utility class Colors: """ANSI color codes.""" diff --git a/src/server/templates/components/git_form.jinja b/src/server/templates/components/git_form.jinja index 8ea0821f..e2e7c91c 100644 --- a/src/server/templates/components/git_form.jinja +++ b/src/server/templates/components/git_form.jinja @@ -76,12 +76,12 @@ +
diff --git a/src/static/js/utils.js b/src/static/js/utils.js index 6370036b..ce19e95e 100644 --- a/src/static/js/utils.js +++ b/src/static/js/utils.js @@ -126,13 +126,13 @@ function collectFormData(form) { const json_data = {}; const inputText = form.querySelector('[name="input_text"]'); const token = form.querySelector('[name="token"]'); - const slider = document.getElementById('file_size'); + const hiddenInput = document.getElementById('max_file_size_kb'); const patternType = document.getElementById('pattern_type'); const pattern = document.getElementById('pattern'); if (inputText) {json_data.input_text = inputText.value;} if (token) {json_data.token = token.value;} - if (slider) {json_data.max_file_size = slider.value;} + if (hiddenInput) {json_data.max_file_size = hiddenInput.value;} if (patternType) {json_data.pattern_type = patternType.value;} if (pattern) {json_data.pattern = pattern.value;} @@ -206,6 +206,14 @@ function handleSubmit(event, showLoadingSpinner = false) { if (!form) {return;} + // Ensure hidden input is updated before collecting form data + const slider = document.getElementById('file_size'); + const hiddenInput = document.getElementById('max_file_size_kb'); + + if (slider && hiddenInput) { + hiddenInput.value = logSliderToSize(slider.value); + } + if (showLoadingSpinner) { showLoading(); } @@ -226,12 +234,32 @@ function handleSubmit(event, showLoadingSpinner = false) { headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(json_data) }) - .then((response) => response.json()) - .then( (data) => { - // Hide loading overlay + .then(async (response) => { + let data; + + try { + data = await response.json(); + } catch { + data = {}; + } setButtonLoadingState(submitButton, false); - // Handle error + if (!response.ok) { + // Show all error details if present + if (Array.isArray(data.detail)) { + const details = data.detail.map((d) => `
  • ${d.msg || JSON.stringify(d)}
  • `).join(''); + + showError(`
    Error(s):
      ${details}
    `); + + return; + } + // Other errors + showError(`
    ${data.error || JSON.stringify(data) || 'An error occurred.'}
    `); + + return; + } + + // Handle error in data if (data.error) { showError(`
    ${data.error}
    `); @@ -327,14 +355,16 @@ function logSliderToSize(position) { function initializeSlider() { const slider = document.getElementById('file_size'); const sizeValue = document.getElementById('size_value'); + const hiddenInput = document.getElementById('max_file_size_kb'); - if (!slider || !sizeValue) {return;} + if (!slider || !sizeValue || !hiddenInput) {return;} function updateSlider() { const value = logSliderToSize(slider.value); sizeValue.textContent = formatSize(value); slider.style.backgroundSize = `${(slider.value / slider.max) * 100}% 100%`; + hiddenInput.value = value; // Set hidden input to KB value } // Update on slider change From a63ed9ed2b4adc67608b47995bc0f1f078b06bcb Mon Sep 17 00:00:00 2001 From: "coderamp-ci[bot]" <221205196+coderamp-ci[bot]@users.noreply.github.com> Date: Mon, 28 Jul 2025 00:56:00 +0200 Subject: [PATCH 11/11] chore(main): release 0.2.1 (#468) Co-authored-by: coderamp-ci[bot] <221205196+coderamp-ci[bot]@users.noreply.github.com> --- .release-please-manifest.json | 2 +- CHANGELOG.md | 7 +++++++ pyproject.toml | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/.release-please-manifest.json b/.release-please-manifest.json index 305c1509..57e0617c 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1 +1 @@ -{".":"0.2.0"} +{".":"0.2.1"} diff --git a/CHANGELOG.md b/CHANGELOG.md index 551b7769..6652a8cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## [0.2.1](https://github.com/coderamp-labs/gitingest/compare/v0.2.0...v0.2.1) (2025-07-27) + + +### Bug Fixes + +* remove logarithm conversion from the backend and correctly process max file size in kb ([#464](https://github.com/coderamp-labs/gitingest/issues/464)) ([932bfef](https://github.com/coderamp-labs/gitingest/commit/932bfef85db66704985c83f3f7c427756bd14023)) + ## [0.2.0](https://github.com/coderamp-labs/gitingest/compare/v0.1.5...v0.2.0) (2025-07-26) ### Features diff --git a/pyproject.toml b/pyproject.toml index 8432f08d..ffbf6504 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "gitingest" -version = "0.2.0" +version = "0.2.1" description="CLI tool to analyze and create text dumps of codebases for LLMs" readme = {file = "README.md", content-type = "text/markdown" } requires-python = ">= 3.8"