Skip to content

refactor: centralize PAT validation, streamline repo checks & misc cleanup #349

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jul 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ celerybeat.pid
# Environments
.env
.venv
.venv*
env/
venv/
ENV/
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ repos:
args: ["--disable=line-length"]

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.12.0
rev: v0.12.1
hooks:
- id: ruff-check
- id: ruff-format
Expand Down
16 changes: 13 additions & 3 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ Thanks for your interest in contributing to Gitingest! 🚀 Gitingest aims to be
cd gitingest
```

**Note**: To contribute, ensure you have **Python 3.9 or newer** installed, as some of the `pre-commit` hooks (e.g. `pyupgrade`) require Python 3.9+.

3. Set up the development environment and install dependencies:

```bash
Expand All @@ -31,7 +33,7 @@ Thanks for your interest in contributing to Gitingest! 🚀 Gitingest aims to be
4. Create a new branch for your changes:

```bash
git checkout -b your-branch
git checkout -S -b your-branch
```

5. Make your changes. Make sure to add corresponding tests for your changes.
Expand Down Expand Up @@ -66,10 +68,18 @@ Thanks for your interest in contributing to Gitingest! 🚀 Gitingest aims to be

9. Confirm that everything is working as expected. If you encounter any issues, fix them and repeat steps 6 to 8.

10. Commit your changes:
10. Commit your changes (signed):

All commits to Gitingest must be [GPG-signed](https://docs.github.com/en/authentication/managing-commit-signature-verification) so that the project can verify the authorship of every contribution. You can either configure Git globally with:

```bash
git config --global commit.gpgSign true
```

or pass the `-S` flag as shown below.

```bash
git commit -m "Your commit message"
git commit -S -m "Your commit message"
```

If `pre-commit` raises any issues, fix them and repeat steps 6 to 9.
Expand Down
8 changes: 1 addition & 7 deletions src/gitingest/clone.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
ensure_git_installed,
is_github_host,
run_command,
validate_github_token,
)
from gitingest.utils.os_utils import ensure_directory
from gitingest.utils.timeout_wrapper import async_timeout
Expand All @@ -23,7 +22,7 @@


@async_timeout(DEFAULT_TIMEOUT)
async def clone_repo(config: CloneConfig, token: str | None = None) -> None:
async def clone_repo(config: CloneConfig, *, token: str | None = None) -> None:
"""Clone a repository to a local path based on the provided configuration.

This function handles the process of cloning a Git repository to the local file system.
Expand All @@ -36,7 +35,6 @@ async def clone_repo(config: CloneConfig, token: str | None = None) -> None:
The configuration for cloning the repository.
token : str | None
GitHub personal access token (PAT) for accessing private repositories.
Can also be set via the ``GITHUB_TOKEN`` environment variable.

Raises
------
Expand All @@ -51,10 +49,6 @@ async def clone_repo(config: CloneConfig, token: str | None = None) -> None:
branch: str | None = config.branch
partial_clone: bool = config.subpath != "/"

# Validate token if provided
if token and is_github_host(url):
validate_github_token(token)

# Create parent directory if it doesn't exist
await ensure_directory(Path(local_path).parent)

Expand Down
5 changes: 1 addition & 4 deletions src/gitingest/query_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ async def parse_query(
Patterns to ignore. Can be a set of strings or a single string.
token : str | None
GitHub personal access token (PAT) for accessing private repositories.
Can also be set via the ``GITHUB_TOKEN`` environment variable.

Returns
-------
Expand Down Expand Up @@ -109,7 +108,6 @@ async def _parse_remote_repo(source: str, token: str | None = None) -> Ingestion
The URL or domain-less slug to parse.
token : str | None
GitHub personal access token (PAT) for accessing private repositories.
Can also be set via the ``GITHUB_TOKEN`` environment variable.

Returns
-------
Expand Down Expand Up @@ -301,7 +299,6 @@ async def try_domains_for_user_and_repo(user_name: str, repo_name: str, token: s
The name of the repository.
token : str | None
GitHub personal access token (PAT) for accessing private repositories.
Can also be set via the ``GITHUB_TOKEN`` environment variable.

Returns
-------
Expand All @@ -316,7 +313,7 @@ async def try_domains_for_user_and_repo(user_name: str, repo_name: str, token: s
"""
for domain in KNOWN_GIT_HOSTS:
candidate = f"https://{domain}/{user_name}/{repo_name}"
if await check_repo_exists(candidate, token=token if domain == "github.com" else None):
if await check_repo_exists(candidate, token=token if domain.startswith("github.") else None):
return domain

msg = f"Could not find a valid repository host for '{user_name}/{repo_name}'."
Expand Down
7 changes: 6 additions & 1 deletion src/gitingest/utils/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

import os

from gitingest.utils.git_utils import validate_github_token


def resolve_token(token: str | None) -> str | None:
"""Resolve the token to use for the query.
Expand All @@ -19,4 +21,7 @@ def resolve_token(token: str | None) -> str | None:
The resolved token.

"""
return token or os.getenv("GITHUB_TOKEN")
token = token or os.getenv("GITHUB_TOKEN")
if token:
validate_github_token(token)
return token
8 changes: 4 additions & 4 deletions src/gitingest/utils/compat_typing.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
"""Compatibility layer for typing."""

try:
from typing import ParamSpec, TypeAlias # Py ≥ 3.10
from typing import ParamSpec, TypeAlias # type: ignore[attr-defined] # Py ≥ 3.10
except ImportError:
from typing_extensions import ParamSpec, TypeAlias # Py 3.8 / 3.9
from typing_extensions import ParamSpec, TypeAlias # type: ignore[attr-defined] # Py 3.8 / 3.9

try:
from typing import Annotated # Py ≥ 3.9
from typing import Annotated # type: ignore[attr-defined] # Py ≥ 3.9
except ImportError:
from typing_extensions import Annotated # Py 3.8
from typing_extensions import Annotated # type: ignore[attr-defined] # Py 3.8

__all__ = ["Annotated", "ParamSpec", "TypeAlias"]
7 changes: 4 additions & 3 deletions src/gitingest/utils/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ class InvalidGitHubTokenError(ValueError):
"""Exception raised when a GitHub Personal Access Token is malformed."""

def __init__(self) -> None:
super().__init__(
"Invalid GitHub token format. Token should start with 'github_pat_' or 'ghp_' "
"followed by at least 36 characters of letters, numbers, and underscores.",
msg = (
"Invalid GitHub token format. To generate a token, go to "
"https://github.com/settings/tokens/new?description=gitingest&scopes=repo."
)
super().__init__(msg)
Loading
Loading