Skip to content

refactor: Refactor output formatting #467

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 17 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ repos:
rev: v1.36.4
hooks:
- id: djlint-reformat-jinja
exclude: ^src/gitingest/format/

- repo: https://github.com/igorshubovych/markdownlint-cli
rev: v0.45.0
Expand Down
40 changes: 19 additions & 21 deletions src/gitingest/entrypoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from gitingest.clone import clone_repo
from gitingest.config import MAX_FILE_SIZE
from gitingest.ingestion import ingest_query
from gitingest.output_formatter import DefaultFormatter
from gitingest.query_parser import parse_local_dir_path, parse_remote_repo
from gitingest.utils.auth import resolve_token
from gitingest.utils.compat_func import removesuffix
Expand Down Expand Up @@ -44,12 +45,13 @@ async def ingest_async(
include_submodules: bool = False,
token: str | None = None,
output: str | None = None,
) -> tuple[str, str, str]:
) -> str:
"""Ingest a source and process its contents.

This function analyzes a source (URL or local path), clones the corresponding repository (if applicable),
and processes its files according to the specified query parameters. It returns a summary, a tree-like
structure of the files, and the content of the files. The results can optionally be written to an output file.
and processes its files according to the specified query parameters. It returns a single digest string.

The output is generated lazily using a ContextV1 object and the DefaultFormatter class.

Parameters
----------
Expand Down Expand Up @@ -79,11 +81,8 @@ async def ingest_async(

Returns
-------
tuple[str, str, str]
A tuple containing:
- A summary string of the analyzed repository or directory.
- A tree-like string representation of the file structure.
- The content of the files in the repository or directory.
str
The full digest string.

"""
logger.info("Starting ingestion process", extra={"source": source})
Expand Down Expand Up @@ -138,14 +137,15 @@ async def ingest_async(
_apply_gitignores(query)

logger.info("Processing files and generating output")
summary, tree, content = ingest_query(query)

if output:
logger.debug("Writing output to file", extra={"output_path": output})
await _write_output(tree, content=content, target=output)

context = ingest_query(query)
formatter = DefaultFormatter()
digest = formatter.format(context, context.query)
await _write_output(digest, content=None, target=output)
logger.info("Ingestion completed successfully")
return summary, tree, content
return digest


def ingest(
Expand All @@ -160,12 +160,13 @@ def ingest(
include_submodules: bool = False,
token: str | None = None,
output: str | None = None,
) -> tuple[str, str, str]:
) -> str:
"""Provide a synchronous wrapper around ``ingest_async``.

This function analyzes a source (URL or local path), clones the corresponding repository (if applicable),
and processes its files according to the specified query parameters. It returns a summary, a tree-like
structure of the files, and the content of the files. The results can optionally be written to an output file.
and processes its files according to the specified query parameters. It returns a single digest string.

The output is generated lazily using a ContextV1 object and the DefaultFormatter class.

Parameters
----------
Expand Down Expand Up @@ -195,11 +196,8 @@ def ingest(

Returns
-------
tuple[str, str, str]
A tuple containing:
- A summary string of the analyzed repository or directory.
- A tree-like string representation of the file structure.
- The content of the files in the repository or directory.
str
The full digest string.

See Also
--------
Expand All @@ -208,7 +206,7 @@ def ingest(
"""
return asyncio.run(
ingest_async(
source=source,
source,
max_file_size=max_file_size,
include_patterns=include_patterns,
exclude_patterns=exclude_patterns,
Expand Down
4 changes: 4 additions & 0 deletions src/gitingest/format/DebugFormatter/Source.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{{ SEPARATOR }}
DEBUG: {{ class_name }}
Fields: {{ fields_str }}
{{ SEPARATOR }}
11 changes: 11 additions & 0 deletions src/gitingest/format/DefaultFormatter/ContextV1.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Generated using https://gitingest.com/{{ source.query.user_name }}/{{ source.query.repo_name }}{{ source.query.subpath }}

Sources used:
{%- for src in source %}
- {{ src.name }}: {{ src.__class__.__name__ }}
{% endfor %}

{%- for src in source.sources %}
{{ formatter.format(src, source.query) }}
{%- endfor %}
# End of https://gitingest.com/{{ source.query.user_name }}/{{ source.query.repo_name }}{{ source.query.subpath }}
7 changes: 7 additions & 0 deletions src/gitingest/format/DefaultFormatter/FileSystemDirectory.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{%- if source.depth == 0 %}{{ source.name }}:
{{ source.tree }}

{% endif -%}
{%- for child in source.children -%}
{{ formatter.format(child, query) }}
{%- endfor -%}
4 changes: 4 additions & 0 deletions src/gitingest/format/DefaultFormatter/FileSystemFile.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{{ SEPARATOR }}
{{ source.name }}
{{ SEPARATOR }}
{{ source.content }}
3 changes: 3 additions & 0 deletions src/gitingest/format/DefaultFormatter/FileSystemSymlink.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{{ SEPARATOR }}
{{ source.name }}{% if source.target %} -> {{ source.target }}{% endif %}
{{ SEPARATOR }}
7 changes: 7 additions & 0 deletions src/gitingest/format/DefaultFormatter/GitRepository.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{%- if source.depth == 0 %}🔗 Git Repository: {{ source.name }}
{{ source.tree }}

{% endif -%}
{%- for child in source.children -%}
{{ formatter.format(child, query) }}
{%- endfor -%}
5 changes: 5 additions & 0 deletions src/gitingest/format/SummaryFormatter/ContextV1.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Repository: {{ source.query.user_name }}/{{ source.query.repo_name }}
Commit: {{ source.query.commit }}
Files analyzed: {{ source.file_count }}

Estimated tokens: {{ source.token_count }}
2 changes: 2 additions & 0 deletions src/gitingest/format/SummaryFormatter/FileSystemDirectory.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Directory structure:
{{ source.tree }}
90 changes: 52 additions & 38 deletions src/gitingest/ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
from typing import TYPE_CHECKING

from gitingest.config import MAX_DIRECTORY_DEPTH, MAX_FILES, MAX_TOTAL_SIZE_BYTES
from gitingest.output_formatter import format_node
from gitingest.schemas import FileSystemNode, FileSystemNodeType, FileSystemStats
from gitingest.schemas import ContextV1, FileSystemNode, FileSystemStats
from gitingest.schemas.filesystem import FileSystemDirectory, FileSystemFile, FileSystemSymlink, GitRepository
from gitingest.utils.ingestion_utils import _should_exclude, _should_include
from gitingest.utils.logging_config import get_logger

Expand All @@ -18,12 +18,18 @@
logger = get_logger(__name__)


def ingest_query(query: IngestionQuery) -> tuple[str, str, str]:
def _is_git_repository(path: Path) -> bool:
"""Check if a directory contains a .git folder."""
return (path / ".git").exists()


def ingest_query(query: IngestionQuery) -> ContextV1:
"""Run the ingestion process for a parsed query.

This is the main entry point for analyzing a codebase directory or single file. It processes the query
parameters, reads the file or directory content, and generates a summary, directory structure, and file content,
along with token estimations.
This is the main entry point for analyzing a codebase directory or single file.

It processes the query parameters, reads the file or directory content, and returns
a ContextV1 object that can generate the final output digest on demand.

Parameters
----------
Expand All @@ -32,8 +38,10 @@ def ingest_query(query: IngestionQuery) -> tuple[str, str, str]:

Returns
-------
tuple[str, str, str]
A tuple containing the summary, directory structure, and file contents.
ContextV1
A ContextV1 object representing the ingested file system nodes.
Use str(DefaultFormatter(context)) to get the summary, directory structure,
and file contents.

Raises
------
Expand Down Expand Up @@ -70,11 +78,8 @@ def ingest_query(query: IngestionQuery) -> tuple[str, str, str]:

relative_path = path.relative_to(query.local_path)

file_node = FileSystemNode(
file_node = FileSystemFile(
name=path.name,
type=FileSystemNodeType.FILE,
size=path.stat().st_size,
file_count=1,
path_str=str(relative_path),
path=path,
)
Expand All @@ -91,16 +96,21 @@ def ingest_query(query: IngestionQuery) -> tuple[str, str, str]:
"file_size": file_node.size,
},
)
return format_node(file_node, query=query)

logger.info("Processing directory", extra={"directory_path": str(path)})
return ContextV1(sources=[file_node], query=query)

root_node = FileSystemNode(
name=path.name,
type=FileSystemNodeType.DIRECTORY,
path_str=str(path.relative_to(query.local_path)),
path=path,
)
# Check if this is a git repository and create appropriate node type
if _is_git_repository(path):
root_node = GitRepository(
name=path.name,
path_str=str(path.relative_to(query.local_path)),
path=path,
)
else:
root_node = FileSystemDirectory(
name=path.name,
path_str=str(path.relative_to(query.local_path)),
path=path,
)

stats = FileSystemStats()

Expand All @@ -117,10 +127,10 @@ def ingest_query(query: IngestionQuery) -> tuple[str, str, str]:
},
)

return format_node(root_node, query=query)
return ContextV1(sources=[root_node], query=query)


def _process_node(node: FileSystemNode, query: IngestionQuery, stats: FileSystemStats) -> None:
def _process_node(node: FileSystemNode, query: IngestionQuery, stats: FileSystemStats) -> None: # noqa: C901
"""Process a file or directory item within a directory.

This function handles each file or directory item, checking if it should be included or excluded based on the
Expand Down Expand Up @@ -161,13 +171,21 @@ def _process_node(node: FileSystemNode, query: IngestionQuery, stats: FileSystem
continue
_process_file(path=sub_path, parent_node=node, stats=stats, local_path=query.local_path)
elif sub_path.is_dir():
child_directory_node = FileSystemNode(
name=sub_path.name,
type=FileSystemNodeType.DIRECTORY,
path_str=str(sub_path.relative_to(query.local_path)),
path=sub_path,
depth=node.depth + 1,
)
# Check if this subdirectory is a git repository
if _is_git_repository(sub_path):
child_directory_node = GitRepository(
name=sub_path.name,
path_str=str(sub_path.relative_to(query.local_path)),
path=sub_path,
depth=node.depth + 1,
)
else:
child_directory_node = FileSystemDirectory(
name=sub_path.name,
path_str=str(sub_path.relative_to(query.local_path)),
path=sub_path,
depth=node.depth + 1,
)

_process_node(node=child_directory_node, query=query, stats=stats)

Expand Down Expand Up @@ -201,9 +219,8 @@ def _process_symlink(path: Path, parent_node: FileSystemNode, stats: FileSystemS
The base path of the repository or directory being processed.

"""
child = FileSystemNode(
child = FileSystemSymlink(
name=path.name,
type=FileSystemNodeType.SYMLINK,
path_str=str(path.relative_to(local_path)),
path=path,
depth=parent_node.depth + 1,
Expand All @@ -213,7 +230,7 @@ def _process_symlink(path: Path, parent_node: FileSystemNode, stats: FileSystemS
parent_node.file_count += 1


def _process_file(path: Path, parent_node: FileSystemNode, stats: FileSystemStats, local_path: Path) -> None:
def _process_file(path: Path, parent_node: FileSystemDirectory, stats: FileSystemStats, local_path: Path) -> None:
"""Process a file in the file system.

This function checks the file's size, increments the statistics, and reads its content.
Expand All @@ -223,7 +240,7 @@ def _process_file(path: Path, parent_node: FileSystemNode, stats: FileSystemStat
----------
path : Path
The full path of the file.
parent_node : FileSystemNode
parent_node : FileSystemDirectory
The dictionary to accumulate the results.
stats : FileSystemStats
Statistics tracking object for the total file count and size.
Expand Down Expand Up @@ -258,11 +275,8 @@ def _process_file(path: Path, parent_node: FileSystemNode, stats: FileSystemStat
stats.total_files += 1
stats.total_size += file_size

child = FileSystemNode(
child = FileSystemFile(
name=path.name,
type=FileSystemNodeType.FILE,
size=file_size,
file_count=1,
path_str=str(path.relative_to(local_path)),
path=path,
depth=parent_node.depth + 1,
Expand Down
Loading
Loading