-
Notifications
You must be signed in to change notification settings - Fork 2
ticket #3 cs-assistant: Added :stats and verbose mode to the dev CLI #10
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,6 +4,7 @@ | |
| from src.config.logger import get_logger | ||
| from src.infrastructure.db import async_session_factory | ||
| from src.infrastructure.db.repository import Repository | ||
| from src.retrieval.services import retrieval_service | ||
|
|
||
| log = get_logger(__name__) | ||
|
|
||
|
|
@@ -19,13 +20,33 @@ async def _check_db() -> None: | |
|
|
||
| async def _repl() -> None: | ||
| await _check_db() | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Dev CLI startup currently doesn't print the source and chunk counts, which is expected behaviour based on ticket description. Once the shared helper from the comment on line 12 has been implemented, that can be used here to both print the counts and an empty DB warning if needed. |
||
| verbose = False # flag for :verbose | ||
|
|
||
| print("cs-assistant dev CLI. Type 'exit' or Ctrl-D to quit.\n") | ||
| print("Type ':stats' or ':verbose' for cmds.\n") | ||
|
|
||
| while True: | ||
| try: | ||
| question = input("ask> ").strip() | ||
| except (EOFError, KeyboardInterrupt): | ||
| print("\nbye") | ||
| return | ||
|
|
||
| # :stats cmd | ||
| if question.lower() in {":stats"}: | ||
| async with async_session_factory() as session: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Once the shared helper based on the comment on line 12 has been implemented, this whole section (lines 37-40) could be replaced with the helper. Same benefit, avoids making 2 queries when just one would be enough. |
||
| count_sources, count_chunks = await Repository.get_source_and_chunk_counts(session) | ||
| print(f"{count_sources} sources, {count_chunks} chunks loaded") | ||
| await _check_db() | ||
| continue | ||
|
|
||
| # :verbose cmd | ||
| if question.lower() in {":verbose"}: | ||
| verbose = not verbose | ||
| print(f"Verbose mode: {'ON' if verbose else 'OFF'}") | ||
| continue | ||
|
|
||
| # exit/quit cmd | ||
| if question.lower() in {"exit", "quit"}: | ||
| return | ||
| if not question: | ||
|
|
@@ -37,6 +58,20 @@ async def _repl() -> None: | |
| print(f"\nError: {e}\n") | ||
| continue | ||
|
|
||
| # printing out chunk content (verbose mode) | ||
| if verbose: | ||
| retrieved_chunks = await retrieval_service.get_relevant_chunks(question) | ||
| for chunk_item in retrieved_chunks: | ||
| source_url = chunk_item.chunk.source_url | ||
| similarity_score = chunk_item.score | ||
| snippet = " ".join( | ||
| (chunk_item.chunk.content.split())[:250] | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Snippet should infact be 250 characters, not words. Just replace with |
||
| ) # snippet ~250 words (maybe chars instead?) | ||
| print(f"URL: {source_url}") | ||
| print(f"Similarity score: {similarity_score}") | ||
| print(f"Content snippet: {snippet}") | ||
| print("-" * 60) | ||
|
|
||
| print(f"\n{answer.text}\n") | ||
| if answer.sources: | ||
| print("Sources:") | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,6 +14,24 @@ async def has_chunks(session: AsyncSession) -> bool: | |
| result = await session.execute(select(ChunkRow.id).limit(1)) | ||
| return result.scalar_one_or_none() is not None | ||
|
|
||
| @staticmethod | ||
| async def get_source_and_chunk_counts(session: AsyncSession) -> tuple[int, int]: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These methods currently don't have tests, even though all other repository methods have tests implemented. For consistency, implement unit tests for these in Follow the pattern / conventions of the other tests in that file. Run tests before committing to verify behaviour and that they pass. |
||
| count_sources = await Repository.count_sources(session) | ||
| count_chunks = await Repository.count_chunks(session) | ||
| return count_sources, count_chunks | ||
|
|
||
| @staticmethod | ||
| async def count_chunks(session: AsyncSession) -> int: | ||
| result = await session.execute(select(func.count(ChunkRow.id))) | ||
| # if above doesn't work properly | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lines 26-27 look like temporary code that isn't needed since they're commented out? If yes, please remove them from the final PR. |
||
| # result = await session.execute(select(func.count().select_from(ChunkRow))) | ||
| return result.scalar_one() | ||
|
|
||
| @staticmethod | ||
| async def count_sources(session: AsyncSession) -> int: | ||
| result = await session.execute(select(func.count(SourceRow.id))) | ||
| return result.scalar_one() | ||
|
|
||
| @staticmethod | ||
| async def get_or_create_source( | ||
| session: AsyncSession, *, name: str, url: str, source_type: str | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These descriptions don't fully match what those CLI commands are doing (e.g. stats doesn't show response time, verbose doesn't show "API interactions"). Update the descriptions so it's more clear exactly what the commands do, something like
:stats - Prints how many sources and chunks are currently loaded in the database.:verbose - Toggles verbose mode: for each question, prints the retrieved chunks (source URL, similarity score, and a content snippet) before the answer.