Task ID: task_e_682fde5294b883238ae9ce345a626844
Configuring language runtimes...
+ rm -- /tmp/iOWIwc-setup_script.sh
+ source ./codex_setup.sh
++ uv sync --frozen
Using CPython 3.12.3 interpreter at: /usr/bin/python3.12
Creating virtual environment at: .venv
Building scubaduck @ file:///workspace/scubaduck
Downloading ruff (11.0MiB)
Downloading pyright (5.3MiB)
Downloading playwright (43.1MiB)
Downloading duckdb (19.3MiB)
Downloading ruff
Built scubaduck @ file:///workspace/scubaduck
Downloading duckdb
Downloading playwright
Downloading pyright
Prepared 33 packages in 1.56s
Installed 33 packages in 71ms
+ blinker==1.9.0
+ certifi==2025.4.26
+ charset-normalizer==3.4.2
+ click==8.2.0
+ duckdb==1.2.2
+ execnet==2.1.1
+ flask==3.1.1
+ greenlet==3.2.2
+ idna==3.10
+ iniconfig==2.1.0
+ itsdangerous==2.2.0
+ jinja2==3.1.6
+ markupsafe==3.0.2
+ nodeenv==1.9.1
+ packaging==25.0
+ playwright==1.52.0
+ pluggy==1.6.0
+ pyee==13.0.0
+ pyright==1.1.400
+ pytest==8.3.5
+ pytest-base-url==2.1.0
+ pytest-playwright==0.7.0
+ pytest-xdist==3.6.1
+ python-dateutil==2.9.0.post0
+ python-slugify==8.0.4
+ requests==2.32.3
+ ruff==0.11.10
+ scubaduck==0.1.0 (from file:///workspace/scubaduck)
+ six==1.17.0
+ text-unidecode==1.3
+ typing-extensions==4.13.2
+ urllib3==2.4.0
+ werkzeug==3.1.3
++ source .venv/bin/activate
+++ '[' -z '' ']'
+++ '[' -n x ']'
+++ SCRIPT_PATH=.venv/bin/activate
+++ '[' .venv/bin/activate = /tmp/iOWIwc-setup_script.sh ']'
+++ deactivate nondestructive
+++ unset -f pydoc
+++ '[' -z '' ']'
+++ '[' -z '' ']'
+++ hash -r
+++ '[' -z '' ']'
+++ unset VIRTUAL_ENV
+++ unset VIRTUAL_ENV_PROMPT
+++ '[' '!' nondestructive = nondestructive ']'
+++ VIRTUAL_ENV=/workspace/scubaduck/.venv
+++ '[' linux-gnu = cygwin ']'
+++ '[' linux-gnu = msys ']'
+++ export VIRTUAL_ENV
+++ '[' -z '' ']'
+++ unset SCRIPT_PATH
+++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/root/.rbenv/shims:/root/.rbenv/bin:/root/.rbenv/shims:/root/.local/share/swiftly/bin:/root/.bun/bin:/root/.nvm/versions/node/v22.16.0/bin:/root/.pyenv/shims:3441PYENV_ROOT/shims:/root/.pyenv/bin:/usr/local/go/bin:/root/go/bin:/root/.rbenv/bin:/root/.rbenv/shims:/root/.bun/bin:/root/.local/bin:/root/.pyenv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
+++ PATH=/workspace/scubaduck/.venv/bin:/root/.cargo/bin:/root/.rbenv/shims:/root/.rbenv/bin:/root/.rbenv/shims:/root/.local/share/swiftly/bin:/root/.bun/bin:/root/.nvm/versions/node/v22.16.0/bin:/root/.pyenv/shims:3441PYENV_ROOT/shims:/root/.pyenv/bin:/usr/local/go/bin:/root/go/bin:/root/.rbenv/bin:/root/.rbenv/shims:/root/.bun/bin:/root/.local/bin:/root/.pyenv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
+++ export PATH
+++ '[' xscubaduck '!=' x ']'
+++ VIRTUAL_ENV_PROMPT=scubaduck
+++ export VIRTUAL_ENV_PROMPT
+++ '[' -z '' ']'
+++ '[' -z '' ']'
+++ _OLD_VIRTUAL_PS1=
+++ PS1='(scubaduck) '
+++ export PS1
+++ alias pydoc
+++ true
+++ hash -r
++ python -c 'import os; import duckdb; con = duckdb.connect(); con.execute(f"SET http_proxy = '\''{os.getenv("HTTP_PROXY")}'\''"); con.execute("INSTALL '\''sqlite'\'';")'
++ playwright install chromium
Downloading Chromium 136.0.7103.25 (playwright build v1169) from https://cdn.playwright.dev/dbazure/download/playwright/builds/chromium/1169/chromium-linux.zip
167.7 MiB [] 0% 0.0s167.7 MiB [] 0% 27.5s167.7 MiB [] 0% 46.3s167.7 MiB [] 0% 32.3s167.7 MiB [] 0% 27.5s167.7 MiB [] 0% 21.5s167.7 MiB [] 0% 18.1s167.7 MiB [] 0% 11.9s167.7 MiB [] 1% 10.0s167.7 MiB [] 1% 10.3s167.7 MiB [] 2% 7.8s167.7 MiB [] 3% 5.6s167.7 MiB [] 4% 4.5s167.7 MiB [] 5% 3.8s167.7 MiB [] 6% 3.7s167.7 MiB [] 7% 3.1s167.7 MiB [] 8% 2.8s167.7 MiB [] 9% 2.6s167.7 MiB [] 11% 2.3s167.7 MiB [] 12% 2.3s167.7 MiB [] 13% 2.2s167.7 MiB [] 15% 2.0s167.7 MiB [] 16% 2.0s167.7 MiB [] 17% 1.9s167.7 MiB [] 19% 1.8s167.7 MiB [] 19% 1.7s167.7 MiB [] 20% 1.7s167.7 MiB [] 22% 1.7s167.7 MiB [] 23% 1.6s167.7 MiB [] 24% 1.5s167.7 MiB [] 25% 1.5s167.7 MiB [] 26% 1.5s167.7 MiB [] 28% 1.4s167.7 MiB [] 30% 1.3s167.7 MiB [] 31% 1.3s167.7 MiB [] 33% 1.3s167.7 MiB [] 34% 1.2s167.7 MiB [] 35% 1.2s167.7 MiB [] 36% 1.2s167.7 MiB [] 37% 1.2s167.7 MiB [] 38% 1.1s167.7 MiB [] 39% 1.1s167.7 MiB [] 41% 1.0s167.7 MiB [] 41% 1.1s167.7 MiB [] 43% 1.0s167.7 MiB [] 45% 1.0s167.7 MiB [] 47% 0.9s167.7 MiB [] 49% 0.9s167.7 MiB [] 50% 0.9s167.7 MiB [] 51% 0.8s167.7 MiB [] 52% 0.8s167.7 MiB [] 53% 0.8s167.7 MiB [] 54% 0.8s167.7 MiB [] 55% 0.8s167.7 MiB [] 56% 0.7s167.7 MiB [] 58% 0.7s167.7 MiB [] 60% 0.7s167.7 MiB [] 61% 0.7s167.7 MiB [] 62% 0.6s167.7 MiB [] 63% 0.6s167.7 MiB [] 65% 0.6s167.7 MiB [] 66% 0.6s167.7 MiB [] 67% 0.5s167.7 MiB [] 68% 0.5s167.7 MiB [] 70% 0.5s167.7 MiB [] 71% 0.5s167.7 MiB [] 72% 0.5s167.7 MiB [] 73% 0.4s167.7 MiB [] 74% 0.4s167.7 MiB [] 75% 0.4s167.7 MiB [] 77% 0.4s167.7 MiB [] 78% 0.4s167.7 MiB [] 79% 0.3s167.7 MiB [] 80% 0.3s167.7 MiB [] 81% 0.3s167.7 MiB [] 83% 0.3s167.7 MiB [] 84% 0.3s167.7 MiB [] 84% 0.2s167.7 MiB [] 86% 0.2s167.7 MiB [] 87% 0.2s167.7 MiB [] 88% 0.2s167.7 MiB [] 90% 0.2s167.7 MiB [] 90% 0.1s167.7 MiB [] 92% 0.1s167.7 MiB [] 93% 0.1s167.7 MiB [] 94% 0.1s167.7 MiB [] 95% 0.1s167.7 MiB [] 96% 0.1s167.7 MiB [] 97% 0.0s167.7 MiB [] 99% 0.0s167.7 MiB [] 100% 0.0s
Chromium 136.0.7103.25 (playwright build v1169) downloaded to /root/.cache/ms-playwright/chromium-1169
Downloading FFMPEG playwright build v1011 from https://cdn.playwright.dev/dbazure/download/playwright/builds/ffmpeg/1011/ffmpeg-linux.zip
2.3 MiB [] 0% 0.0s2.3 MiB [] 2% 0.6s2.3 MiB [] 5% 0.7s2.3 MiB [] 10% 0.5s2.3 MiB [] 16% 0.4s2.3 MiB [] 26% 0.3s2.3 MiB [] 48% 0.1s2.3 MiB [] 78% 0.0s2.3 MiB [] 100% 0.0s
FFMPEG playwright build v1011 downloaded to /root/.cache/ms-playwright/ffmpeg-1011
Downloading Chromium Headless Shell 136.0.7103.25 (playwright build v1169) from https://cdn.playwright.dev/dbazure/download/playwright/builds/chromium/1169/chromium-headless-shell-linux.zip
101.4 MiB [] 0% 0.0s101.4 MiB [] 1% 1.1s101.4 MiB [] 3% 1.1s101.4 MiB [] 5% 0.9s101.4 MiB [] 8% 0.8s101.4 MiB [] 9% 0.8s101.4 MiB [] 11% 0.8s101.4 MiB [] 13% 0.7s101.4 MiB [] 15% 0.7s101.4 MiB [] 16% 0.8s101.4 MiB [] 19% 0.7s101.4 MiB [] 20% 0.7s101.4 MiB [] 21% 0.7s101.4 MiB [] 24% 0.7s101.4 MiB [] 25% 0.7s101.4 MiB [] 26% 0.7s101.4 MiB [] 29% 0.7s101.4 MiB [] 32% 0.6s101.4 MiB [] 34% 0.6s101.4 MiB [] 36% 0.6s101.4 MiB [] 37% 0.6s101.4 MiB [] 40% 0.6s101.4 MiB [] 42% 0.5s101.4 MiB [] 43% 0.5s101.4 MiB [] 46% 0.5s101.4 MiB [] 47% 0.5s101.4 MiB [] 50% 0.5s101.4 MiB [] 52% 0.4s101.4 MiB [] 54% 0.4s101.4 MiB [] 56% 0.4s101.4 MiB [] 58% 0.4s101.4 MiB [] 59% 0.4s101.4 MiB [] 62% 0.3s101.4 MiB [] 65% 0.3s101.4 MiB [] 68% 0.3s101.4 MiB [] 70% 0.3s101.4 MiB [] 71% 0.3s101.4 MiB [] 74% 0.2s101.4 MiB [] 75% 0.2s101.4 MiB [] 77% 0.2s101.4 MiB [] 79% 0.2s101.4 MiB [] 81% 0.2s101.4 MiB [] 83% 0.1s101.4 MiB [] 86% 0.1s101.4 MiB [] 88% 0.1s101.4 MiB [] 89% 0.1s101.4 MiB [] 91% 0.1s101.4 MiB [] 93% 0.1s101.4 MiB [] 95% 0.0s101.4 MiB [] 98% 0.0s101.4 MiB [] 100% 0.0s
Chromium Headless Shell 136.0.7103.25 (playwright build v1169) downloaded to /root/.cache/ms-playwright/chromium_headless_shell-1169
++ echo 'source .venv/bin/activate'
I should open the AGENTS.md file in the repo root for guidance.
bash: .venv/bin/activate: No such file or directory
root@54a46097547f:/workspace# ls
scubaduck
root@54a46097547f:/workspace# cd scubaduck
root@54a46097547f:/workspace/scubaduck# ls
AGENTS.md codex_setup.sh scubaduck tests
README.md pyproject.toml stubs uv.lock
I'm checking the root directory for AGENTS.md to see what guidance I can follow.
root@54a46097547f:/workspace/scubaduck# cat AGENTS.md
- Use "source .venv/bin/activate" to activate the virtual environment before
running any commands
- ONLY use non-interactive commands like cat, sed, apply_patch to do edits.
Do NOT use interactive editors.
- Do NOT attempt to install packages. Only the packages specified in
pyproject.toml are available. You cannot add new packages. If you
desperately want another package, make a note of it in the final PR
description.
- Use conventional commits to format PR title
- There are no nested AGENTS.md files, this is the only agents file
- When using Playwright, ONLY use chromium browser. This browser is already
installed.
- Use "ruff check" to check lint, "ruff format" to autoformat files and
"pyright" to typecheck.
- When writing the PR description, include the original prompt supplied by the
user.
- When you add functionality to the server, add server tests to
tests/test_server.py. When you add features to the frontend, add frontend
tests to tests/test_web.py
Looking at root AGENTS for guidance—I'll use venv, ruff, and pyright tools.
root@54a46097547f:/workspace/scubaduck# ls
AGENTS.md codex_setup.sh scubaduck tests
README.md pyproject.toml stubs uv.lock
root@54a46097547f:/workspace/scubaduck# ls scubaduck
__init__.py sample.csv server.py static
root@54a46097547f:/workspace/scubaduck# ls tests
conftest.py test_server_time.py test_web_graph.py
test_multi_table_web.py test_server_timeseries.py test_web_table.py
test_server.py test_web.py test_web_timeseries.py
test_server_basic.py test_web_basics.py web_utils.py
test_server_db_types.py test_web_chips.py
test_server_errors.py test_web_columns.py
I'll explore test_server_db_types.py to understand the patterns and structure used.
root@54a46097547f:/workspace/scubaduck# cat tests/test_server_db_types.py
from __future__ import annotations
import json
from pathlib import Path
import duckdb
import pytest
from scubaduck import server
def _make_payload() -> dict[str, object]:
return {
"table": "events",
"start": "2024-01-01 00:00:00",
"end": "2024-01-02 00:00:00",
"order_by": "timestamp",
"order_dir": "ASC",
"limit": 10,
"columns": ["timestamp", "event", "value", "user"],
"filters": [],
}
def test_database_types(tmp_path: Path) -> None:
csv_file = tmp_path / "events.csv"
csv_file.write_text(Path("scubaduck/sample.csv").read_text())
sqlite_file = tmp_path / "events.sqlite"
import sqlite3
conn = sqlite3.connect(sqlite_file)
conn.execute(
"CREATE TABLE events (timestamp TEXT, event TEXT, value INTEGER, user TE
XT)"
)
with open(csv_file) as f:
next(f)
for line in f:
ts, ev, val, user = line.strip().split(",")
conn.execute(
"INSERT INTO events VALUES (?, ?, ?, ?)", (ts, ev, int(val), use
r)
)
conn.commit()
conn.close() # pyright: ignore[reportUnknownMemberType, reportAttributeAcce
ssIssue]
duckdb_file = tmp_path / "events.duckdb"
con = duckdb.connect(duckdb_file)
con.execute(
f"CREATE TABLE events AS SELECT * FROM read_csv_auto('{csv_file.as_posix
()}')"
)
con.close() # pyright: ignore[reportUnknownMemberType, reportAttributeAcces
sIssue]
for db in (csv_file, sqlite_file, duckdb_file):
app = server.create_app(db)
client = app.test_client()
payload = _make_payload()
rv = client.post(
"/api/query", data=json.dumps(payload), content_type="application/js
on"
)
rows = rv.get_json()["rows"]
assert len(rows) == 3
def test_sqlite_longvarchar(tmp_path: Path) -> None:
sqlite_file = tmp_path / "events.sqlite"
import sqlite3
conn = sqlite3.connect(sqlite_file)
conn.execute(
"CREATE TABLE events (timestamp TEXT, url LONGVARCHAR, title VARCHAR(10)
)"
)
conn.execute(
"INSERT INTO events VALUES ('2024-01-01 00:00:00', 'https://a.com', 'Hom
e')"
)
conn.commit()
conn.close() # pyright: ignore[reportUnknownMemberType, reportAttributeAcce
ssIssue]
app = server.create_app(sqlite_file)
client = app.test_client()
payload = {
"table": "events",
"start": "2024-01-01 00:00:00",
"end": "2024-01-01 01:00:00",
"order_by": "timestamp",
"columns": ["timestamp", "url", "title"],
}
rv = client.post(
"/api/query", data=json.dumps(payload), content_type="application/json"
)
data = rv.get_json()
assert rv.status_code == 200
assert data["rows"][0][1] == "https://a.com"
def test_sqlite_bigint(tmp_path: Path) -> None:
sqlite_file = tmp_path / "big.sqlite"
import sqlite3
conn = sqlite3.connect(sqlite_file)
conn.execute("CREATE TABLE events (timestamp TEXT, value INTEGER)")
big_value = 13385262862605259
conn.execute(
"INSERT INTO events VALUES ('2024-01-01 00:00:00', ?)",
(big_value,),
)
conn.commit()
conn.close() # pyright: ignore[reportUnknownMemberType, reportAttributeAcce
ssIssue]
app = server.create_app(sqlite_file)
client = app.test_client()
payload = {
"table": "events",
"order_by": "timestamp",
"columns": ["timestamp", "value"],
}
rv = client.post(
"/api/query", data=json.dumps(payload), content_type="application/json"
)
data = rv.get_json()
assert rv.status_code == 200
assert data["rows"][0][1] == big_value
def test_sqlite_boolean_aggregation(tmp_path: Path) -> None:
sqlite_file = tmp_path / "bool.sqlite"
import sqlite3
conn = sqlite3.connect(sqlite_file)
conn.execute("CREATE TABLE events (timestamp TEXT, flag BOOLEAN)")
conn.execute("INSERT INTO events VALUES ('2024-01-01 00:00:00', 1)")
conn.execute("INSERT INTO events VALUES ('2024-01-01 00:30:00', 0)")
conn.commit()
conn.close() # pyright: ignore[reportUnknownMemberType, reportAttributeAcce
ssIssue]
app = server.create_app(sqlite_file)
client = app.test_client()
payload = {
"table": "events",
"start": "2024-01-01 00:00:00",
"end": "2024-01-02 00:00:00",
"graph_type": "table",
"aggregate": "Avg",
"columns": ["flag"],
"show_hits": True,
}
rv = client.post(
"/api/query", data=json.dumps(payload), content_type="application/json"
)
data = rv.get_json()
assert rv.status_code == 200
assert data["rows"][0][0] == 2
assert data["rows"][0][1] == 0.5
def test_sqlite_boolean_group_by(tmp_path: Path) -> None:
sqlite_file = tmp_path / "bool.sqlite"
import sqlite3
conn = sqlite3.connect(sqlite_file)
conn.execute("CREATE TABLE events (id INTEGER, ts TEXT, flag BOOLEAN)")
conn.execute("INSERT INTO events VALUES (1, '2024-01-01 00:00:00', 1)")
conn.execute("INSERT INTO events VALUES (1, '2024-01-01 00:30:00', 0)")
conn.execute("INSERT INTO events VALUES (2, '2024-01-01 01:00:00', 1)")
conn.commit()
conn.close() # pyright: ignore[reportUnknownMemberType, reportAttributeAcce
ssIssue]
app = server.create_app(sqlite_file)
client = app.test_client()
payload = {
"table": "events",
"start": "2024-01-01 00:00:00",
"end": "2024-01-02 00:00:00",
"graph_type": "table",
"time_column": "ts",
"aggregate": "Avg",
"group_by": ["id"],
"columns": ["flag"],
}
rv = client.post(
"/api/query", data=json.dumps(payload), content_type="application/json"
)
data = rv.get_json()
assert rv.status_code == 200
rows = sorted(data["rows"]) # order can vary
assert rows == [[1, 2, 0.5], [2, 1, 1.0]]
def test_envvar_db(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
csv_file = tmp_path / "custom.csv"
csv_file.write_text("timestamp,event,value,user\n2024-01-01 00:00:00,login,5
,bob\n")
monkeypatch.setenv("SCUBADUCK_DB", str(csv_file))
app = server.create_app()
client = app.test_client()
payload = _make_payload()
rv = client.post(
"/api/query", data=json.dumps(payload), content_type="application/json"
)
rows = rv.get_json()["rows"]
assert len(rows) == 1
def test_envvar_db_missing(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> N
one:
missing = tmp_path / "missing.sqlite"
monkeypatch.setenv("SCUBADUCK_DB", str(missing))
with pytest.raises(FileNotFoundError):
server.create_app()
I'll check the server code to understand how it's handling queries before adding a new test.
root@54a46097547f:/workspace/scubaduck# sed -n '1,160p' scubaduck/server.py
from __future__ import annotations
from dataclasses import dataclass, field, replace
from typing import Any, Dict, List, Tuple, cast
import re
from datetime import datetime, timedelta, timezone
import time
from pathlib import Path
import os
import traceback
import math
import duckdb
from dateutil import parser as dtparser
from dateutil.relativedelta import relativedelta
from flask import Flask, jsonify, request, send_from_directory
def _quote(ident: str) -> str:
"""Return identifier quoted for SQL."""
return f'"{ident.replace('"', '""')}"'
@dataclass
class Filter:
column: str
op: str
value: str | int | float | list[str] | None
@dataclass
class QueryParams:
start: str | None = None
end: str | None = None
order_by: str | None = None
order_dir: str = "ASC"
limit: int | None = None
columns: list[str] = field(default_factory=lambda: [])
filters: list[Filter] = field(default_factory=lambda: [])
derived_columns: dict[str, str] = field(default_factory=lambda: {})
graph_type: str = "samples"
group_by: list[str] = field(default_factory=lambda: [])
aggregate: str | None = None
show_hits: bool = False
x_axis: str | None = None
granularity: str = "Auto"
fill: str = "0"
table: str = "events"
time_column: str | None = "timestamp"
time_unit: str = "s"
def _load_database(path: Path) -> duckdb.DuckDBPyConnection:
if not path.exists():
raise FileNotFoundError(path)
ext = path.suffix.lower()
if ext == ".csv":
con = duckdb.connect()
con.execute(
f"CREATE TABLE events AS SELECT * FROM read_csv_auto('{path.as_posix
()}')"
)
elif ext in {".db", ".sqlite"}:
con = duckdb.connect()
con.execute("LOAD sqlite")
con.execute(f"ATTACH '{path.as_posix()}' AS db (TYPE SQLITE)")
tables = [
r[0]
for r in con.execute(
"SELECT name FROM sqlite_master WHERE type='table'"
).fetchall()
]
for t in tables:
con.execute(f'CREATE VIEW "{t}" AS SELECT * FROM db."{t}"')
else:
con = duckdb.connect(path)
return con
def _create_test_database() -> duckdb.DuckDBPyConnection:
"""Return a DuckDB connection with a small multi-table dataset."""
con = duckdb.connect()
con.execute(
"CREATE TABLE events (id INTEGER PRIMARY KEY, ts INTEGER, val REAL, name
TEXT, flag BOOLEAN)"
)
con.execute("INSERT INTO events VALUES (1, 1704067200, 1.5, 'alice', 1)")
con.execute("INSERT INTO events VALUES (2, 1704070800, 2.0, 'bob', 0)")
con.execute('CREATE TABLE extra (ts INTEGER, "desc" TEXT, num INTEGER)')
con.execute("INSERT INTO extra VALUES (1704067200, 'x', 1)")
con.execute("INSERT INTO extra VALUES (1704070800, 'y', 2)")
return con
_REL_RE = re.compile(
r"([+-]?\d+(?:\.\d*)?)\s*(hour|hours|day|days|week|weeks|fortnight|fortnight
s|month|months|year|years)",
re.IGNORECASE,
)
def parse_time(val: str | None) -> str | None:
"""Parse an absolute or relative time string into ``YYYY-MM-DD HH:MM:SS``.""
"
if val is None or val == "":
return None
s = val.strip()
if s.lower() == "now":
dt = datetime.now(timezone.utc)
return dt.replace(microsecond=0).strftime("%Y-%m-%d %H:%M:%S")
m = _REL_RE.fullmatch(s)
if m:
qty = float(m.group(1))
unit = m.group(2).lower()
now = datetime.now(timezone.utc)
dt: datetime
if unit.startswith("hour"):
dt = now + timedelta(hours=qty)
elif unit.startswith("day"):
dt = now + timedelta(days=qty)
elif unit.startswith("week"):
dt = now + timedelta(weeks=qty)
elif unit.startswith("fortnight"):
dt = now + timedelta(weeks=2 * qty)
elif unit.startswith("month"):
if qty.is_integer():
dt = now + relativedelta(months=int(qty))
else:
dt = now + timedelta(days=30 * qty)
elif unit.startswith("year"):
if qty.is_integer():
dt = now + relativedelta(years=int(qty))
else:
dt = now + timedelta(days=365 * qty)
else: # pragma: no cover - defensive
raise ValueError(f"Unsupported unit: {unit}")
return dt.replace(microsecond=0).strftime("%Y-%m-%d %H:%M:%S")
dt = dtparser.parse(s)
return dt.replace(microsecond=0, tzinfo=None).strftime("%Y-%m-%d %H:%M:%S")
def _numeric_to_datetime(value: int | float, unit: str) -> datetime:
"""Convert a numeric timestamp ``value`` with unit ``unit`` to ``datetime``.
Heuristically fall back to seconds when the converted value is before 1990
but the seconds interpretation is in a reasonable range. This handles
integer columns stored in seconds even when ``unit`` is mistakenly set to a
finer granularity.
"""
divisor = {
"s": 1,
"ms": 1000,
"us": 1_000_000,
"ns": 1_000_000_000,
}.get(unit, 1)
dt = datetime.fromtimestamp(int(value) / divisor, tz=timezone.utc)
if unit != "s" and dt.year < 1990:
root@54a46097547f:/workspace/scubaduck# sed -n '160,320p' scubaduck/server.py
if unit != "s" and dt.year < 1990:
alt = datetime.fromtimestamp(int(value), tz=timezone.utc)
if alt.year >= 1990:
dt = alt
return dt
def _granularity_seconds(granularity: str, start: str | None, end: str | None) -
> int:
gran = granularity.lower()
mapping = {
"1 second": 1,
"5 seconds": 5,
"10 seconds": 10,
"30 seconds": 30,
"1 minute": 60,
"4 minutes": 240,
"5 minutes": 300,
"10 minutes": 600,
"15 minutes": 900,
"30 minutes": 1800,
"1 hour": 3600,
"3 hours": 10800,
"6 hours": 21600,
"1 day": 86400,
"1 week": 604800,
"30 days": 2592000,
}
if gran in mapping:
return mapping[gran]
if gran in {"auto", "fine"} and start and end:
try:
s = dtparser.parse(start)
e = dtparser.parse(end)
except Exception:
return 3600
total = max((e - s).total_seconds(), 1)
buckets = 100 if gran == "auto" else 500
return max(int(total // buckets), 1)
return 3600
def _time_expr(col: str, column_types: Dict[str, str] | None, unit: str) -> str:
"""Return SQL expression for column interpreted as timestamp."""
qcol = _quote(col)
if column_types is None:
return qcol
ctype = column_types.get(col, "").upper()
if not any(t in ctype for t in ["TIMESTAMP", "DATE", "TIME"]):
if any(
t in ctype
for t in [
"INT",
"DECIMAL",
"REAL",
"DOUBLE",
"FLOAT",
"NUMERIC",
"HUGEINT",
]
):
if unit == "ns":
# Use nanosecond helper unless column cannot represent such larg
e values
if "INT" in ctype and "BIGINT" not in ctype and "HUGEINT" not in
ctype:
unit = "s"
else:
expr = f"CAST({qcol} AS BIGINT)"
return f"make_timestamp_ns({expr})"
if (
unit != "s"
and "INT" in ctype
and "BIGINT" not in ctype
and "HUGEINT" not in ctype
):
# 32-bit integers cannot store sub-second precision for modern d
ates
unit = "s"
multiplier = {
"s": 1_000_000,
"ms": 1_000,
"us": 1,
}.get(unit, 1_000_000)
base = f"CAST({qcol} AS BIGINT)"
expr = f"CAST({base} * {multiplier} AS BIGINT)" if multiplier != 1 e
lse base
return f"make_timestamp({expr})"
return qcol
def build_query(params: QueryParams, column_types: Dict[str, str] | None = None)
-> str:
select_parts: list[str] = []
group_cols = params.group_by[:]
selected_for_order = set(params.columns) | set(params.derived_columns.keys()
)
if params.graph_type == "timeseries":
sec = _granularity_seconds(params.granularity, params.start, params.end)
x_axis = params.x_axis or params.time_column
if x_axis is None:
raise ValueError("x_axis required for timeseries")
xexpr = _time_expr(x_axis, column_types, params.time_unit)
if params.start:
bucket_expr = (
f"TIMESTAMP '{params.start}' + INTERVAL '{sec} second' * "
f"CAST(floor((epoch({xexpr}) - epoch(TIMESTAMP '{params.start}')
)/{sec}) AS BIGINT)"
)
else:
bucket_expr = (
f"TIMESTAMP 'epoch' + INTERVAL '{sec} second' * "
f"CAST(floor(epoch({xexpr})/{sec}) AS BIGINT)"
)
select_parts.append(f"{bucket_expr} AS bucket")
group_cols = ["bucket"] + group_cols
selected_for_order.add("bucket")
has_agg = bool(group_cols) or params.aggregate is not None
if has_agg:
select_cols = (
group_cols[1:] if params.graph_type == "timeseries" else group_cols
)
select_parts.extend(_quote(c) for c in select_cols)
agg = (params.aggregate or "count").lower()
selected_for_order.update(group_cols)
def agg_expr(col: str) -> str:
expr = _quote(col)
ctype = column_types.get(col, "").upper() if column_types else ""
if "BOOL" in ctype:
expr = f"CAST({_quote(col)} AS BIGINT)"
if agg.startswith("p"):
quant = float(agg[1:]) / 100
return f"quantile({expr}, {quant})"
if agg == "count distinct":
return f"count(DISTINCT {expr})"
if agg == "avg" and column_types is not None:
if "TIMESTAMP" in ctype or "DATE" in ctype or "TIME" in ctype:
return (
"TIMESTAMP 'epoch' + INTERVAL '1 second' * "
f"CAST(avg(epoch({_quote(col)})) AS BIGINT)"
)
return f"{agg}({expr})"
if agg == "count":
if params.graph_type != "table":
select_parts.append("count(*) AS Count")
selected_for_order.add("Count")
else:
for col in params.columns:
if col in group_cols:
continue
select_parts.append(f"{agg_expr(col)} AS {_quote(col)}")
selected_for_order.add(col)
select_parts.insert(len(group_cols), "count(*) AS Hits")
selected_for_order.add("Hits")
else:
select_parts.extend(_quote(c) for c in params.columns)
selected_for_order.update(params.columns)
order_by = params.order_by
if order_by == "Samples":
order_by = "Hits"
order_by = order_by if order_by in selected_for_order else None
if has_agg and params.derived_columns:
inner_params = replace(
root@54a46097547f:/workspace/scubaduck# sed -n '320,640p' scubaduck/server.py
inner_params = replace(
params,
derived_columns={},
order_by=None,
limit=None,
)
inner_sql = build_query(inner_params, column_types)
outer_select = ["t.*"] + [
f"{expr} AS {name}" for name, expr in params.derived_columns.items()
]
indented_inner = "\n".join(" " + line for line in inner_sql.splitline
s())
lines = [
f"SELECT {', '.join(outer_select)}",
"FROM (",
indented_inner,
") t",
]
if order_by:
lines.append(f"ORDER BY {_quote(order_by)} {params.order_dir}")
elif params.graph_type == "timeseries":
lines.append("ORDER BY bucket")
if params.limit is not None:
lines.append(f"LIMIT {params.limit}")
return "\n".join(lines)
for name, expr in params.derived_columns.items():
select_parts.append(f"{expr} AS {name}")
selected_for_order.add(name)
select_clause = ", ".join(select_parts) if select_parts else "*"
lines = [f"SELECT {select_clause}", f'FROM "{params.table}"']
where_parts: list[str] = []
if params.time_column:
time_expr = _time_expr(params.time_column, column_types, params.time_uni
t)
else:
time_expr = None
if time_expr and params.start:
where_parts.append(f"{time_expr} >= '{params.start}'")
if time_expr and params.end:
where_parts.append(f"{time_expr} <= '{params.end}'")
for f in params.filters:
op = f.op
if op in {"empty", "!empty"}:
val = "''"
else:
if f.value is None:
continue
if isinstance(f.value, list):
if not f.value:
continue
if op == "=":
qcol = _quote(f.column)
vals = " OR ".join(f"{qcol} = '{v}'" for v in f.value)
where_parts.append(f"({vals})")
continue
val = f"'{f.value}'" if isinstance(f.value, str) else str(f.value)
qcol = _quote(f.column)
if op == "contains":
where_parts.append(f"{qcol} ILIKE '%' || {val} || '%'")
elif op == "!contains":
where_parts.append(f"{qcol} NOT ILIKE '%' || {val} || '%'")
elif op == "empty":
where_parts.append(f"{qcol} = {val}")
elif op == "!empty":
where_parts.append(f"{qcol} != {val}")
else:
where_parts.append(f"{qcol} {op} {val}")
if where_parts:
lines.append("WHERE " + " AND ".join(where_parts))
if group_cols:
lines.append("GROUP BY " + ", ".join(_quote(c) for c in group_cols))
if order_by:
lines.append(f"ORDER BY {_quote(order_by)} {params.order_dir}")
elif params.graph_type == "timeseries":
lines.append("ORDER BY bucket")
if params.limit is not None:
lines.append(f"LIMIT {params.limit}")
return "\n".join(lines)
def create_app(db_file: str | Path | None = None) -> Flask:
app = Flask(__name__, static_folder="static")
if db_file is None:
env_db = os.environ.get("SCUBADUCK_DB")
if env_db:
db_file = env_db
if isinstance(db_file, str) and db_file.upper() == "TEST":
con = _create_test_database()
else:
db_path = Path(db_file or Path(__file__).with_name("sample.csv")).resolv
e()
con = _load_database(db_path)
tables = [r[0] for r in con.execute("SHOW TABLES").fetchall()]
if not tables:
raise ValueError("No tables found in database")
default_table = tables[0]
columns_cache: Dict[str, Dict[str, str]] = {}
def get_columns(table: str) -> Dict[str, str]:
if table not in columns_cache:
rows = con.execute(f'PRAGMA table_info("{table}")').fetchall()
if not rows:
raise ValueError(f"Unknown table: {table}")
columns_cache[table] = {r[1]: r[2] for r in rows}
return columns_cache[table]
sample_cache: Dict[Tuple[str, str, str], Tuple[List[str], float]] = {}
CACHE_TTL = 60.0
CACHE_LIMIT = 200
@app.route("/")
def index() -> Any: # pyright: ignore[reportUnusedFunction]
assert app.static_folder is not None
return send_from_directory(app.static_folder, "index.html")
@app.route("/js/<path:filename>")
def js(filename: str) -> Any: # pyright: ignore[reportUnusedFunction]
assert app.static_folder is not None
folder = Path(app.static_folder) / "js"
return send_from_directory(folder, filename)
...
params.filters.append(Filter(f["column"], f["op"], f.get("value")))
if params.table not in tables:
return jsonify({"error": "Invalid table"}), 400
column_types = get_columns(params.table)
if params.time_column and params.time_column not in column_types:
return jsonify({"error": "Invalid time_column"}), 400
if params.time_unit not in {"s", "ms", "us", "ns"}:
return jsonify({"error": "Invalid time_unit"}), 400
if params.graph_type not in {"table", "timeseries"} and (
params.group_by or params.aggregate or params.show_hits
):
return (
jsonify(
{
"error": "group_by, aggregate and show_hits are only val
id for table or timeseries view"
}
),
400,
)
valid_cols = set(column_types.keys())
valid_cols.update(params.derived_columns.keys())
valid_cols.add("Hits")
if params.graph_type == "timeseries":
if params.x_axis is None:
params.x_axis = params.time_column
if params.x_axis is None or params.x_axis not in valid_cols:
return jsonify({"error": "Invalid x_axis"}), 400
ctype = column_types.get(params.x_axis, "").upper()
is_time = any(t in ctype for t in ["TIMESTAMP", "DATE", "TIME"])
is_numeric = any(
t in ctype
for t in [
"INT",
"DECIMAL",
"REAL",
"DOUBLE",
"FLOAT",
"NUMERIC",
"HUGEINT",
]
)
if not (is_time or is_numeric):
return jsonify({"error": "x_axis must be a time column"}), 400
for col in params.columns:
if col not in valid_cols:
return jsonify({"error": f"Unknown column: {col}"}), 400
for col in params.group_by:
if col not in valid_cols:
return jsonify({"error": f"Unknown column: {col}"}), 400
if params.order_by and params.order_by not in valid_cols:
return jsonify({"error": f"Unknown column: {params.order_by}"}), 400
if params.group_by or params.graph_type == "timeseries":
agg = (params.aggregate or "count").lower()
if agg.startswith("p") or agg == "sum":
need_numeric = True
allow_time = False
elif agg == "avg" or agg in {"min", "max"}:
need_numeric = False
allow_time = True
else:
need_numeric = False
allow_time = False
if need_numeric or allow_time:
for c in params.columns:
if c in params.group_by or c == params.x_axis:
continue
if c not in column_types:
continue
ctype = column_types.get(c, "").upper()
is_numeric = "BOOL" in ctype or any(
t in ctype
for t in [
"INT",
"DECIMAL",
"REAL",
"DOUBLE",
"FLOAT",
"NUMERIC",
"HUGEINT",
]
)
is_time = "TIMESTAMP" in ctype or "DATE" in ctype or "TIME"
in ctype
if need_numeric and not is_numeric:
return (
jsonify(
{
"error": f"Aggregate {agg} cannot be applied
to column {c}",
}
),
400,
)
if allow_time and not (is_numeric or is_time):
return (
jsonify(
{
"error": f"Aggregate {agg} cannot be applied
to column {c}",
}
),
400,
)
if (params.start is None or params.end is None) and (
params.x_axis or params.time_column
):
axis = params.x_axis or params.time_column
assert axis is not None
row = cast(
tuple[datetime | None, datetime | None],
con.execute(
f'SELECT min({_quote(axis)}), max({_quote(axis)}) FROM "{par
ams.table}"'
).fetchall()[0],
)
mn, mx = row
root@54a46097547f:/workspace/scubaduck# sed -n '640,960p' scubaduck/server.py
mn, mx = row
if isinstance(mn, (int, float)):
try:
mn = _numeric_to_datetime(mn, params.time_unit)
except Exception:
return (
jsonify(
{
"error": (
f"Invalid time value {mn} for column {axis}"
f" with time_unit {params.time_unit}"
)
}
),
400,
)
if isinstance(mx, (int, float)):
try:
mx = _numeric_to_datetime(mx, params.time_unit)
except Exception:
return (
jsonify(
{
"error": (
f"Invalid time value {mx} for column {axis}"
f" with time_unit {params.time_unit}"
)
}
),
400,
)
if params.start is None and mn is not None:
params.start = (
mn.strftime("%Y-%m-%d %H:%M:%S") if not isinstance(mn, str)
else mn
)
if params.end is None and mx is not None:
params.end = (
mx.strftime("%Y-%m-%d %H:%M:%S") if not isinstance(mx, str)
else mx
)
bucket_size: int | None = None
if params.graph_type == "timeseries":
bucket_size = _granularity_seconds(
params.granularity,
params.start if isinstance(params.start, str) else None,
params.end if isinstance(params.end, str) else None,
)
if (
params.limit is not None
and params.start is not None
and params.end is not None
):
try:
start_dt = dtparser.parse(params.start)
end_dt = dtparser.parse(params.end)
buckets = math.ceil(
(end_dt - start_dt).total_seconds() / bucket_size
)
if buckets > 1:
params.limit *= buckets
except Exception:
pass
sql = build_query(params, column_types)
try:
rows = con.execute(sql).fetchall()
except Exception as exc:
tb = traceback.format_exc()
print(f"Query failed:\n{sql}\n{tb}")
return (
jsonify({"sql": sql, "error": str(exc), "traceback": tb}),
400,
)
result: Dict[str, Any] = {"sql": sql, "rows": rows}
if params.start is not None:
result["start"] = str(params.start)
if params.end is not None:
result["end"] = str(params.end)
if bucket_size is not None:
result["bucket_size"] = bucket_size
return jsonify(result)
return app
app = create_app()
if __name__ == "__main__":
app.run(debug=True)
I see about 720 lines in server.py. I'll search for the query function near line 660.
root@54a46097547f:/workspace/scubaduck# grep -n "def query" -n scubaduck/server.
py
492: def query() -> Any: # pyright: ignore[reportUnusedFunction]
payload = request.get_json(force=True)
try:
start = parse_time(payload.get("start"))
end = parse_time(payload.get("end"))
except Exception as exc:
return jsonify({"error": str(exc)}), 400
params = QueryParams(
start=start,
end=end,
order_by=payload.get("order_by"),
order_dir=payload.get("order_dir", "ASC"),
limit=payload.get("limit"),
columns=payload.get("columns", []),
derived_columns=payload.get("derived_columns", {}),
graph_type=payload.get("graph_type", "samples"),
group_by=payload.get("group_by", []),
aggregate=payload.get("aggregate"),
show_hits=payload.get("show_hits", False),
x_axis=payload.get("x_axis"),
granularity=payload.get("granularity", "Auto"),
fill=payload.get("fill", "0"),
table=payload.get("table", default_table),
time_column=payload.get("time_column", "timestamp"),
time_unit=payload.get("time_unit", "s"),
)
if params.order_by == "Samples":
params.order_by = "Hits"
for f in payload.get("filters", []):
params.filters.append(Filter(f["column"], f["op"], f.get("value")))
if params.table not in tables:
return jsonify({"error": "Invalid table"}), 400
column_types = get_columns(params.table)
if params.time_column and params.time_column not in column_types:
return jsonify({"error": "Invalid time_column"}), 400
if params.time_unit not in {"s", "ms", "us", "ns"}:
return jsonify({"error": "Invalid time_unit"}), 400
if params.graph_type not in {"table", "timeseries"} and (
params.group_by or params.aggregate or params.show_hits
):
return (
jsonify(
{
"error": "group_by, aggregate and show_hits are only val
id for table or timeseries view"
}
),
400,
)
valid_cols = set(column_types.keys())
valid_cols.update(params.derived_columns.keys())
valid_cols.add("Hits")
if params.graph_type == "timeseries":
if params.x_axis is None:
params.x_axis = params.time_column
if params.x_axis is None or params.x_axis not in valid_cols:
return jsonify({"error": "Invalid x_axis"}), 400
ctype = column_types.get(params.x_axis, "").upper()
is_time = any(t in ctype for t in ["TIMESTAMP", "DATE", "TIME"])
is_numeric = any(
t in ctype
for t in [
"INT",
"DECIMAL",
"REAL",
"DOUBLE",
"FLOAT",
"NUMERIC",
"HUGEINT",
]
)
if not (is_time or is_numeric):
return jsonify({"error": "x_axis must be a time column"}), 400
for col in params.columns:
if col not in valid_cols:
return jsonify({"error": f"Unknown column: {col}"}), 400
for col in params.group_by:
if col not in valid_cols:
return jsonify({"error": f"Unknown column: {col}"}), 400
if params.order_by and params.order_by not in valid_cols:
return jsonify({"error": f"Unknown column: {params.order_by}"}), 400
if params.group_by or params.graph_type == "timeseries":
agg = (params.aggregate or "count").lower()
if agg.startswith("p") or agg == "sum":
need_numeric = True
allow_time = False
elif agg == "avg" or agg in {"min", "max"}:
need_numeric = False
allow_time = True
else:
need_numeric = False
allow_time = False
if need_numeric or allow_time:
for c in params.columns:
if c in params.group_by or c == params.x_axis:
continue
if c not in column_types:
continue
ctype = column_types.get(c, "").upper()
is_numeric = "BOOL" in ctype or any(
t in ctype
for t in [
"INT",
"DECIMAL",
"REAL",
"DOUBLE",
"FLOAT",
"NUMERIC",
"HUGEINT",
]
)
is_time = "TIMESTAMP" in ctype or "DATE" in ctype or "TIME"
in ctype
if need_numeric and not is_numeric:
return (
jsonify(
{
"error": f"Aggregate {agg} cannot be applied
to column {c}",
}
),
400,
)
if allow_time and not (is_numeric or is_time):
return (
jsonify(
{
"error": f"Aggregate {agg} cannot be applied
to column {c}",
}
),
400,
)
if (params.start is None or params.end is None) and (
params.x_axis or params.time_column
):
axis = params.x_axis or params.time_column
assert axis is not None
row = cast(
tuple[datetime | None, datetime | None],
con.execute(
f'SELECT min({_quote(axis)}), max({_quote(axis)}) FROM "{par
ams.table}"'
).fetchall()[0],
)
mn, mx = row
if isinstance(mn, (int, float)):
try:
mn = _numeric_to_datetime(mn, params.time_unit)
except Exception:
return (
jsonify(
{
"error": (
f"Invalid time value {mn} for column {axis}"
f" with time_unit {params.time_unit}"
)
}
),
400,
)
if isinstance(mx, (int, float)):
try:
mx = _numeric_to_datetime(mx, params.time_unit)
except Exception:
return (
jsonify(
{
"error": (
f"Invalid time value {mx} for column {axis}"
f" with time_unit {params.time_unit}"
)
}
),
400,
)
if params.start is None and mn is not None:
params.start = (
mn.strftime("%Y-%m-%d %H:%M:%S") if not isinstance(mn, str)
else mn
)
if params.end is None and mx is not None:
params.end = (
mx.strftime("%Y-%m-%d %H:%M:%S") if not isinstance(mx, str)
else mx
)
bucket_size: int | None = None
if params.graph_type == "timeseries":
bucket_size = _granularity_seconds(
params.granularity,
params.start if isinstance(params.start, str) else None,
params.end if isinstance(params.end, str) else None,
)
if (
params.limit is not None
and params.start is not None
and params.end is not None
):
try:
start_dt = dtparser.parse(params.start)
end_dt = dtparser.parse(params.end)
buckets = math.ceil(
(end_dt - start_dt).total_seconds() / bucket_size
)
if buckets > 1:
params.limit *= buckets
except Exception:
pass
sql = build_query(params, column_types)
try:
rows = con.execute(sql).fetchall()
except Exception as exc:
tb = traceback.format_exc()
print(f"Query failed:\n{sql}\n{tb}")
return (
jsonify({"sql": sql, "error": str(exc), "traceback": tb}),
400,
)
result: Dict[str, Any] = {"sql": sql, "rows": rows}
if params.start is not None:
result["start"] = str(params.start)
if params.end is not None:
result["end"] = str(params.end)
if bucket_size is not None:
result["bucket_size"] = bucket_size
return jsonify(result)
return app
app = create_app()
The user wants byte objects serialized to an escaped ASCII format similar to Python's repr
.
I'll check existing tests for usage of 'bytes' or 'binary' to create a new one.
root@54a46097547f:/workspace/scubaduck# grep -n "bytes" -r tests | head
I'll add a test for bytes at the end of tests/test_server_db_types.py.
def test_envvar_db(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
csv_file = tmp_path / "custom.csv"
csv_file.write_text("timestamp,event,value,user\n2024-01-01 00:00:00,login,5
,bob\n")
monkeypatch.setenv("SCUBADUCK_DB", str(csv_file))
app = server.create_app()
client = app.test_client()
payload = _make_payload()
rv = client.post(
"/api/query", data=json.dumps(payload), content_type="application/json"
)
rows = rv.get_json()["rows"]
assert len(rows) == 1
def test_envvar_db_missing(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> N
one:
missing = tmp_path / "missing.sqlite"
monkeypatch.setenv("SCUBADUCK_DB", str(missing))
with pytest.raises(FileNotFoundError):
server.create_app()
I'll create a table with a BLOB column to test handling non-serializable bytes.
root@54a46097547f:/workspace/scubaduck# grep -n "bytes" -n scubaduck/server.py
I'll inspect how build_query
or query
handle data, considering a custom encoder for byte conversion.
root@54a46097547f:/workspace/scubaduck# grep -n "_numeric_to_datetime" -n scubad
uck/server.py
143:def _numeric_to_datetime(value: int | float, unit: str) -> datetime:
643: mn = _numeric_to_datetime(mn, params.time_unit)
658: mx = _numeric_to_datetime(mx, params.time_unit)
I'll check init.py to understand the typical style for implementing this function.
root@54a46097547f:/workspace/scubaduck# cat scubaduck/__init__.py
def hello() -> str:
return "Hello from scubaduck!"
I'll check the tests for server_time to match the desired style.
from __future__ import annotations
import json
from scubaduck import server
def test_basic_query() -> None:
app = server.app
client = app.test_client()
payload = {
"table": "events",
"start": "2024-01-01 00:00:00",
"end": "2024-01-02 00:00:00",
"order_by": "timestamp",
"order_dir": "ASC",
"limit": 10,
"columns": ["timestamp", "event", "value", "user"],
"filters": [],
}
rv = client.post(
"/api/query", data=json.dumps(payload), content_type="application/json"
)
data = rv.get_json()
assert data
rows = data["rows"]
# We expect first three rows (until 2024-01-02 00:00:00)
assert len(rows) == 3
assert rows[0][1] == "login"
assert rows[1][1] == "logout"
def test_js_served() -> None:
app = server.app
client = app.test_client()
rv = client.get("/js/chip_input.js")
assert rv.status_code == 200
assert b"initChipInput" in rv.data
def test_filter_multi_token() -> None:
app = server.app
client = app.test_client()
payload = {
"table": "events",
"start": "2024-01-01 00:00:00",
"end": "2024-01-02 03:00:00",
"order_by": "timestamp",
"limit": 10,
"columns": ["timestamp", "event", "value", "user"],
"filters": [{"column": "user", "op": "=", "value": ["alice", "charlie"]}
],
}
rv = client.post(
"/api/query", data=json.dumps(payload), content_type="application/json"
)
data = rv.get_json()
assert data
rows = data["rows"]
# Should only return rows for alice and charlie
assert len(rows) == 3
assert rows[0][3] == "alice"
assert rows[-1][3] == "charlie"
def test_empty_filter_is_noop() -> None:
app = server.app
client = app.test_client()
base_payload = {
"table": "events",
"start": "2024-01-01 00:00:00",
"end": "2024-01-03 00:00:00",
"limit": 100,
"columns": ["timestamp", "event", "value", "user"],
}
no_filter = {**base_payload, "filters": []}
empty_filter = {
**base_payload,
"filters": [{"column": "user", "op": "=", "value": None}],
}
I’ll create a function to convert bytes to ASCII with escape sequences using 'backslashreplace'.
root@54a46097547f:/workspace/scubaduck# python3 - <<'EOF'
> print(b'abc\xff'.decode('ascii', 'backslashreplace'))
> EOF
abc\xff
I'll see what happens when I use str(value)
to check the output.
> print(str(b'abc\xff'))
> EOF
b'abc\xff'
I'm reconsidering how to represent bytes. It sounds like the user wants the escaped ASCII format. I'll check the relevant code style for clarification before proceeding.
root@54a46097547f:/workspace/scubaduck# grep -n "backslash" -r scubaduck | head
I'll check if there is a custom datetime serializer by looking for json dumps.
root@54a46097547f:/workspace/scubaduck# grep -n "json" -n scubaduck/server.py |
head -n 20
18:from flask import Flask, jsonify, request, send_from_directory
442: return jsonify(tables)
448: return jsonify([{"name": r[1], "type": r[2]} for r in rows])
474: return jsonify([])
477: return jsonify([])
481: return jsonify(cached)
489: return jsonify(values)
493: payload = request.get_json(force=True)
498: return jsonify({"error": str(exc)}), 400
525: return jsonify({"error": "Invalid table"}), 400
530: return jsonify({"error": "Invalid time_column"}), 400
533: return jsonify({"error": "Invalid time_unit"}), 400
539: jsonify(
554: return jsonify({"error": "Invalid x_axis"}), 400
570: return jsonify({"error": "x_axis must be a time column"}), 4
00
573: return jsonify({"error": f"Unknown column: {col}"}), 400
576: return jsonify({"error": f"Unknown column: {col}"}), 400
578: return jsonify({"error": f"Unknown column: {params.order_by}"}),
400
613: jsonify(
622: jsonify(
I need to implement custom serialization for byte values and ensure correct handling of datetimes.
root@54a46097547f:/workspace/scubaduck# cat pyproject.toml
[project]
name = "scubaduck"
version = "0.1.0"
description = "Scuba on DuckDB"
readme = "README.md"
requires-python = ">=3.12"
license = {text = "MIT"}
dependencies = [
"duckdb",
"flask",
"python-dateutil>=2.9.0.post0",
]
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[tool.pytest.ini_options]
addopts = "-n auto --tb=native"
[tool.pyright]
# Pyright configuration with strict settings
include = ["scubaduck", "tests"]
exclude = ["**/__pycache__", "dist"]
stubPath = "stubs"
venvPath = "."
venv = ".venv"
reportMissingImports = true
reportMissingTypeStubs = true
pythonVersion = "3.12"
pythonPlatform = "All"
typeCheckingMode = "strict"
reportUnknownMemberType = true
reportUnknownParameterType = true
reportUnknownVariableType = true
reportUnknownArgumentType = true
reportPrivateImportUsage = true
reportUntypedFunctionDecorator = true
reportFunctionMemberAccess = true
reportIncompatibleMethodOverride = true
[tool.ruff]
target-version = "py312"
[tool.uv]
dev-dependencies = [
"pytest>=8.3.5",
"ruff>=0.11.2",
"pytest-xdist>=3.6.1",
"playwright>=1.52.0",
"pyright>=1.1.350",
"pytest-playwright>=0.7.0",
]
I’ll check codex_setup.sh to understand how the environment is configured before running tests.
root@54a46097547f:/workspace/scubaduck# cat codex_setup.sh
uv sync --frozen
source .venv/bin/activate
python -c "import os; import duckdb; con = duckdb.connect(); con.execute(f\"SET
http_proxy = '{os.getenv(\"HTTP_PROXY\")}'\"); con.execute(\"INSTALL 'sqlite';\"
)"
playwright install chromium
echo "source .venv/bin/activate" >> ~/.bashrc
I'll check AGENTS.md again to confirm the instruction to use a virtual environment.
root@54a46097547f:/workspace/scubaduck# source .venv/bin/activate
(scubaduck) root@54a46097547f:/workspace/scubaduck#
I’m adding the bytes serialization near the bottom of server.py and patching the result.
(scubaduck) root@54a46097547f:/workspace/scubaduck# sed -n '708,760p' scubaduck/
server.py
print(f"Query failed:\n{sql}\n{tb}")
return (
jsonify({"sql": sql, "error": str(exc), "traceback": tb}),
400,
)
result: Dict[str, Any] = {"sql": sql, "rows": rows}
if params.start is not None:
result["start"] = str(params.start)
if params.end is not None:
result["end"] = str(params.end)
if bucket_size is not None:
result["bucket_size"] = bucket_size
return jsonify(result)
return app
app = create_app()
if __name__ == "__main__":
app.run(debug=True)
(scubaduck) root@54a46097547f:/workspace/scubaduck#
I’ll implement serialization using repr(val)
to return byte literals in Python’s desired format.
(scubaduck) root@54a46097547f:/workspace/scubaduck# grep -n "def test_sqlite_" -
n tests/test_server_db_types.py
64:def test_sqlite_longvarchar(tmp_path: Path) -> None:
95:def test_sqlite_bigint(tmp_path: Path) -> None:
124:def test_sqlite_boolean_aggregation(tmp_path: Path) -> None:
155:def test_sqlite_boolean_group_by(tmp_path: Path) -> None:
(scubaduck) root@54a46097547f:/workspace/scubaduck#
I'll add the new test at the bottom, following style around line 60.
"INSERT INTO events VALUES (?, ?, ?, ?)", (ts, ev, int(val), use
r)
)
conn.commit()
conn.close() # pyright: ignore[reportUnknownMemberType, reportAttributeAcce
ssIssue]
duckdb_file = tmp_path / "events.duckdb"
con = duckdb.connect(duckdb_file)
con.execute(
f"CREATE TABLE events AS SELECT * FROM read_csv_auto('{csv_file.as_posix
()}')"
)
con.close() # pyright: ignore[reportUnknownMemberType, reportAttributeAcces
sIssue]
for db in (csv_file, sqlite_file, duckdb_file):
app = server.create_app(db)
client = app.test_client()
payload = _make_payload()
rv = client.post(
"/api/query", data=json.dumps(payload), content_type="application/js
on"
)
rows = rv.get_json()["rows"]
assert len(rows) == 3
def test_sqlite_longvarchar(tmp_path: Path) -> None:
sqlite_file = tmp_path / "events.sqlite"
import sqlite3
conn = sqlite3.connect(sqlite_file)
conn.execute(
"CREATE TABLE events (timestamp TEXT, url LONGVARCHAR, title VARCHAR(10)
)"
)
conn.execute(
"INSERT INTO events VALUES ('2024-01-01 00:00:00', 'https://a.com', 'Hom
e')"
)
conn.commit()
conn.close() # pyright: ignore[reportUnknownMemberType, reportAttributeAcce
ssIssue]
app = server.create_app(sqlite_file)
client = app.test_client()
payload = {
"table": "events",
"start": "2024-01-01 00:00:00",
"end": "2024-01-01 01:00:00",
"order_by": "timestamp",
"columns": ["timestamp", "url", "title"],
}
rv = client.post(
"/api/query", data=json.dumps(payload), content_type="application/json"
)
data = rv.get_json()
assert rv.status_code == 200
assert data["rows"][0][1] == "https://a.com"
def test_sqlite_bigint(tmp_path: Path) -> None:
sqlite_file = tmp_path / "big.sqlite"
import sqlite3
conn = sqlite3.connect(sqlite_file)
conn.execute("CREATE TABLE events (timestamp TEXT, value INTEGER)")
big_value = 13385262862605259
conn.execute(
"INSERT INTO events VALUES ('2024-01-01 00:00:00', ?)",
(big_value,),
)
conn.commit()
conn.close() # pyright: ignore[reportUnknownMemberType, reportAttributeAcce
ssIssue]
app = server.create_app(sqlite_file)
client = app.test_client()
payload = {
"table": "events",
"order_by": "timestamp",
"columns": ["timestamp", "value"],
}
rv = client.post(
"/api/query", data=json.dumps(payload), content_type="application/json"
)
data = rv.get_json()
assert rv.status_code == 200
assert data["rows"][0][1] == big_value
def test_sqlite_boolean_aggregation(tmp_path: Path) -> None:
sqlite_file = tmp_path / "bool.sqlite"
import sqlite3
conn = sqlite3.connect(sqlite_file)
conn.execute("CREATE TABLE events (timestamp TEXT, flag BOOLEAN)")
conn.execute("INSERT INTO events VALUES ('2024-01-01 00:00:00', 1)")
conn.execute("INSERT INTO events VALUES ('2024-01-01 00:30:00', 0)")
conn.commit()
conn.close() # pyright: ignore[reportUnknownMemberType, reportAttributeAcce
ssIssue]
app = server.create_app(sqlite_file)
client = app.test_client()
payload = {
"table": "events",
"start": "2024-01-01 00:00:00",
"end": "2024-01-02 00:00:00",
"graph_type": "table",
"aggregate": "Avg",
"columns": ["flag"],
"show_hits": True,
}
rv = client.post(
"/api/query", data=json.dumps(payload), content_type="application/json"
)
data = rv.get_json()
assert rv.status_code == 200
assert data["rows"][0][0] == 2
assert data["rows"][0][1] == 0.5
def test_sqlite_boolean_group_by(tmp_path: Path) -> None:
sqlite_file = tmp_path / "bool.sqlite"
import sqlite3
conn = sqlite3.connect(sqlite_file)
conn.execute("CREATE TABLE events (id INTEGER, ts TEXT, flag BOOLEAN)")
conn.execute("INSERT INTO events VALUES (1, '2024-01-01 00:00:00', 1)")
conn.execute("INSERT INTO events VALUES (1, '2024-01-01 00:30:00', 0)")
conn.execute("INSERT INTO events VALUES (2, '2024-01-01 01:00:00', 1)")
conn.commit()
conn.close() # pyright: ignore[reportUnknownMemberType, reportAttributeAcce
ssIssue]
app = server.create_app(sqlite_file)
client = app.test_client()
payload = {
"table": "events",
"start": "2024-01-01 00:00:00",
"end": "2024-01-02 00:00:00",
"graph_type": "table",
"time_column": "ts",
"aggregate": "Avg",
"group_by": ["id"],
"columns": ["flag"],
}
rv = client.post(
"/api/query", data=json.dumps(payload), content_type="application/json"
(scubaduck) root@54a46097547f:/workspace/scubaduck# sed -n '180,260p' tests/test
_server_db_types.py
"/api/query", data=json.dumps(payload), content_type="application/json"
)
data = rv.get_json()
assert rv.status_code == 200
rows = sorted(data["rows"]) # order can vary
assert rows == [[1, 2, 0.5], [2, 1, 1.0]]
def test_envvar_db(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
csv_file = tmp_path / "custom.csv"
csv_file.write_text("timestamp,event,value,user\n2024-01-01 00:00:00,login,5
,bob\n")
monkeypatch.setenv("SCUBADUCK_DB", str(csv_file))
app = server.create_app()
client = app.test_client()
payload = _make_payload()
rv = client.post(
"/api/query", data=json.dumps(payload), content_type="application/json"
)
rows = rv.get_json()["rows"]
assert len(rows) == 1
def test_envvar_db_missing(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> N
one:
missing = tmp_path / "missing.sqlite"
monkeypatch.setenv("SCUBADUCK_DB", str(missing))
with pytest.raises(FileNotFoundError):
server.create_app()
(scubaduck) root@54a46097547f:/workspace/scubaduck#
I'll add a test for SQLite BLOB data and verify the number of rows returned.
> *** Begin Patch
> *** Update File: scubaduck/server.py
> @@
> - result: Dict[str, Any] = {"sql": sql, "rows": rows}
> + def _serialize(value: Any) -> Any:
> + if isinstance(value, bytes):
> + return repr(value)
> + return value
> +
> + rows = [[_serialize(v) for v in r] for r in rows]
> + result: Dict[str, Any] = {"sql": sql, "rows": rows}
> *** End Patch
> PATCH
Success. Updated the following files:
M scubaduck/server.py
(scubaduck) root@54a46097547f:/workspace/scubaduck#
I'll check the areas around the patch in server.py to verify.
aduck/server.py
714: def _serialize(value: Any) -> Any:
719: rows = [[_serialize(v) for v in r] for r in rows]
(scubaduck) root@54a46097547f:/workspace/scubaduck# sed -n '708,736p' scubaduck/
server.py
print(f"Query failed:\n{sql}\n{tb}")
return (
jsonify({"sql": sql, "error": str(exc), "traceback": tb}),
400,
)
def _serialize(value: Any) -> Any:
if isinstance(value, bytes):
return repr(value)
return value
rows = [[_serialize(v) for v in r] for r in rows]
result: Dict[str, Any] = {"sql": sql, "rows": rows}
if params.start is not None:
result["start"] = str(params.start)
if params.end is not None:
result["end"] = str(params.end)
if bucket_size is not None:
result["bucket_size"] = bucket_size
return jsonify(result)
return app
app = create_app()
if __name__ == "__main__":
app.run(debug=True)
(scubaduck) root@54a46097547f:/workspace/scubaduck#
Everything looks good with spacing; now it's time to add a test!
> @@
> def test_sqlite_bigint(tmp_path: Path) -> None:
> @@
> assert data["rows"][0][1] == big_value
> +
> +
> +def test_sqlite_bytes(tmp_path: Path) -> None:
> + sqlite_file = tmp_path / "bin.sqlite"
> + import sqlite3
> +
> + conn = sqlite3.connect(sqlite_file)
> + conn.execute("CREATE TABLE events (timestamp TEXT, data BLOB)")
> + conn.execute(
> + "INSERT INTO events VALUES ('2024-01-01 00:00:00', ?)",
> + (b"\x00\xff",),
> + )
> + conn.commit()
> + conn.close() # pyright: ignore[reportUnknownMemberType, reportAttributeA
ccessIssue]
> +
> + app = server.create_app(sqlite_file)
> + client = app.test_client()
> + payload = {
> + "table": "events",
> + "order_by": "timestamp",
> + "columns": ["timestamp", "data"],
> + }
> + rv = client.post(
> + "/api/query", data=json.dumps(payload), content_type="application/jso
n"
> + )
> + data = rv.get_json()
> + assert rv.status_code == 200
> + assert data["rows"] == [["2024-01-01 00:00:00", "b'\\x00\\xff'"]]
> *** End Patch
> PATCH
Success. Updated the following files:
M tests/test_server_db_types.py
(scubaduck) root@54a46097547f:/workspace/scubaduck# grep -n "test_sqlite_bytes"
-n tests/test_server_db_types.py
124:def test_sqlite_bytes(tmp_path: Path) -> None:
(scubaduck) root@54a46097547f:/workspace/scubaduck#
The changes are near line 124, but I'll focus on the area around line 120.
(scubaduck) root@54a46097547f:/workspace/scubaduck# sed -n '100,142p' tests/test
_server_db_types.py
conn.execute("CREATE TABLE events (timestamp TEXT, value INTEGER)")
big_value = 13385262862605259
conn.execute(
"INSERT INTO events VALUES ('2024-01-01 00:00:00', ?)",
(big_value,),
)
conn.commit()
conn.close() # pyright: ignore[reportUnknownMemberType, reportAttributeAcce
ssIssue]
app = server.create_app(sqlite_file)
client = app.test_client()
payload = {
"table": "events",
"order_by": "timestamp",
"columns": ["timestamp", "value"],
}
rv = client.post(
"/api/query", data=json.dumps(payload), content_type="application/json"
)
data = rv.get_json()
assert rv.status_code == 200
assert data["rows"][0][1] == big_value
def test_sqlite_bytes(tmp_path: Path) -> None:
sqlite_file = tmp_path / "bin.sqlite"
import sqlite3
conn = sqlite3.connect(sqlite_file)
conn.execute("CREATE TABLE events (timestamp TEXT, data BLOB)")
conn.execute(
"INSERT INTO events VALUES ('2024-01-01 00:00:00', ?)",
(b"\x00\xff",),
)
conn.commit()
conn.close() # pyright: ignore[reportUnknownMemberType, reportAttributeAcce
ssIssue]
app = server.create_app(sqlite_file)
client = app.test_client()
payload = {
"table": "events",
"order_by": "timestamp",
"columns": ["timestamp", "data"],
"columns": ["timestamp", "data"],
}
rv = client.post(
"/api/query", data=json.dumps(payload), content_type="application/json"
)
data = rv.get_json()
assert rv.status_code == 200
assert data["rows"] == [["2024-01-01 00:00:00", "b'\\x00\\xff'"]]
def test_sqlite_boolean_aggregation(tmp_path: Path) -> None:
sqlite_file = tmp_path / "bool.sqlite"
import sqlite3
conn = sqlite3.connect(sqlite_file)
conn.execute("CREATE TABLE events (timestamp TEXT, flag BOOLEAN)")
conn.execute("INSERT INTO events VALUES ('2024-01-01 00:00:00', 1)")
conn.execute("INSERT INTO events VALUES ('2024-01-01 00:30:00', 0)")
conn.commit()
conn.close() # pyright: ignore[reportUnknownMemberType, reportAttributeAcce
ssIssue]
app = server.create_app(sqlite_file)
client = app.test_client()
payload = {
"table": "events",
"start": "2024-01-01 00:00:00",
"end": "2024-01-02 00:00:00",
"graph_type": "table",
"aggregate": "Avg",
"columns": ["flag"],
"show_hits": True,
}
rv = client.post(
"/api/query", data=json.dumps(payload), content_type="application/json"
)
data = rv.get_json()
assert rv.status_code == 200
assert data["rows"][0][0] == 2
assert data["rows"][0][1] == 0.5
def test_sqlite_boolean_group_by(tmp_path: Path) -> None:
sqlite_file = tmp_path / "bool.sqlite"
import sqlite3
conn = sqlite3.connect(sqlite_file)
conn.execute("CREATE TABLE events (id INTEGER, ts TEXT, flag BOOLEAN)")
conn.execute("INSERT INTO events VALUES (1, '2024-01-01 00:00:00', 1)")
conn.execute("INSERT INTO events VALUES (1, '2024-01-01 00:30:00', 0)")
(scubaduck) root@54a46097547f:/workspace/scubaduck#
I’ll go ahead and format the code using ruff now.
(scubaduck) root@54a46097547f:/workspace/scubaduck# ruff format scubaduck/server
.py tests/test_server_db_types.py
2 files left unchanged
(scubaduck) root@54a46097547f:/workspace/scubaduck#
I'll run a quick ruff check to ensure everything's in place.
(scubaduck) root@54a46097547f:/workspace/scubaduck# ruff check scubaduck/server.
py tests/test_server_db_types.py
All checks passed!
(scubaduck) root@54a46097547f:/workspace/scubaduck# pyright scubaduck/server.py
tests/test_server_db_types.py > /tmp/pyright.log && tail -n 20 /tmp/pyright.log
0 errors, 0 warnings, 0 informations
(scubaduck) root@54a46097547f:/workspace/scubaduck# cat /tmp/pyright.log
0 errors, 0 warnings, 0 informations
(scubaduck) root@54a46097547f:/workspace/scubaduck#
Time to test the code. I'll go ahead and run pytest now!
(scubaduck) root@54a46097547f:/workspace/scubaduck# pytest -q tests/test_server_
db_types.py::test_sqlite_bytes > /tmp/pytest.log && tail -n 20 /tmp/pytest.log
bringing up nodes...
. [100%]
1 passed in 0.84s
(scubaduck) root@54a46097547f:/workspace/scubaduck#
To be safe, I'll run all the tests to check for any cross effects.
(scubaduck) root@54a46097547f:/workspace/scubaduck# pytest -q > /tmp/pytest_full
.log && tail -n 20 /tmp/pytest_full.log
bringing up nodes...
bringing up nodes...
........................................................................ [ 59%]
.................................................. [100%]
122 passed in 15.81s
(scubaduck) root@54a46097547f:/workspace/scubaduck#
I'll check the status of my git repository now that all tests have passed.
(scubaduck) root@54a46097547f:/workspace/scubaduck# git status --short
M scubaduck/server.py
M tests/test_server_db_types.py
(scubaduck) root@54a46097547f:/workspace/scubaduck#
It's time to go ahead and commit the changes now.
(scubaduck) root@54a46097547f:/workspace/scubaduck# git diff --staged
(scubaduck) root@54a46097547f:/workspace/scubaduck#
I haven't staged the changes yet, so I'll go ahead and stage the files.
(scubaduck) root@54a46097547f:/workspace/scubaduck# git add scubaduck/server.py
tests/test_server_db_types.py
(scubaduck) root@54a46097547f:/workspace/scubaduck#
I’m reviewing the differences in the code to understand what’s changed.
(scubaduck) root@54a46097547f:/workspace/scubaduck# git status --short
M scubaduck/server.py
M tests/test_server_db_types.py
(scubaduck) root@54a46097547f:/workspace/scubaduck#
I'm double-checking the files' status after staging them to confirm if changes are tracked correctly.
y
scubaduck/server.py
tests/test_server_db_types.py
(scubaduck) root@54a46097547f:/workspace/scubaduck#
Looks like a stray 'y' line appeared. Diff shows the files, so I'll commit.
(scubaduck) root@54a46097547f:/workspace/scubaduck# git commit -m "fix(server):
json encode bytes" && git status --short
[work 776de08] fix(server): json encode bytes
2 files changed, 34 insertions(+)
(scubaduck) root@54a46097547f:/workspace/scubaduck#
The commit may have succeeded, but I’ll run 'git status' to be sure.
(scubaduck) root@54a46097547f:/workspace/scubaduck# git status --short
(scubaduck) root@54a46097547f:/workspace/scubaduck#
I'll show the diff to make sure everything looks good.
commit 776de087bc434149cf3c541cbf7ead5ad5aa098f (HEAD -> work)
Author: Codex <codex@openai.com>
Date: Fri May 23 02:35:07 2025 +0000
fix(server): json encode bytes
scubaduck/server.py | 6 ++++++
tests/test_server_db_types.py | 28 ++++++++++++++++++++++++++++
2 files changed, 34 insertions(+)
(scubaduck) root@54a46097547f:/workspace/scubaduck#
I'll first get server.py snippet with patch lines to cite in the PR message.
(scubaduck) root@54a46097547f:/workspace/scubaduck# nl -ba scubaduck/server.py |
sed -n '708,730p'
708 print(f"Query failed:\n{sql}\n{tb}")
709 return (
710 jsonify({"sql": sql, "error": str(exc), "traceback": tb}
),
711 400,
712 )
713
714 def _serialize(value: Any) -> Any:
715 if isinstance(value, bytes):
716 return repr(value)
717 return value
718
719 rows = [[_serialize(v) for v in r] for r in rows]
720 result: Dict[str, Any] = {"sql": sql, "rows": rows}
721 if params.start is not None:
722 result["start"] = str(params.start)
723 if params.end is not None:
724 result["end"] = str(params.end)
725 if bucket_size is not None:
726 result["bucket_size"] = bucket_size
727 return jsonify(result)
728
729 return app
730
(scubaduck) root@54a46097547f:/workspace/scubaduck#
I'll gather a snippet needed for the test file now.
(scubaduck) root@54a46097547f:/workspace/scubaduck# nl -ba tests/test_server_db_
types.py | sed -n '120,160p'
120 assert rv.status_code == 200
121 assert data["rows"][0][1] == big_value
122
123
124 def test_sqlite_bytes(tmp_path: Path) -> None:
125 sqlite_file = tmp_path / "bin.sqlite"
126 import sqlite3
127
128 conn = sqlite3.connect(sqlite_file)
129 conn.execute("CREATE TABLE events (timestamp TEXT, data BLOB)")
130 conn.execute(
131 "INSERT INTO events VALUES ('2024-01-01 00:00:00', ?)",
132 (b"\x00\xff",),
133 )
134 conn.commit()
135 conn.close() # pyright: ignore[reportUnknownMemberType, reportAttri
buteAccessIssue]
136
137 app = server.create_app(sqlite_file)
138 client = app.test_client()
139 payload = {
140 "table": "events",
141 "order_by": "timestamp",
142 "columns": ["timestamp", "data"],
143 }
144 rv = client.post(
145 "/api/query", data=json.dumps(payload), content_type="applicatio
n/json"
146 )
147 data = rv.get_json()
148 assert rv.status_code == 200
149 assert data["rows"] == [["2024-01-01 00:00:00", "b'\\x00\\xff'"]]
150
151
152 def test_sqlite_boolean_aggregation(tmp_path: Path) -> None:
153 sqlite_file = tmp_path / "bool.sqlite"
154 import sqlite3
155
156 conn = sqlite3.connect(sqlite_file)
157 conn.execute("CREATE TABLE events (timestamp TEXT, flag BOOLEAN)")
158 conn.execute("INSERT INTO events VALUES ('2024-01-01 00:00:00', 1)")
159 conn.execute("INSERT INTO events VALUES ('2024-01-01 00:30:00', 0)")
160 conn.commit()
(scubaduck) root@54a46097547f:/workspace/scubaduck#
I’ll craft the PR with a clear commit message for both the fix and test.