Task ID: task_e_682fd598fe04832393a62b189b3074a4
Configuring language runtimes...
+ rm -- /tmp/ETO1KX-setup_script.sh
+ source ./codex_setup.sh
++ uv sync --frozen
Using CPython 3.12.3 interpreter at: /usr/bin/python3.12
Creating virtual environment at: .venv
Building scubaduck @ file:///workspace/scubaduck
Downloading pyright (5.3MiB)
Downloading playwright (43.1MiB)
Downloading duckdb (19.3MiB)
Downloading ruff (11.0MiB)
Downloading ruff
Built scubaduck @ file:///workspace/scubaduck
Downloading duckdb
Downloading playwright
Downloading pyright
Prepared 33 packages in 1.61s
Installed 33 packages in 75ms
+ blinker==1.9.0
+ certifi==2025.4.26
+ charset-normalizer==3.4.2
+ click==8.2.0
+ duckdb==1.2.2
+ execnet==2.1.1
+ flask==3.1.1
+ greenlet==3.2.2
+ idna==3.10
+ iniconfig==2.1.0
+ itsdangerous==2.2.0
+ jinja2==3.1.6
+ markupsafe==3.0.2
+ nodeenv==1.9.1
+ packaging==25.0
+ playwright==1.52.0
+ pluggy==1.6.0
+ pyee==13.0.0
+ pyright==1.1.400
+ pytest==8.3.5
+ pytest-base-url==2.1.0
+ pytest-playwright==0.7.0
+ pytest-xdist==3.6.1
+ python-dateutil==2.9.0.post0
+ python-slugify==8.0.4
+ requests==2.32.3
+ ruff==0.11.10
+ scubaduck==0.1.0 (from file:///workspace/scubaduck)
+ six==1.17.0
+ text-unidecode==1.3
+ typing-extensions==4.13.2
+ urllib3==2.4.0
+ werkzeug==3.1.3
++ source .venv/bin/activate
+++ '[' -z '' ']'
+++ '[' -n x ']'
+++ SCRIPT_PATH=.venv/bin/activate
+++ '[' .venv/bin/activate = /tmp/ETO1KX-setup_script.sh ']'
+++ deactivate nondestructive
+++ unset -f pydoc
+++ '[' -z '' ']'
+++ '[' -z '' ']'
+++ hash -r
+++ '[' -z '' ']'
+++ unset VIRTUAL_ENV
+++ unset VIRTUAL_ENV_PROMPT
+++ '[' '!' nondestructive = nondestructive ']'
+++ VIRTUAL_ENV=/workspace/scubaduck/.venv
+++ '[' linux-gnu = cygwin ']'
+++ '[' linux-gnu = msys ']'
+++ export VIRTUAL_ENV
+++ '[' -z '' ']'
+++ unset SCRIPT_PATH
+++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/root/.rbenv/shims:/root/.rbenv/bin:/root/.rbenv/shims:/root/.local/share/swiftly/bin:/root/.bun/bin:/root/.nvm/versions/node/v22.16.0/bin:/root/.pyenv/shims:3441PYENV_ROOT/shims:/root/.pyenv/bin:/usr/local/go/bin:/root/go/bin:/root/.rbenv/bin:/root/.rbenv/shims:/root/.bun/bin:/root/.local/bin:/root/.pyenv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
+++ PATH=/workspace/scubaduck/.venv/bin:/root/.cargo/bin:/root/.rbenv/shims:/root/.rbenv/bin:/root/.rbenv/shims:/root/.local/share/swiftly/bin:/root/.bun/bin:/root/.nvm/versions/node/v22.16.0/bin:/root/.pyenv/shims:3441PYENV_ROOT/shims:/root/.pyenv/bin:/usr/local/go/bin:/root/go/bin:/root/.rbenv/bin:/root/.rbenv/shims:/root/.bun/bin:/root/.local/bin:/root/.pyenv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
+++ export PATH
+++ '[' xscubaduck '!=' x ']'
+++ VIRTUAL_ENV_PROMPT=scubaduck
+++ export VIRTUAL_ENV_PROMPT
+++ '[' -z '' ']'
+++ '[' -z '' ']'
+++ _OLD_VIRTUAL_PS1=
+++ PS1='(scubaduck) '
+++ export PS1
+++ alias pydoc
+++ true
+++ hash -r
++ python -c 'import os; import duckdb; con = duckdb.connect(); con.execute(f"SET http_proxy = '\''{os.getenv("HTTP_PROXY")}'\''"); con.execute("INSTALL '\''sqlite'\'';")'
++ playwright install chromium
Downloading Chromium 136.0.7103.25 (playwright build v1169) from https://cdn.playwright.dev/dbazure/download/playwright/builds/chromium/1169/chromium-linux.zip
167.7 MiB [] 0% 0.0s167.7 MiB [] 0% 29.6s167.7 MiB [] 0% 30.0s167.7 MiB [] 0% 20.7s167.7 MiB [] 0% 12.7s167.7 MiB [] 1% 7.7s167.7 MiB [] 1% 5.6s167.7 MiB [] 2% 4.5s167.7 MiB [] 3% 3.7s167.7 MiB [] 4% 3.2s167.7 MiB [] 5% 2.9s167.7 MiB [] 6% 2.8s167.7 MiB [] 7% 2.5s167.7 MiB [] 8% 2.2s167.7 MiB [] 9% 2.1s167.7 MiB [] 11% 1.9s167.7 MiB [] 13% 1.7s167.7 MiB [] 14% 1.6s167.7 MiB [] 15% 1.6s167.7 MiB [] 17% 1.5s167.7 MiB [] 18% 1.4s167.7 MiB [] 19% 1.4s167.7 MiB [] 21% 1.3s167.7 MiB [] 22% 1.3s167.7 MiB [] 23% 1.3s167.7 MiB [] 24% 1.3s167.7 MiB [] 25% 1.3s167.7 MiB [] 26% 1.3s167.7 MiB [] 27% 1.2s167.7 MiB [] 29% 1.2s167.7 MiB [] 30% 1.1s167.7 MiB [] 31% 1.1s167.7 MiB [] 33% 1.0s167.7 MiB [] 34% 1.0s167.7 MiB [] 36% 1.0s167.7 MiB [] 38% 0.9s167.7 MiB [] 39% 0.9s167.7 MiB [] 40% 0.9s167.7 MiB [] 41% 0.9s167.7 MiB [] 42% 0.9s167.7 MiB [] 43% 0.8s167.7 MiB [] 45% 0.8s167.7 MiB [] 46% 0.8s167.7 MiB [] 47% 0.8s167.7 MiB [] 48% 0.8s167.7 MiB [] 50% 0.7s167.7 MiB [] 51% 0.7s167.7 MiB [] 52% 0.7s167.7 MiB [] 54% 0.7s167.7 MiB [] 56% 0.6s167.7 MiB [] 57% 0.6s167.7 MiB [] 58% 0.6s167.7 MiB [] 60% 0.6s167.7 MiB [] 62% 0.5s167.7 MiB [] 63% 0.5s167.7 MiB [] 64% 0.5s167.7 MiB [] 65% 0.5s167.7 MiB [] 67% 0.5s167.7 MiB [] 68% 0.4s167.7 MiB [] 70% 0.4s167.7 MiB [] 71% 0.4s167.7 MiB [] 72% 0.4s167.7 MiB [] 74% 0.4s167.7 MiB [] 76% 0.3s167.7 MiB [] 78% 0.3s167.7 MiB [] 80% 0.3s167.7 MiB [] 81% 0.3s167.7 MiB [] 82% 0.2s167.7 MiB [] 83% 0.2s167.7 MiB [] 85% 0.2s167.7 MiB [] 87% 0.2s167.7 MiB [] 88% 0.2s167.7 MiB [] 90% 0.1s167.7 MiB [] 92% 0.1s167.7 MiB [] 93% 0.1s167.7 MiB [] 94% 0.1s167.7 MiB [] 95% 0.1s167.7 MiB [] 97% 0.0s167.7 MiB [] 98% 0.0s167.7 MiB [] 99% 0.0s167.7 MiB [] 100% 0.0s
Chromium 136.0.7103.25 (playwright build v1169) downloaded to /root/.cache/ms-playwright/chromium-1169
Downloading FFMPEG playwright build v1011 from https://cdn.playwright.dev/dbazure/download/playwright/builds/ffmpeg/1011/ffmpeg-linux.zip
2.3 MiB [] 0% 0.0s2.3 MiB [] 1% 0.9s2.3 MiB [] 8% 0.4s2.3 MiB [] 17% 0.2s2.3 MiB [] 36% 0.1s2.3 MiB [] 78% 0.0s2.3 MiB [] 100% 0.0s
FFMPEG playwright build v1011 downloaded to /root/.cache/ms-playwright/ffmpeg-1011
Downloading Chromium Headless Shell 136.0.7103.25 (playwright build v1169) from https://cdn.playwright.dev/dbazure/download/playwright/builds/chromium/1169/chromium-headless-shell-linux.zip
101.4 MiB [] 0% 0.0s101.4 MiB [] 0% 21.5s101.4 MiB [] 0% 21.1s101.4 MiB [] 0% 14.2s101.4 MiB [] 0% 9.2s101.4 MiB [] 1% 5.6s101.4 MiB [] 3% 3.1s101.4 MiB [] 6% 1.7s101.4 MiB [] 7% 1.5s101.4 MiB [] 9% 1.4s101.4 MiB [] 12% 1.2s101.4 MiB [] 13% 1.1s101.4 MiB [] 14% 1.1s101.4 MiB [] 15% 1.2s101.4 MiB [] 16% 1.2s101.4 MiB [] 16% 1.3s101.4 MiB [] 17% 1.3s101.4 MiB [] 18% 1.3s101.4 MiB [] 19% 1.3s101.4 MiB [] 21% 1.3s101.4 MiB [] 24% 1.1s101.4 MiB [] 26% 1.1s101.4 MiB [] 27% 1.1s101.4 MiB [] 30% 1.0s101.4 MiB [] 32% 0.9s101.4 MiB [] 34% 0.9s101.4 MiB [] 36% 0.8s101.4 MiB [] 38% 0.8s101.4 MiB [] 41% 0.7s101.4 MiB [] 44% 0.7s101.4 MiB [] 46% 0.6s101.4 MiB [] 49% 0.6s101.4 MiB [] 52% 0.5s101.4 MiB [] 55% 0.5s101.4 MiB [] 57% 0.5s101.4 MiB [] 58% 0.4s101.4 MiB [] 61% 0.4s101.4 MiB [] 62% 0.4s101.4 MiB [] 65% 0.4s101.4 MiB [] 67% 0.3s101.4 MiB [] 70% 0.3s101.4 MiB [] 72% 0.3s101.4 MiB [] 74% 0.2s101.4 MiB [] 76% 0.2s101.4 MiB [] 78% 0.2s101.4 MiB [] 81% 0.2s101.4 MiB [] 82% 0.2s101.4 MiB [] 85% 0.1s101.4 MiB [] 86% 0.1s101.4 MiB [] 89% 0.1s101.4 MiB [] 90% 0.1s101.4 MiB [] 92% 0.1s101.4 MiB [] 96% 0.0s101.4 MiB [] 98% 0.0s101.4 MiB [] 100% 0.0s
Chromium Headless Shell 136.0.7103.25 (playwright build v1169) downloaded to /root/.cache/ms-playwright/chromium_headless_shell-1169
++ echo 'source .venv/bin/activate'
I'll start by reviewing the AGENTS.md file in the repo root.
bash: .venv/bin/activate: No such file or directory
root@408933a71470:/workspace# ls
scubaduck
I'll move into the scubaduck repo root directory now.
root@408933a71470:/workspace# cd scubaduck
root@408933a71470:/workspace/scubaduck# ls
AGENTS.md codex_setup.sh scubaduck tests
README.md pyproject.toml stubs uv.lock
Let's dive into AGENTS.md and see what guidance it has for me.
root@408933a71470:/workspace/scubaduck# cat AGENTS.md
- Use "source .venv/bin/activate" to activate the virtual environment before
running any commands
- ONLY use non-interactive commands like cat, sed, apply_patch to do edits.
Do NOT use interactive editors.
- Do NOT attempt to install packages. Only the packages specified in
pyproject.toml are available. You cannot add new packages. If you
desperately want another package, make a note of it in the final PR
description.
- Use conventional commits to format PR title
- There are no nested AGENTS.md files, this is the only agents file
- When using Playwright, ONLY use chromium browser. This browser is already
installed.
- Use "ruff check" to check lint, "ruff format" to autoformat files and
"pyright" to typecheck.
- When writing the PR description, include the original prompt supplied by the
user.
- When you add functionality to the server, add server tests to
tests/test_server.py. When you add features to the frontend, add frontend
tests to tests/test_web.py
I’ll follow these specific steps for activating the environment and testing. Time to inspect the repo!
root@408933a71470:/workspace/scubaduck# ls -R | head
.:
AGENTS.md
README.md
codex_setup.sh
pyproject.toml
scubaduck
stubs
tests
uv.lock
I’ll take a look at the pyproject file.
root@408933a71470:/workspace/scubaduck# cat pyproject.toml | head -n 20
[project]
name = "scubaduck"
version = "0.1.0"
description = "Scuba on DuckDB"
readme = "README.md"
requires-python = ">=3.12"
license = {text = "MIT"}
dependencies = [
"duckdb",
"flask",
"python-dateutil>=2.9.0.post0",
]
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[tool.pytest.ini_options]
addopts = "-n auto --tb=native"
I'll take a look at the tests now to see how they're structured.
root@408933a71470:/workspace/scubaduck# ls tests | head
conftest.py
test_multi_table_web.py
test_server.py
test_server_basic.py
test_server_db_types.py
test_server_errors.py
test_server_time.py
test_server_timeseries.py
test_web.py
test_web_basics.py
I'm checking out the structure of the repository, starting with the "scubaduck" directory.
root@408933a71470:/workspace/scubaduck# ls scubaduck | head
__init__.py
sample.csv
server.py
static
Time to open the server and get everything running.
root@408933a71470:/workspace/scubaduck# sed -n '1,160p' scubaduck/server.py
from __future__ import annotations
from dataclasses import dataclass, field, replace
from typing import Any, Dict, List, Tuple, cast
import re
from datetime import datetime, timedelta, timezone
import time
from pathlib import Path
import os
import traceback
import math
import duckdb
from dateutil import parser as dtparser
from flask import Flask, jsonify, request, send_from_directory
def _quote(ident: str) -> str:
"""Return identifier quoted for SQL."""
return f'"{ident.replace('"', '""')}"'
@dataclass
class Filter:
column: str
op: str
value: str | int | float | list[str] | None
@dataclass
class QueryParams:
start: str | None = None
end: str | None = None
order_by: str | None = None
order_dir: str = "ASC"
limit: int | None = None
columns: list[str] = field(default_factory=lambda: [])
filters: list[Filter] = field(default_factory=lambda: [])
derived_columns: dict[str, str] = field(default_factory=lambda: {})
graph_type: str = "samples"
group_by: list[str] = field(default_factory=lambda: [])
aggregate: str | None = None
show_hits: bool = False
x_axis: str | None = None
granularity: str = "Auto"
fill: str = "0"
table: str = "events"
time_column: str | None = "timestamp"
time_unit: str = "s"
def _load_database(path: Path) -> duckdb.DuckDBPyConnection:
if not path.exists():
raise FileNotFoundError(path)
ext = path.suffix.lower()
if ext == ".csv":
con = duckdb.connect()
con.execute(
f"CREATE TABLE events AS SELECT * FROM read_csv_auto('{path.as_posix
()}')"
)
elif ext in {".db", ".sqlite"}:
con = duckdb.connect()
con.execute("LOAD sqlite")
con.execute(f"ATTACH '{path.as_posix()}' AS db (TYPE SQLITE)")
tables = [
r[0]
for r in con.execute(
"SELECT name FROM sqlite_master WHERE type='table'"
).fetchall()
]
for t in tables:
con.execute(f'CREATE VIEW "{t}" AS SELECT * FROM db."{t}"')
else:
con = duckdb.connect(path)
return con
def _create_test_database() -> duckdb.DuckDBPyConnection:
"""Return a DuckDB connection with a small multi-table dataset."""
con = duckdb.connect()
con.execute(
"CREATE TABLE events (id INTEGER PRIMARY KEY, ts INTEGER, val REAL, name
TEXT, flag BOOLEAN)"
)
con.execute("INSERT INTO events VALUES (1, 1704067200, 1.5, 'alice', 1)")
con.execute("INSERT INTO events VALUES (2, 1704070800, 2.0, 'bob', 0)")
con.execute('CREATE TABLE extra (ts INTEGER, "desc" TEXT, num INTEGER)')
con.execute("INSERT INTO extra VALUES (1704067200, 'x', 1)")
con.execute("INSERT INTO extra VALUES (1704070800, 'y', 2)")
return con
_REL_RE = re.compile(
r"([+-]?\d+(?:\.\d*)?)\s*(hour|hours|day|days|week|weeks|fortnight|fortnight
s)",
re.IGNORECASE,
)
def parse_time(val: str | None) -> str | None:
"""Parse an absolute or relative time string into ``YYYY-MM-DD HH:MM:SS``.""
"
if val is None or val == "":
return None
s = val.strip()
if s.lower() == "now":
dt = datetime.now(timezone.utc)
return dt.replace(microsecond=0).strftime("%Y-%m-%d %H:%M:%S")
m = _REL_RE.fullmatch(s)
if m:
qty = float(m.group(1))
unit = m.group(2).lower()
delta: timedelta
if unit.startswith("hour"):
delta = timedelta(hours=qty)
elif unit.startswith("day"):
delta = timedelta(days=qty)
elif unit.startswith("week"):
delta = timedelta(weeks=qty)
elif unit.startswith("fortnight"):
delta = timedelta(weeks=2 * qty)
else: # pragma: no cover - defensive
raise ValueError(f"Unsupported unit: {unit}")
dt = datetime.now(timezone.utc) + delta
return dt.replace(microsecond=0).strftime("%Y-%m-%d %H:%M:%S")
dt = dtparser.parse(s)
return dt.replace(microsecond=0, tzinfo=None).strftime("%Y-%m-%d %H:%M:%S")
def _granularity_seconds(granularity: str, start: str | None, end: str | None) -
> int:
gran = granularity.lower()
mapping = {
"1 second": 1,
"5 seconds": 5,
"10 seconds": 10,
"30 seconds": 30,
"1 minute": 60,
"4 minutes": 240,
"5 minutes": 300,
"10 minutes": 600,
"15 minutes": 900,
"30 minutes": 1800,
"1 hour": 3600,
"3 hours": 10800,
"6 hours": 21600,
"1 day": 86400,
"1 week": 604800,
"30 days": 2592000,
}
if gran in mapping:
return mapping[gran]
if gran in {"auto", "fine"} and start and end:
try:
s = dtparser.parse(start)
e = dtparser.parse(end)
except Exception:
return 3600
total = max((e - s).total_seconds(), 1)
root@408933a71470:/workspace/scubaduck# sed -n '160,320p' scubaduck/server.py
total = max((e - s).total_seconds(), 1)
buckets = 100 if gran == "auto" else 500
return max(int(total // buckets), 1)
return 3600
def _time_expr(col: str, column_types: Dict[str, str] | None, unit: str) -> str:
"""Return SQL expression for column interpreted as timestamp."""
qcol = _quote(col)
if column_types is None:
return qcol
ctype = column_types.get(col, "").upper()
if not any(t in ctype for t in ["TIMESTAMP", "DATE", "TIME"]):
if any(
t in ctype
for t in [
"INT",
"DECIMAL",
"REAL",
"DOUBLE",
"FLOAT",
"NUMERIC",
"HUGEINT",
]
):
if unit == "ns":
expr = f"CAST({qcol} AS BIGINT)"
return f"make_timestamp_ns({expr})"
multiplier = {
"s": 1_000_000,
"ms": 1_000,
"us": 1,
}.get(unit, 1_000_000)
expr = (
f"CAST({qcol} * {multiplier} AS BIGINT)"
if multiplier != 1
else f"CAST({qcol} AS BIGINT)"
)
return f"make_timestamp({expr})"
return qcol
def build_query(params: QueryParams, column_types: Dict[str, str] | None = None)
-> str:
select_parts: list[str] = []
group_cols = params.group_by[:]
selected_for_order = set(params.columns) | set(params.derived_columns.keys()
)
if params.graph_type == "timeseries":
sec = _granularity_seconds(params.granularity, params.start, params.end)
x_axis = params.x_axis or params.time_column
if x_axis is None:
raise ValueError("x_axis required for timeseries")
xexpr = _time_expr(x_axis, column_types, params.time_unit)
if params.start:
bucket_expr = (
f"TIMESTAMP '{params.start}' + INTERVAL '{sec} second' * "
f"CAST(floor((epoch({xexpr}) - epoch(TIMESTAMP '{params.start}')
)/{sec}) AS BIGINT)"
)
else:
bucket_expr = (
f"TIMESTAMP 'epoch' + INTERVAL '{sec} second' * "
f"CAST(floor(epoch({xexpr})/{sec}) AS BIGINT)"
)
select_parts.append(f"{bucket_expr} AS bucket")
group_cols = ["bucket"] + group_cols
selected_for_order.add("bucket")
has_agg = bool(group_cols) or params.aggregate is not None
if has_agg:
select_cols = (
group_cols[1:] if params.graph_type == "timeseries" else group_cols
)
select_parts.extend(_quote(c) for c in select_cols)
agg = (params.aggregate or "count").lower()
selected_for_order.update(group_cols)
def agg_expr(col: str) -> str:
expr = _quote(col)
ctype = column_types.get(col, "").upper() if column_types else ""
if "BOOL" in ctype:
expr = f"CAST({_quote(col)} AS BIGINT)"
if agg.startswith("p"):
quant = float(agg[1:]) / 100
return f"quantile({expr}, {quant})"
if agg == "count distinct":
return f"count(DISTINCT {expr})"
if agg == "avg" and column_types is not None:
if "TIMESTAMP" in ctype or "DATE" in ctype or "TIME" in ctype:
return (
"TIMESTAMP 'epoch' + INTERVAL '1 second' * "
f"CAST(avg(epoch({_quote(col)})) AS BIGINT)"
)
return f"{agg}({expr})"
if agg == "count":
select_parts.append("count(*) AS Count")
selected_for_order.add("Count")
else:
for col in params.columns:
if col in group_cols:
continue
select_parts.append(f"{agg_expr(col)} AS {_quote(col)}")
selected_for_order.add(col)
if params.show_hits:
select_parts.insert(len(group_cols), "count(*) AS Hits")
selected_for_order.add("Hits")
else:
select_parts.extend(_quote(c) for c in params.columns)
selected_for_order.update(params.columns)
order_by = params.order_by if params.order_by in selected_for_order else Non
e
if has_agg and params.derived_columns:
inner_params = replace(
params,
derived_columns={},
order_by=None,
limit=None,
)
inner_sql = build_query(inner_params, column_types)
outer_select = ["t.*"] + [
f"{expr} AS {name}" for name, expr in params.derived_columns.items()
]
indented_inner = "\n".join(" " + line for line in inner_sql.splitline
s())
lines = [
f"SELECT {', '.join(outer_select)}",
"FROM (",
indented_inner,
") t",
]
if order_by:
lines.append(f"ORDER BY {_quote(order_by)} {params.order_dir}")
elif params.graph_type == "timeseries":
lines.append("ORDER BY bucket")
if params.limit is not None:
lines.append(f"LIMIT {params.limit}")
return "\n".join(lines)
for name, expr in params.derived_columns.items():
select_parts.append(f"{expr} AS {name}")
selected_for_order.add(name)
select_clause = ", ".join(select_parts) if select_parts else "*"
lines = [f"SELECT {select_clause}", f'FROM "{params.table}"']
where_parts: list[str] = []
if params.time_column:
time_expr = _time_expr(params.time_column, column_types, params.time_uni
t)
else:
time_expr = None
if time_expr and params.start:
where_parts.append(f"{time_expr} >= '{params.start}'")
if time_expr and params.end:
where_parts.append(f"{time_expr} <= '{params.end}'")
for f in params.filters:
op = f.op
if op in {"empty", "!empty"}:
val = "''"
else:
if f.value is None:
continue
if isinstance(f.value, list):
if not f.value:
continue
root@408933a71470:/workspace/scubaduck# sed -n '320,640p' scubaduck/server.py
continue
if op == "=":
qcol = _quote(f.column)
vals = " OR ".join(f"{qcol} = '{v}'" for v in f.value)
where_parts.append(f"({vals})")
continue
val = f"'{f.value}'" if isinstance(f.value, str) else str(f.value)
qcol = _quote(f.column)
if op == "contains":
where_parts.append(f"{qcol} ILIKE '%' || {val} || '%'")
elif op == "!contains":
where_parts.append(f"{qcol} NOT ILIKE '%' || {val} || '%'")
elif op == "empty":
where_parts.append(f"{qcol} = {val}")
elif op == "!empty":
where_parts.append(f"{qcol} != {val}")
else:
where_parts.append(f"{qcol} {op} {val}")
if where_parts:
lines.append("WHERE " + " AND ".join(where_parts))
if group_cols:
lines.append("GROUP BY " + ", ".join(_quote(c) for c in group_cols))
if order_by:
lines.append(f"ORDER BY {_quote(order_by)} {params.order_dir}")
elif params.graph_type == "timeseries":
lines.append("ORDER BY bucket")
if params.limit is not None:
lines.append(f"LIMIT {params.limit}")
return "\n".join(lines)
def create_app(db_file: str | Path | None = None) -> Flask:
app = Flask(__name__, static_folder="static")
if db_file is None:
env_db = os.environ.get("SCUBADUCK_DB")
if env_db:
db_file = env_db
if isinstance(db_file, str) and db_file.upper() == "TEST":
con = _create_test_database()
else:
db_path = Path(db_file or Path(__file__).with_name("sample.csv")).resolv
e()
con = _load_database(db_path)
tables = [r[0] for r in con.execute("SHOW TABLES").fetchall()]
if not tables:
raise ValueError("No tables found in database")
default_table = tables[0]
columns_cache: Dict[str, Dict[str, str]] = {}
def get_columns(table: str) -> Dict[str, str]:
if table not in columns_cache:
rows = con.execute(f'PRAGMA table_info("{table}")').fetchall()
if not rows:
raise ValueError(f"Unknown table: {table}")
columns_cache[table] = {r[1]: r[2] for r in rows}
return columns_cache[table]
sample_cache: Dict[Tuple[str, str, str], Tuple[List[str], float]] = {}
CACHE_TTL = 60.0
CACHE_LIMIT = 200
@app.route("/")
def index() -> Any: # pyright: ignore[reportUnusedFunction]
assert app.static_folder is not None
return send_from_directory(app.static_folder, "index.html")
@app.route("/js/<path:filename>")
def js(filename: str) -> Any: # pyright: ignore[reportUnusedFunction]
assert app.static_folder is not None
folder = Path(app.static_folder) / "js"
return send_from_directory(folder, filename)
@app.route("/api/tables")
def tables_endpoint() -> Any: # pyright: ignore[reportUnusedFunction]
return jsonify(tables)
@app.route("/api/columns")
def columns() -> Any: # pyright: ignore[reportUnusedFunction]
table = request.args.get("table", default_table)
rows = con.execute(f'PRAGMA table_info("{table}")').fetchall()
return jsonify([{"name": r[1], "type": r[2]} for r in rows])
def _cache_get(key: Tuple[str, str, str]) -> List[str] | None:
item = sample_cache.get(key)
if item is None:
return None
vals, ts = item
if time.time() - ts > CACHE_TTL:
del sample_cache[key]
return None
sample_cache[key] = (vals, time.time())
return vals
def _cache_set(key: Tuple[str, str, str], vals: List[str]) -> None:
sample_cache[key] = (vals, time.time())
if len(sample_cache) > CACHE_LIMIT:
oldest = min(sample_cache.items(), key=lambda kv: kv[1][1])[0]
del sample_cache[oldest]
@app.route("/api/samples")
def sample_values() -> Any: # pyright: ignore[reportUnusedFunction]
table = request.args.get("table", default_table)
column = request.args.get("column")
substr = request.args.get("q", "")
column_types = get_columns(table)
if not column or column not in column_types:
return jsonify([])
ctype = column_types[column].upper()
if "CHAR" not in ctype and "STRING" not in ctype and "VARCHAR" not in ct
ype:
return jsonify([])
key = (table, column, substr)
cached = _cache_get(key)
if cached is not None:
return jsonify(cached)
qcol = _quote(column)
rows = con.execute(
f"SELECT DISTINCT {qcol} FROM \"{table}\" WHERE CAST({qcol} AS VARCH
AR) ILIKE '%' || ? || '%' LIMIT 20",
[substr],
).fetchall()
values = [r[0] for r in rows]
...
return jsonify({"error": f"Unknown column: {col}"}), 400
if params.order_by and params.order_by not in valid_cols:
return jsonify({"error": f"Unknown column: {params.order_by}"}), 400
if params.group_by or params.graph_type == "timeseries":
agg = (params.aggregate or "count").lower()
if agg.startswith("p") or agg == "sum":
need_numeric = True
allow_time = False
elif agg == "avg" or agg in {"min", "max"}:
need_numeric = False
allow_time = True
else:
need_numeric = False
allow_time = False
if need_numeric or allow_time:
for c in params.columns:
if c in params.group_by or c == params.x_axis:
continue
if c not in column_types:
continue
ctype = column_types.get(c, "").upper()
is_numeric = any(
t in ctype
for t in [
"INT",
"DECIMAL",
"REAL",
"DOUBLE",
"FLOAT",
"NUMERIC",
"HUGEINT",
]
)
is_time = "TIMESTAMP" in ctype or "DATE" in ctype or "TIME"
in ctype
if need_numeric and not is_numeric:
return (
jsonify(
{
"error": f"Aggregate {agg} cannot be applied
to column {c}",
}
),
400,
)
if allow_time and not (is_numeric or is_time):
return (
jsonify(
{
"error": f"Aggregate {agg} cannot be applied
to column {c}",
}
),
400,
)
if (params.start is None or params.end is None) and (
params.x_axis or params.time_column
):
axis = params.x_axis or params.time_column
assert axis is not None
row = cast(
tuple[datetime | None, datetime | None],
con.execute(
f'SELECT min({_quote(axis)}), max({_quote(axis)}) FROM "{par
ams.table}"'
).fetchall()[0],
)
mn, mx = row
divisor = {
"s": 1,
"ms": 1000,
"us": 1_000_000,
"ns": 1_000_000_000,
}.get(params.time_unit, 1)
if isinstance(mn, (int, float)):
try:
mn = datetime.fromtimestamp(int(mn) / divisor, tz=timezone.u
tc)
except Exception:
return (
jsonify(
{
"error": (
f"Invalid time value {mn} for column {axis}"
f" with time_unit {params.time_unit}"
)
}
),
400,
)
if isinstance(mx, (int, float)):
try:
mx = datetime.fromtimestamp(int(mx) / divisor, tz=timezone.u
tc)
except Exception:
return (
jsonify(
{
"error": (
f"Invalid time value {mx} for column {axis}"
f" with time_unit {params.time_unit}"
)
}
),
400,
)
if params.start is None and mn is not None:
params.start = (
mn.strftime("%Y-%m-%d %H:%M:%S") if not isinstance(mn, str)
else mn
)
if params.end is None and mx is not None:
params.end = (
mx.strftime("%Y-%m-%d %H:%M:%S") if not isinstance(mx, str)
else mx
)
bucket_size: int | None = None
if params.graph_type == "timeseries":
bucket_size = _granularity_seconds(
params.granularity,
params.start if isinstance(params.start, str) else None,
params.end if isinstance(params.end, str) else None,
root@408933a71470:/workspace/scubaduck# grep -n "order_by" -n scubaduck/server.p
y
36: order_by: str | None = None
269: order_by = params.order_by if params.order_by in selected_for_order else
None
275: order_by=None,
289: if order_by:
290: lines.append(f"ORDER BY {_quote(order_by)} {params.order_dir}")
343: if order_by:
344: lines.append(f"ORDER BY {_quote(order_by)} {params.order_dir}")
455: order_by=payload.get("order_by"),
526: if params.order_by and params.order_by not in valid_cols:
527: return jsonify({"error": f"Unknown column: {params.order_by}"}),
400
I'll start reviewing from line 641 onward.
root@408933a71470:/workspace/scubaduck# sed -n '640,960p' scubaduck/server.py
params.end if isinstance(params.end, str) else None,
)
if (
params.limit is not None
and params.start is not None
and params.end is not None
):
try:
start_dt = dtparser.parse(params.start)
end_dt = dtparser.parse(params.end)
buckets = math.ceil(
(end_dt - start_dt).total_seconds() / bucket_size
)
if buckets > 1:
params.limit *= buckets
except Exception:
pass
sql = build_query(params, column_types)
try:
rows = con.execute(sql).fetchall()
except Exception as exc:
tb = traceback.format_exc()
print(f"Query failed:\n{sql}\n{tb}")
return (
jsonify({"sql": sql, "error": str(exc), "traceback": tb}),
400,
)
result: Dict[str, Any] = {"sql": sql, "rows": rows}
if params.start is not None:
result["start"] = str(params.start)
if params.end is not None:
result["end"] = str(params.end)
if bucket_size is not None:
result["bucket_size"] = bucket_size
return jsonify(result)
return app
app = create_app()
if __name__ == "__main__":
app.run(debug=True)
I need to activate the environment before running the tests to ensure they pass.
root@408933a71470:/workspace/scubaduck# source .venv/bin/activate
(scubaduck) root@408933a71470:/workspace/scubaduck# pytest -q
bringing up nodes...
........................................................................ [ 64%]
....................................... [100%]
111 passed in 15.49s
(scubaduck) root@408933a71470:/workspace/scubaduck#
I see that the default Order By behavior leads to using the first DB column.
(scubaduck) root@408933a71470:/workspace/scubaduck# grep -n "QueryParams(" -n -r
. | head
grep: ./.venv/lib/python3.12/site-packages/playwright/driver/node: binary file m
atches
./.venv/lib/python3.12/site-packages/playwright/driver/package/lib/server/transp
ort.js:99: const logUrl = stripQueryParams(url);
./.venv/lib/python3.12/site-packages/playwright/driver/package/lib/server/transp
ort.js:169:function stripQueryParams(url) {
./scubaduck/server.py:452: params = QueryParams(
(scubaduck) root@408933a71470:/workspace/scubaduck#
I'll open the section around line 452 to look at the JSON parsing code.
(scubaduck) root@408933a71470:/workspace/scubaduck# sed -n '440,540p' scubaduck/
server.py
_cache_set(key, values)
return jsonify(values)
@app.route("/api/query", methods=["POST"])
def query() -> Any: # pyright: ignore[reportUnusedFunction]
payload = request.get_json(force=True)
try:
start = parse_time(payload.get("start"))
end = parse_time(payload.get("end"))
except Exception as exc:
return jsonify({"error": str(exc)}), 400
params = QueryParams(
start=start,
end=end,
order_by=payload.get("order_by"),
order_dir=payload.get("order_dir", "ASC"),
limit=payload.get("limit"),
columns=payload.get("columns", []),
derived_columns=payload.get("derived_columns", {}),
graph_type=payload.get("graph_type", "samples"),
group_by=payload.get("group_by", []),
aggregate=payload.get("aggregate"),
show_hits=payload.get("show_hits", False),
x_axis=payload.get("x_axis"),
granularity=payload.get("granularity", "Auto"),
fill=payload.get("fill", "0"),
table=payload.get("table", default_table),
time_column=payload.get("time_column", "timestamp"),
time_unit=payload.get("time_unit", "s"),
)
for f in payload.get("filters", []):
params.filters.append(Filter(f["column"], f["op"], f.get("value")))
if params.table not in tables:
return jsonify({"error": "Invalid table"}), 400
column_types = get_columns(params.table)
if params.time_column and params.time_column not in column_types:
return jsonify({"error": "Invalid time_column"}), 400
if params.time_unit not in {"s", "ms", "us", "ns"}:
return jsonify({"error": "Invalid time_unit"}), 400
if params.graph_type not in {"table", "timeseries"} and (
params.group_by or params.aggregate or params.show_hits
):
return (
jsonify(
{
"error": "group_by, aggregate and show_hits are only val
id for table or timeseries view"
}
),
400,
)
valid_cols = set(column_types.keys())
valid_cols.update(params.derived_columns.keys())
if params.graph_type == "timeseries":
if params.x_axis is None:
params.x_axis = params.time_column
if params.x_axis is None or params.x_axis not in valid_cols:
return jsonify({"error": "Invalid x_axis"}), 400
ctype = column_types.get(params.x_axis, "").upper()
is_time = any(t in ctype for t in ["TIMESTAMP", "DATE", "TIME"])
is_numeric = any(
t in ctype
for t in [
"INT",
"DECIMAL",
"REAL",
"DOUBLE",
"FLOAT",
"NUMERIC",
"HUGEINT",
]
)
if not (is_time or is_numeric):
return jsonify({"error": "x_axis must be a time column"}), 400
for col in params.columns:
if col not in valid_cols:
return jsonify({"error": f"Unknown column: {col}"}), 400
for col in params.group_by:
if col not in valid_cols:
return jsonify({"error": f"Unknown column: {col}"}), 400
if params.order_by and params.order_by not in valid_cols:
return jsonify({"error": f"Unknown column: {params.order_by}"}), 400
if params.group_by or params.graph_type == "timeseries":
agg = (params.aggregate or "count").lower()
if agg.startswith("p") or agg == "sum":
need_numeric = True
allow_time = False
elif agg == "avg" or agg in {"min", "max"}:
need_numeric = False
allow_time = True
else:
need_numeric = False
allow_time = False
if need_numeric or allow_time:
(scubaduck) root@408933a71470:/workspace/scubaduck# sed -n '540,680p' scubaduck/
server.py
if need_numeric or allow_time:
for c in params.columns:
if c in params.group_by or c == params.x_axis:
continue
if c not in column_types:
continue
ctype = column_types.get(c, "").upper()
is_numeric = any(
t in ctype
for t in [
"INT",
"DECIMAL",
"REAL",
"DOUBLE",
"FLOAT",
"NUMERIC",
"HUGEINT",
]
)
is_time = "TIMESTAMP" in ctype or "DATE" in ctype or "TIME"
in ctype
if need_numeric and not is_numeric:
return (
jsonify(
{
"error": f"Aggregate {agg} cannot be applied
to column {c}",
}
),
400,
)
if allow_time and not (is_numeric or is_time):
return (
jsonify(
{
"error": f"Aggregate {agg} cannot be applied
to column {c}",
}
),
400,
)
if (params.start is None or params.end is None) and (
params.x_axis or params.time_column
):
axis = params.x_axis or params.time_column
assert axis is not None
row = cast(
tuple[datetime | None, datetime | None],
con.execute(
f'SELECT min({_quote(axis)}), max({_quote(axis)}) FROM "{par
ams.table}"'
).fetchall()[0],
)
mn, mx = row
divisor = {
"s": 1,
"ms": 1000,
"us": 1_000_000,
"ns": 1_000_000_000,
}.get(params.time_unit, 1)
if isinstance(mn, (int, float)):
try:
mn = datetime.fromtimestamp(int(mn) / divisor, tz=timezone.u
tc)
except Exception:
return (
jsonify(
{
"error": (
f"Invalid time value {mn} for column {axis}"
f" with time_unit {params.time_unit}"
)
}
),
400,
)
if isinstance(mx, (int, float)):
try:
mx = datetime.fromtimestamp(int(mx) / divisor, tz=timezone.u
tc)
except Exception:
return (
jsonify(
{
"error": (
f"Invalid time value {mx} for column {axis}"
f" with time_unit {params.time_unit}"
)
}
),
400,
)
if params.start is None and mn is not None:
params.start = (
mn.strftime("%Y-%m-%d %H:%M:%S") if not isinstance(mn, str)
else mn
)
if params.end is None and mx is not None:
params.end = (
mx.strftime("%Y-%m-%d %H:%M:%S") if not isinstance(mx, str)
else mx
)
bucket_size: int | None = None
if params.graph_type == "timeseries":
bucket_size = _granularity_seconds(
params.granularity,
params.start if isinstance(params.start, str) else None,
params.end if isinstance(params.end, str) else None,
)
if (
params.limit is not None
and params.start is not None
and params.end is not None
):
try:
start_dt = dtparser.parse(params.start)
end_dt = dtparser.parse(params.end)
buckets = math.ceil(
(end_dt - start_dt).total_seconds() / bucket_size
)
if buckets > 1:
params.limit *= buckets
except Exception:
pass
sql = build_query(params, column_types)
try:
rows = con.execute(sql).fetchall()
except Exception as exc:
tb = traceback.format_exc()
print(f"Query failed:\n{sql}\n{tb}")
return (
jsonify({"sql": sql, "error": str(exc), "traceback": tb}),
400,
)
result: Dict[str, Any] = {"sql": sql, "rows": rows}
if params.start is not None:
result["start"] = str(params.start)
if params.end is not None:
result["end"] = str(params.end)
if bucket_size is not None:
result["bucket_size"] = bucket_size
return jsonify(result)
return app
(scubaduck) root@408933a71470:/workspace/scubaduck#
The order_by
parameter lacks a default, but the test suggests it defaults to the first column.
| head
grep: tests/__pycache__/web_utils.cpython-312.pyc: binary file matches
grep: tests/__pycache__/test_server_db_types.cpython-312-pytest-8.3.5.pyc: binar
y file matches
grep: tests/__pycache__/test_web.cpython-312-pytest-8.3.5.pyc: binary file match
es
grep: tests/__pycache__/test_server_timeseries.cpython-312-pytest-8.3.5.pyc: bin
ary file matches
grep: tests/__pycache__/test_web_chips.cpython-312-pytest-8.3.5.pyc: binary file
matches
grep: tests/__pycache__/test_server_errors.cpython-312-pytest-8.3.5.pyc: binary
file matches
grep: tests/__pycache__/test_web_table.cpython-312-pytest-8.3.5.pyc: binary file
matches
grep: tests/__pycache__/test_web_basics.cpython-312-pytest-8.3.5.pyc: binary fil
e matches
grep: tests/__pycache__/test_server_basic.cpython-312-pytest-8.3.5.pyc: binary f
ile matches
grep: tests/__pycache__/test_server_time.cpython-312-pytest-8.3.5.pyc: binary fi
le matches
grep: tests/__pycache__/test_web_columns.cpython-312-pytest-8.3.5.pyc: binary fi
le matches
tests/test_web_basics.py:14: order_by="user",
tests/test_web_basics.py:27:def test_order_by(page: Any, server_url: str) -> Non
e:
tests/test_web_basics.py:33: order_by="value",
tests/test_web_basics.py:47: order_by="user",
tests/test_web_basics.py:116: page.wait_for_selector("#order_by option", stat
e="attached")
tests/test_web_basics.py:137: page.wait_for_selector("#order_by option", stat
e="attached")
tests/test_web_basics.py:152: page.wait_for_selector("#order_by option", stat
e="attached")
tests/test_web_basics.py:161: page.wait_for_selector("#order_by option", stat
e="attached")
tests/test_web_basics.py:194: page.wait_for_selector("#order_by option", stat
e="attached")
tests/test_web_basics.py:208: page.wait_for_selector("#order_by option", stat
e="attached")
(scubaduck) root@408933a71470:/workspace/scubaduck#
I need to scroll to capture more details. Let's go ahead and do that.
(scubaduck) root@408933a71470:/workspace/scubaduck# grep -n "order_by" tests/tes
t_server*.py | head
tests/test_server_basic.py:15: "order_by": "timestamp",
tests/test_server_basic.py:48: "order_by": "timestamp",
tests/test_server_basic.py:99: "order_by": "timestamp",
tests/test_server_basic.py:120: "order_by": "timestamp",
tests/test_server_basic.py:153:def test_order_by_ignored_when_not_selected() ->
None:
tests/test_server_basic.py:158: "order_by": "value",
tests/test_server_db_types.py:17: "order_by": "timestamp",
tests/test_server_db_types.py:84: "order_by": "timestamp",
tests/test_server_db_types.py:113: "order_by": "timestamp",
tests/test_server_errors.py:15: "order_by": "timestamp",
(scubaduck) root@408933a71470:/workspace/scubaduck#
I'll search for tests related to order_by
in the codebase.
(scubaduck) root@408933a71470:/workspace/scubaduck# grep -n "order_by" -r tests
| grep -v __pycache__ | head -n 20
grep: tests/__pycache__/web_utils.cpython-312.pyc: binary file matches
grep: tests/__pycache__/test_server_db_types.cpython-312-pytest-8.3.5.pyc: binar
y file matches
grep: tests/__pycache__/test_web.cpython-312-pytest-8.3.5.pyc: binary file match
es
grep: tests/__pycache__/test_server_timeseries.cpython-312-pytest-8.3.5.pyc: bin
ary file matches
grep: tests/__pycache__/test_web_chips.cpython-312-pytest-8.3.5.pyc: binary file
matches
grep: tests/__pycache__/test_server_errors.cpython-312-pytest-8.3.5.pyc: binary
file matches
grep: tests/__pycache__/test_web_table.cpython-312-pytest-8.3.5.pyc: binary file
matches
grep: tests/__pycache__/test_web_basics.cpython-312-pytest-8.3.5.pyc: binary fil
e matches
grep: tests/__pycache__/test_server_basic.cpython-312-pytest-8.3.5.pyc: binary f
ile matches
grep: tests/__pycache__/test_server_time.cpython-312-pytest-8.3.5.pyc: binary fi
le matches
grep: tests/__pycache__/test_web_columns.cpython-312-pytest-8.3.5.pyc: binary fi
le matches
tests/test_web_basics.py:14: order_by="user",
tests/test_web_basics.py:27:def test_order_by(page: Any, server_url: str) -> Non
e:
tests/test_web_basics.py:33: order_by="value",
tests/test_web_basics.py:47: order_by="user",
tests/test_web_basics.py:116: page.wait_for_selector("#order_by option", stat
e="attached")
tests/test_web_basics.py:137: page.wait_for_selector("#order_by option", stat
e="attached")
tests/test_web_basics.py:152: page.wait_for_selector("#order_by option", stat
e="attached")
tests/test_web_basics.py:161: page.wait_for_selector("#order_by option", stat
e="attached")
tests/test_web_basics.py:194: page.wait_for_selector("#order_by option", stat
e="attached")
tests/test_web_basics.py:208: page.wait_for_selector("#order_by option", stat
e="attached")
tests/test_web_basics.py:218: page.wait_for_selector("#order_by option", stat
e="attached")
tests/test_web_basics.py:230: order_by="user",
tests/test_web_basics.py:239: page.wait_for_selector("#order_by option", stat
e="attached")
tests/test_web_basics.py:268: "&order_by=timestamp&limit=2"
tests/test_web_basics.py:271: page.wait_for_selector("#order_by option", stat
e="attached")
tests/test_server_errors.py:15: "order_by": "timestamp",
tests/test_server_errors.py:55: "order_by": "timestamp",
tests/test_server_timeseries.py:17: "order_by": "user",
tests/test_server_timeseries.py:41: "order_by": "user",
tests/test_server_timeseries.py:140: "order_by": "user",
(scubaduck) root@408933a71470:/workspace/scubaduck#
I'll find tests where order_by isn't explicitly set, as others already handle it.
(scubaduck) root@408933a71470:/workspace/scubaduck# grep -n "query" -r tests/tes
t_server*.py | grep -v "order_by" | head
tests/test_server_basic.py:8:def test_basic_query() -> None:
tests/test_server_basic.py:22: "/api/query", data=json.dumps(payload), co
ntent_type="application/json"
tests/test_server_basic.py:54: "/api/query", data=json.dumps(payload), co
ntent_type="application/json"
tests/test_server_basic.py:82: "/api/query", data=json.dumps(no_filter),
content_type="application/json"
tests/test_server_basic.py:85: "/api/query", data=json.dumps(empty_filter
), content_type="application/json"
tests/test_server_basic.py:105: "/api/query", data=json.dumps(payload), c
ontent_type="application/json"
tests/test_server_basic.py:130: "/api/query", data=json.dumps(contains),
content_type="application/json"
tests/test_server_basic.py:140: "/api/query", data=json.dumps(regex), con
tent_type="application/json"
tests/test_server_basic.py:148: "/api/query", data=json.dumps(not_empty),
content_type="application/json"
tests/test_server_basic.py:162: "/api/query", data=json.dumps(payload), c
ontent_type="application/json"
(scubaduck) root@408933a71470:/workspace/scubaduck#
I’ll check out the test_server_basic.py file to see what’s inside.
from scubaduck import server
def test_basic_query() -> None:
app = server.app
client = app.test_client()
payload = {
"table": "events",
"start": "2024-01-01 00:00:00",
"end": "2024-01-02 00:00:00",
"order_by": "timestamp",
"order_dir": "ASC",
"limit": 10,
"columns": ["timestamp", "event", "value", "user"],
"filters": [],
}
rv = client.post(
"/api/query", data=json.dumps(payload), content_type="application/json"
)
data = rv.get_json()
assert data
rows = data["rows"]
# We expect first three rows (until 2024-01-02 00:00:00)
assert len(rows) == 3
assert rows[0][1] == "login"
assert rows[1][1] == "logout"
def test_js_served() -> None:
app = server.app
client = app.test_client()
rv = client.get("/js/chip_input.js")
assert rv.status_code == 200
assert b"initChipInput" in rv.data
def test_filter_multi_token() -> None:
app = server.app
client = app.test_client()
payload = {
"table": "events",
"start": "2024-01-01 00:00:00",
"end": "2024-01-02 03:00:00",
"order_by": "timestamp",
"limit": 10,
"columns": ["timestamp", "event", "value", "user"],
"filters": [{"column": "user", "op": "=", "value": ["alice", "charlie"]}
],
}
rv = client.post(
"/api/query", data=json.dumps(payload), content_type="application/json"
)
data = rv.get_json()
assert data
rows = data["rows"]
# Should only return rows for alice and charlie
assert len(rows) == 3
assert rows[0][3] == "alice"
assert rows[-1][3] == "charlie"
def test_empty_filter_is_noop() -> None:
app = server.app
client = app.test_client()
base_payload = {
"table": "events",
"start": "2024-01-01 00:00:00",
"end": "2024-01-03 00:00:00",
"limit": 100,
"columns": ["timestamp", "event", "value", "user"],
}
no_filter = {**base_payload, "filters": []}
empty_filter = {
**base_payload,
"filters": [{"column": "user", "op": "=", "value": None}],
}
rv1 = client.post(
"/api/query", data=json.dumps(no_filter), content_type="application/json
"
)
rv2 = client.post(
"/api/query", data=json.dumps(empty_filter), content_type="application/j
son"
)
rows1 = rv1.get_json()["rows"]
rows2 = rv2.get_json()["rows"]
assert rows1 == rows2
def test_select_columns() -> None:
app = server.app
client = app.test_client()
payload = {
"table": "events",
"start": "2024-01-01 00:00:00",
"end": "2024-01-03 00:00:00",
"order_by": "timestamp",
"limit": 10,
"columns": ["timestamp", "user"],
"filters": [],
}
rv = client.post(
"/api/query", data=json.dumps(payload), content_type="application/json"
)
data = rv.get_json()
assert data
rows = data["rows"]
assert len(rows[0]) == 2
assert rows[0][1] == "alice"
def test_string_filter_ops() -> None:
app = server.app
client = app.test_client()
base = {
"start": "2024-01-01 00:00:00",
"end": "2024-01-03 00:00:00",
"order_by": "timestamp",
"limit": 100,
"columns": ["timestamp", "event", "value", "user"],
}
contains = {
**base,
"filters": [{"column": "user", "op": "contains", "value": "ali"}],
}
rv = client.post(
"/api/query", data=json.dumps(contains), content_type="application/json"
)
rows = rv.get_json()["rows"]
assert all("ali" in r[3] for r in rows)
regex = {
**base,
"filters": [{"column": "user", "op": "~", "value": "^a.*"}],
}
rv = client.post(
"/api/query", data=json.dumps(regex), content_type="application/json"
)
rows = rv.get_json()["rows"]
assert all(r[3].startswith("a") for r in rows)
assert len(rows) == 2
not_empty = {**base, "filters": [{"column": "user", "op": "!empty"}]}
rv = client.post(
"/api/query", data=json.dumps(not_empty), content_type="application/json
"
)
assert len(rv.get_json()["rows"]) == 4
def test_order_by_ignored_when_not_selected() -> None:
app = server.app
client = app.test_client()
payload = {
"table": "events",
"order_by": "value",
"columns": ["timestamp"],
}
rv = client.post(
"/api/query", data=json.dumps(payload), content_type="application/json"
)
data = rv.get_json()
assert rv.status_code == 200
assert "ORDER BY" not in data["sql"]
(scubaduck) root@408933a71470:/workspace/scubaduck# sed -n '200,400p' tests/test
_server_basic.py
def test_group_by_table() -> None:
app = server.app
client = app.test_client()
payload = {
"table": "events",
"start": "2024-01-01 00:00:00",
"end": "2024-01-03 00:00:00",
"graph_type": "table",
"order_by": "user",
"limit": 10,
"columns": ["value"],
"group_by": ["user"],
"aggregate": "Sum",
"show_hits": True,
}
rv = client.post(
"/api/query", data=json.dumps(payload), content_type="application/json"
)
rows = rv.get_json()["rows"]
assert rows[0][0] == "alice"
assert rows[0][1] == 2
assert rows[0][2] == 40
def test_table_avg_with_timestamp() -> None:
app = server.app
client = app.test_client()
payload = {
"table": "events",
"start": "2024-01-01 00:00:00",
"end": "2024-01-03 00:00:00",
"graph_type": "table",
"order_by": "user",
"limit": 100,
"columns": ["user", "timestamp", "value"],
"group_by": ["user"],
"aggregate": "Avg",
}
rv = client.post(
"/api/query", data=json.dumps(payload), content_type="application/json"
)
data = rv.get_json()
assert rv.status_code == 200
assert "error" not in data
rows = data["rows"]
assert rows[0][0] == "alice"
from dateutil import parser
ts = parser.parse(rows[0][1]).replace(tzinfo=None)
assert ts == parser.parse("2024-01-01 12:00:00")
def test_timeseries_basic() -> None:
app = server.app
client = app.test_client()
payload = {
"table": "events",
"start": "2024-01-01 00:00:00",
"end": "2024-01-03 00:00:00",
"graph_type": "timeseries",
"limit": 100,
"group_by": ["user"],
"aggregate": "Count",
"columns": ["value"],
"x_axis": "timestamp",
"granularity": "1 day",
}
rv = client.post(
"/api/query", data=json.dumps(payload), content_type="application/json"
)
data = rv.get_json()
assert rv.status_code == 200
assert len(data["rows"]) == 4
def test_timeseries_orders_by_xaxis() -> None:
app = server.app
client = app.test_client()
payload = {
"table": "events",
"start": "2024-01-01 00:00:00",
"end": "2024-01-03 00:00:00",
"graph_type": "timeseries",
"limit": 100,
"columns": ["value"],
"x_axis": "timestamp",
"granularity": "1 day",
}
rv = client.post(
"/api/query", data=json.dumps(payload), content_type="application/json"
)
data = rv.get_json()
assert rv.status_code == 200
rows = data["rows"]
from dateutil import parser
timestamps = [parser.parse(r[0]).replace(tzinfo=None) for r in rows]
assert timestamps == sorted(timestamps)
def test_timeseries_count_no_columns() -> None:
app = server.app
client = app.test_client()
payload: dict[str, Any] = {
"table": "events",
"start": "2024-01-01 00:00:00",
"end": "2024-01-03 00:00:00",
"graph_type": "timeseries",
"granularity": "1 day",
"columns": [],
}
rv = client.post(
"/api/query", data=json.dumps(payload), content_type="application/json"
)
data = rv.get_json()
assert rv.status_code == 200
rows = data["rows"]
assert len(rows) == 2
assert rows[0][1] == 2
assert rows[1][1] == 2
def test_timeseries_limit_applies_to_series() -> None:
app = server.app
client = app.test_client()
payload = {
"table": "events",
"start": "2024-01-01 00:00:00",
"end": "2024-01-03 00:00:00",
"graph_type": "timeseries",
"limit": 1,
"order_by": "user",
"group_by": ["user"],
"aggregate": "Count",
"columns": ["value"],
"x_axis": "timestamp",
"granularity": "1 day",
}
rv = client.post(
"/api/query", data=json.dumps(payload), content_type="application/json"
)
data = rv.get_json()
assert rv.status_code == 200
assert len(data["rows"]) == 2
assert all(r[1] == "alice" for r in data["rows"])
def test_timeseries_auto_and_fine_buckets() -> None:
app = server.app
client = app.test_client()
def run(gran: str) -> None:
payload = {
"start": "2024-01-01 00:00:00",
"end": "2024-01-02 03:00:00",
"graph_type": "timeseries",
"columns": ["value"],
"x_axis": "timestamp",
"granularity": gran,
}
rv = client.post(
"/api/query", data=json.dumps(payload), content_type="application/js
on"
)
data = rv.get_json()
assert rv.status_code == 200
from dateutil import parser
start = parser.parse(cast(str, payload["start"])).replace(tzinfo=None)
buckets = [
parser.parse(cast(str, r[0])).replace(tzinfo=None) for r in data["ro
ws"]
]
assert buckets[0] == start
if len(buckets) > 1:
step = (buckets[1] - buckets[0]).total_seconds()
assert step % data["bucket_size"] == 0
assert any(r[1] != 0 for r in data["rows"])
run("Auto")
run("Fine")
def test_timeseries_string_column_error() -> None:
app = server.app
client = app.test_client()
payload = {
"table": "events",
"start": "2024-01-01 00:00:00",
"end": "2024-01-03 00:00:00",
"graph_type": "timeseries",
"limit": 7,
"columns": ["timestamp", "event", "value", "user"],
"x_axis": "timestamp",
"granularity": "1 hour",
}
rv = client.post(
"/api/query", data=json.dumps(payload), content_type="application/json"
)
assert rv.status_code == 200
def test_derived_column_basic() -> None:
app = server.app
client = app.test_client()
payload = {
"table": "events",
"start": "2024-01-01 00:00:00",
"end": "2024-01-03 00:00:00",
"limit": 10,
"columns": ["timestamp"],
"derived_columns": {"val2": "value * 2"},
"filters": [],
}
rv = client.post(
"/api/query", data=json.dumps(payload), content_type="application/json"
)
data = rv.get_json()
assert rv.status_code == 200
assert data["rows"][0][1] == 20
def test_timeseries_derived_column() -> None:
app = server.app
client = app.test_client()
payload = {
"table": "events",
"start": "2024-01-01 00:00:00",
"end": "2024-01-03 00:00:00",
"graph_type": "timeseries",
"granularity": "1 hour",
"limit": 7,
"columns": ["value"],
"derived_columns": {"derived_1": "value * 2"},
"aggregate": "Avg",
}
rv = client.post(
"/api/query", data=json.dumps(payload), content_type="application/json"
)
data = rv.get_json()
assert rv.status_code == 200
rows = data["rows"]
assert all(r[2] == r[1] * 2 for r in rows)
def test_reserved_word_column() -> None:
app = server.create_app("TEST")
client = app.test_client()
payload = {
"table": "extra",
"columns": ["ts", "desc"],
"order_by": "ts",
"time_column": "",
}
rv = client.post(
"/api/query", data=json.dumps(payload), content_type="application/json"
)
data = rv.get_json()
assert rv.status_code == 200
assert len(data["rows"]) == 2
assert data["rows"][0][1] == "x"
(scubaduck) root@408933a71470:/workspace/scubaduck#
Continuing to scroll down to line 400 and further to review all changes.
(scubaduck) root@408933a71470:/workspace/scubaduck# sed -n '400,800p' tests/test
_server_timeseries.py
(scubaduck) root@408933a71470:/workspace/scubaduck#
I'm reopening the file to verify if it ends around 318 lines.
(scubaduck) root@408933a71470:/workspace/scubaduck# wc -l tests/test_server_time
series.py
267 tests/test_server_timeseries.py
(scubaduck) root@408933a71470:/workspace/scubaduck#
I'll check out the 'test_web_basics.py' file to start server_web tests.
def test_range_filters(page: Any, server_url: str) -> None:
data = run_query(
page,
server_url,
start="2024-01-02 00:00:00",
end="2024-01-02 04:00:00",
order_by="user",
limit=100,
)
assert len(data["rows"]) == 2
from dateutil import parser
timestamps = [parser.parse(row[0]).replace(tzinfo=None) for row in data["row
s"]]
assert timestamps == [
parser.parse("2024-01-02 00:00:00"),
parser.parse("2024-01-02 03:00:00"),
]
def test_order_by(page: Any, server_url: str) -> None:
data = run_query(
page,
server_url,
start="2024-01-01 00:00:00",
end="2024-01-03 00:00:00",
order_by="value",
order_dir="DESC",
limit=100,
)
values = [row[2] for row in data["rows"]]
assert values == sorted(values, reverse=True)
def test_limit(page: Any, server_url: str) -> None:
data = run_query(
page,
server_url,
start="2024-01-01 00:00:00",
end="2024-01-03 00:00:00",
order_by="user",
limit=2,
)
assert len(data["rows"]) == 2
def test_time_column_dropdown(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#time_column option", state="attached")
options = page.locator("#time_column option").all_inner_texts()
assert "(none)" in options
assert "timestamp" in options
assert "value" in options
assert page.input_value("#time_column") == "timestamp"
def test_time_column_none_hides_range(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#time_column option", state="attached")
select_value(page, "#time_column", "")
assert page.is_hidden("#start")
assert page.is_hidden("#end")
page.evaluate("window.lastResults = undefined")
page.click("text=Dive")
page.wait_for_function("window.lastResults !== undefined")
data = page.evaluate("window.lastResults")
assert len(data["rows"]) == 4
assert "start" not in data and "end" not in data
def test_time_unit_dropdown(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#time_unit", state="attached")
opts = page.locator("#time_unit option").all_inner_texts()
assert "ms" in opts
assert page.input_value("#time_unit") == "s"
def test_time_unit_hidden_when_no_time_column(page: Any, server_url: str) -> Non
e:
page.goto(server_url)
page.wait_for_selector("#time_column option", state="attached")
select_value(page, "#time_column", "")
assert page.is_hidden("#time_unit")
def test_table_selector_dropdown(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#table option", state="attached")
disp = page.query_selector("#table + .dropdown-display")
assert disp
assert (
page.evaluate("getComputedStyle(document.querySelector('#table')).displa
y")
== "none"
)
assert page.query_selector("#table + .dropdown-display + .dropdown-menu inpu
t")
def test_x_axis_default_entry(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#graph_type", state="attached")
select_value(page, "#graph_type", "timeseries")
page.wait_for_selector("#x_axis option", state="attached")
options = page.locator("#x_axis option").all_inner_texts()
assert "(default)" in options
assert page.input_value("#x_axis") == ""
def test_simple_filter(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#order_by option", state="attached")
page.click("text=Add Filter")
filter_el = page.query_selector("#filters .filter:last-child")
assert filter_el
page.evaluate(
page.keyboard.type("alice")
page.keyboard.press("Enter")
page.click("text=Dive")
page.wait_for_function("window.lastResults !== undefined")
data = page.evaluate("window.lastResults")
assert len(data["rows"]) == 2
assert all(row[3] == "alice" for row in data["rows"])
def test_default_filter_and_layout(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#order_by option", state="attached")
count = page.evaluate("document.querySelectorAll('#filters .filter').length"
)
assert count == 1
last_is_button = page.evaluate(
"document.querySelector('#filters').lastElementChild.id === 'add_filter'
"
)
assert last_is_button
position = page.evaluate(
"getComputedStyle(document.querySelector('#filters .filter button.remove
')).position"
)
assert position == "static"
def test_filter_remove_alignment(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#order_by option", state="attached")
diff = page.evaluate(
"() => { const r=document.querySelector('#filters .filter-row').getBound
ingClientRect(); const x=document.querySelector('#filters .filter-row button.rem
ove').getBoundingClientRect(); return Math.abs(r.right - x.right); }"
)
assert diff <= 1
def test_header_and_tabs(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#order_by option", state="attached")
header = page.text_content("#header")
assert "sample.csv" in header
assert "events" in header
assert page.is_visible("#settings")
assert page.is_hidden("#columns")
page.click("text=Columns")
assert page.is_visible("#columns")
cols = [c.strip() for c in page.locator("#column_groups li").all_inner_texts
()]
assert "timestamp" in cols
assert "event" in cols
page.click("text=View Settings")
assert page.is_visible("#settings")
btn_color = page.evaluate(
"getComputedStyle(document.querySelector('#dive')).backgroundColor"
)
assert "rgb(0, 128, 0)" == btn_color
sidebar_overflow = page.evaluate(
"getComputedStyle(document.querySelector('#sidebar')).overflowY"
)
view_overflow = page.evaluate(
"getComputedStyle(document.querySelector('#view')).overflowY"
)
assert sidebar_overflow == "auto"
assert view_overflow == "auto"
def test_help_and_alignment(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#order_by option", state="attached")
titles = page.evaluate(
"Array.from(document.querySelectorAll('#settings .help')).map(e => e.tit
le)"
)
assert any("start/end of the time range" in t for t in titles)
text_align = page.evaluate(
"getComputedStyle(document.querySelector('#settings label')).textAlign"
)
assert text_align == "right"
def test_relative_dropdown(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#order_by option", state="attached")
btn = page.query_selector('[data-target="start-select"]')
assert btn
btn.click()
page.click("#start-select div:text('-3 hours')")
assert page.input_value("#start") == "-3 hours"
def test_end_dropdown_now(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#order_by option", state="attached")
page.click('[data-target="end-select"]')
page.click("#end-select div:text('now')")
assert page.input_value("#end") == "now"
def test_invalid_time_error_shown(page: Any, server_url: str) -> None:
data = run_query(
page,
server_url,
start="nonsense",
end="now",
order_by="user",
)
assert "error" in data
msg = page.text_content("#view")
assert "nonsense" in msg
def test_url_query_persistence(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#order_by option", state="attached")
page.wait_for_function("window.lastResults !== undefined")
(scubaduck) root@408933a71470:/workspace/scubaduck#
I'll check for any run_query or similar functions related to querying in the code.
-r | head
grep: tests/__pycache__/web_utils.cpython-312.pyc: binary file matches
grep: tests/__pycache__/test_web_table.cpython-312-pytest-8.3.5.pyc: binary file
matches
grep: tests/__pycache__/test_web_basics.cpython-312-pytest-8.3.5.pyc: binary fil
e matches
tests/test_web_basics.py:5:from tests.web_utils import run_query, select_value
tests/test_web_basics.py:9: data = run_query(
tests/test_web_basics.py:28: data = run_query(
tests/test_web_basics.py:42: data = run_query(
tests/test_web_basics.py:225: data = run_query(
tests/web_utils.py:13:def run_query(
tests/test_web_table.py:5:from tests.web_utils import run_query
tests/test_web_table.py:9: run_query(
tests/test_web_table.py:57: data = run_query(
tests/test_web_table.py:71: run_query(
from typing import Any
def select_value(page: Any, selector: str, value: str) -> None:
page.evaluate(
"arg => setSelectValue(arg.sel, arg.val)",
{"sel": selector, "val": value},
)
def run_query(
page: Any,
url: str,
*,
start: str | None = None,
end: str | None = None,
order_by: str | None = None,
order_dir: str | None = "ASC",
limit: int | None = None,
group_by: list[str] | None = None,
aggregate: str | None = None,
) -> dict[str, Any]:
page.goto(url)
page.wait_for_selector("#order_by option", state="attached")
page.wait_for_selector("#order_dir", state="attached")
page.wait_for_function("window.lastResults !== undefined")
if start is not None:
page.fill("#start", start)
if end is not None:
page.fill("#end", end)
if order_by is not None:
select_value(page, "#order_by", order_by)
if order_dir is not None and order_dir == "DESC":
page.click("#order_dir")
if limit is not None:
page.fill("#limit", str(limit))
if group_by is not None:
select_value(page, "#graph_type", "table")
page.evaluate(
"g => { groupBy.chips = g; groupBy.renderChips(); }",
group_by,
)
if aggregate is not None:
select_value(page, "#graph_type", "table")
select_value(page, "#aggregate", aggregate)
if page.input_value("#graph_type") != "samples":
page.click("text=Columns")
page.wait_for_selector("#column_groups input", state="attached")
if not page.is_checked("#column_groups input[value='value']"):
page.check("#column_groups input[value='value']")
order_col = order_by or page.input_value("#order_by")
if order_col and not page.is_checked(
f"#column_groups input[value='{order_col}']"
):
if page.query_selector(f"#column_groups input[value='{order_col}']")
:
page.check(f"#column_groups input[value='{order_col}']")
page.click("text=View Settings")
page.evaluate("window.lastResults = undefined")
page.click("text=Dive")
page.wait_for_function("window.lastResults !== undefined")
return page.evaluate("window.lastResults")
(scubaduck) root@408933a71470:/workspace/scubaduck#
I’m checking the file tests/test_web_table.py to see what's inside.
from typing import Any
from tests.web_utils import run_query
def test_table_sorting(page: Any, server_url: str) -> None:
run_query(
page,
server_url,
start="2024-01-01 00:00:00",
end="2024-01-03 00:00:00",
order_by="user",
order_dir="ASC",
limit=100,
)
# header alignment
align = page.evaluate(
"getComputedStyle(document.querySelector('#results th')).textAlign"
)
assert align == "left"
header = page.locator("#results th").nth(3)
def values() -> list[str]:
return page.locator("#results td:nth-child(4)").all_inner_texts()
orig_rows = values()
assert orig_rows == ["alice", "bob", "alice", "charlie"]
first_sql = page.evaluate("window.lastResults.sql")
header.click()
assert values() == sorted(orig_rows)
assert header.inner_text().endswith("▲")
color = page.evaluate(
"getComputedStyle(document.querySelector('#results th:nth-child(4)')).co
lor"
)
assert "0, 0, 255" in color
assert page.evaluate("window.lastResults.sql") == first_sql
header.click()
assert values() == sorted(orig_rows, reverse=True)
assert header.inner_text().endswith("▼")
header.click()
assert values() == orig_rows
assert header.inner_text() == "user"
color = page.evaluate(
"getComputedStyle(document.querySelector('#results th:nth-child(4)')).co
lor"
)
assert "0, 0, 255" not in color
def test_table_avg_group_by(page: Any, server_url: str) -> None:
data = run_query(
page,
server_url,
start="2024-01-01 00:00:00",
end="2024-01-03 00:00:00",
order_by="timestamp",
group_by=["user"],
aggregate="Avg",
)
assert "error" not in data
assert len(data["rows"]) == 3
def test_table_enhancements(page: Any, server_url: str) -> None:
run_query(
page,
server_url,
start="2024-01-01 00:00:00",
end="2024-01-03 00:00:00",
order_by="timestamp",
limit=10,
)
border = page.evaluate(
"getComputedStyle(document.querySelector('#results td')).borderStyle"
)
assert border == "solid"
color1 = page.evaluate(
"getComputedStyle(document.querySelector('#results tr:nth-child(2) td'))
.backgroundColor"
)
color2 = page.evaluate(
"getComputedStyle(document.querySelector('#results tr:nth-child(3) td'))
.backgroundColor"
)
assert color1 != color2
page.hover("#results tr:nth-child(2)")
hover_color = page.evaluate(
"getComputedStyle(document.querySelector('#results tr:nth-child(2) td'))
.backgroundColor"
)
assert hover_color != color1
page.click("#results tr:nth-child(2)")
selected_color = page.evaluate(
"getComputedStyle(document.querySelector('#results tr:nth-child(2) td'))
.backgroundColor"
)
assert "189, 228, 255" in selected_color
overflow = page.evaluate(
"var v=document.getElementById('view'); v.scrollWidth > v.clientWidth"
)
assert not overflow
def test_table_single_selection(page: Any, server_url: str) -> None:
run_query(
page,
server_url,
start="2024-01-01 00:00:00",
end="2024-01-03 00:00:00",
order_by="timestamp",
limit=10,
)
page.click("#results tr:nth-child(2)")
page.click("#results tr:nth-child(3)")
)
assert is_third
def test_timestamp_rendering(page: Any, server_url: str) -> None:
run_query(
page,
server_url,
start="2024-01-01 00:00:00",
end="2024-01-02 00:00:00",
order_by="timestamp",
limit=1,
)
cell = page.text_content("#results td")
assert cell != "Invalid Date"
valid = page.evaluate("v => !isNaN(Date.parse(v))", cell)
assert valid
def test_empty_data_message(page: Any, server_url: str) -> None:
data = run_query(
page,
server_url,
start="2025-01-01 00:00:00",
end="2025-01-02 00:00:00",
order_by="timestamp",
limit=100,
)
assert data["rows"] == []
msg = page.text_content("#view")
assert "Empty data provided to table" in msg
def test_group_by_chip_from_url(page: Any, server_url: str) -> None:
url = f"{server_url}?graph_type=table&group_by=user&order_by=user&limit=10"
page.goto(url)
page.wait_for_selector("#group_by_field .chip", state="attached")
chips = page.evaluate(
"Array.from(document.querySelectorAll('#group_by_field .chip')).map(c =>
c.firstChild.textContent)"
)
assert chips == ["user"]
def test_group_by_autocomplete(page: Any, server_url: str) -> None:
page.goto(f"{server_url}?graph_type=table")
page.wait_for_selector("#group_by_field", state="visible")
inp = page.query_selector("#group_by_field .f-val")
assert inp
inp.click()
page.keyboard.type("us")
page.wait_for_selector("#group_by_field .chip-dropdown div")
options = page.locator("#group_by_field .chip-dropdown div").all_inner_texts
()
assert "user" in options
def test_group_by_copy_icon(page: Any, server_url: str) -> None:
page.goto(f"{server_url}?graph_type=table")
page.wait_for_selector("#group_by_field", state="visible")
icon = page.text_content("#group_by_field .chip-copy")
assert icon == "⎘"
def test_group_by_input_no_border(page: Any, server_url: str) -> None:
page.goto(f"{server_url}?graph_type=table")
page.wait_for_selector("#group_by_field", state="visible")
border = page.evaluate(
"getComputedStyle(document.querySelector('#group_by_field .f-val')).bord
erStyle"
)
assert border == "none"
def test_table_group_by_query(page: Any, server_url: str) -> None:
data = run_query(
page,
server_url,
start="2024-01-01 00:00:00",
end="2024-01-03 00:00:00",
order_by="user",
limit=100,
group_by=["user"],
aggregate="Count",
)
assert "error" not in data
assert len(data["rows"]) == 3
def test_table_avg_no_group_by(page: Any, server_url: str) -> None:
data = run_query(
page,
server_url,
aggregate="Avg",
)
assert len(data["rows"]) == 1
row = data["rows"][0]
assert row[0] == 4
from dateutil import parser
ts = parser.parse(row[1]).replace(tzinfo=None)
assert ts == parser.parse("2024-01-01 13:00:00")
assert row[2] == 25
def test_table_headers_show_aggregate(page: Any, server_url: str) -> None:
run_query(
page,
server_url,
aggregate="Avg",
)
headers = page.locator("#results th").all_inner_texts()
assert "Hits" in headers
assert "timestamp (avg)" in headers
assert "value (avg)" in headers
def test_format_number_function(page: Any, server_url: str) -> None:
page.goto(server_url)
), formatNumber(0)]"
)
assert vals == ["815.21 K", "999.999", "0.000", "0"]
def test_numeric_cell_nowrap(page: Any, server_url: str) -> None:
run_query(page, server_url, limit=10)
whitespace = page.evaluate(
"getComputedStyle(document.querySelector('#results td:nth-child(3)')).wh
iteSpace"
)
assert whitespace == "nowrap"
def test_date_cell_nowrap(page: Any, server_url: str) -> None:
run_query(page, server_url, limit=10)
whitespace = page.evaluate(
"getComputedStyle(document.querySelector('#results td:nth-child(1)')).wh
iteSpace"
)
assert whitespace == "nowrap"
def test_derived_column_query(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#order_by option", state="attached")
page.click("text=Columns")
page.click("text=Add Derived")
expr = page.query_selector("#derived_list .derived textarea")
assert expr
expr.fill("value * 2")
page.click("text=View Settings")
page.fill("#start", "2024-01-01 00:00:00")
page.fill("#end", "2024-01-03 00:00:00")
page.fill("#limit", "10")
page.evaluate("window.lastResults = undefined")
page.click("text=Dive")
page.wait_for_function("window.lastResults !== undefined")
data = page.evaluate("window.lastResults")
assert data["rows"][0][-1] == 20
def test_derived_column_remove(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#order_by option", state="attached")
page.click("text=Columns")
page.click("text=Add Derived")
assert page.query_selector("#derived_list .derived button.remove")
page.click("#derived_list .derived button.remove")
count = page.evaluate("document.querySelectorAll('#derived_list .derived').l
ength")
assert count == 0
def test_sql_query_display(page: Any, server_url: str) -> None:
data = run_query(
page,
server_url,
start="2024-01-01 00:00:00",
end="2024-01-02 00:00:00",
order_by="timestamp",
limit=10,
)
sql = data["sql"]
displayed = page.text_content("#sql_query")
assert displayed is not None
assert displayed.strip() == sql
(scubaduck) root@408933a71470:/workspace/scubaduck#
I'm looking for tests, checking for the absence of order_by
.
(scubaduck) root@408933a71470:/workspace/scubaduck# sed -n '480,960p' tests/test
_web_table.py
(scubaduck) root@408933a71470:/workspace/scubaduck#
I'm verifying if all run_query calls specify order_by or if some are missing it.
tests/test_web_table.py | head
249: run_query(page, server_url, limit=10)
257: run_query(page, server_url, limit=10)
(scubaduck) root@408933a71470:/workspace/scubaduck# grep -n "run_query(" tests/t
est_web_table.py | nl -ba | head
1 9: run_query(
2 57: data = run_query(
3 71: run_query(
4 111: run_query(
5 130: run_query(
6 145: data = run_query(
7 197: data = run_query(
8 212: data = run_query(
9 228: run_query(
10 249: run_query(page, server_url, limit=10)
(scubaduck) root@408933a71470:/workspace/scubaduck#
I'll begin by focusing on the section around line 111.
(scubaduck) root@408933a71470:/workspace/scubaduck# sed -n '90,120p' tests/test_
web_table.py
assert color1 != color2
page.hover("#results tr:nth-child(2)")
hover_color = page.evaluate(
"getComputedStyle(document.querySelector('#results tr:nth-child(2) td'))
.backgroundColor"
)
assert hover_color != color1
page.click("#results tr:nth-child(2)")
selected_color = page.evaluate(
"getComputedStyle(document.querySelector('#results tr:nth-child(2) td'))
.backgroundColor"
)
assert "189, 228, 255" in selected_color
overflow = page.evaluate(
"var v=document.getElementById('view'); v.scrollWidth > v.clientWidth"
)
assert not overflow
def test_table_single_selection(page: Any, server_url: str) -> None:
run_query(
page,
server_url,
start="2024-01-01 00:00:00",
end="2024-01-03 00:00:00",
order_by="timestamp",
limit=10,
)
page.click("#results tr:nth-child(2)")
page.click("#results tr:nth-child(3)")
start="2025-01-01 00:00:00",
end="2025-01-02 00:00:00",
order_by="timestamp",
limit=100,
)
assert data["rows"] == []
msg = page.text_content("#view")
assert "Empty data provided to table" in msg
def test_group_by_chip_from_url(page: Any, server_url: str) -> None:
url = f"{server_url}?graph_type=table&group_by=user&order_by=user&limit=10"
page.goto(url)
page.wait_for_selector("#group_by_field .chip", state="attached")
chips = page.evaluate(
"Array.from(document.querySelectorAll('#group_by_field .chip')).map(c =>
c.firstChild.textContent)"
)
assert chips == ["user"]
def test_group_by_autocomplete(page: Any, server_url: str) -> None:
page.goto(f"{server_url}?graph_type=table")
page.wait_for_selector("#group_by_field", state="visible")
inp = page.query_selector("#group_by_field .f-val")
assert inp
inp.click()
page.keyboard.type("us")
page.wait_for_selector("#group_by_field .chip-dropdown div")
options = page.locator("#group_by_field .chip-dropdown div").all_inner_texts
()
assert "user" in options
def test_group_by_copy_icon(page: Any, server_url: str) -> None:
page.goto(f"{server_url}?graph_type=table")
page.wait_for_selector("#group_by_field", state="visible")
icon = page.text_content("#group_by_field .chip-copy")
assert icon == "⎘"
def test_group_by_input_no_border(page: Any, server_url: str) -> None:
page.goto(f"{server_url}?graph_type=table")
page.wait_for_selector("#group_by_field", state="visible")
border = page.evaluate(
"getComputedStyle(document.querySelector('#group_by_field .f-val')).bord
erStyle"
)
assert border == "none"
def test_table_group_by_query(page: Any, server_url: str) -> None:
data = run_query(
page,
server_url,
start="2024-01-01 00:00:00",
end="2024-01-03 00:00:00",
order_by="user",
limit=100,
group_by=["user"],
aggregate="Count",
)
assert "error" not in data
assert len(data["rows"]) == 3
def test_table_avg_no_group_by(page: Any, server_url: str) -> None:
data = run_query(
page,
server_url,
aggregate="Avg",
)
assert len(data["rows"]) == 1
row = data["rows"][0]
assert row[0] == 4
from dateutil import parser
ts = parser.parse(row[1]).replace(tzinfo=None)
assert ts == parser.parse("2024-01-01 13:00:00")
assert row[2] == 25
def test_table_headers_show_aggregate(page: Any, server_url: str) -> None:
run_query(
page,
server_url,
aggregate="Avg",
)
headers = page.locator("#results th").all_inner_texts()
assert "Hits" in headers
assert "timestamp (avg)" in headers
assert "value (avg)" in headers
def test_format_number_function(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#order_by option", state="attached")
vals = page.evaluate(
"() => [formatNumber(815210), formatNumber(999.999), formatNumber(0.0004
), formatNumber(0)]"
)
assert vals == ["815.21 K", "999.999", "0.000", "0"]
def test_numeric_cell_nowrap(page: Any, server_url: str) -> None:
run_query(page, server_url, limit=10)
whitespace = page.evaluate(
"getComputedStyle(document.querySelector('#results td:nth-child(3)')).wh
iteSpace"
)
assert whitespace == "nowrap"
def test_date_cell_nowrap(page: Any, server_url: str) -> None:
run_query(page, server_url, limit=10)
whitespace = page.evaluate(
"getComputedStyle(document.querySelector('#results td:nth-child(1)')).wh
iteSpace"
)
(scubaduck) root@408933a71470:/workspace/scubaduck#
I need to scroll more to capture the remaining information.
(scubaduck) root@408933a71470:/workspace/scubaduck# grep -n "run_query(page, ser
ver_url, limit" -n tests/test_web_table.py
249: run_query(page, server_url, limit=10)
257: run_query(page, server_url, limit=10)
(scubaduck) root@408933a71470:/workspace/scubaduck#
I'll check the content between lines 240 and 260.
def test_format_number_function(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#order_by option", state="attached")
vals = page.evaluate(
"() => [formatNumber(815210), formatNumber(999.999), formatNumber(0.0004
), formatNumber(0)]"
)
assert vals == ["815.21 K", "999.999", "0.000", "0"]
def test_numeric_cell_nowrap(page: Any, server_url: str) -> None:
run_query(page, server_url, limit=10)
whitespace = page.evaluate(
"getComputedStyle(document.querySelector('#results td:nth-child(3)')).wh
iteSpace"
)
assert whitespace == "nowrap"
def test_date_cell_nowrap(page: Any, server_url: str) -> None:
run_query(page, server_url, limit=10)
whitespace = page.evaluate(
"getComputedStyle(document.querySelector('#results td:nth-child(1)')).wh
iteSpace"
)
)
assert whitespace == "nowrap"
def test_derived_column_query(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#order_by option", state="attached")
page.click("text=Columns")
page.click("text=Add Derived")
expr = page.query_selector("#derived_list .derived textarea")
assert expr
expr.fill("value * 2")
page.click("text=View Settings")
page.fill("#start", "2024-01-01 00:00:00")
page.fill("#end", "2024-01-03 00:00:00")
page.fill("#limit", "10")
page.evaluate("window.lastResults = undefined")
page.click("text=Dive")
page.wait_for_function("window.lastResults !== undefined")
data = page.evaluate("window.lastResults")
assert data["rows"][0][-1] == 20
(scubaduck) root@408933a71470:/workspace/scubaduck#
I noticed run_query might fall back on ordering by the first column, even without order_by specified.
I’ll check the query() function to confirm how order_by is set when it's None.
(scubaduck) root@408933a71470:/workspace/scubaduck# grep -n "order_by" -n scubad
uck/server.py | sed -n '430,540p'
(scubaduck) root@408933a71470:/workspace/scubaduck#
I'll show lines from 460 to 580 to double-check the relevant section.
server.py
return jsonify({"error": str(exc)}), 400
params = QueryParams(
start=start,
end=end,
order_by=payload.get("order_by"),
order_dir=payload.get("order_dir", "ASC"),
limit=payload.get("limit"),
columns=payload.get("columns", []),
derived_columns=payload.get("derived_columns", {}),
graph_type=payload.get("graph_type", "samples"),
group_by=payload.get("group_by", []),
aggregate=payload.get("aggregate"),
show_hits=payload.get("show_hits", False),
x_axis=payload.get("x_axis"),
granularity=payload.get("granularity", "Auto"),
fill=payload.get("fill", "0"),
table=payload.get("table", default_table),
time_column=payload.get("time_column", "timestamp"),
time_unit=payload.get("time_unit", "s"),
)
for f in payload.get("filters", []):
params.filters.append(Filter(f["column"], f["op"], f.get("value")))
if params.table not in tables:
return jsonify({"error": "Invalid table"}), 400
column_types = get_columns(params.table)
if params.time_column and params.time_column not in column_types:
return jsonify({"error": "Invalid time_column"}), 400
if params.time_unit not in {"s", "ms", "us", "ns"}:
return jsonify({"error": "Invalid time_unit"}), 400
if params.graph_type not in {"table", "timeseries"} and (
params.group_by or params.aggregate or params.show_hits
):
return (
jsonify(
{
"error": "group_by, aggregate and show_hits are only val
id for table or timeseries view"
}
),
400,
)
valid_cols = set(column_types.keys())
valid_cols.update(params.derived_columns.keys())
if params.graph_type == "timeseries":
if params.x_axis is None:
params.x_axis = params.time_column
if params.x_axis is None or params.x_axis not in valid_cols:
return jsonify({"error": "Invalid x_axis"}), 400
ctype = column_types.get(params.x_axis, "").upper()
is_time = any(t in ctype for t in ["TIMESTAMP", "DATE", "TIME"])
is_numeric = any(
t in ctype
for t in [
"INT",
"DECIMAL",
"REAL",
"DOUBLE",
"FLOAT",
"NUMERIC",
"HUGEINT",
]
)
if not (is_time or is_numeric):
return jsonify({"error": "x_axis must be a time column"}), 400
for col in params.columns:
if col not in valid_cols:
return jsonify({"error": f"Unknown column: {col}"}), 400
for col in params.group_by:
if col not in valid_cols:
return jsonify({"error": f"Unknown column: {col}"}), 400
if params.order_by and params.order_by not in valid_cols:
return jsonify({"error": f"Unknown column: {params.order_by}"}), 400
if params.group_by or params.graph_type == "timeseries":
agg = (params.aggregate or "count").lower()
(scubaduck) root@408933a71470:/workspace/scubaduck#
I noticed there's no default order_by in the code, but the UI might handle it. Let me check the static JS in scubaduck/static to confirm.
(scubaduck) root@408933a71470:/workspace/scubaduck# ls scubaduck/static
index.html js
(scubaduck) root@408933a71470:/workspace/scubaduck# ls scubaduck/static/js | hea
d
chip_input.js
sidebar.js
table_view.js
time_column.js
timeseries_chart.js
view_settings.js
(scubaduck) root@408933a71470:/workspace/scubaduck# grep -n "order_by" -r scubad
uck/static/js | head
scubaduck/static/js/view_settings.js:172: document.getElementById('order_by')
.value = '';
scubaduck/static/js/view_settings.js:198: const orderSelect = document.getEle
mentById('order_by');
scubaduck/static/js/view_settings.js:455: const orderCol = document.getElementB
yId('order_by').value;
scubaduck/static/js/view_settings.js:689: order_by: document.getElementById('
order_by').value,
scubaduck/static/js/view_settings.js:737: if (params.order_by) sp.set('order_by
', params.order_by);
scubaduck/static/js/view_settings.js:767: if (params.order_by) {
scubaduck/static/js/view_settings.js:768: document.getElementById('order_by')
.value = params.order_by;
scubaduck/static/js/view_settings.js:837: if (sp.has('order_by')) params.order_
by = sp.get('order_by');
(scubaduck) root@408933a71470:/workspace/scubaduck# sed -n '160,220p' scubaduck/
static/js/view_settings.js
limitValues.timeseries = 7;
}
limitInput.value = limitValues[newType];
document.querySelectorAll('#column_groups input').forEach(cb => {
cb.checked = columnValues[newType].includes(cb.value);
});
if (showTS) {
document.querySelectorAll('#column_groups input').forEach(cb => {
if (isTimeColumn(cb.value) || isStringColumn(cb.value)) {
cb.checked = false;
}
});
document.getElementById('order_by').value = '';
}
updateSelectedColumns(newType);
displayType = newType;
}
function updateTimeFieldVisibility() {
const show = document.getElementById('time_column').value !== '';
document.getElementById('start').closest('.field').style.display = show
? 'flex'
: 'none';
document.getElementById('end').closest('.field').style.display = show
? 'flex'
: 'none';
document.getElementById('time_unit').style.display = show ? '' : 'none';
}
orderDirBtn.addEventListener('click', () => {
orderDir = orderDir === 'ASC' ? 'DESC' : 'ASC';
updateOrderDirButton();
});
updateOrderDirButton();
graphTypeSel.addEventListener('change', updateDisplayTypeUI);
document.getElementById('time_column').addEventListener('change', updateTimeFiel
dVisibility);
updateTimeFieldVisibility();
function loadColumns(table) {
return fetch('/api/columns?table=' + encodeURIComponent(table)).then(r => r.js
on()).then(cols => {
const orderSelect = document.getElementById('order_by');
const xAxisSelect = document.getElementById('x_axis');
const groupsEl = document.getElementById('column_groups');
const timeColumnSelect = document.getElementById('time_column');
orderSelect.innerHTML = '';
xAxisSelect.innerHTML = '';
const defOpt = document.createElement('option');
defOpt.value = '';
defOpt.textContent = '(default)';
xAxisSelect.appendChild(defOpt);
timeColumnSelect.innerHTML = '';
const noneOpt = document.createElement('option');
noneOpt.value = '';
noneOpt.textContent = '(none)';
timeColumnSelect.appendChild(noneOpt);
groupsEl.innerHTML = '';
allColumns.length = 0;
stringColumns.length = 0;
integerColumns.length = 0;
timeColumns.length = 0;
timeColumnOptions.length = 0;
baseColumns.length = 0;
baseStringColumns.length = 0;
(scubaduck) root@408933a71470:/workspace/scubaduck#
I’ll take a look between lines 220 and 300 now.
(scubaduck) root@408933a71470:/workspace/scubaduck# sed -n '220,320p' scubaduck/
static/js/view_settings.js
baseStringColumns.length = 0;
baseIntegerColumns.length = 0;
baseTimeColumns.length = 0;
baseTimeColumnOptions.length = 0;
for (const k in columnTypes) delete columnTypes[k];
const groups = {
time: {name: 'Time', cols: [], ul: null},
integer: {name: 'Integers', cols: [], ul: null},
string: {name: 'Strings', cols: [], ul: null},
};
cols.forEach(c => {
const t = c.type.toUpperCase();
columnTypes[c.name] = c.type;
allColumns.push(c.name);
baseColumns.push(c.name);
let g = 'string';
const isNumeric = t.includes('INT') || t.includes('DECIMAL') || t.includes
('NUMERIC') || t.includes('REAL') || t.includes('DOUBLE') || t.includes('FLOAT')
|| t.includes('HUGEINT');
const isTimeType = t.includes('TIMESTAMP') || t.includes('DATE') || t.incl
udes('TIME');
if (isNumeric || isTimeType) {
timeColumnOptions.push(c.name);
baseTimeColumnOptions.push(c.name);
}
if (isTimeType) {
g = 'time';
timeColumns.push(c.name);
baseTimeColumns.push(c.name);
} else if (isNumeric) {
g = 'integer';
}
if (g === 'string') {
stringColumns.push(c.name);
baseStringColumns.push(c.name);
} else if (g === 'integer') {
integerColumns.push(c.name);
baseIntegerColumns.push(c.name);
}
groups[g].cols.push(c.name);
if (g !== 'string') {
const o = document.createElement('option');
o.value = c.name;
o.textContent = c.name;
orderSelect.appendChild(o);
}
});
timeColumns.forEach(name => {
const o = document.createElement('option');
o.value = name;
o.textContent = name;
xAxisSelect.appendChild(o);
});
timeColumnOptions.forEach(name => {
const o = document.createElement('option');
o.value = name;
o.textContent = name;
timeColumnSelect.appendChild(o);
});
xAxisSelect.value = '';
defaultTimeColumn = guessTimeColumn(cols) || '';
updateTimeFieldVisibility();
Object.keys(groups).forEach(key => {
const g = groups[key];
const div = document.createElement('div');
div.className = 'col-group';
const header = document.createElement('div');
header.className = 'col-group-header';
header.appendChild(document.createTextNode(g.name + ': '));
const links = document.createElement('span');
links.className = 'links';
const allBtn = document.createElement('a');
allBtn.href = '#';
allBtn.textContent = 'All';
const noneBtn = document.createElement('a');
noneBtn.href = '#';
noneBtn.textContent = 'None';
links.appendChild(allBtn);
links.appendChild(noneBtn);
header.appendChild(links);
div.appendChild(header);
const ul = document.createElement('ul');
g.ul = ul;
g.cols.forEach(name => {
const li = document.createElement('li');
const label = document.createElement('label');
const cb = document.createElement('input');
cb.type = 'checkbox';
cb.value = name;
cb.checked = true;
cb.addEventListener('change', updateSelectedColumns);
label.appendChild(cb);
label.appendChild(document.createTextNode(' ' + name));
li.appendChild(label);
ul.appendChild(li);
});
allBtn.addEventListener('click', e => {
e.preventDefault();
ul.querySelectorAll('input').forEach(cb => (cb.checked = true));
updateSelectedColumns();
});
noneBtn.addEventListener('click', e => {
e.preventDefault();
ul.querySelectorAll('input').forEach(cb => (cb.checked = false));
(scubaduck) root@408933a71470:/workspace/scubaduck# grep -n "guessTimeColumn" -n
scubaduck/static/js -R
scubaduck/static/js/time_column.js:2:function guessTimeColumn(columns) {
scubaduck/static/js/view_settings.js:277: defaultTimeColumn = guessTimeColumn
(cols) || '';
(scubaduck) root@408933a71470:/workspace/scubaduck# cat scubaduck/static/js/time
_column.js
// Helper for choosing a default time column based on column names/types
function guessTimeColumn(columns) {
const heur = ['timestamp','created','created_at','event_time','time','date','o
ccurred','happened','logged'];
let heurGuess = null;
let timestamp = null;
columns.forEach(c => {
const t = (c.type || '').toUpperCase();
const isNumeric = t.includes('INT') || t.includes('DECIMAL') || t.includes('
NUMERIC') ||
t.includes('REAL') || t.includes('DOUBLE') || t.includes('
FLOAT') || t.includes('HUGEINT');
const isTimeType = t.includes('TIMESTAMP') || t.includes('DATE') || t.includ
es('TIME');
if (heur.some(h => c.name.toLowerCase().includes(h)) && (isTimeType || isNum
eric)) {
if (!heurGuess) heurGuess = c.name;
}
if (!timestamp && isTimeType) {
timestamp = c.name;
}
});
return heurGuess || timestamp || null;
}
(scubaduck) root@408933a71470:/workspace/scubaduck#
I'll check inside view_settings.js
to see where order_by might be computed.
(scubaduck) root@408933a71470:/workspace/scubaduck# grep -n "orderSelect" -n scu
baduck/static/js/view_settings.js | head -n 40
198: const orderSelect = document.getElementById('order_by');
202: orderSelect.innerHTML = '';
261: orderSelect.appendChild(o);
344: initDropdown(orderSelect);
(scubaduck) root@408933a71470:/workspace/scubaduck# sed -n '340,380p' scubaduck/
static/js/view_settings.js
groupBy = document.getElementById('group_by').closest('.field');
initChipInput(groupBy, typed =>
allColumns.filter(c => c.toLowerCase().includes(typed.toLowerCase()))
);
initDropdown(orderSelect);
initDropdown(document.getElementById('aggregate'));
});
}
let columnsInitialized = false;
fetch('/api/tables').then(r => r.json()).then(tables => {
const tableSel = document.getElementById('table');
tables.forEach(t => {
const o = document.createElement('option');
o.value = t;
o.textContent = t;
tableSel.appendChild(o);
});
initDropdown(tableSel);
const measure = document.createElement('span');
measure.style.visibility = 'hidden';
measure.style.position = 'absolute';
document.body.appendChild(measure);
let maxWidth = 0;
tables.forEach(t => {
measure.textContent = t;
const w = measure.getBoundingClientRect().width;
if (w > maxWidth) maxWidth = w;
});
measure.remove();
const disp = tableSel.parentElement.querySelector('.dropdown-display');
if (disp) disp.style.minWidth = maxWidth + 30 + 'px';
const table = parseSearch().table || tables[0];
tableSel.value = table;
tableSel.dispatchEvent(new Event('change'));
loadColumns(table).then(() => {
updateDisplayTypeUI();
addFilter();
initFromUrl();
columnsInitialized = true;
});
(scubaduck) root@408933a71470:/workspace/scubaduck#
I'll find where updateDisplayTypeUI
or initFromUrl
set the order_by default.
(scubaduck) root@408933a71470:/workspace/scubaduck# grep -n "initFromUrl" -n scu
baduck/static/js/view_settings.js
378: initFromUrl();
859:function initFromUrl() {
(scubaduck) root@408933a71470:/workspace/scubaduck# sed -n '828,880p' scubaduck/
static/js/view_settings.js
function parseSearch() {
const sp = new URLSearchParams(window.location.search);
const params = {};
if (sp.has('table')) params.table = sp.get('table');
if (sp.has('time_column')) params.time_column = sp.get('time_column');
if (sp.has('time_unit')) params.time_unit = sp.get('time_unit');
if (sp.has('start')) params.start = sp.get('start');
if (sp.has('end')) params.end = sp.get('end');
if (sp.has('order_by')) params.order_by = sp.get('order_by');
if (sp.has('order_dir')) params.order_dir = sp.get('order_dir');
if (sp.has('limit')) params.limit = parseInt(sp.get('limit'), 10);
if (sp.has('samples_columns')) params.samples_columns = sp.get('samples_column
s').split(',').filter(c => c);
if (sp.has('table_columns')) params.table_columns = sp.get('table_columns').sp
lit(',').filter(c => c);
if (sp.has('timeseries_columns')) params.timeseries_columns = sp.get('timeseri
es_columns').split(',').filter(c => c);
if (sp.has('filters')) {
try { params.filters = JSON.parse(sp.get('filters')); } catch(e) { params.fi
lters = []; }
}
if (sp.has('graph_type')) params.graph_type = sp.get('graph_type');
if (sp.has('group_by')) params.group_by = sp.get('group_by').split(',').filter
(c => c);
if (sp.has('aggregate')) params.aggregate = sp.get('aggregate');
if (sp.has('show_hits')) params.show_hits = sp.get('show_hits') === '1';
if (sp.has('x_axis')) params.x_axis = sp.get('x_axis');
if (sp.has('granularity')) params.granularity = sp.get('granularity');
if (sp.has('fill')) params.fill = sp.get('fill');
if (sp.has('derived_columns')) {
try { params.derived_columns = JSON.parse(sp.get('derived_columns')); } catc
h(e) { params.derived_columns = []; }
}
return params;
}
function initFromUrl() {
const params = parseSearch();
history.replaceState(params, '', paramsToSearch(params));
applyParams(params);
dive(false);
}
window.addEventListener('popstate', e => {
const params = e.state || parseSearch();
applyParams(params);
dive(false);
});
function setSelectValue(selector, value) {
const el = typeof selector === 'string' ? document.querySelector(selector) : s
elector;
if (el) {
const select = el.tagName === 'SELECT' ? el : el.querySelector('select');
if (select) {
select.value = value;
select.dispatchEvent(new Event('change'));
}
}
(scubaduck) root@408933a71470:/workspace/scubaduck#
I'll check if the default order_by is linked to column loading or something else.
(scubaduck) root@408933a71470:/workspace/scubaduck# grep -n "order_by" -n scubad
uck/static/js/view_settings.js | head -n 20
172: document.getElementById('order_by').value = '';
198: const orderSelect = document.getElementById('order_by');
455: const orderCol = document.getElementById('order_by').value;
689: order_by: document.getElementById('order_by').value,
737: if (params.order_by) sp.set('order_by', params.order_by);
767: if (params.order_by) {
768: document.getElementById('order_by').value = params.order_by;
837: if (sp.has('order_by')) params.order_by = sp.get('order_by');
(scubaduck) root@408933a71470:/workspace/scubaduck#
I'm browsing through the lines around 455 to review the changes.
(scubaduck) root@408933a71470:/workspace/scubaduck# sed -n '440,470p' scubaduck/
static/js/view_settings.js
selectedColumns = groupBy.chips.slice();
if (document.getElementById('show_hits').checked) selectedColumns.push('Hits
');
base.forEach(c => {
if (!selectedColumns.includes(c)) selectedColumns.push(c);
});
derivedColumns.forEach(dc => {
if (dc.include && !selectedColumns.includes(dc.name)) selectedColumns.push
(dc.name);
});
} else {
selectedColumns = base.slice();
derivedColumns.forEach(dc => {
if (dc.include) selectedColumns.push(dc.name);
});
}
columnValues[type] = selectedColumns.slice();
const orderCol = document.getElementById('order_by').value;
if (orderCol && !selectedColumns.includes(orderCol)) {
selectedColumns.push(orderCol);
}
updateColumnsTabCount();
}
function isStringColumn(name) {
const t = (columnTypes[name] || '').toUpperCase();
return t.includes('CHAR') || t.includes('STRING') || t.includes('VARCHAR');
}
function isIntegerColumn(name) {
const t = (columnTypes[name] || '').toUpperCase();
return t.includes('INT');
}
static/js/view_settings.js
}
function isTimeColumn(name) {
const t = (columnTypes[name] || '').toUpperCase();
if (t.includes('TIMESTAMP') || t.includes('DATE') || t.includes('TIME')) retur
n true;
const sel = document.getElementById('time_column').value;
const xsel = document.getElementById('x_axis').value;
if (name === sel || name === xsel) return true;
return false;
}
function formatNumber(val) {
if (typeof val !== 'number') val = Number(val);
if (Number.isNaN(val)) return '';
if (val === 0) return '0';
const abs = Math.abs(val);
if (abs > 999.999) {
const units = [
{n: 1e12, s: 'T'},
{n: 1e9, s: 'B'},
{n: 1e6, s: 'M'},
{n: 1e3, s: 'K'},
];
for (const u of units) {
if (abs >= u.n) {
return (val / u.n).toFixed(2) + ' ' + u.s;
}
}
}
if (abs < 0.0005) return '0.000';
if (Number.isInteger(val)) return val.toString();
return val.toFixed(3);
}
function addFilter() {
const container = document.createElement('div');
container.className = 'filter';
container.innerHTML = `
<div class="filter-row">
<select class="f-col"></select>
<select class="f-op"></select>
<button type="button" class="remove" onclick="this.closest('.filter').remo
ve()">✖</button>
</div>
<div class="chip-box">
<div class="chip-input">
<input class="f-val" type="text">
<button type="button" class="chip-copy">⎘</button>
</div>
<div class="chip-dropdown"></div>
</div>
`;
const colSel = container.querySelector('.f-col');
colSel.innerHTML = allColumns.map(c => `<option value="${c}">${c}</option>`).j
oin('');
initDropdown(colSel);
function populateOps() {
const opSel = container.querySelector('.f-op');
const col = colSel.value;
const ops = isStringColumn(col)
? [
(scubaduck) root@408933a71470:/workspace/scubaduck#
I didn't find a default order_by
. I'll search for defaultOrderCol
in the code.
(scubaduck) root@408933a71470:/workspace/scubaduck# grep -n "default" -n scubadu
ck/static/js/view_settings.js | grep -i order
(scubaduck) root@408933a71470:/workspace/scubaduck#
I think the first column might be selected by default due to query results being unsorted.
(scubaduck) root@408933a71470:/workspace/scubaduck# grep -n "order" -n scubaduck
/static/js/view_settings.js | grep -i default | head
(scubaduck) root@408933a71470:/workspace/scubaduck#
I need to find tests that aren't specifying 'order_by' and update them.
(scubaduck) root@408933a71470:/workspace/scubaduck# grep -n "order_by" -r tests/
*.py | wc -l
72
(scubaduck) root@408933a71470:/workspace/scubaduck#
I'll look for occurrences of "order_by" in the code, considering the tests' behavior.
(scubaduck) root@408933a71470:/workspace/scubaduck# grep -n "run_query(page, ser
ver_url" -n -r tests | head
tests/test_web_table.py:249: run_query(page, server_url, limit=10)
tests/test_web_table.py:257: run_query(page, server_url, limit=10)
(scubaduck) root@408933a71470:/workspace/scubaduck#
I noticed only two lines. Let's expand to show the complete output.
(scubaduck) root@408933a71470:/workspace/scubaduck# grep -n "run_query(" -n test
s/test_web_table.py
9: run_query(
57: data = run_query(
71: run_query(
111: run_query(
130: run_query(
145: data = run_query(
197: data = run_query(
212: data = run_query(
228: run_query(
249: run_query(page, server_url, limit=10)
257: run_query(page, server_url, limit=10)
295: data = run_query(
(scubaduck) root@408933a71470:/workspace/scubaduck# sed -n '290,320p' tests/test
_web_table.py
count = page.evaluate("document.querySelectorAll('#derived_list .derived').l
ength")
assert count == 0
def test_sql_query_display(page: Any, server_url: str) -> None:
data = run_query(
page,
server_url,
start="2024-01-01 00:00:00",
end="2024-01-02 00:00:00",
order_by="timestamp",
limit=10,
)
sql = data["sql"]
displayed = page.text_content("#sql_query")
assert displayed is not None
assert displayed.strip() == sql
(scubaduck) root@408933a71470:/workspace/scubaduck#
I’m confirming the parameters being passed to the run_query call at line 9.
from __future__ import annotations
from typing import Any
from tests.web_utils import run_query
def test_table_sorting(page: Any, server_url: str) -> None:
run_query(
page,
server_url,
start="2024-01-01 00:00:00",
end="2024-01-03 00:00:00",
order_by="user",
order_dir="ASC",
limit=100,
)
# header alignment
align = page.evaluate(
"getComputedStyle(document.querySelector('#results th')).textAlign"
)
assert align == "left"
header = page.locator("#results th").nth(3)
def values() -> list[str]:
return page.locator("#results td:nth-child(4)").all_inner_texts()
orig_rows = values()
assert orig_rows == ["alice", "bob", "alice", "charlie"]
first_sql = page.evaluate("window.lastResults.sql")
header.click()
assert values() == sorted(orig_rows)
assert header.inner_text().endswith("▲")
color = page.evaluate(
"getComputedStyle(document.querySelector('#results th:nth-child(4)')).co
lor"
)
assert "0, 0, 255" in color
(scubaduck) root@408933a71470:/workspace/scubaduck#
I’ll check the run_query at lines 57 and 71 to see if order_by is included.
eb_table.py
)
assert "0, 0, 255" not in color
def test_table_avg_group_by(page: Any, server_url: str) -> None:
data = run_query(
page,
server_url,
start="2024-01-01 00:00:00",
end="2024-01-03 00:00:00",
order_by="timestamp",
group_by=["user"],
aggregate="Avg",
)
assert "error" not in data
assert len(data["rows"]) == 3
def test_table_enhancements(page: Any, server_url: str) -> None:
run_query(
page,
server_url,
start="2024-01-01 00:00:00",
end="2024-01-03 00:00:00",
order_by="timestamp",
limit=10,
)
border = page.evaluate(
"getComputedStyle(document.querySelector('#results td')).borderStyle"
"getComputedStyle(document.querySelector('#results td')).borderStyle"
)
assert border == "solid"
color1 = page.evaluate(
"getComputedStyle(document.querySelector('#results tr:nth-child(2) td'))
.backgroundColor"
)
color2 = page.evaluate(
"getComputedStyle(document.querySelector('#results tr:nth-child(3) td'))
.backgroundColor"
)
assert color1 != color2
page.hover("#results tr:nth-child(2)")
hover_color = page.evaluate(
"getComputedStyle(document.querySelector('#results tr:nth-child(2) td'))
.backgroundColor"
)
assert hover_color != color1
page.click("#results tr:nth-child(2)")
selected_color = page.evaluate(
"getComputedStyle(document.querySelector('#results tr:nth-child(2) td'))
.backgroundColor"
)
assert "189, 228, 255" in selected_color
overflow = page.evaluate(
"var v=document.getElementById('view'); v.scrollWidth > v.clientWidth"
)
assert not overflow
def test_table_single_selection(page: Any, server_url: str) -> None:
run_query(
page,
server_url,
start="2024-01-01 00:00:00",
end="2024-01-03 00:00:00",
order_by="timestamp",
limit=10,
)
page.click("#results tr:nth-child(2)")
page.click("#results tr:nth-child(3)")
count = page.evaluate("document.querySelectorAll('#results tr.selected').len
gth")
assert count == 1
is_third = page.evaluate(
"document.querySelector('#results tr:nth-child(3)').classList.contains('
selected')"
)
assert is_third
def test_timestamp_rendering(page: Any, server_url: str) -> None:
run_query(
page,
server_url,
start="2024-01-01 00:00:00",
end="2024-01-02 00:00:00",
order_by="timestamp",
limit=1,
)
cell = page.text_content("#results td")
assert cell != "Invalid Date"
valid = page.evaluate("v => !isNaN(Date.parse(v))", cell)
(scubaduck) root@408933a71470:/workspace/scubaduck#
I’m moving down to capture more of the changes listed.
def test_empty_data_message(page: Any, server_url: str) -> None:
data = run_query(
page,
server_url,
start="2025-01-01 00:00:00",
end="2025-01-02 00:00:00",
order_by="timestamp",
limit=100,
)
assert data["rows"] == []
msg = page.text_content("#view")
assert "Empty data provided to table" in msg
def test_group_by_chip_from_url(page: Any, server_url: str) -> None:
url = f"{server_url}?graph_type=table&group_by=user&order_by=user&limit=10"
page.goto(url)
page.wait_for_selector("#group_by_field .chip", state="attached")
chips = page.evaluate(
"Array.from(document.querySelectorAll('#group_by_field .chip')).map(c =>
c.firstChild.textContent)"
)
assert chips == ["user"]
def test_group_by_autocomplete(page: Any, server_url: str) -> None:
page.goto(f"{server_url}?graph_type=table")
page.wait_for_selector("#group_by_field", state="visible")
inp = page.query_selector("#group_by_field .f-val")
assert inp
inp.click()
page.keyboard.type("us")
page.wait_for_selector("#group_by_field .chip-dropdown div")
options = page.locator("#group_by_field .chip-dropdown div").all_inner_texts
()
assert "user" in options
def test_group_by_copy_icon(page: Any, server_url: str) -> None:
page.goto(f"{server_url}?graph_type=table")
page.wait_for_selector("#group_by_field", state="visible")
icon = page.text_content("#group_by_field .chip-copy")
assert icon == "⎘"
def test_group_by_input_no_border(page: Any, server_url: str) -> None:
page.goto(f"{server_url}?graph_type=table")
page.wait_for_selector("#group_by_field", state="visible")
border = page.evaluate(
"getComputedStyle(document.querySelector('#group_by_field .f-val')).bord
erStyle"
)
assert border == "none"
def test_table_group_by_query(page: Any, server_url: str) -> None:
data = run_query(
page,
server_url,
start="2024-01-01 00:00:00",
(scubaduck) root@408933a71470:/workspace/scubaduck#
I’ll highlight lines 249 and 257 with the run_query
function call.
def test_format_number_function(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#order_by option", state="attached")
vals = page.evaluate(
"() => [formatNumber(815210), formatNumber(999.999), formatNumber(0.0004
), formatNumber(0)]"
)
assert vals == ["815.21 K", "999.999", "0.000", "0"]
def test_numeric_cell_nowrap(page: Any, server_url: str) -> None:
run_query(page, server_url, limit=10)
whitespace = page.evaluate(
"getComputedStyle(document.querySelector('#results td:nth-child(3)')).wh
iteSpace"
)
assert whitespace == "nowrap"
def test_date_cell_nowrap(page: Any, server_url: str) -> None:
run_query(page, server_url, limit=10)
whitespace = page.evaluate(
"getComputedStyle(document.querySelector('#results td:nth-child(1)')).wh
iteSpace"
)
def test_date_cell_nowrap(page: Any, server_url: str) -> None:
run_query(page, server_url, limit=10)
whitespace = page.evaluate(
"getComputedStyle(document.querySelector('#results td:nth-child(1)')).wh
iteSpace"
)
assert whitespace == "nowrap"
def test_derived_column_query(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#order_by option", state="attached")
page.click("text=Columns")
page.click("text=Add Derived")
expr = page.query_selector("#derived_list .derived textarea")
assert expr
expr.fill("value * 2")
page.click("text=View Settings")
(scubaduck) root@408933a71470:/workspace/scubaduck#
I'll continue past line 272 to locate the relevant content.
page.click("text=View Settings")
page.fill("#start", "2024-01-01 00:00:00")
page.fill("#end", "2024-01-03 00:00:00")
page.fill("#limit", "10")
page.evaluate("window.lastResults = undefined")
page.click("text=Dive")
page.wait_for_function("window.lastResults !== undefined")
data = page.evaluate("window.lastResults")
assert data["rows"][0][-1] == 20
def test_derived_column_remove(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#order_by option", state="attached")
page.click("text=Columns")
page.click("text=Add Derived")
assert page.query_selector("#derived_list .derived button.remove")
page.click("#derived_list .derived button.remove")
count = page.evaluate("document.querySelectorAll('#derived_list .derived').l
ength")
assert count == 0
def test_sql_query_display(page: Any, server_url: str) -> None:
data = run_query(
page,
server_url,
start="2024-01-01 00:00:00",
end="2024-01-02 00:00:00",
order_by="timestamp",
limit=10,
)
sql = data["sql"]
displayed = page.text_content("#sql_query")
assert displayed is not None
assert displayed.strip() == sql
(scubaduck) root@408933a71470:/workspace/scubaduck#
I see some run_query calls without an explicit order_by—let's check for others.
(scubaduck) root@408933a71470:/workspace/scubaduck# grep -n "run_query(page" -n
tests/test_web_basics.py | head
(scubaduck) root@408933a71470:/workspace/scubaduck#
I think run_query might be imported but not used. Let's take a closer look.
(scubaduck) root@408933a71470:/workspace/scubaduck# grep -n "run_query(" tests/t
est_web_basics.py | nl -ba | head
1 9: data = run_query(
2 28: data = run_query(
3 42: data = run_query(
4 225: data = run_query(
(scubaduck) root@408933a71470:/workspace/scubaduck#
I’ll take a look at these lines now.
from typing import Any
from tests.web_utils import run_query, select_value
def test_range_filters(page: Any, server_url: str) -> None:
data = run_query(
page,
server_url,
start="2024-01-02 00:00:00",
end="2024-01-02 04:00:00",
order_by="user",
limit=100,
)
assert len(data["rows"]) == 2
from dateutil import parser
timestamps = [parser.parse(row[0]).replace(tzinfo=None) for row in data["row
s"]]
assert timestamps == [
parser.parse("2024-01-02 00:00:00"),
parser.parse("2024-01-02 03:00:00"),
]
def test_order_by(page: Any, server_url: str) -> None:
data = run_query(
page,
server_url,
start="2024-01-01 00:00:00",
end="2024-01-03 00:00:00",
order_by="value",
order_dir="DESC",
limit=100,
)
values = [row[2] for row in data["rows"]]
assert values == sorted(values, reverse=True)
def test_limit(page: Any, server_url: str) -> None:
data = run_query(
page,
server_url,
start="2024-01-01 00:00:00",
end="2024-01-03 00:00:00",
order_by="user",
limit=2,
)
assert len(data["rows"]) == 2
def test_time_column_dropdown(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#time_column option", state="attached")
options = page.locator("#time_column option").all_inner_texts()
assert "(none)" in options
assert "timestamp" in options
assert "value" in options
assert page.input_value("#time_column") == "timestamp"
(scubaduck) root@408933a71470:/workspace/scubaduck#
I’m reviewing content around line 42 and checking for relevant details.
server_url,
start="2024-01-01 00:00:00",
end="2024-01-03 00:00:00",
order_by="user",
limit=2,
)
assert len(data["rows"]) == 2
def test_time_column_dropdown(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#time_column option", state="attached")
options = page.locator("#time_column option").all_inner_texts()
assert "(none)" in options
assert "timestamp" in options
assert "value" in options
assert page.input_value("#time_column") == "timestamp"
def test_time_column_none_hides_range(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#time_column option", state="attached")
select_value(page, "#time_column", "")
assert page.is_hidden("#start")
assert page.is_hidden("#end")
page.evaluate("window.lastResults = undefined")
page.click("text=Dive")
page.wait_for_function("window.lastResults !== undefined")
data = page.evaluate("window.lastResults")
assert len(data["rows"]) == 4
assert "start" not in data and "end" not in data
def test_time_unit_dropdown(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#time_unit", state="attached")
opts = page.locator("#time_unit option").all_inner_texts()
assert "ms" in opts
assert page.input_value("#time_unit") == "s"
def test_time_unit_hidden_when_no_time_column(page: Any, server_url: str) -> Non
e:
page.goto(server_url)
page.wait_for_selector("#time_column option", state="attached")
select_value(page, "#time_column", "")
assert page.is_hidden("#time_unit")
page.wait_for_selector("#table option", state="attached")
disp = page.query_selector("#table + .dropdown-display")
assert disp
assert (
page.evaluate("getComputedStyle(document.querySelector('#table')).displa
y")
== "none"
)
assert page.query_selector("#table + .dropdown-display + .dropdown-menu inpu
t")
def test_x_axis_default_entry(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#graph_type", state="attached")
select_value(page, "#graph_type", "timeseries")
page.wait_for_selector("#x_axis option", state="attached")
options = page.locator("#x_axis option").all_inner_texts()
assert "(default)" in options
assert page.input_value("#x_axis") == ""
def test_simple_filter(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#order_by option", state="attached")
page.click("text=Add Filter")
filter_el = page.query_selector("#filters .filter:last-child")
assert filter_el
page.evaluate(
"arg => setSelectValue(arg.el.querySelector('.f-col'), arg.val)",
{"el": filter_el, "val": "user"},
)
val_input = filter_el.query_selector(".f-val")
val_input.click()
page.keyboard.type("alice")
page.keyboard.press("Enter")
page.click("text=Dive")
page.wait_for_function("window.lastResults !== undefined")
data = page.evaluate("window.lastResults")
assert len(data["rows"]) == 2
assert all(row[3] == "alice" for row in data["rows"])
def test_default_filter_and_layout(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#order_by option", state="attached")
count = page.evaluate("document.querySelectorAll('#filters .filter').length"
)
assert count == 1
last_is_button = page.evaluate(
"document.querySelector('#filters').lastElementChild.id === 'add_filter'
"
)
assert last_is_button
position = page.evaluate(
"getComputedStyle(document.querySelector('#filters .filter button.remove
')).position"
)
assert position == "static"
def test_filter_remove_alignment(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#order_by option", state="attached")
diff = page.evaluate(
"() => { const r=document.querySelector('#filters .filter-row').getBound
ingClientRect(); const x=document.querySelector('#filters .filter-row button.rem
ove').getBoundingClientRect(); return Math.abs(r.right - x.right); }"
)
assert diff <= 1
def test_header_and_tabs(page: Any, server_url: str) -> None:
page.goto(server_url)
header = page.text_content("#header")
assert "sample.csv" in header
assert "events" in header
assert page.is_visible("#settings")
assert page.is_hidden("#columns")
page.click("text=Columns")
assert page.is_visible("#columns")
cols = [c.strip() for c in page.locator("#column_groups li").all_inner_texts
()]
assert "timestamp" in cols
assert "event" in cols
page.click("text=View Settings")
assert page.is_visible("#settings")
btn_color = page.evaluate(
"getComputedStyle(document.querySelector('#dive')).backgroundColor"
)
assert "rgb(0, 128, 0)" == btn_color
sidebar_overflow = page.evaluate(
"getComputedStyle(document.querySelector('#sidebar')).overflowY"
)
view_overflow = page.evaluate(
"getComputedStyle(document.querySelector('#view')).overflowY"
)
assert sidebar_overflow == "auto"
assert view_overflow == "auto"
def test_help_and_alignment(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#order_by option", state="attached")
titles = page.evaluate(
"Array.from(document.querySelectorAll('#settings .help')).map(e => e.tit
le)"
)
assert any("start/end of the time range" in t for t in titles)
text_align = page.evaluate(
"getComputedStyle(document.querySelector('#settings label')).textAlign"
)
assert text_align == "right"
def test_relative_dropdown(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#order_by option", state="attached")
btn = page.query_selector('[data-target="start-select"]')
assert btn
btn.click()
page.click("#start-select div:text('-3 hours')")
assert page.input_value("#start") == "-3 hours"
def test_end_dropdown_now(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#order_by option", state="attached")
page.click('[data-target="end-select"]')
page.click("#end-select div:text('now')")
assert page.input_value("#end") == "now"
def test_invalid_time_error_shown(page: Any, server_url: str) -> None:
data = run_query(
page,
server_url,
start="nonsense",
end="now",
order_by="user",
)
assert "error" in data
msg = page.text_content("#view")
assert "nonsense" in msg
def test_url_query_persistence(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#order_by option", state="attached")
page.wait_for_function("window.lastResults !== undefined")
(scubaduck) root@408933a71470:/workspace/scubaduck#
I'm looking through lines around 224 and nearby to make sure I capture everything.
def test_invalid_time_error_shown(page: Any, server_url: str) -> None:
data = run_query(
page,
server_url,
start="nonsense",
end="now",
order_by="user",
)
assert "error" in data
msg = page.text_content("#view")
assert "nonsense" in msg
def test_url_query_persistence(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#order_by option", state="attached")
page.wait_for_function("window.lastResults !== undefined")
page.fill("#start", "2024-01-01 00:00:00")
page.fill("#end", "2024-01-02 00:00:00")
page.fill("#limit", "1")
page.evaluate("window.lastResults = undefined")
page.click("text=Dive")
page.wait_for_function("window.lastResults !== undefined")
first_url = page.url
first_rows = page.evaluate("window.lastResults.rows.length")
page.fill("#limit", "2")
page.evaluate("window.lastResults = undefined")
page.click("text=Dive")
page.wait_for_function("window.lastResults !== undefined")
second_url = page.url
second_rows = page.evaluate("window.lastResults.rows.length")
assert second_rows != first_rows
_web_basics.py
assert second_rows != first_rows
assert first_url != second_url
page.go_back()
page.wait_for_function("window.lastResults !== undefined")
assert page.url == first_url
assert page.evaluate("window.lastResults.rows.length") == first_rows
def test_load_from_url(page: Any, server_url: str) -> None:
url = (
f"{server_url}?start=2024-01-01%2000:00:00&end=2024-01-02%2000:00:00"
"&order_by=timestamp&limit=2"
)
page.goto(url)
page.wait_for_selector("#order_by option", state="attached")
page.wait_for_function("window.lastResults !== undefined")
assert page.input_value("#start") == "2024-01-01 00:00:00"
assert page.input_value("#end") == "2024-01-02 00:00:00"
assert page.input_value("#limit") == "2"
assert page.evaluate("window.lastResults.rows.length") == 2
(scubaduck) root@408933a71470:/workspace/scubaduck#
Tests rely on the first non-string column for default order_by. I'll update them accordingly.
I'll review tests for run_query calls without an explicit order_by and update them.
(scubaduck) root@408933a71470:/workspace/scubaduck# grep -n "run_query(page, ser
ver_url" -n -r tests | grep -v order_by
tests/test_web_table.py:249: run_query(page, server_url, limit=10)
tests/test_web_table.py:257: run_query(page, server_url, limit=10)
(scubaduck) root@408933a71470:/workspace/scubaduck#
It looks like there are only two run_query calls missing order_by; I'll check for similar cases.
sts/test_server*.py | grep -v tests/test_server_basic.py | head
tests/test_server_db_types.py:17: "order_by": "timestamp",
tests/test_server_db_types.py:84: "order_by": "timestamp",
tests/test_server_db_types.py:113: "order_by": "timestamp",
tests/test_server_errors.py:15: "order_by": "timestamp",
tests/test_server_errors.py:55: "order_by": "timestamp",
tests/test_server_time.py:21: "order_by": "created",
tests/test_server_time.py:42: "order_by": "created",
tests/test_server_time.py:88: "order_by": "created",
tests/test_server_time.py:120: "order_by": "visit_time",
tests/test_server_time.py:152: "order_by": "timestamp",
(scubaduck) root@408933a71470:/workspace/scubaduck#
I'll look for instances in server tests missing the "order_by" parameter.
(scubaduck) root@408933a71470:/workspace/scubaduck# grep -L "\"order_by\"" tests
/test_server*.py
tests/test_server.py
(scubaduck) root@408933a71470:/workspace/scubaduck#
Looks like tests/test_server.py is the only one missing "order_by". Let's check it out.
(scubaduck) root@408933a71470:/workspace/scubaduck# sed -n '1,160p' tests/test_s
erver.py
"""Legacy test module.
Tests have been split into multiple files:
- test_server_basic.py
- test_server_db_types.py
- test_server_time.py
- test_server_errors.py
- test_server_timeseries.py
"""
(scubaduck) root@408933a71470:/workspace/scubaduck#
I'll search for ?order_by=
in URLs to check direct API requests.
(scubaduck) root@408933a71470:/workspace/scubaduck# grep -n "order_by" -n tests/
test_web_basics.py | grep -v run_query | head
14: order_by="user",
27:def test_order_by(page: Any, server_url: str) -> None:
33: order_by="value",
47: order_by="user",
116: page.wait_for_selector("#order_by option", state="attached")
137: page.wait_for_selector("#order_by option", state="attached")
152: page.wait_for_selector("#order_by option", state="attached")
161: page.wait_for_selector("#order_by option", state="attached")
194: page.wait_for_selector("#order_by option", state="attached")
208: page.wait_for_selector("#order_by option", state="attached")
(scubaduck) root@408933a71470:/workspace/scubaduck#
I see that there might be other instances of order_by in tests outside run_query.
(scubaduck) root@408933a71470:/workspace/scubaduck# grep -n "goto(server_url" -n
tests/test_web_table.py
240: page.goto(server_url)
265: page.goto(server_url)
284: page.goto(server_url)
(scubaduck) root@408933a71470:/workspace/scubaduck#
I’ll review about 240 lines for the necessary changes.
page.goto(server_url)
page.wait_for_selector("#order_by option", state="attached")
vals = page.evaluate(
"() => [formatNumber(815210), formatNumber(999.999), formatNumber(0.0004
), formatNumber(0)]"
)
assert vals == ["815.21 K", "999.999", "0.000", "0"]
def test_numeric_cell_nowrap(page: Any, server_url: str) -> None:
run_query(page, server_url, limit=10)
whitespace = page.evaluate(
"getComputedStyle(document.querySelector('#results td:nth-child(3)')).wh
iteSpace"
)
assert whitespace == "nowrap"
def test_date_cell_nowrap(page: Any, server_url: str) -> None:
run_query(page, server_url, limit=10)
whitespace = page.evaluate(
"getComputedStyle(document.querySelector('#results td:nth-child(1)')).wh
iteSpace"
)
assert whitespace == "nowrap"
def test_derived_column_query(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#order_by option", state="attached")
page.click("text=Columns")
page.click("text=Add Derived")
expr = page.query_selector("#derived_list .derived textarea")
assert expr
expr.fill("value * 2")
page.click("text=View Settings")
page.fill("#start", "2024-01-01 00:00:00")
page.fill("#end", "2024-01-03 00:00:00")
page.fill("#limit", "10")
page.evaluate("window.lastResults = undefined")
page.click("text=Dive")
page.wait_for_function("window.lastResults !== undefined")
data = page.evaluate("window.lastResults")
assert data["rows"][0][-1] == 20
(scubaduck) root@408933a71470:/workspace/scubaduck#
I'll look for tests and files where order_by is missing.
(scubaduck) root@408933a71470:/workspace/scubaduck# grep -n "run_query(.*order_b
y" -n tests -n | head
grep: tests: Is a directory
(scubaduck) root@408933a71470:/workspace/scubaduck#
The command is wrong due to quoting. I'll grep for "run_query(" without the ordering parts.
s | grep -v order_by
tests/test_web_basics.py:9: data = run_query(
tests/test_web_basics.py:28: data = run_query(
tests/test_web_basics.py:42: data = run_query(
tests/test_web_basics.py:225: data = run_query(
tests/web_utils.py:13:def run_query(
tests/test_web_table.py:9: run_query(
tests/test_web_table.py:57: data = run_query(
tests/test_web_table.py:71: run_query(
tests/test_web_table.py:111: run_query(
tests/test_web_table.py:130: run_query(
tests/test_web_table.py:145: data = run_query(
tests/test_web_table.py:197: data = run_query(
tests/test_web_table.py:212: data = run_query(
tests/test_web_table.py:228: run_query(
tests/test_web_table.py:249: run_query(page, server_url, limit=10)
tests/test_web_table.py:257: run_query(page, server_url, limit=10)
tests/test_web_table.py:295: data = run_query(
(scubaduck) root@408933a71470:/workspace/scubaduck#
The grep command hasn't fully excluded lines with "order_by" due to multi-line parameters.
_web_table.py
page.goto(f"{server_url}?graph_type=table")
page.wait_for_selector("#group_by_field", state="visible")
border = page.evaluate(
"getComputedStyle(document.querySelector('#group_by_field .f-val')).bord
erStyle"
)
assert border == "none"
def test_table_group_by_query(page: Any, server_url: str) -> None:
data = run_query(
page,
server_url,
start="2024-01-01 00:00:00",
end="2024-01-03 00:00:00",
order_by="user",
limit=100,
group_by=["user"],
aggregate="Count",
)
assert "error" not in data
assert len(data["rows"]) == 3
def test_table_avg_no_group_by(page: Any, server_url: str) -> None:
data = run_query(
page,
server_url,
aggregate="Avg",
)
assert len(data["rows"]) == 1
row = data["rows"][0]
assert row[0] == 4
from dateutil import parser
(scubaduck) root@408933a71470:/workspace/scubaduck#
I'm paging through the content, moving beyond line 220 to capture the necessary info.
_web_table.py
from dateutil import parser
ts = parser.parse(row[1]).replace(tzinfo=None)
assert ts == parser.parse("2024-01-01 13:00:00")
assert row[2] == 25
def test_table_headers_show_aggregate(page: Any, server_url: str) -> None:
run_query(
page,
server_url,
aggregate="Avg",
)
headers = page.locator("#results th").all_inner_texts()
assert "Hits" in headers
assert "timestamp (avg)" in headers
assert "value (avg)" in headers
def test_format_number_function(page: Any, server_url: str) -> None:
page.goto(server_url)
(scubaduck) root@408933a71470:/workspace/scubaduck#
I'm checking the call to run_query in test_table_headers_show_aggregate, still unsure about other arguments.
border = page.evaluate(
"getComputedStyle(document.querySelector('#group_by_field .f-val')).bord
erStyle"
)
assert border == "none"
def test_table_group_by_query(page: Any, server_url: str) -> None:
data = run_query(
page,
server_url,
start="2024-01-01 00:00:00",
end="2024-01-03 00:00:00",
order_by="user",
limit=100,
group_by=["user"],
aggregate="Count",
)
assert "error" not in data
assert len(data["rows"]) == 3
def test_table_avg_no_group_by(page: Any, server_url: str) -> None:
data = run_query(
page,
server_url,
aggregate="Avg",
)
assert len(data["rows"]) == 1
row = data["rows"][0]
assert row[0] == 4
from dateutil import parser
ts = parser.parse(row[1]).replace(tzinfo=None)
assert ts == parser.parse("2024-01-01 13:00:00")
assert row[2] == 25
def test_table_headers_show_aggregate(page: Any, server_url: str) -> None:
run_query(
page,
server_url,
aggregate="Avg",
)
headers = page.locator("#results th").all_inner_texts()
assert "Hits" in headers
assert "timestamp (avg)" in headers
assert "value (avg)" in headers
def test_format_number_function(page: Any, server_url: str) -> None:
page.goto(server_url)
(scubaduck) root@408933a71470:/workspace/scubaduck#
I’m checking run_query
instances in the code and listing those without an order_by
clause.
def test_table_single_selection(page: Any, server_url: str) -> None:
run_query(
page,
server_url,
start="2024-01-01 00:00:00",
end="2024-01-03 00:00:00",
order_by="timestamp",
limit=10,
)
page.click("#results tr:nth-child(2)")
page.click("#results tr:nth-child(3)")
count = page.evaluate("document.querySelectorAll('#results tr.selected').len
gth")
assert count == 1
is_third = page.evaluate(
"document.querySelector('#results tr:nth-child(3)').classList.contains('
selected')"
)
assert is_third
def test_timestamp_rendering(page: Any, server_url: str) -> None:
run_query(
page,
server_url,
start="2024-01-01 00:00:00",
end="2024-01-02 00:00:00",
order_by="timestamp",
limit=1,
)
cell = page.text_content("#results td")
assert cell != "Invalid Date"
valid = page.evaluate("v => !isNaN(Date.parse(v))", cell)
assert valid
def test_empty_data_message(page: Any, server_url: str) -> None:
data = run_query(
page,
server_url,
start="2025-01-01 00:00:00",
end="2025-01-02 00:00:00",
order_by="timestamp",
(scubaduck) root@408933a71470:/workspace/scubaduck#
The only run_query without 'order_by' appears to be at table_avg_no_group_by with aggregate="Avg".
-n tests/test_web_table.py
64: aggregate="Avg",
215: aggregate="Avg",
231: aggregate="Avg",
(scubaduck) root@408933a71470:/workspace/scubaduck#
I'll review the code near line 64 for any changes.
eb_table.py
start="2024-01-01 00:00:00",
end="2024-01-03 00:00:00",
order_by="timestamp",
group_by=["user"],
aggregate="Avg",
)
assert "error" not in data
assert len(data["rows"]) == 3
def test_table_enhancements(page: Any, server_url: str) -> None:
run_query(
page,
server_url,
start="2024-01-01 00:00:00",
end="2024-01-03 00:00:00",
order_by="timestamp",
limit=10,
)
border = page.evaluate(
"getComputedStyle(document.querySelector('#results td')).borderStyle"
(scubaduck) root@408933a71470:/workspace/scubaduck#
I noticed the first run_query orders by timestamp. Let's also check the one with aggregate=Avg.
_web_table.py
def test_table_avg_no_group_by(page: Any, server_url: str) -> None:
data = run_query(
page,
server_url,
aggregate="Avg",
)
assert len(data["rows"]) == 1
row = data["rows"][0]
assert row[0] == 4
from dateutil import parser
ts = parser.parse(row[1]).replace(tzinfo=None)
assert ts == parser.parse("2024-01-01 13:00:00")
assert row[2] == 25
def test_table_headers_show_aggregate(page: Any, server_url: str) -> None:
run_query(
page,
server_url,
aggregate="Avg",
)
headers = page.locator("#results th").all_inner_texts()
assert "Hits" in headers
assert "timestamp (avg)" in headers
assert "value (avg)" in headers
(scubaduck) root@408933a71470:/workspace/scubaduck#
I see repeated run_query
calls for aggregate "Avg" without order_by, but with limits. Checking for order_by
usage at other lines.
_web_table.py
run_query(
page,
server_url,
aggregate="Avg",
)
headers = page.locator("#results th").all_inner_texts()
assert "Hits" in headers
assert "timestamp (avg)" in headers
assert "value (avg)" in headers
def test_format_number_function(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#order_by option", state="attached")
vals = page.evaluate(
"() => [formatNumber(815210), formatNumber(999.999), formatNumber(0.0004
), formatNumber(0)]"
)
assert vals == ["815.21 K", "999.999", "0.000", "0"]
def test_numeric_cell_nowrap(page: Any, server_url: str) -> None:
run_query(page, server_url, limit=10)
whitespace = page.evaluate(
(scubaduck) root@408933a71470:/workspace/scubaduck#
Five tests lack order_by. I'll inspect test_web_table.py and test_web_basics for potential issues.
(scubaduck) root@408933a71470:/workspace/scubaduck# grep -n "run_query(" -n test
s/test_web_basics.py
9: data = run_query(
28: data = run_query(
42: data = run_query(
225: data = run_query(
(scubaduck) root@408933a71470:/workspace/scubaduck#
It looks like the order_by is specified after line 9. Let's verify what's at line 225.
page.click("#end-select div:text('now')")
assert page.input_value("#end") == "now"
def test_invalid_time_error_shown(page: Any, server_url: str) -> None:
data = run_query(
page,
server_url,
start="nonsense",
end="now",
order_by="user",
)
assert "error" in data
msg = page.text_content("#view")
assert "nonsense" in msg
(scubaduck) root@408933a71470:/workspace/scubaduck#
I'll modify the tests to specify order_by="timestamp"
or ts
where needed.
I'll update tests to set order_by="timestamp"
after page.goto and wait.
(scubaduck) root@408933a71470:/workspace/scubaduck# grep -n "TEST" -n tests | he
ad
grep: tests: Is a directory
(scubaduck) root@408933a71470:/workspace/scubaduck#
I'll look for references to create_app('TEST') or the 'extra' table.
grep: tests/__pycache__/test_server_timeseries.cpython-312-pytest-8.3.5.pyc: bin
ary file matches
grep: tests/__pycache__/test_multi_table_web.cpython-312-pytest-8.3.5.pyc: binar
y file matches
tests/test_server_timeseries.py:256: "table": "extra",
tests/test_multi_table_web.py:29: select_value(page, "#table", "extra")
tests/test_multi_table_web.py:36: assert table_param == "extra"
tests/test_multi_table_web.py:44: select_value(page, "#table", "extra")
tests/test_multi_table_web.py:50: assert page.input_value("#table") == "extra
"
tests/test_multi_table_web.py:52: assert disp is not None and disp.strip() ==
"extra"
(scubaduck) root@408933a71470:/workspace/scubaduck#
I'll check out the tests/test_multi_table_web.py file now.
from collections.abc import Iterator
from typing import Any
import pytest
from werkzeug.serving import make_server
from scubaduck.server import create_app
from tests.web_utils import select_value
@pytest.fixture()
def multi_table_server_url() -> Iterator[str]:
app = create_app("TEST")
httpd = make_server("127.0.0.1", 0, app)
port = httpd.server_port
thread = threading.Thread(target=httpd.serve_forever)
thread.start()
try:
yield f"http://127.0.0.1:{port}"
finally:
httpd.shutdown()
thread.join()
def test_table_param_updates_on_dive(page: Any, multi_table_server_url: str) ->
None:
page.goto(multi_table_server_url + "?table=events")
page.wait_for_selector("#table option", state="attached")
select_value(page, "#table", "extra")
page.evaluate("window.lastResults = undefined")
page.click("text=Dive")
page.wait_for_function("window.lastResults !== undefined")
table_param = page.evaluate(
"new URLSearchParams(window.location.search).get('table')"
)
assert table_param == "extra"
def test_table_dropdown_persists_on_refresh(
page: Any, multi_table_server_url: str
) -> None:
page.goto(multi_table_server_url + "?table=events")
page.wait_for_selector("#table option", state="attached")
select_value(page, "#table", "extra")
page.evaluate("window.lastResults = undefined")
page.click("text=Dive")
page.wait_for_function("window.lastResults !== undefined")
page.reload()
page.wait_for_selector("#table option", state="attached")
assert page.input_value("#table") == "extra"
disp = page.text_content("#table + .dropdown-display")
assert disp is not None and disp.strip() == "extra"
(scubaduck) root@408933a71470:/workspace/scubaduck#
I noticed there's no run_query. I'll search for it in other files to double-check.
(scubaduck) root@408933a71470:/workspace/scubaduck# grep -R "run_query(" tests |
grep -v test_web_table.py | grep -v test_web_basics.py
tests/web_utils.py:def run_query(
(scubaduck) root@408933a71470:/workspace/scubaduck#
I'll look at tests like test_server_timeseries to confirm if order_by is specified.
test_server_timeseries.py | head
17: "order_by": "user",
41: "order_by": "user",
140: "order_by": "user",
258: "order_by": "ts",
(scubaduck) root@408933a71470:/workspace/scubaduck#
I'll add the order_by="timestamp"
to run_query
and derived_column_query
. Let's implement!
_web_table.py
assert len(data["rows"]) == 3
def test_table_avg_no_group_by(page: Any, server_url: str) -> None:
data = run_query(
page,
server_url,
aggregate="Avg",
)
assert len(data["rows"]) == 1
row = data["rows"][0]
assert row[0] == 4
from dateutil import parser
ts = parser.parse(row[1]).replace(tzinfo=None)
assert ts == parser.parse("2024-01-01 13:00:00")
assert row[2] == 25
def test_table_headers_show_aggregate(page: Any, server_url: str) -> None:
run_query(
page,
server_url,
aggregate="Avg",
)
headers = page.locator("#results th").all_inner_texts()
assert "Hits" in headers
assert "timestamp (avg)" in headers
assert "value (avg)" in headers
(scubaduck) root@408933a71470:/workspace/scubaduck#
I’ll add the “order_by” parameter with “timestamp” for both run_query calls.
def test_format_number_function(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#order_by option", state="attached")
vals = page.evaluate(
"() => [formatNumber(815210), formatNumber(999.999), formatNumber(0.0004
), formatNumber(0)]"
)
assert vals == ["815.21 K", "999.999", "0.000", "0"]
def test_numeric_cell_nowrap(page: Any, server_url: str) -> None:
run_query(page, server_url, limit=10)
whitespace = page.evaluate(
"getComputedStyle(document.querySelector('#results td:nth-child(3)')).wh
iteSpace"
)
assert whitespace == "nowrap"
def test_date_cell_nowrap(page: Any, server_url: str) -> None:
run_query(page, server_url, limit=10)
whitespace = page.evaluate(
"getComputedStyle(document.querySelector('#results td:nth-child(1)')).wh
iteSpace"
)
assert whitespace == "nowrap"
def test_derived_column_query(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#order_by option", state="attached")
(scubaduck) root@408933a71470:/workspace/scubaduck#
I’ll add the order_by parameter and patch lines starting at 266 for derived_column_query.
page.click("text=Columns")
page.click("text=Add Derived")
expr = page.query_selector("#derived_list .derived textarea")
assert expr
expr.fill("value * 2")
page.click("text=View Settings")
page.fill("#start", "2024-01-01 00:00:00")
page.fill("#end", "2024-01-03 00:00:00")
page.fill("#limit", "10")
page.evaluate("window.lastResults = undefined")
page.click("text=Dive")
page.wait_for_function("window.lastResults !== undefined")
data = page.evaluate("window.lastResults")
assert data["rows"][0][-1] == 20
def test_derived_column_remove(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#order_by option", state="attached")
page.click("text=Columns")
page.click("text=Add Derived")
assert page.query_selector("#derived_list .derived button.remove")
page.click("#derived_list .derived button.remove")
count = page.evaluate("document.querySelectorAll('#derived_list .derived').l
ength")
assert count == 0
def test_sql_query_display(page: Any, server_url: str) -> None:
data = run_query(
page,
server_url,
start="2024-01-01 00:00:00",
end="2024-01-02 00:00:00",
order_by="timestamp",
(scubaduck) root@408933a71470:/workspace/scubaduck#
I'll add select_value(page, "#order_by", "timestamp")
right after page.fill("#limit", "10")
and ensure proper waits.
> *** Update File: tests/test_web_table.py
> @@
> -def test_table_avg_no_group_by(page: Any, server_url: str) -> None:
> - data = run_query(
> - page,
> - server_url,
> - aggregate="Avg",
> - )
> +def test_table_avg_no_group_by(page: Any, server_url: str) -> None:
> + data = run_query(
> + page,
> + server_url,
> + aggregate="Avg",
> + order_by="timestamp",
> + )
> @@
> -def test_table_headers_show_aggregate(page: Any, server_url: str) -> None:
> - run_query(
> - page,
> - server_url,
> - aggregate="Avg",
> - )
> +def test_table_headers_show_aggregate(page: Any, server_url: str) -> None:
> + run_query(
> + page,
> + server_url,
> + aggregate="Avg",
> + order_by="timestamp",
> + )
> *** End Patch
> PATCH
Success. Updated the following files:
M tests/test_web_table.py
(scubaduck) root@408933a71470:/workspace/scubaduck#
I'll work on adjusting the numeric_cell_nowrap and date_cell_nowrap.
(scubaduck) root@408933a71470:/workspace/scubaduck# apply_patch<<'PATCH'
> *** Begin Patch
> *** Update File: tests/test_web_table.py
> @@
> def test_numeric_cell_nowrap(page: Any, server_url: str) -> None:
> - run_query(page, server_url, limit=10)
> + run_query(page, server_url, order_by="timestamp", limit=10)
> @@
> def test_date_cell_nowrap(page: Any, server_url: str) -> None:
> - run_query(page, server_url, limit=10)
> + run_query(page, server_url, order_by="timestamp", limit=10)
> *** End Patch
> PATCH
Success. Updated the following files:
M tests/test_web_table.py
(scubaduck) root@408933a71470:/workspace/scubaduck#
I'll update the derived_column_query to apply the order_by field.
(scubaduck) root@408933a71470:/workspace/scubaduck# grep -n "derived_column_quer
y" -n tests/test_web_table.py
266:def test_derived_column_query(page: Any, server_url: str) -> None:
(scubaduck) root@408933a71470:/workspace/scubaduck# sed -n '266,292p' tests/test
_web_table.py
def test_derived_column_query(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#order_by option", state="attached")
page.click("text=Columns")
page.click("text=Add Derived")
expr = page.query_selector("#derived_list .derived textarea")
assert expr
expr.fill("value * 2")
page.click("text=View Settings")
page.fill("#start", "2024-01-01 00:00:00")
page.fill("#end", "2024-01-03 00:00:00")
page.fill("#limit", "10")
page.evaluate("window.lastResults = undefined")
page.click("text=Dive")
page.wait_for_function("window.lastResults !== undefined")
data = page.evaluate("window.lastResults")
assert data["rows"][0][-1] == 20
def test_derived_column_remove(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#order_by option", state="attached")
page.click("text=Columns")
page.click("text=Add Derived")
assert page.query_selector("#derived_list .derived button.remove")
page.click("#derived_list .derived button.remove")
count = page.evaluate("document.querySelectorAll('#derived_list .derived').l
ength")
(scubaduck) root@408933a71470:/workspace/scubaduck#
I'll implement select_value next.
(scubaduck) root@408933a71470:/workspace/scubaduck# apply_patch<<'PATCH'
> *** Begin Patch
> *** Update File: tests/test_web_table.py
> @@
> def test_derived_column_query(page: Any, server_url: str) -> None:
> page.goto(server_url)
> page.wait_for_selector("#order_by option", state="attached")
> + select_value(page, "#order_by", "timestamp")
> page.click("text=Columns")
> *** End Patch
> PATCH
Success. Updated the following files:
M tests/test_web_table.py
(scubaduck) root@408933a71470:/workspace/scubaduck#
I see "run_query" already imported. Let's check if "select_value" is missing from imports.
b_table.py
from __future__ import annotations
from typing import Any
from tests.web_utils import run_query
def test_table_sorting(page: Any, server_url: str) -> None:
run_query(
page,
(scubaduck) root@408933a71470:/workspace/scubaduck#
I realized I need to import select_value at the top since it's now in use.
(scubaduck) root@408933a71470:/workspace/scubaduck# apply_patch<<'PATCH'
> *** Begin Patch
> *** Update File: tests/test_web_table.py
> @@
> -from tests.web_utils import run_query
> +from tests.web_utils import run_query, select_value
> *** End Patch
> PATCH
Success. Updated the following files:
M tests/test_web_table.py
(scubaduck) root@408933a71470:/workspace/scubaduck#
I modified test_table_headers_show_aggregate
, but need to confirm other tests around line 308.
(scubaduck) root@408933a71470:/workspace/scubaduck# grep -n "derived_column_remo
ve" -n tests/test_web_table.py
286:def test_derived_column_remove(page: Any, server_url: str) -> None:
_web_table.py
def test_derived_column_remove(page: Any, server_url: str) -> None:
page.goto(server_url)
page.wait_for_selector("#order_by option", state="attached")
page.click("text=Columns")
page.click("text=Add Derived")
assert page.query_selector("#derived_list .derived button.remove")
page.click("#derived_list .derived button.remove")
count = page.evaluate("document.querySelectorAll('#derived_list .derived').l
ength")
assert count == 0
def test_sql_query_display(page: Any, server_url: str) -> None:
data = run_query(
page,
server_url,
start="2024-01-01 00:00:00",
end="2024-01-02 00:00:00",
order_by="timestamp",
limit=10,
)
sql = data["sql"]
displayed = page.text_content("#sql_query")
assert displayed is not None
assert displayed.strip() == sql
(scubaduck) root@408933a71470:/workspace/scubaduck#
I see no query is run, so no Dive needed. I'll proceed with formatting and checks.
(scubaduck) root@408933a71470:/workspace/scubaduck# ruff format tests/test_web_t
able.py
1 file left unchanged
(scubaduck) root@408933a71470:/workspace/scubaduck# ruff check tests/test_web_ta
ble.py
All checks passed!
(scubaduck) root@408933a71470:/workspace/scubaduck#
I'll run pyright to check for type issues in the code.
(scubaduck) root@408933a71470:/workspace/scubaduck# pyright
0 errors, 0 warnings, 0 informations
bringing up nodes...
........................................................................ [ 64%]
....................................... [100%]
111 passed in 15.13s
(scubaduck) root@408933a71470:/workspace/scubaduck#
I’ll review the git status now to ensure everything's in order.
(scubaduck) root@408933a71470:/workspace/scubaduck# git status --short
M tests/test_web_table.py
(scubaduck) root@408933a71470:/workspace/scubaduck# git add tests/test_web_table
.py
(scubaduck) root@408933a71470:/workspace/scubaduck# git commit -m "test: specify
order_by explicitly" && git status --short
[work 3958351] test: specify order_by explicitly
1 file changed, 6 insertions(+), 3 deletions(-)
(scubaduck) root@408933a71470:/workspace/scubaduck#
I'll check the git status to review any changes before moving forward.
(scubaduck) root@408933a71470:/workspace/scubaduck# git status --short
(scubaduck) root@408933a71470:/workspace/scubaduck#
I’ll gather the citation lines from the changed files for the PR message.
(scubaduck) root@408933a71470:/workspace/scubaduck# nl -ba tests/test_web_table.
py | sed -n '1,120p'
1 from __future__ import annotations
2
3 from typing import Any
4
5 from tests.web_utils import run_query, select_value
6
7
8 def test_table_sorting(page: Any, server_url: str) -> None:
9 run_query(
10 page,
11 server_url,
12 start="2024-01-01 00:00:00",
13 end="2024-01-03 00:00:00",
14 order_by="user",
15 order_dir="ASC",
16 limit=100,
17 )
18 # header alignment
19 align = page.evaluate(
20 "getComputedStyle(document.querySelector('#results th')).textAli
gn"
21 )
22 assert align == "left"
23
24 header = page.locator("#results th").nth(3)
25
26 def values() -> list[str]:
27 return page.locator("#results td:nth-child(4)").all_inner_texts(
)
28
29 orig_rows = values()
30 assert orig_rows == ["alice", "bob", "alice", "charlie"]
31
32 first_sql = page.evaluate("window.lastResults.sql")
33
34 header.click()
35 assert values() == sorted(orig_rows)
36 assert header.inner_text().endswith("▲")
37 color = page.evaluate(
38 "getComputedStyle(document.querySelector('#results th:nth-child(
4)')).color"
39 )
40 assert "0, 0, 255" in color
41 assert page.evaluate("window.lastResults.sql") == first_sql
42
43 header.click()
44 assert values() == sorted(orig_rows, reverse=True)
45 assert header.inner_text().endswith("▼")
46
47 header.click()
48 assert values() == orig_rows
49 assert header.inner_text() == "user"
50 color = page.evaluate(
51 "getComputedStyle(document.querySelector('#results th:nth-child(
4)')).color"
52 )
53 assert "0, 0, 255" not in color
54
55
56 def test_table_avg_group_by(page: Any, server_url: str) -> None:
57 data = run_query(
58 page,
59 server_url,
60 start="2024-01-01 00:00:00",
61 end="2024-01-03 00:00:00",
62 order_by="timestamp",
63 group_by=["user"],
64 aggregate="Avg",
65 )
66 assert "error" not in data
67 assert len(data["rows"]) == 3
68
69
70 def test_table_enhancements(page: Any, server_url: str) -> None:
71 run_query(
72 page,
73 server_url,
74 start="2024-01-01 00:00:00",
75 end="2024-01-03 00:00:00",
76 order_by="timestamp",
77 limit=10,
78 )
79 border = page.evaluate(
80 "getComputedStyle(document.querySelector('#results td')).borderS
tyle"
81 )
82 assert border == "solid"
83
84 color1 = page.evaluate(
85 "getComputedStyle(document.querySelector('#results tr:nth-child(
2) td')).backgroundColor"
86 )
87 color2 = page.evaluate(
88 "getComputedStyle(document.querySelector('#results tr:nth-child(
3) td')).backgroundColor"
89 )
90 assert color1 != color2
91
92 page.hover("#results tr:nth-child(2)")
93 hover_color = page.evaluate(
94 "getComputedStyle(document.querySelector('#results tr:nth-child(
2) td')).backgroundColor"
95 )
96 assert hover_color != color1
97
98 page.click("#results tr:nth-child(2)")
99 selected_color = page.evaluate(
100 "getComputedStyle(document.querySelector('#results tr:nth-child(
2) td')).backgroundColor"
101 )
102 assert "189, 228, 255" in selected_color
103
104 overflow = page.evaluate(
105 "var v=document.getElementById('view'); v.scrollWidth > v.client
Width"
106 )
107 assert not overflow
108
109
110 def test_table_single_selection(page: Any, server_url: str) -> None:
111 run_query(
112 page,
113 server_url,
114 start="2024-01-01 00:00:00",
115 end="2024-01-03 00:00:00",
116 order_by="timestamp",
117 limit=10,
118 )
119 page.click("#results tr:nth-child(2)")
120 page.click("#results tr:nth-child(3)")
(scubaduck) root@408933a71470:/workspace/scubaduck# nl -ba tests/test_web_table.
py | sed -n '204,312p'
204 group_by=["user"],
205 aggregate="Count",
206 )
207 assert "error" not in data
208 assert len(data["rows"]) == 3
209
210
211 def test_table_avg_no_group_by(page: Any, server_url: str) -> None:
212 data = run_query(
213 page,
214 server_url,
215 aggregate="Avg",
216 order_by="timestamp",
217 )
218 assert len(data["rows"]) == 1
219 row = data["rows"][0]
220 assert row[0] == 4
221 from dateutil import parser
222
223 ts = parser.parse(row[1]).replace(tzinfo=None)
224 assert ts == parser.parse("2024-01-01 13:00:00")
225 assert row[2] == 25
226
227
228 def test_table_headers_show_aggregate(page: Any, server_url: str) -> Non
e:
229 run_query(
230 page,
231 server_url,
232 aggregate="Avg",
233 order_by="timestamp",
234 )
235 headers = page.locator("#results th").all_inner_texts()
236 assert "Hits" in headers
237 assert "timestamp (avg)" in headers
238 assert "value (avg)" in headers
239
240
241 def test_format_number_function(page: Any, server_url: str) -> None:
242 page.goto(server_url)
243 page.wait_for_selector("#order_by option", state="attached")
244 vals = page.evaluate(
245 "() => [formatNumber(815210), formatNumber(999.999), formatNumbe
r(0.0004), formatNumber(0)]"
246 )
247 assert vals == ["815.21 K", "999.999", "0.000", "0"]
248
249
250 def test_numeric_cell_nowrap(page: Any, server_url: str) -> None:
251 run_query(page, server_url, order_by="timestamp", limit=10)
252 whitespace = page.evaluate(
253 "getComputedStyle(document.querySelector('#results td:nth-child(
3)')).whiteSpace"
254 )
255 assert whitespace == "nowrap"
256
257
258 def test_date_cell_nowrap(page: Any, server_url: str) -> None:
259 run_query(page, server_url, order_by="timestamp", limit=10)
260 whitespace = page.evaluate(
261 "getComputedStyle(document.querySelector('#results td:nth-child(
1)')).whiteSpace"
262 )
263 assert whitespace == "nowrap"
264
265
266 def test_derived_column_query(page: Any, server_url: str) -> None:
267 page.goto(server_url)
268 page.wait_for_selector("#order_by option", state="attached")
269 select_value(page, "#order_by", "timestamp")
270 page.click("text=Columns")
271 page.click("text=Add Derived")
272 expr = page.query_selector("#derived_list .derived textarea")
273 assert expr
274 expr.fill("value * 2")
275 page.click("text=View Settings")
276 page.fill("#start", "2024-01-01 00:00:00")
277 page.fill("#end", "2024-01-03 00:00:00")
278 page.fill("#limit", "10")
279 page.evaluate("window.lastResults = undefined")
280 page.click("text=Dive")
281 page.wait_for_function("window.lastResults !== undefined")
282 data = page.evaluate("window.lastResults")
283 assert data["rows"][0][-1] == 20
284
285
286 def test_derived_column_remove(page: Any, server_url: str) -> None:
287 page.goto(server_url)
288 page.wait_for_selector("#order_by option", state="attached")
289 page.click("text=Columns")
290 page.click("text=Add Derived")
291 assert page.query_selector("#derived_list .derived button.remove")
292 page.click("#derived_list .derived button.remove")
293 count = page.evaluate("document.querySelectorAll('#derived_list .der
ived').length")
294 assert count == 0
295
296
297 def test_sql_query_display(page: Any, server_url: str) -> None:
298 data = run_query(
299 page,
300 server_url,
301 start="2024-01-01 00:00:00",
302 end="2024-01-02 00:00:00",
303 order_by="timestamp",
304 limit=10,
305 )
306 sql = data["sql"]
307 displayed = page.text_content("#sql_query")
308 assert displayed is not None
309 assert displayed.strip() == sql
(scubaduck) root@408933a71470:/workspace/scubaduck#
I'll link the changed lines and begin forming the commit message.