#!/usr/bin/env python3
"""
Encord Scene validator (docs-style InputScene)

Validates the *semantic* correctness of Encord "InputScene" JSON as documented here:
https://docs.encord.com/platform-documentation/Index/add-files/add-pcd-data

What it checks (offline):
- JSON shape: scenes[] -> scene -> (content OR direct streams)
- Each stream object has a valid `type`
- `events` structure is correct per stream type
- Cross-references:
  - image streams: `camera` must reference a camera_parameters stream
  - streams with `frameOfReference`: must reference a frame_of_reference stream
- Timestamp sanity (numeric, non-null; monotonic per stream)
- Camera extrinsics shape (no pose wrapper; position/rotation dicts)
- FoR pose shape (pose.position + pose.rotation quaternion dict)
- URI sanity (string, basic extension checks)

Usage:

Summary only (fastest, minimal output):

  python encord-scene-validator.py my-scene.json --summary-only

Emit issues as JSONL (best for log pipelines / big runs):

  python encord-scene-validator.py my-scene.json --emit-jsonl > issues.jsonl
"""

#!/usr/bin/env python3
from __future__ import annotations

import argparse
import json
import math
import re
import sys
from collections import Counter, defaultdict
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, Optional, Tuple, List

NUM = (int, float)

def is_number(x: Any) -> bool:
    return isinstance(x, NUM) and not (isinstance(x, float) and (math.isnan(x) or math.isinf(x)))

def is_xyz(d: Any) -> bool:
    return isinstance(d, dict) and all(k in d for k in ("x", "y", "z")) and all(is_number(d[k]) for k in ("x","y","z"))

def is_xyzw(d: Any) -> bool:
    return isinstance(d, dict) and all(k in d for k in ("x","y","z","w")) and all(is_number(d[k]) for k in ("x","y","z","w"))

def guess_ext(uri: str) -> str:
    m = re.search(r"\.([A-Za-z0-9]+)(?:\?|#|$)", uri)
    return (m.group(1).lower() if m else "")

def normalize_scene_content(scene_obj: Dict[str, Any]) -> Tuple[Dict[str, Any], Optional[str]]:
    """
    Supports docs patterns:
      A) scene: { worldConvention, cameraConvention, content: {...streams...} }
      B) scene: { <streams...> } (no content wrapper)
    """
    if not isinstance(scene_obj, dict):
        return {}, "scene is not an object"
    if "content" in scene_obj:
        c = scene_obj.get("content")
        if not isinstance(c, dict):
            return {}, "scene.content is not an object"
        return c, None
    conventions = {"worldConvention", "cameraConvention", "config"}
    c = {k: v for k, v in scene_obj.items() if k not in conventions}
    return c, None

@dataclass(frozen=True)
class Issue:
    level: str   # "ERROR" or "WARN"
    code: str    # stable machine-readable code
    msg: str
    scene_index: int
    stream: Optional[str] = None
    event_index: Optional[int] = None
    field: Optional[str] = None

class FastSceneValidator:
    """
    Offline-only Encord docs-style scene validator optimized for scale.
    Adds optional "projection-correct" checks to catch subtle rendering issues
    like radius-indicator / overlay inconsistencies caused by missing calibration FoRs.
    """

    def __init__(
        self,
        *,
        strict: bool,
        projection_correct: bool,
        max_examples_per_code: int,
        emit_jsonl: bool,
        summary_only: bool,
    ) -> None:
        self.strict = strict
        self.projection_correct = projection_correct
        self.max_examples_per_code = max_examples_per_code
        self.emit_jsonl = emit_jsonl and not summary_only

        self.counts = Counter()                  # code -> count
        self.level_counts = Counter()            # ERROR/WARN -> count
        self.examples: Dict[str, list[Issue]] = defaultdict(list)

    def report(self, issue: Issue) -> None:
        self.level_counts[issue.level] += 1
        self.counts[issue.code] += 1

        if len(self.examples[issue.code]) < self.max_examples_per_code:
            self.examples[issue.code].append(issue)

        if self.emit_jsonl:
            out = {
                "level": issue.level,
                "code": issue.code,
                "msg": issue.msg,
                "scene_index": issue.scene_index,
            }
            if issue.stream is not None: out["stream"] = issue.stream
            if issue.event_index is not None: out["event_index"] = issue.event_index
            if issue.field is not None: out["field"] = issue.field
            print(json.dumps(out, ensure_ascii=False))

    def validate(self, payload: Any) -> int:
        if not isinstance(payload, dict):
            self.report(Issue("ERROR", "TOP_NOT_OBJECT", "Top-level JSON must be an object", -1))
            return 2

        scenes = payload.get("scenes")
        if not isinstance(scenes, list) or not scenes:
            self.report(Issue("ERROR", "SCENES_MISSING", "`scenes` must be a non-empty list", -1))
            return 2

        for si, scene_entry in enumerate(scenes):
            if not isinstance(scene_entry, dict):
                self.report(Issue("ERROR", "SCENE_NOT_OBJECT", "Scene entry must be an object", si))
                continue

            scene_obj = scene_entry.get("scene")
            if not isinstance(scene_obj, dict):
                self.report(Issue("ERROR", "SCENE_FIELD_MISSING", "`scene` must be an object", si))
                continue

            content, warn = normalize_scene_content(scene_obj)
            if warn:
                self.report(Issue("WARN", "SCENE_CONTENT_SHAPE", warn, si))

            if not content:
                self.report(Issue("ERROR", "NO_STREAMS", "No streams found in scene", si))
                continue

            self._validate_scene_content(si, content)

        if self.level_counts["ERROR"] > 0:
            return 1
        if self.strict and self.level_counts["WARN"] > 0:
            return 1
        return 0

    # -------------------------
    # Scene-level helpers
    # -------------------------

    def _collect_first_timestamp(self, stream_obj: Dict[str, Any]) -> Optional[float]:
        events = stream_obj.get("events")
        if not isinstance(events, list):
            return None
        for e in events:
            if isinstance(e, dict) and is_number(e.get("timestamp")):
                return float(e["timestamp"])
        return None

    def _collect_all_timestamps(self, stream_obj: Dict[str, Any]) -> List[float]:
        out: List[float] = []
        events = stream_obj.get("events")
        if not isinstance(events, list):
            return out
        for e in events:
            if isinstance(e, dict) and is_number(e.get("timestamp")):
                out.append(float(e["timestamp"]))
        return out

    def _is_calibration_name(self, name: str) -> bool:
        return name.endswith("-calibration")


    def _expected_calibration_for(self, stream_name: str, stream_type: str) -> Optional[str]:
        """Return the conventional '*-calibration' FoR name for a given stream, when applicable."""
        if stream_type == "camera_parameters" and stream_name.endswith("-camera"):
            return stream_name[:-7] + "-calibration"  # replace '-camera'
        if stream_type == "point_cloud":
            return stream_name + "-calibration"
        return None

    def _for_chain_reaches_ego(self, start_for: str, types: Dict[str, str], content: Dict[str, Any]) -> bool:
        """
        Checks whether a FoR stream reaches ego_vehicle by following parentForId chain.
        Accepts direct parent or multi-hop.
        """
        seen = set()
        cur = start_for
        while True:
            if cur in seen:
                return False
            seen.add(cur)

            if cur == "ego_vehicle":
                return True

            obj = content.get(cur)
            if not isinstance(obj, dict) or types.get(cur) != "frame_of_reference":
                return False
            parent = obj.get("parentForId")
            if parent is None:
                return False
            if not isinstance(parent, str):
                return False
            cur = parent

    def _projection_checks(self, si: int, types: Dict[str, str], content: Dict[str, Any]) -> None:
        """        Enforces additional constraints for correct overlays / radius indicator behavior.

        Name-agnostic calibration checks:
        - Works for any stream keys (no reliance on LIDAR_TOP / CAM_* naming conventions)
        - Requires sensor streams (point_cloud, camera_parameters) to reference a valid FoR
        - Encourages per-sensor FoRs (warn if many sensors share the same FoR)
        - Ensures FoRs chain up to ego_vehicle (when present)
        - Ensures FoR timestamps cover the first sensor timestamps
        """
        ego_exists = types.get("ego_vehicle") == "frame_of_reference"
        if not ego_exists:
            self.report(Issue(
                "WARN",
                "EGO_MISSING",
                "projection-correct mode: ego_vehicle FoR missing; overlays may behave unexpectedly",
                si,
            ))

        # Identify first sensor timestamps (point_cloud & image)
        min_pc_ts: Optional[float] = None
        min_img_ts: Optional[float] = None

        for name, t in types.items():
            obj = content.get(name)
            if not isinstance(obj, dict):
                continue
            if t == "point_cloud":
                ts0 = self._collect_first_timestamp(obj)
                if ts0 is not None:
                    min_pc_ts = ts0 if min_pc_ts is None else min(min_pc_ts, ts0)
            elif t == "image":
                ts0 = self._collect_first_timestamp(obj)
                if ts0 is not None:
                    min_img_ts = ts0 if min_img_ts is None else min(min_img_ts, ts0)

        # Map FoR usage by sensor streams (point_cloud + camera_parameters)
        for_users: Dict[str, list[str]] = defaultdict(list)

        for name, t in types.items():
            obj = content.get(name)
            if not isinstance(obj, dict):
                continue

            if t in ("point_cloud", "camera_parameters"):
                fo = obj.get("frameOfReference")
                if not isinstance(fo, str) or not fo.strip():
                    self.report(Issue(
                        "ERROR",
                        "PROJ_FOR_REQUIRED",
                        f"projection-correct: stream '{name}' must define frameOfReference",
                        si,
                        stream=name,
                        field="frameOfReference",
                    ))
                    continue

                if fo not in types:
                    self.report(Issue(
                        "ERROR",
                        "FOR_REF_MISSING",
                        f"frameOfReference '{fo}' must reference an existing stream",
                        si,
                        stream=name,
                        field="frameOfReference",
                    ))
                    continue

                if types.get(fo) != "frame_of_reference":
                    self.report(Issue(
                        "ERROR",
                        "FOR_REF_WRONG_TYPE",
                        f"frameOfReference '{fo}' is not frame_of_reference",
                        si,
                        stream=name,
                        field="frameOfReference",
                    ))
                    continue

                for_users[fo].append(name)

                # Strong warning if someone points sensors directly at ego_vehicle
                if fo == "ego_vehicle":
                    self.report(Issue(
                        "WARN",
                        "SENSOR_FOR_IS_EGO",
                        f"projection-correct: stream '{name}' uses frameOfReference 'ego_vehicle'. Typically each sensor has its own calibration FoR parented to ego_vehicle.",
                        si,
                        stream=name,
                        field="frameOfReference",
                    ))

                # FoR must reach ego_vehicle (or at least have a coherent root)
                if ego_exists:
                    if not self._for_chain_reaches_ego(fo, types, content):
                        self.report(Issue(
                            "WARN",
                            "PROJ_FOR_NOT_CHAINED_TO_EGO",
                            f"projection-correct: FoR '{fo}' for stream '{name}' does not parent up to ego_vehicle; transforms/overlays may be wrong.",
                            si,
                            stream=name,
                            field="frameOfReference",
                        ))

                # Calibration coverage: FoR should have an event at/before first sensor timestamp
                fo_obj = content.get(fo)
                fo_ts = self._collect_all_timestamps(fo_obj if isinstance(fo_obj, dict) else {})
                if fo_ts:
                    if t == "point_cloud" and min_pc_ts is not None and min(fo_ts) > min_pc_ts:
                        self.report(Issue(
                            "WARN",
                            "PROJ_FOR_TIME_COVERAGE",
                            f"projection-correct: FoR '{fo}' events start after first point_cloud timestamp; early frames may use missing transforms.",
                            si,
                            stream=name,
                        ))
                    if t == "camera_parameters" and min_img_ts is not None and min(fo_ts) > min_img_ts:
                        self.report(Issue(
                            "WARN",
                            "PROJ_FOR_TIME_COVERAGE",
                            f"projection-correct: FoR '{fo}' events start after first image timestamp; early frames may use missing transforms.",
                            si,
                            stream=name,
                        ))
                else:
                    self.report(Issue(
                        "WARN",
                        "FOR_NO_TIMESTAMPS",
                        f"projection-correct: FoR '{fo}' has no numeric timestamps; transform selection may be unreliable.",
                        si,
                        stream=fo,
                    ))

            # Optional: if an image stream has frameOfReference and it points somewhere odd, warn
            if t == "image":
                fo = obj.get("frameOfReference")
                if isinstance(fo, str) and fo in types and types.get(fo) == "frame_of_reference":
                    if fo == "ego_vehicle":
                        self.report(Issue(
                            "WARN",
                            "IMG_FOR_IS_EGO",
                            f"projection-correct: image stream '{name}' frameOfReference is 'ego_vehicle'. This is unusual; typically images omit frameOfReference or use a camera-specific FoR.",
                            si,
                            stream=name,
                            field="frameOfReference",
                        ))

        # If multiple sensors share the same FoR, flag it (often a calibration wiring mistake)
        for fo, users in for_users.items():
            if len(users) > 1 and fo != "ego_vehicle":
                self.report(Issue(
                    "WARN",
                    "FOR_SHARED_BY_SENSORS",
                    f"projection-correct: FoR '{fo}' is referenced by multiple sensor streams ({', '.join(users)}). If overlays look wrong in some views, ensure each sensor has its own calibration FoR.",
                    si,
                    stream=fo,
                ))

# -------------------------
    # Core validation
    # -------------------------

    def _validate_scene_content(self, si: int, content: Dict[str, Any]) -> None:
        # Collect types first (for refs)
        types: Dict[str, str] = {}
        for name, obj in content.items():
            if not isinstance(obj, dict):
                self.report(Issue("ERROR", "STREAM_NOT_OBJECT", "Stream must be an object", si, stream=name))
                continue
            t = obj.get("type")
            if not isinstance(t, str):
                self.report(Issue("ERROR", "STREAM_NO_TYPE", "Stream missing discriminator `type`", si, stream=name, field="type"))
                continue
            types[name] = t

        # Validate each stream
        for name, obj in content.items():
            if not isinstance(obj, dict):
                continue
            t = types.get(name)
            if not t:
                continue

            if t == "point_cloud":
                self._validate_point_cloud(si, name, obj, types, content)
            elif t == "image":
                self._validate_image(si, name, obj, types, content)
            elif t == "camera_parameters":
                self._validate_camera_parameters(si, name, obj, types)
            elif t == "frame_of_reference":
                self._validate_for(si, name, obj, types)
            else:
                self.report(Issue("WARN", "UNKNOWN_TYPE", f"Unknown stream type '{t}'", si, stream=name, field="type"))

        # Extra projection correctness checks
        if self.projection_correct:
            self._projection_checks(si, types, content)

    def _validate_events_uri_ts(self, si: int, stream: str, events: Any, *, require_uri: bool) -> None:
        if not isinstance(events, list) or not events:
            self.report(Issue("ERROR", "EVENTS_EMPTY", "events must be a non-empty list", si, stream=stream, field="events"))
            return

        last_ts: Optional[float] = None
        for ei, e in enumerate(events):
            if not isinstance(e, dict):
                self.report(Issue("ERROR", "EVENT_NOT_OBJECT", "event must be an object", si, stream=stream, event_index=ei))
                continue

            if require_uri:
                uri = e.get("uri")
                if not isinstance(uri, str) or not uri.strip():
                    self.report(Issue("ERROR", "EVENT_URI_BAD", "event.uri must be a non-empty string", si, stream=stream, event_index=ei, field="uri"))

            if "timestamp" in e:
                ts = e.get("timestamp")
                if ts is None:
                    self.report(Issue("ERROR", "TS_NULL", "timestamp must not be null", si, stream=stream, event_index=ei, field="timestamp"))
                elif not is_number(ts):
                    self.report(Issue("ERROR", "TS_NOT_NUMBER", "timestamp must be a number", si, stream=stream, event_index=ei, field="timestamp"))
                else:
                    tsf = float(ts)
                    if last_ts is not None and tsf < last_ts:
                        self.report(Issue("WARN", "TS_NOT_MONO", "timestamps not monotonic", si, stream=stream))
                    last_ts = tsf

    def _validate_point_cloud(self, si: int, name: str, s: Dict[str, Any], types: Dict[str, str], content: Dict[str, Any]) -> None:
        fo = s.get("frameOfReference")
        if fo is not None:
            if not isinstance(fo, str) or fo not in types:
                self.report(Issue("ERROR", "FOR_REF_MISSING", "frameOfReference must reference an existing stream", si, stream=name, field="frameOfReference"))
            elif types[fo] != "frame_of_reference":
                self.report(Issue("ERROR", "FOR_REF_WRONG_TYPE", f"frameOfReference '{fo}' is not frame_of_reference", si, stream=name, field="frameOfReference"))
            else:
                # Helpful warning if calibration exists but you bound to ego_vehicle
                if fo == "ego_vehicle":
                    any_calib = any(k.endswith("-calibration") and v == "frame_of_reference" for k, v in types.items())
                    if any_calib:
                        self.report(Issue(
                            "WARN",
                            "PC_BOUND_TO_EGO_WITH_CALIB",
                            "point_cloud is bound directly to ego_vehicle while '*-calibration' FoRs exist; overlays (radius indicator) may be incorrect. Prefer binding to sensor calibration FoR.",
                            si,
                            stream=name,
                            field="frameOfReference",
                        ))

        events = s.get("events")
        self._validate_events_uri_ts(si, name, events, require_uri=True)

        if isinstance(events, list):
            for ei, e in enumerate(events):
                if isinstance(e, dict) and isinstance(e.get("uri"), str):
                    ext = guess_ext(e["uri"])
                    if ext and ext not in {"pcd", "ply", "las", "laz"}:
                        self.report(Issue("WARN", "PC_EXT_ODD", f"Unexpected point cloud extension '.{ext}'", si, stream=name, event_index=ei, field="uri"))

    def _validate_image(self, si: int, name: str, s: Dict[str, Any], types: Dict[str, str], content: Dict[str, Any]) -> None:
        cam = s.get("camera")
        if not isinstance(cam, str) or cam not in types:
            self.report(Issue("ERROR", "IMG_CAMERA_MISSING", "image stream must have `camera` referencing a camera_parameters stream", si, stream=name, field="camera"))
        elif types[cam] != "camera_parameters":
            self.report(Issue("ERROR", "IMG_CAMERA_WRONG_TYPE", f"camera '{cam}' is not camera_parameters", si, stream=name, field="camera"))

        fo = s.get("frameOfReference")
        if fo is not None:
            if not isinstance(fo, str) or fo not in types:
                self.report(Issue("ERROR", "FOR_REF_MISSING", "frameOfReference must reference an existing stream", si, stream=name, field="frameOfReference"))
            elif types[fo] != "frame_of_reference":
                self.report(Issue("ERROR", "FOR_REF_WRONG_TYPE", f"frameOfReference '{fo}' is not frame_of_reference", si, stream=name, field="frameOfReference"))

        events = s.get("events")
        self._validate_events_uri_ts(si, name, events, require_uri=True)

        if isinstance(events, list):
            for ei, e in enumerate(events):
                if isinstance(e, dict) and isinstance(e.get("uri"), str):
                    ext = guess_ext(e["uri"])
                    if ext and ext not in {"jpg", "jpeg", "png", "webp"}:
                        self.report(Issue("WARN", "IMG_EXT_ODD", f"Unexpected image extension '.{ext}'", si, stream=name, event_index=ei, field="uri"))

    def _validate_camera_parameters(self, si: int, name: str, s: Dict[str, Any], types: Dict[str, str]) -> None:
        events = s.get("events")
        if not isinstance(events, list) or not events:
            self.report(Issue("ERROR", "CAM_EVENTS_EMPTY", "camera_parameters.events must be non-empty list", si, stream=name, field="events"))
            return

        # camera_parameters can reference FoR (important for correct projections)
        fo = s.get("frameOfReference")
        if fo is not None:
            if not isinstance(fo, str) or fo not in types:
                self.report(Issue("ERROR", "FOR_REF_MISSING", "camera_parameters.frameOfReference must reference an existing stream", si, stream=name, field="frameOfReference"))
            elif types[fo] != "frame_of_reference":
                self.report(Issue("ERROR", "FOR_REF_WRONG_TYPE", f"camera_parameters.frameOfReference '{fo}' is not frame_of_reference", si, stream=name, field="frameOfReference"))

        for ei, e in enumerate(events):
            if not isinstance(e, dict):
                self.report(Issue("ERROR", "EVENT_NOT_OBJECT", "event must be an object", si, stream=name, event_index=ei))
                continue

            if "extrinsics" in e:
                extr = e.get("extrinsics")
                if not isinstance(extr, dict):
                    self.report(Issue("ERROR", "EXTR_NOT_OBJECT", "extrinsics must be an object", si, stream=name, event_index=ei, field="extrinsics"))
                    continue
                if "pose" in extr:
                    self.report(Issue("ERROR", "EXTR_HAS_POSE", "extrinsics must not have pose wrapper (use position/rotation directly)", si, stream=name, event_index=ei, field="extrinsics"))
                    continue

                if not is_xyz(extr.get("position")):
                    self.report(Issue("ERROR", "EXTR_POS_BAD", "extrinsics.position must be {x,y,z} numbers", si, stream=name, event_index=ei, field="extrinsics.position"))
                if not is_xyzw(extr.get("rotation")):
                    self.report(Issue("ERROR", "EXTR_ROT_BAD", "extrinsics.rotation must be quaternion {x,y,z,w} numbers", si, stream=name, event_index=ei, field="extrinsics.rotation"))

    def _validate_for(self, si: int, name: str, s: Dict[str, Any], types: Dict[str, str]) -> None:
        fid = s.get("id")
        if not isinstance(fid, str) or not fid.strip():
            self.report(Issue("ERROR", "FOR_ID_BAD", "frame_of_reference must have non-empty id", si, stream=name, field="id"))

        parent = s.get("parentForId")
        if parent is not None:
            if not isinstance(parent, str) or parent not in types:
                self.report(Issue("ERROR", "FOR_PARENT_BAD", "parentForId must reference an existing FoR stream or be null", si, stream=name, field="parentForId"))
            elif types[parent] != "frame_of_reference":
                self.report(Issue("ERROR", "FOR_PARENT_WRONG_TYPE", f"parentForId '{parent}' is not frame_of_reference", si, stream=name, field="parentForId"))

        events = s.get("events")
        if not isinstance(events, list) or not events:
            self.report(Issue("ERROR", "FOR_EVENTS_EMPTY", "frame_of_reference.events must be non-empty list", si, stream=name, field="events"))
            return

        last_ts: Optional[float] = None
        for ei, e in enumerate(events):
            if not isinstance(e, dict):
                self.report(Issue("ERROR", "EVENT_NOT_OBJECT", "event must be an object", si, stream=name, event_index=ei))
                continue
            ts = e.get("timestamp")
            if ts is None:
                self.report(Issue("ERROR", "TS_NULL", "timestamp must not be null", si, stream=name, event_index=ei, field="timestamp"))
            elif not is_number(ts):
                self.report(Issue("ERROR", "TS_NOT_NUMBER", "timestamp must be a number", si, stream=name, event_index=ei, field="timestamp"))
            else:
                tsf = float(ts)
                if last_ts is not None and tsf < last_ts:
                    self.report(Issue("WARN", "TS_NOT_MONO", "timestamps not monotonic", si, stream=name))
                last_ts = tsf

            pose = e.get("pose")
            if not isinstance(pose, dict):
                self.report(Issue("ERROR", "POSE_NOT_OBJECT", "pose must be an object", si, stream=name, event_index=ei, field="pose"))
                continue
            if not is_xyz(pose.get("position")):
                self.report(Issue("ERROR", "POSE_POS_BAD", "pose.position must be {x,y,z} numbers", si, stream=name, event_index=ei, field="pose.position"))
            if not is_xyzw(pose.get("rotation")):
                self.report(Issue("ERROR", "POSE_ROT_BAD", "pose.rotation must be quaternion {x,y,z,w} numbers", si, stream=name, event_index=ei, field="pose.rotation"))
            else:
                rot = pose.get("rotation")
                # Quaternion should be (close to) unit length. Non-unit quaternions often indicate convention/scale bugs.
                norm = math.sqrt(rot["x"]**2 + rot["y"]**2 + rot["z"]**2 + rot["w"]**2)
                if norm == 0 or abs(norm - 1.0) > 1e-3:
                    self.report(Issue("WARN", "POSE_QUAT_NOT_UNIT", f"pose.rotation quaternion norm is {norm:.6f} (expected ~1.0)", si, stream=name, event_index=ei, field="pose.rotation"))

def main() -> int:
    ap = argparse.ArgumentParser()
    ap.add_argument("json_path", type=str)
    ap.add_argument("--strict", action="store_true", help="treat warnings as errors")
    ap.add_argument("--projection-correct", action="store_true",
                    help="enforce extra FoR/calibration rules needed for correct overlays (e.g., radius indicator)")
    ap.add_argument("--max-examples", type=int, default=3, help="store up to N example issues per code in summary")
    ap.add_argument("--emit-jsonl", action="store_true", help="emit each issue as JSONL line to stdout (good for scale)")
    ap.add_argument("--summary-only", action="store_true", help="do not emit per-issue lines; only print summary")
    args = ap.parse_args()

    path = Path(args.json_path)
    payload = json.loads(path.read_text(encoding="utf-8"))

    v = FastSceneValidator(
        strict=args.strict,
        projection_correct=args.projection_correct,
        max_examples_per_code=args.max_examples,
        emit_jsonl=args.emit_jsonl,
        summary_only=args.summary_only,
    )
    rc = v.validate(payload)

    # Summary to stderr (keeps stdout clean for JSONL pipelines)
    print("\n=== SUMMARY ===", file=sys.stderr)
    print(f"errors={v.level_counts['ERROR']} warnings={v.level_counts['WARN']}", file=sys.stderr)
    for code, count in v.counts.most_common():
        print(f"{code}: {count}", file=sys.stderr)
        for ex in v.examples[code]:
            where = f"scene={ex.scene_index}"
            if ex.stream is not None: where += f" stream={ex.stream}"
            if ex.event_index is not None: where += f" event={ex.event_index}"
            if ex.field is not None: where += f" field={ex.field}"
            print(f"  - {where}: {ex.msg}", file=sys.stderr)

    return rc

if __name__ == "__main__":
    raise SystemExit(main())