Source code for cellucid.server

"""
Cellucid Data Server

A lightweight HTTP server for serving pre-exported cellucid datasets.
Supports both local and remote access patterns:

1. Local mode: Run on your machine, open browser locally
2. SSH tunnel mode: Run on remote server, access via SSH port forwarding
3. Jupyter mode: Run alongside Jupyter, embed in notebooks

Usage:
    from cellucid import serve
    serve("/path/to/data", port=8765)

Or via CLI:
    cellucid serve /path/to/data --port 8765

The server provides:
- Static file serving for dataset files
- CORS headers for cross-origin access (needed for web viewer)
- Health check endpoint for connection validation

For serving AnnData directly (without pre-export), use:
    from cellucid import serve_anndata
    serve_anndata("/path/to/data.h5ad")
"""

from __future__ import annotations

import json
import logging
import threading
import webbrowser
from functools import partial
from http.server import HTTPServer, SimpleHTTPRequestHandler
from pathlib import Path
from urllib.parse import unquote, urlparse

logger = logging.getLogger("cellucid.server")

# Import shared configuration from _server_base to avoid duplication
from ._server_base import (
    CORSMixin,
    DEFAULT_PORT,
    DEFAULT_HOST,
    ensure_port_available,
    print_step,
    print_detail,
    print_success,
    print_server_banner,
)


class CORSRequestHandler(CORSMixin, SimpleHTTPRequestHandler):
    """HTTP handler with CORS support for serving dataset files."""

    allow_caching = True  # Static files can be cached

    def __init__(
        self,
        *args,
        data_dir: Path,
        server_info: dict,
        web_proxy: bool = False,
        **kwargs,
    ):
        self.data_dir = data_dir
        self.server_info = server_info
        self.web_proxy = bool(web_proxy)
        # Must call super().__init__ last because it calls do_GET immediately
        super().__init__(*args, directory=str(data_dir), **kwargs)

    def end_headers(self):
        """Add CORS headers to every response."""
        self.add_cors_headers()
        super().end_headers()

    def do_POST(self):
        """Handle POST requests (events from frontend)."""
        if self.handle_event_post():
            return
        if self.handle_session_bundle_post():
            return
        # No other POST endpoints - return 404
        self.send_error_response(404, f"POST not supported for path: {self.path}")

    def do_GET(self):
        """Handle GET requests with special endpoints."""
        parsed = urlparse(self.path)
        path = unquote(parsed.path)

        # Proxy the hosted viewer UI assets so the viewer runs on the same
        # origin as the dataset server (avoids mixed-content).
        if self.web_proxy and self.handle_web_proxy_get(path):
            return

        # Root path - redirect to viewer
        if path == "/" or path == "/index.html":
            # Always serve the viewer UI from this server via the hosted-asset proxy.
            if self.handle_web_proxy_get("/index.html"):
                return
            self.send_error_response(503, "Cellucid viewer UI unavailable")
            return

        # Health check endpoint
        if path == "/_cellucid/health":
            self.send_json({
                "status": "ok",
                "type": "exported",
                "version": self.server_info.get("version", "unknown"),
            })
            return

        # Server info endpoint
        if path == "/_cellucid/info":
            self.send_json(self.server_info)
            return

        # Datasets list endpoint
        if path == "/_cellucid/datasets":
            datasets = self._list_datasets()
            self.send_json({"datasets": datasets})
            return

        # Regular file serving
        super().do_GET()

    def _list_datasets(self) -> list[dict]:
        """List available datasets in the data directory."""
        datasets = []

        # Check if data_dir itself is a dataset
        if self._is_dataset_dir(self.data_dir):
            ds_id, ds_name = self._get_dataset_identity_fields(self.data_dir)
            datasets.append({
                "id": ds_id,
                "path": "/",
                "name": ds_name,
            })
        else:
            # Look for subdirectories that are datasets
            for subdir in self.data_dir.iterdir():
                if subdir.is_dir() and self._is_dataset_dir(subdir):
                    ds_id, ds_name = self._get_dataset_identity_fields(subdir)
                    datasets.append({
                        "id": ds_id,
                        "path": f"/{subdir.name}/",
                        "name": ds_name,
                    })

        return datasets

    def _is_dataset_dir(self, path: Path) -> bool:
        """Check if a directory is a valid cellucid dataset."""
        # Dev-phase strictness: dataset_identity.json is required by the frontend.
        if not (path / "dataset_identity.json").exists():
            return False
        # Must have obs_manifest.json
        if not (path / "obs_manifest.json").exists():
            return False
        # Must have at least one points file
        for dim in ["1d", "2d", "3d", "4d"]:
            if (path / f"points_{dim}.bin").exists():
                return True
            if (path / f"points_{dim}.bin.gz").exists():
                return True
        return False

    def _get_dataset_identity_fields(self, path: Path) -> tuple[str, str]:
        """Return (dataset_id, dataset_name) for a dataset directory."""
        identity_file = path / "dataset_identity.json"
        if identity_file.exists():
            try:
                with open(identity_file) as f:
                    identity = json.load(f)
                    dataset_id = identity.get("id", path.name)
                    dataset_name = identity.get("name", path.name)
                    return str(dataset_id or path.name), str(dataset_name or path.name)
            except Exception:
                pass
        return path.name, path.name

    def log_message(self, format: str, *args):
        """Override to use Python logging instead of stderr."""
        logger.debug("%s - %s", self.address_string(), format % args)


[docs] class CellucidServer: """ Cellucid data server for serving datasets over HTTP. Supports multiple deployment modes: - Local: Direct browser access on localhost - SSH tunnel: Access via port forwarding from remote server - Jupyter: Embedded in notebook environment Example: server = CellucidServer("/path/to/data") server.start() # Blocking # Or non-blocking: server.start_background() # ... do other things ... server.stop() """
[docs] def __init__( self, data_dir: str | Path, port: int = DEFAULT_PORT, host: str = DEFAULT_HOST, open_browser: bool = False, quiet: bool = False, ): """ Initialize the server. Args: data_dir: Path to the dataset directory (single dataset or multi-dataset) port: Port to serve on (default: 8765) host: Host to bind to (default: 127.0.0.1 for localhost only) open_browser: Whether to open the browser on start quiet: Suppress info messages """ self.data_dir = Path(data_dir).resolve() self.port = port self.host = host self.open_browser = open_browser self.quiet = quiet # Local web assets and the legacy hosted-viewer mode are intentionally disabled in dev. # We always serve the UI from this server via the hosted-asset proxy to avoid mixed-content. self.web_proxy = True # Step 1: Validate dataset if not quiet: print_step(1, 3, "Validating dataset") print_detail("Path", str(self.data_dir)) if not self.data_dir.exists(): raise FileNotFoundError(f"Data directory not found: {self.data_dir}") if not quiet: print_success("Dataset valid") # Step 2: Load dataset info if not quiet: print_step(2, 3, "Loading dataset info") self._print_dataset_info() print_success("Dataset loaded") self._server: HTTPServer | None = None self._thread: threading.Thread | None = None self._running = False # Version from package try: from . import __version__ except ImportError: __version__ = "0.0.0" self.server_info = { "version": __version__, "host": self.host, "port": self.port, "mode": "standalone", }
def _print_dataset_info(self): """Print information about the dataset.""" # Try to load dataset identity for more info identity_file = self.data_dir / "dataset_identity.json" if identity_file.exists(): try: with open(identity_file) as f: identity = json.load(f) stats = identity.get("stats", {}) if "n_cells" in stats: print_detail("Cells", f"{stats['n_cells']:,}") if "n_genes" in stats: print_detail("Genes", f"{stats['n_genes']:,}") if "has_connectivity" in stats: print_detail("Connectivity", "yes" if stats["has_connectivity"] else "no") except Exception: pass @property def url(self) -> str: """Get the server URL.""" return f"http://{self.host}:{self.port}" @property def viewer_url(self) -> str: """Get the full URL to open the viewer with this server's data.""" return f"{self.url}/"
[docs] def start(self, blocking: bool = True): """ Start the server. Args: blocking: If True, block until interrupted. If False, start in background. """ if self._running: logger.warning("Server is already running") return # Step 3: Start server if not self.quiet: print_step(3, 3, "Starting server") # Pre-warm the hosted-asset proxy cache so first browser load has a # visible progress indicator and doesn't surprise users later with # missing lazily-loaded assets. if self.web_proxy and not self.quiet: try: from .web_cache import ensure_web_ui_cached print_detail("Viewer UI cache", "prefetching (one-time per web build)") ensure_web_ui_cached(force=False, show_progress=True) print_success("Viewer UI cached") except Exception as e: print_detail("Viewer UI cache", f"prefetch failed: {e}") # Ensure port is available (finds new one if needed) self.port = ensure_port_available(self.host, self.port, self.quiet) self.server_info["port"] = self.port # Create handler with data directory handler = partial( CORSRequestHandler, data_dir=self.data_dir, server_info=self.server_info, web_proxy=self.web_proxy, ) self._server = HTTPServer((self.host, self.port), handler) self._running = True if not self.quiet: print_success("Server ready") print_server_banner(self.url, self.viewer_url) if self.open_browser: webbrowser.open(self.viewer_url) if blocking: try: self._server.serve_forever() except KeyboardInterrupt: if not self.quiet: print("\nShutting down server...") # Don't call shutdown() here - it would deadlock since we're in the same thread # Just close the server directly self._running = False if self._server: self._server.server_close() self._server = None if not self.quiet: print("Server stopped") else: self._thread = threading.Thread(target=self._server.serve_forever, daemon=True) self._thread.start()
[docs] def start_background(self): """Start the server in a background thread.""" self.start(blocking=False)
[docs] def stop(self): """Stop the server.""" self._running = False if self._server: # shutdown() tells serve_forever() to stop, but we also need # server_close() to release the socket immediately self._server.shutdown() self._server.server_close() self._server = None if not self.quiet: print("Server stopped")
[docs] def is_running(self) -> bool: """Check if the server is running.""" return self._running
[docs] def wait(self): """Wait for the server to stop (blocks until Ctrl+C or stop()).""" if self._thread: try: while self._running: self._thread.join(timeout=1) except KeyboardInterrupt: self.stop()
[docs] def serve( data_dir: str | Path, port: int = DEFAULT_PORT, host: str = DEFAULT_HOST, open_browser: bool = True, quiet: bool = False, ): """ Serve a cellucid dataset directory. This is the main entry point for serving data. It starts an HTTP server that serves the dataset files with CORS headers enabled. Args: data_dir: Path to the dataset directory port: Port to serve on (default: 8765) host: Host to bind to (default: 127.0.0.1) open_browser: Whether to open the viewer in browser (default: True) quiet: Suppress info messages Example: >>> from cellucid import serve >>> serve("/path/to/my_dataset") # For remote server access via SSH: >>> serve("/path/to/data", host="0.0.0.0") # Then on local machine: ssh -L 8765:localhost:8765 remote-server """ server = CellucidServer( data_dir=data_dir, port=port, host=host, open_browser=open_browser, quiet=quiet, ) server.start()