Source code for myst_nb.core.execute

"""Module for executing notebooks."""
from __future__ import annotations

from contextlib import nullcontext, suppress
from datetime import datetime
import os
from pathlib import Path, PurePosixPath
from tempfile import TemporaryDirectory

from jupyter_cache import get_cache
from jupyter_cache.base import CacheBundleIn
from jupyter_cache.cache.db import NbProjectRecord
from jupyter_cache.executors.utils import single_nb_execution
from nbformat import NotebookNode
from typing_extensions import TypedDict

from myst_nb.core.config import NbParserConfig
from myst_nb.core.loggers import LoggerType


[docs]class ExecutionResult(TypedDict): """Result of executing a notebook.""" mtime: float """POSIX timestamp of the execution time""" runtime: float | None """runtime in seconds""" method: str """method used to execute the notebook""" succeeded: bool """True if the notebook executed successfully""" error: str | None """error type if the notebook failed to execute""" traceback: str | None """traceback if the notebook failed"""
class ExecutionError(Exception): """An exception for failed execution and `execution_raise_on_error` is true."""
[docs]def execute_notebook( notebook: NotebookNode, source: str, nb_config: NbParserConfig, logger: LoggerType, read_fmt: None | dict = None, ) -> tuple[NotebookNode, ExecutionResult | None]: """Update a notebook's outputs using the given configuration. This function may execute the notebook if necessary, to update its outputs, or populate from a cache. :param notebook: The notebook to update. :param source: Path to or description of the input source being processed. :param nb_config: The configuration for the notebook parser. :param logger: The logger to use. :param read_fmt: The format of the input source (to parse to jupyter cache) :returns: The updated notebook, and the (optional) execution metadata. """ # TODO should any of the logging messages be debug instead of info? # path should only be None when using docutils programmatically, # e.g. source="<string>" try: path = Path(source) if Path(source).is_file() else None except OSError: path = None # occurs on Windows for `source="<string>"` exec_metadata: ExecutionResult | None = None # check if the notebook is excluded from execution by pattern if path is not None and nb_config.execution_excludepatterns: posix_path = PurePosixPath(path.as_posix()) for pattern in nb_config.execution_excludepatterns: if posix_path.match(pattern): logger.info(f"Excluded from execution by pattern: {pattern!r}") return notebook, exec_metadata # 'auto' mode only executes the notebook if it is missing at least one output missing_outputs = ( len(cell.outputs) == 0 for cell in notebook.cells if cell["cell_type"] == "code" ) if nb_config.execution_mode == "auto" and not any(missing_outputs): logger.info("Skipped execution in 'auto' mode (all outputs present)") return notebook, exec_metadata if nb_config.execution_mode in ("auto", "force"): # setup the execution current working directory if nb_config.execution_in_temp: cwd_context = TemporaryDirectory() else: if path is None: raise ValueError( f"source must exist as file, if execution_in_temp=False: {source}" ) cwd_context = nullcontext(str(path.parent)) # type: ignore # execute in the context of the current working directory with cwd_context as cwd: cwd = os.path.abspath(cwd) logger.info( "Executing notebook using " + ("temporary" if nb_config.execution_in_temp else "local") + " CWD" ) result = single_nb_execution( notebook, cwd=cwd, allow_errors=nb_config.execution_allow_errors, timeout=nb_config.execution_timeout, meta_override=True, # TODO still support this? ) if result.err is not None: if nb_config.execution_raise_on_error: raise ExecutionError(str(source)) from result.err msg = f"Executing notebook failed: {result.err.__class__.__name__}" if nb_config.execution_show_tb: msg += f"\n{result.exc_string}" logger.warning(msg, subtype="exec") else: logger.info(f"Executed notebook in {result.time:.2f} seconds") exec_metadata = { "mtime": datetime.now().timestamp(), "runtime": result.time, "method": nb_config.execution_mode, "succeeded": False if result.err else True, "error": f"{result.err.__class__.__name__}" if result.err else None, "traceback": result.exc_string if result.err else None, } elif nb_config.execution_mode == "cache": # setup the cache cache = get_cache(nb_config.execution_cache_path or ".jupyter_cache") # TODO config on what notebook/cell metadata to hash/merge # attempt to match the notebook to one in the cache cache_record = None with suppress(KeyError): cache_record = cache.match_cache_notebook(notebook) # use the cached notebook if it exists if cache_record is not None: logger.info(f"Using cached notebook: ID={cache_record.pk}") _, notebook = cache.merge_match_into_notebook(notebook) exec_metadata = { "mtime": cache_record.created.timestamp(), "runtime": cache_record.data.get("execution_seconds", None), "method": nb_config.execution_mode, "succeeded": True, "error": None, "traceback": None, } return notebook, exec_metadata if path is None: raise ValueError( f"source must exist as file, if execution_mode is 'cache': {source}" ) # attempt to execute the notebook if read_fmt is not None: stage_record = cache.add_nb_to_project(str(path), read_data=read_fmt) else: stage_record = cache.add_nb_to_project(str(path)) # TODO do in try/except, in case of db write errors NbProjectRecord.remove_tracebacks([stage_record.pk], cache.db) cwd_context = ( TemporaryDirectory() # type: ignore if nb_config.execution_in_temp else nullcontext(str(path.parent)) ) with cwd_context as cwd: cwd = os.path.abspath(cwd) logger.info( "Executing notebook using " + ("temporary" if nb_config.execution_in_temp else "local") + " CWD" ) result = single_nb_execution( notebook, cwd=cwd, allow_errors=nb_config.execution_allow_errors, timeout=nb_config.execution_timeout, meta_override=True, # TODO still support this? ) # handle success / failure cases # TODO do in try/except to be careful (in case of database write errors? if result.err is not None: if nb_config.execution_raise_on_error: raise ExecutionError(str(source)) from result.err msg = f"Executing notebook failed: {result.err.__class__.__name__}" if nb_config.execution_show_tb: msg += f"\n{result.exc_string}" logger.warning(msg, subtype="exec") NbProjectRecord.set_traceback(stage_record.uri, result.exc_string, cache.db) else: logger.info(f"Executed notebook in {result.time:.2f} seconds") cache_record = cache.cache_notebook_bundle( CacheBundleIn( notebook, stage_record.uri, data={"execution_seconds": result.time} ), check_validity=False, overwrite=True, ) logger.info(f"Cached executed notebook: ID={cache_record.pk}") exec_metadata = { "mtime": datetime.now().timestamp(), "runtime": result.time, "method": nb_config.execution_mode, "succeeded": False if result.err else True, "error": f"{result.err.__class__.__name__}" if result.err else None, "traceback": result.exc_string if result.err else None, } return notebook, exec_metadata