"""Module for executing notebooks."""
from __future__ import annotations
from contextlib import nullcontext, suppress
from datetime import datetime
import os
from pathlib import Path, PurePosixPath
from tempfile import TemporaryDirectory
from jupyter_cache import get_cache
from jupyter_cache.base import CacheBundleIn
from jupyter_cache.cache.db import NbProjectRecord
from jupyter_cache.executors.utils import single_nb_execution
from nbformat import NotebookNode
from typing_extensions import TypedDict
from myst_nb.core.config import NbParserConfig
from myst_nb.core.loggers import LoggerType
[docs]class ExecutionResult(TypedDict):
"""Result of executing a notebook."""
mtime: float
"""POSIX timestamp of the execution time"""
runtime: float | None
"""runtime in seconds"""
method: str
"""method used to execute the notebook"""
succeeded: bool
"""True if the notebook executed successfully"""
error: str | None
"""error type if the notebook failed to execute"""
traceback: str | None
"""traceback if the notebook failed"""
class ExecutionError(Exception):
"""An exception for failed execution and `execution_raise_on_error` is true."""
[docs]def execute_notebook(
notebook: NotebookNode,
source: str,
nb_config: NbParserConfig,
logger: LoggerType,
read_fmt: None | dict = None,
) -> tuple[NotebookNode, ExecutionResult | None]:
"""Update a notebook's outputs using the given configuration.
This function may execute the notebook if necessary, to update its outputs,
or populate from a cache.
:param notebook: The notebook to update.
:param source: Path to or description of the input source being processed.
:param nb_config: The configuration for the notebook parser.
:param logger: The logger to use.
:param read_fmt: The format of the input source (to parse to jupyter cache)
:returns: The updated notebook, and the (optional) execution metadata.
"""
# TODO should any of the logging messages be debug instead of info?
# path should only be None when using docutils programmatically,
# e.g. source="<string>"
try:
path = Path(source) if Path(source).is_file() else None
except OSError:
path = None # occurs on Windows for `source="<string>"`
exec_metadata: ExecutionResult | None = None
# check if the notebook is excluded from execution by pattern
if path is not None and nb_config.execution_excludepatterns:
posix_path = PurePosixPath(path.as_posix())
for pattern in nb_config.execution_excludepatterns:
if posix_path.match(pattern):
logger.info(f"Excluded from execution by pattern: {pattern!r}")
return notebook, exec_metadata
# 'auto' mode only executes the notebook if it is missing at least one output
missing_outputs = (
len(cell.outputs) == 0 for cell in notebook.cells if cell["cell_type"] == "code"
)
if nb_config.execution_mode == "auto" and not any(missing_outputs):
logger.info("Skipped execution in 'auto' mode (all outputs present)")
return notebook, exec_metadata
if nb_config.execution_mode in ("auto", "force"):
# setup the execution current working directory
if nb_config.execution_in_temp:
cwd_context = TemporaryDirectory()
else:
if path is None:
raise ValueError(
f"source must exist as file, if execution_in_temp=False: {source}"
)
cwd_context = nullcontext(str(path.parent)) # type: ignore
# execute in the context of the current working directory
with cwd_context as cwd:
cwd = os.path.abspath(cwd)
logger.info(
"Executing notebook using "
+ ("temporary" if nb_config.execution_in_temp else "local")
+ " CWD"
)
result = single_nb_execution(
notebook,
cwd=cwd,
allow_errors=nb_config.execution_allow_errors,
timeout=nb_config.execution_timeout,
meta_override=True, # TODO still support this?
)
if result.err is not None:
if nb_config.execution_raise_on_error:
raise ExecutionError(str(source)) from result.err
msg = f"Executing notebook failed: {result.err.__class__.__name__}"
if nb_config.execution_show_tb:
msg += f"\n{result.exc_string}"
logger.warning(msg, subtype="exec")
else:
logger.info(f"Executed notebook in {result.time:.2f} seconds")
exec_metadata = {
"mtime": datetime.now().timestamp(),
"runtime": result.time,
"method": nb_config.execution_mode,
"succeeded": False if result.err else True,
"error": f"{result.err.__class__.__name__}" if result.err else None,
"traceback": result.exc_string if result.err else None,
}
elif nb_config.execution_mode == "cache":
# setup the cache
cache = get_cache(nb_config.execution_cache_path or ".jupyter_cache")
# TODO config on what notebook/cell metadata to hash/merge
# attempt to match the notebook to one in the cache
cache_record = None
with suppress(KeyError):
cache_record = cache.match_cache_notebook(notebook)
# use the cached notebook if it exists
if cache_record is not None:
logger.info(f"Using cached notebook: ID={cache_record.pk}")
_, notebook = cache.merge_match_into_notebook(notebook)
exec_metadata = {
"mtime": cache_record.created.timestamp(),
"runtime": cache_record.data.get("execution_seconds", None),
"method": nb_config.execution_mode,
"succeeded": True,
"error": None,
"traceback": None,
}
return notebook, exec_metadata
if path is None:
raise ValueError(
f"source must exist as file, if execution_mode is 'cache': {source}"
)
# attempt to execute the notebook
if read_fmt is not None:
stage_record = cache.add_nb_to_project(str(path), read_data=read_fmt)
else:
stage_record = cache.add_nb_to_project(str(path))
# TODO do in try/except, in case of db write errors
NbProjectRecord.remove_tracebacks([stage_record.pk], cache.db)
cwd_context = (
TemporaryDirectory() # type: ignore
if nb_config.execution_in_temp
else nullcontext(str(path.parent))
)
with cwd_context as cwd:
cwd = os.path.abspath(cwd)
logger.info(
"Executing notebook using "
+ ("temporary" if nb_config.execution_in_temp else "local")
+ " CWD"
)
result = single_nb_execution(
notebook,
cwd=cwd,
allow_errors=nb_config.execution_allow_errors,
timeout=nb_config.execution_timeout,
meta_override=True, # TODO still support this?
)
# handle success / failure cases
# TODO do in try/except to be careful (in case of database write errors?
if result.err is not None:
if nb_config.execution_raise_on_error:
raise ExecutionError(str(source)) from result.err
msg = f"Executing notebook failed: {result.err.__class__.__name__}"
if nb_config.execution_show_tb:
msg += f"\n{result.exc_string}"
logger.warning(msg, subtype="exec")
NbProjectRecord.set_traceback(stage_record.uri, result.exc_string, cache.db)
else:
logger.info(f"Executed notebook in {result.time:.2f} seconds")
cache_record = cache.cache_notebook_bundle(
CacheBundleIn(
notebook, stage_record.uri, data={"execution_seconds": result.time}
),
check_validity=False,
overwrite=True,
)
logger.info(f"Cached executed notebook: ID={cache_record.pk}")
exec_metadata = {
"mtime": datetime.now().timestamp(),
"runtime": result.time,
"method": nb_config.execution_mode,
"succeeded": False if result.err else True,
"error": f"{result.err.__class__.__name__}" if result.err else None,
"traceback": result.exc_string if result.err else None,
}
return notebook, exec_metadata