Source code for myst_nb.render_outputs

"""A Sphinx post-transform, to convert notebook outpus to AST nodes."""
import os
from abc import ABC, abstractmethod
from typing import List, Optional
from unittest import mock

import nbconvert
from docutils import nodes
from docutils.parsers.rst import directives
from importlib_metadata import entry_points
from jupyter_sphinx.ast import JupyterWidgetViewNode, strip_latex_delimiters
from jupyter_sphinx.utils import sphinx_abs_dir
from myst_parser.docutils_renderer import make_document
from myst_parser.main import MdParserConfig, default_parser
from nbformat import NotebookNode
from sphinx.environment import BuildEnvironment
from sphinx.environment.collectors.asset import ImageCollector
from sphinx.errors import SphinxError
from sphinx.transforms.post_transforms import SphinxPostTransform
from sphinx.util import logging

from .nodes import CellOutputBundleNode

LOGGER = logging.getLogger(__name__)

WIDGET_VIEW_MIMETYPE = "application/vnd.jupyter.widget-view+json"


def get_default_render_priority(builder: str) -> Optional[List[str]]:
    priority = {
        builder: (
            WIDGET_VIEW_MIMETYPE,
            "application/javascript",
            "text/html",
            "image/svg+xml",
            "image/png",
            "image/jpeg",
            "text/markdown",
            "text/latex",
            "text/plain",
        )
        for builder in (
            "html",
            "readthedocs",
            "singlehtml",
            "dirhtml",
            "linkcheck",
            "readthedocsdirhtml",
            "readthedocssinglehtml",
            "readthedocssinglehtmllocalmedia",
            "epub",
        )
    }
    # TODO: add support for "image/svg+xml"
    priority["latex"] = (
        "application/pdf",
        "image/png",
        "image/jpeg",
        "text/latex",
        "text/markdown",
        "text/plain",
    )
    return priority.get(builder, None)


[docs]class MystNbEntryPointError(SphinxError): category = "MyST NB Renderer Load"
[docs]def load_renderer(name: str) -> "CellOutputRendererBase": """Load a renderer, given a name within the ``myst_nb.mime_render`` entry point group """ all_eps = entry_points() if hasattr(all_eps, "select"): # importlib_metadata >= 3.6 or importlib.metadata in python >=3.10 eps = all_eps.select(group="myst_nb.mime_render", name=name) found = name in eps.names else: eps = {ep.name: ep for ep in all_eps.get("myst_nb.mime_render", [])} found = name in eps if found: klass = eps[name].load() if not issubclass(klass, CellOutputRendererBase): raise MystNbEntryPointError( f"Entry Point for myst_nb.mime_render:{name} " f"is not a subclass of `CellOutputRendererBase`: {klass}" ) return klass raise MystNbEntryPointError(f"No Entry Point found for myst_nb.mime_render:{name}")
[docs]class CellOutputsToNodes(SphinxPostTransform): """Use the builder context to transform a CellOutputNode into Sphinx nodes.""" # process very early, before CitationReferenceTransform (5), ReferencesResolver (10) # https://www.sphinx-doc.org/en/master/extdev/appapi.html#sphinx.application.Sphinx.add_transform default_priority = 4
[docs] def run(self): abs_dir = sphinx_abs_dir(self.env) renderers = {} # cache renderers for node in self.document.traverse(CellOutputBundleNode): try: renderer_cls = renderers[node.renderer] except KeyError: renderer_cls = load_renderer(node.renderer) renderers[node.renderer] = renderer_cls renderer = renderer_cls(self.document, node, abs_dir) output_nodes = renderer.cell_output_to_nodes(self.env.nb_render_priority) node.replace_self(output_nodes) # Image collect extra nodes from cell outputs that we need to process # this normally gets called as a `doctree-read` event for node in self.document.traverse(nodes.image): # If the image node has `candidates` then it's already been processed # as in-line markdown, so skip it if "candidates" in node: continue col = ImageCollector() # use the node docname, where possible, to deal with single document builds docname = ( self.app.env.path2doc(node.source) if node.source else self.app.env.docname ) with mock.patch.dict(self.app.env.temp_data, {"docname": docname}): col.process_doc(self.app, node)
[docs]class CellOutputRendererBase(ABC): """An abstract base class for rendering Notebook outputs to docutils nodes. Subclasses should implement the ``render`` method. """
[docs] def __init__( self, document: nodes.document, node: CellOutputBundleNode, sphinx_dir: str ): """ :param sphinx_dir: Sphinx "absolute path" to the output folder, so it is a relative path to the source folder prefixed with ``/``. """ self.document = document self.env = document.settings.env # type: BuildEnvironment self.node = node self.sphinx_dir = sphinx_dir
[docs] def cell_output_to_nodes(self, data_priority: List[str]) -> List[nodes.Node]: """Convert a jupyter cell with outputs and filenames to doctree nodes. :param outputs: a list of outputs from a Jupyter cell :param data_priority: media type by priority. :returns: list of docutils nodes """ output_nodes = [] for idx, output in enumerate(self.node.outputs): output_type = output["output_type"] if output_type == "stream": if output["name"] == "stderr": output_nodes.extend(self.render("stderr", output, idx)) else: output_nodes.extend(self.render("stdout", output, idx)) elif output_type == "error": output_nodes.extend(self.render("traceback", output, idx)) elif output_type in ("display_data", "execute_result"): try: # First mime_type by priority that occurs in output. mime_type = next(x for x in data_priority if x in output["data"]) except StopIteration: # TODO this is incompatible with glue outputs # perhaps have sphinx config to turn on/off this error reporting? # and/or only warn if "scrapbook" not in output.metadata # (then enable tests/test_render_outputs.py::test_unknown_mimetype) # LOGGER.warning( # "MyST-NB: output contains no MIME type in priority list: %s", # list(output["data"].keys()), # location=location, # ) continue output_nodes.extend(self.render(mime_type, output, idx)) return output_nodes
[docs] def add_source_and_line(self, *nodes: List[nodes.Node]): """Add the source and line recursively to all nodes.""" location = self.node.source, self.node.line for node in nodes: node.source, node.line = location for child in node.traverse(): child.source, child.line = location
[docs] def make_warning(self, error_msg: str) -> nodes.system_message: """Raise an exception or generate a warning if appropriate, and return a system_message node""" return self.document.reporter.warning( "output render: {}".format(error_msg), line=self.node.line, )
[docs] def make_error(self, error_msg: str) -> nodes.system_message: """Raise an exception or generate a warning if appropriate, and return a system_message node""" return self.document.reporter.error( "output render: {}".format(error_msg), line=self.node.line, )
[docs] def make_severe(self, error_msg: str) -> nodes.system_message: """Raise an exception or generate a warning if appropriate, and return a system_message node""" return self.document.reporter.severe( "output render: {}".format(error_msg), line=self.node.line, )
[docs] def add_name(self, node: nodes.Node, name: str): """Append name to node['names']. Also normalize the name string and register it as explicit target. """ name = nodes.fully_normalize_name(name) if "name" in node: del node["name"] node["names"].append(name) self.document.note_explicit_target(node, node) return name
[docs] def parse_markdown( self, text: str, parent: Optional[nodes.Node] = None ) -> List[nodes.Node]: """Parse text as CommonMark, in a new document.""" parser = default_parser(MdParserConfig(commonmark_only=True)) # setup parent node if parent is None: parent = nodes.container() self.add_source_and_line(parent) parser.options["current_node"] = parent # setup containing document new_doc = make_document(self.node.source) new_doc.settings = self.document.settings new_doc.reporter = self.document.reporter parser.options["document"] = new_doc # use the node docname, where possible, to deal with single document builds with mock.patch.dict( self.env.temp_data, {"docname": self.env.path2doc(self.node.source)} ): parser.render(text) # TODO is there any transforms we should retroactively carry out? return parent.children
[docs] @abstractmethod def render( self, mime_type: str, output: NotebookNode, index: int ) -> List[nodes.Node]: """Take a MIME bundle and MIME type, and return zero or more nodes.""" pass
[docs]class CellOutputRenderer(CellOutputRendererBase): def __init__( self, document: nodes.document, node: CellOutputBundleNode, sphinx_dir: str ): """ :param sphinx_dir: Sphinx "absolute path" to the output folder, so it is a relative path to the source folder prefixed with ``/``. """ super().__init__(document, node, sphinx_dir) self._render_map = { "stderr": self.render_stderr, "stdout": self.render_stdout, "traceback": self.render_traceback, "text/plain": self.render_text_plain, "text/markdown": self.render_text_markdown, "text/html": self.render_text_html, "text/latex": self.render_text_latex, "application/javascript": self.render_application_javascript, WIDGET_VIEW_MIMETYPE: self.render_widget, }
[docs] def render( self, mime_type: str, output: NotebookNode, index: int ) -> List[nodes.Node]: """Take a MIME bundle and MIME type, and return zero or more nodes.""" if mime_type.startswith("image"): nodes = self.create_render_image(mime_type)(output, index) self.add_source_and_line(*nodes) return nodes if mime_type in self._render_map: nodes = self._render_map[mime_type](output, index) self.add_source_and_line(*nodes) return nodes LOGGER.warning( "MyST-NB: No renderer found for output MIME: %s", mime_type, location=(self.node.source, self.node.line), ) return []
[docs] def render_stderr(self, output: NotebookNode, index: int): """Output a container with an unhighlighted literal block.""" text = output["text"] if self.env.config.nb_output_stderr == "show": pass elif self.env.config.nb_output_stderr == "remove-warn": self.make_warning(f"stderr was found in the cell outputs: {text}") return [] elif self.env.config.nb_output_stderr == "warn": self.make_warning(f"stderr was found in the cell outputs: {text}") elif self.env.config.nb_output_stderr == "error": self.make_error(f"stderr was found in the cell outputs: {text}") elif self.env.config.nb_output_stderr == "severe": self.make_severe(f"stderr was found in the cell outputs: {text}") if ( "remove-stderr" in self.node.metadata.get("tags", []) or self.env.config.nb_output_stderr == "remove" ): return [] node = nodes.literal_block( text=output["text"], rawsource=output["text"], language=self.env.config.nb_render_text_lexer, classes=["output", "stderr"], ) return [node]
[docs] def render_stdout(self, output: NotebookNode, index: int): if "remove-stdout" in self.node.metadata.get("tags", []): return [] return [ nodes.literal_block( text=output["text"], rawsource=output["text"], language=self.env.config.nb_render_text_lexer, classes=["output", "stream"], ) ]
[docs] def render_traceback(self, output: NotebookNode, index: int): traceback = "\n".join(output["traceback"]) text = nbconvert.filters.strip_ansi(traceback) return [ nodes.literal_block( text=text, rawsource=text, language="ipythontb", classes=["output", "traceback"], ) ]
[docs] def render_text_markdown(self, output: NotebookNode, index: int): text = output["data"]["text/markdown"] return self.parse_markdown(text)
[docs] def render_text_html(self, output: NotebookNode, index: int): text = output["data"]["text/html"] return [nodes.raw(text=text, format="html", classes=["output", "text_html"])]
[docs] def render_text_latex(self, output: NotebookNode, index: int): text = output["data"]["text/latex"] self.env.get_domain("math").data["has_equations"][self.env.docname] = True return [ nodes.math_block( text=strip_latex_delimiters(text), nowrap=False, number=None, classes=["output", "text_latex"], ) ]
[docs] def render_text_plain(self, output: NotebookNode, index: int): text = output["data"]["text/plain"] return [ nodes.literal_block( text=text, rawsource=text, language=self.env.config.nb_render_text_lexer, classes=["output", "text_plain"], ) ]
[docs] def render_application_javascript(self, output: NotebookNode, index: int): data = output["data"]["application/javascript"] return [ nodes.raw( text='<script type="{mime_type}">{data}</script>'.format( mime_type="application/javascript", data=data ), format="html", ) ]
[docs] def render_widget(self, output: NotebookNode, index: int): data = output["data"][WIDGET_VIEW_MIMETYPE] return [JupyterWidgetViewNode(view_spec=data)]
[docs] def create_render_image(self, mime_type: str): def _render_image(output: NotebookNode, index: int): # Sphinx treats absolute paths as being rooted at the source # directory, so make a relative path, which Sphinx treats # as being relative to the current working directory. filename = os.path.basename(output.metadata["filenames"][mime_type]) # checks if file dir path is inside a subdir of dir filedir = os.path.dirname(output.metadata["filenames"][mime_type]) subpaths = filedir.split(self.sphinx_dir) final_dir = self.sphinx_dir if subpaths and len(subpaths) > 1: subpath = subpaths[1] final_dir += subpath uri = os.path.join(final_dir, filename) # TODO I'm not quite sure why, but as soon as you give it a width, # it becomes clickable?! (i.e. will open the image in the browser) image_node = nodes.image(uri=uri) myst_meta_img = self.node.metadata.get( self.env.config.nb_render_key, {} ).get("image", {}) for key, spec in [ ("classes", directives.class_option), ("alt", directives.unchanged), ("height", directives.length_or_unitless), ("width", directives.length_or_percentage_or_unitless), ("scale", directives.percentage), ("align", align), ]: if key in myst_meta_img: value = myst_meta_img[key] try: image_node[key] = spec(value) except (ValueError, TypeError) as error: error_msg = ( "Invalid image attribute: " "(key: '{}'; value: {})\n{}".format(key, value, error) ) return [self.make_error(error_msg)] myst_meta_fig = self.node.metadata.get( self.env.config.nb_render_key, {} ).get("figure", {}) if "caption" not in myst_meta_fig: return [image_node] figure_node = nodes.figure("", image_node) caption = nodes.caption(myst_meta_fig["caption"], "") figure_node += caption # TODO only contents of one paragraph? (and second should be a legend) self.parse_markdown(myst_meta_fig["caption"], caption) if "name" in myst_meta_fig: name = myst_meta_fig["name"] self.add_source_and_line(figure_node) self.add_name(figure_node, name) # The target should have already been processed by now, with # sphinx.transforms.references.SphinxDomains, which calls # sphinx.domains.std.StandardDomain.process_doc, # so we have to replicate that here std = self.env.get_domain("std") nametypes = self.document.nametypes.items() self.document.nametypes = {name: True} try: std.process_doc(self.env, self.env.docname, self.document) finally: self.document.nametypes = nametypes return [figure_node] return _render_image
def align(argument): return directives.choice(argument, ("left", "center", "right")) class CellOutputRendererInline(CellOutputRenderer): """Replaces literal/math blocks with non-block versions""" def render_stderr(self, output: NotebookNode, index: int): """Output a container with an unhighlighted literal""" return [ nodes.literal( text=output["text"], rawsource="", # disables Pygment highlighting language="none", classes=["stderr"], ) ] def render_stdout(self, output: NotebookNode, index: int): """Output a container with an unhighlighted literal""" return [ nodes.literal( text=output["text"], rawsource="", # disables Pygment highlighting language="none", classes=["output", "stream"], ) ] def render_traceback(self, output: NotebookNode, index: int): traceback = "\n".join(output["traceback"]) text = nbconvert.filters.strip_ansi(traceback) return [ nodes.literal( text=text, rawsource=text, language="ipythontb", classes=["output", "traceback"], ) ] def render_text_latex(self, output: NotebookNode, index: int): data = output["data"]["text/latex"] self.env.get_domain("math").data["has_equations"][self.env.docname] = True return [ nodes.math( text=strip_latex_delimiters(data), nowrap=False, number=None, classes=["output", "text_latex"], ) ] def render_text_plain(self, output: NotebookNode, index: int): data = output["data"]["text/plain"] return [ nodes.literal( text=data, rawsource=data, language="none", classes=["output", "text_plain"], ) ]