Source code for myst_nb.render_outputs

"""A Sphinx post-transform, to convert notebook outpus to AST nodes."""
from abc import ABC, abstractmethod
import os
from typing import List, Optional
from unittest import mock

from importlib_metadata import entry_points
from docutils import nodes
from docutils.parsers.rst import directives
from sphinx.environment import BuildEnvironment
from sphinx.environment.collectors.asset import ImageCollector
from sphinx.errors import SphinxError
from sphinx.transforms.post_transforms import SphinxPostTransform
from sphinx.util import logging

import nbconvert
from nbformat import NotebookNode

from jupyter_sphinx.ast import strip_latex_delimiters, JupyterWidgetViewNode
from jupyter_sphinx.utils import sphinx_abs_dir

from myst_parser.main import default_parser, MdParserConfig
from myst_parser.docutils_renderer import make_document

from .nodes import CellOutputBundleNode

LOGGER = logging.getLogger(__name__)

WIDGET_VIEW_MIMETYPE = "application/vnd.jupyter.widget-view+json"


def get_default_render_priority(builder: str) -> Optional[List[str]]:
    priority = {
        builder: (
            WIDGET_VIEW_MIMETYPE,
            "application/javascript",
            "text/html",
            "image/svg+xml",
            "image/png",
            "image/jpeg",
            "text/markdown",
            "text/latex",
            "text/plain",
        )
        for builder in (
            "html",
            "readthedocs",
            "singlehtml",
            "dirhtml",
            "linkcheck",
            "readthedocsdirhtml",
            "readthedocssinglehtml",
            "readthedocssinglehtmllocalmedia",
            "epub",
        )
    }
    # TODO: add support for "image/svg+xml"
    priority["latex"] = (
        "application/pdf",
        "image/png",
        "image/jpeg",
        "text/latex",
        "text/markdown",
        "text/plain",
    )
    return priority.get(builder, None)


[docs]class MystNbEntryPointError(SphinxError): category = "MyST NB Renderer Load"
[docs]def load_renderer(name) -> "CellOutputRendererBase": """Load a renderer, given a name within the ``myst_nb.mime_render`` entry point group """ ep_list = set(ep for ep in entry_points()["myst_nb.mime_render"] if ep.name == name) if len(ep_list) == 1: klass = ep_list.pop().load() if not issubclass(klass, CellOutputRendererBase): raise MystNbEntryPointError( f"Entry Point for myst_nb.mime_render:{name} " f"is not a subclass of `CellOutputRendererBase`: {klass}" ) return klass elif not ep_list: raise MystNbEntryPointError( f"No Entry Point found for myst_nb.mime_render:{name}" ) raise MystNbEntryPointError( f"Multiple Entry Points found for myst_nb.mime_render:{name}: {ep_list}" )
[docs]class CellOutputsToNodes(SphinxPostTransform): """Use the builder context to transform a CellOutputNode into Sphinx nodes.""" # process very early, before CitationReferenceTransform (5), ReferencesResolver (10) # https://www.sphinx-doc.org/en/master/extdev/appapi.html#sphinx.application.Sphinx.add_transform default_priority = 4
[docs] def run(self): abs_dir = sphinx_abs_dir(self.env) renderers = {} # cache renderers for node in self.document.traverse(CellOutputBundleNode): try: renderer_cls = renderers[node.renderer] except KeyError: renderer_cls = load_renderer(node.renderer) renderers[node.renderer] = renderer_cls renderer = renderer_cls(self.document, node, abs_dir) output_nodes = renderer.cell_output_to_nodes(self.env.nb_render_priority) node.replace_self(output_nodes) # Image collect extra nodes from cell outputs that we need to process # this normally gets called as a `doctree-read` event for node in self.document.traverse(nodes.image): # If the image node has `candidates` then it's already been processed # as in-line markdown, so skip it if "candidates" in node: continue col = ImageCollector() # use the node docname, where possible, to deal with single document builds docname = ( self.app.env.path2doc(node.source) if node.source else self.app.env.docname ) with mock.patch.dict(self.app.env.temp_data, {"docname": docname}): col.process_doc(self.app, node)
[docs]class CellOutputRendererBase(ABC): """An abstract base class for rendering Notebook outputs to docutils nodes. Subclasses should implement the ``render`` method. """
[docs] def __init__( self, document: nodes.document, node: CellOutputBundleNode, sphinx_dir: str ): """ :param sphinx_dir: Sphinx "absolute path" to the output folder, so it is a relative path to the source folder prefixed with ``/``. """ self.document = document self.env = document.settings.env # type: BuildEnvironment self.node = node self.sphinx_dir = sphinx_dir
[docs] def cell_output_to_nodes(self, data_priority: List[str]) -> List[nodes.Node]: """Convert a jupyter cell with outputs and filenames to doctree nodes. :param outputs: a list of outputs from a Jupyter cell :param data_priority: media type by priority. :returns: list of docutils nodes """ output_nodes = [] for idx, output in enumerate(self.node.outputs): output_type = output["output_type"] if output_type == "stream": if output["name"] == "stderr": output_nodes.extend(self.render("stderr", output, idx)) else: output_nodes.extend(self.render("stdout", output, idx)) elif output_type == "error": output_nodes.extend(self.render("traceback", output, idx)) elif output_type in ("display_data", "execute_result"): try: # First mime_type by priority that occurs in output. mime_type = next(x for x in data_priority if x in output["data"]) except StopIteration: # TODO this is incompatible with glue outputs # perhaps have sphinx config to turn on/off this error reporting? # and/or only warn if "scrapbook" not in output.metadata # (then enable tests/test_render_outputs.py::test_unknown_mimetype) # LOGGER.warning( # "MyST-NB: output contains no MIME type in priority list: %s", # list(output["data"].keys()), # location=location, # ) continue output_nodes.extend(self.render(mime_type, output, idx)) return output_nodes
[docs] def add_source_and_line(self, *nodes: List[nodes.Node]): """Add the source and line recursively to all nodes.""" location = self.node.source, self.node.line for node in nodes: node.source, node.line = location for child in node.traverse(): child.source, child.line = location
[docs] def make_warning(self, error_msg: str) -> nodes.system_message: """Raise an exception or generate a warning if appropriate, and return a system_message node""" return self.document.reporter.warning( "output render: {}".format(error_msg), line=self.node.line, )
[docs] def make_error(self, error_msg: str) -> nodes.system_message: """Raise an exception or generate a warning if appropriate, and return a system_message node""" return self.document.reporter.error( "output render: {}".format(error_msg), line=self.node.line, )
[docs] def make_severe(self, error_msg: str) -> nodes.system_message: """Raise an exception or generate a warning if appropriate, and return a system_message node""" return self.document.reporter.severe( "output render: {}".format(error_msg), line=self.node.line, )
[docs] def add_name(self, node: nodes.Node, name: str): """Append name to node['names']. Also normalize the name string and register it as explicit target. """ name = nodes.fully_normalize_name(name) if "name" in node: del node["name"] node["names"].append(name) self.document.note_explicit_target(node, node) return name
[docs] def parse_markdown( self, text: str, parent: Optional[nodes.Node] = None ) -> List[nodes.Node]: """Parse text as CommonMark, in a new document.""" parser = default_parser(MdParserConfig(commonmark_only=True)) # setup parent node if parent is None: parent = nodes.container() self.add_source_and_line(parent) parser.options["current_node"] = parent # setup containing document new_doc = make_document(self.node.source) new_doc.settings = self.document.settings new_doc.reporter = self.document.reporter parser.options["document"] = new_doc # use the node docname, where possible, to deal with single document builds with mock.patch.dict( self.env.temp_data, {"docname": self.env.path2doc(self.node.source)} ): parser.render(text) # TODO is there any transforms we should retroactively carry out? return parent.children
[docs] @abstractmethod def render( self, mime_type: str, output: NotebookNode, index: int ) -> List[nodes.Node]: """Take a MIME bundle and MIME type, and return zero or more nodes.""" pass
[docs]class CellOutputRenderer(CellOutputRendererBase): def __init__( self, document: nodes.document, node: CellOutputBundleNode, sphinx_dir: str ): """ :param sphinx_dir: Sphinx "absolute path" to the output folder, so it is a relative path to the source folder prefixed with ``/``. """ super().__init__(document, node, sphinx_dir) self._render_map = { "stderr": self.render_stderr, "stdout": self.render_stdout, "traceback": self.render_traceback, "text/plain": self.render_text_plain, "text/markdown": self.render_text_markdown, "text/html": self.render_text_html, "text/latex": self.render_text_latex, "application/javascript": self.render_application_javascript, WIDGET_VIEW_MIMETYPE: self.render_widget, }
[docs] def render( self, mime_type: str, output: NotebookNode, index: int ) -> List[nodes.Node]: """Take a MIME bundle and MIME type, and return zero or more nodes.""" if mime_type.startswith("image"): nodes = self.create_render_image(mime_type)(output, index) self.add_source_and_line(*nodes) return nodes if mime_type in self._render_map: nodes = self._render_map[mime_type](output, index) self.add_source_and_line(*nodes) return nodes LOGGER.warning( "MyST-NB: No renderer found for output MIME: %s", mime_type, location=(self.node.source, self.node.line), ) return []
[docs] def render_stderr(self, output: NotebookNode, index: int): """Output a container with an unhighlighted literal block.""" text = output["text"] if self.env.config.nb_output_stderr == "show": pass elif self.env.config.nb_output_stderr == "remove-warn": self.make_warning(f"stderr was found in the cell outputs: {text}") return [] elif self.env.config.nb_output_stderr == "warn": self.make_warning(f"stderr was found in the cell outputs: {text}") elif self.env.config.nb_output_stderr == "error": self.make_error(f"stderr was found in the cell outputs: {text}") elif self.env.config.nb_output_stderr == "severe": self.make_severe(f"stderr was found in the cell outputs: {text}") if ( "remove-stderr" in self.node.metadata.get("tags", []) or self.env.config.nb_output_stderr == "remove" ): return [] node = nodes.literal_block( text=output["text"], rawsource=output["text"], language=self.env.config.nb_render_text_lexer, classes=["output", "stderr"], ) return [node]
[docs] def render_stdout(self, output: NotebookNode, index: int): if "remove-stdout" in self.node.metadata.get("tags", []): return [] return [ nodes.literal_block( text=output["text"], rawsource=output["text"], language=self.env.config.nb_render_text_lexer, classes=["output", "stream"], ) ]
[docs] def render_traceback(self, output: NotebookNode, index: int): traceback = "\n".join(output["traceback"]) text = nbconvert.filters.strip_ansi(traceback) return [ nodes.literal_block( text=text, rawsource=text, language="ipythontb", classes=["output", "traceback"], ) ]
[docs] def render_text_markdown(self, output: NotebookNode, index: int): text = output["data"]["text/markdown"] return self.parse_markdown(text)
[docs] def render_text_html(self, output: NotebookNode, index: int): text = output["data"]["text/html"] return [nodes.raw(text=text, format="html", classes=["output", "text_html"])]
[docs] def render_text_latex(self, output: NotebookNode, index: int): text = output["data"]["text/latex"] self.env.get_domain("math").data["has_equations"][self.env.docname] = True return [ nodes.math_block( text=strip_latex_delimiters(text), nowrap=False, number=None, classes=["output", "text_latex"], ) ]
[docs] def render_text_plain(self, output: NotebookNode, index: int): text = output["data"]["text/plain"] return [ nodes.literal_block( text=text, rawsource=text, language=self.env.config.nb_render_text_lexer, classes=["output", "text_plain"], ) ]
[docs] def render_application_javascript(self, output: NotebookNode, index: int): data = output["data"]["application/javascript"] return [ nodes.raw( text='<script type="{mime_type}">{data}</script>'.format( mime_type="application/javascript", data=data ), format="html", ) ]
[docs] def render_widget(self, output: NotebookNode, index: int): data = output["data"][WIDGET_VIEW_MIMETYPE] return [JupyterWidgetViewNode(view_spec=data)]
[docs] def create_render_image(self, mime_type: str): def _render_image(output: NotebookNode, index: int): # Sphinx treats absolute paths as being rooted at the source # directory, so make a relative path, which Sphinx treats # as being relative to the current working directory. filename = os.path.basename(output.metadata["filenames"][mime_type]) # checks if file dir path is inside a subdir of dir filedir = os.path.dirname(output.metadata["filenames"][mime_type]) subpaths = filedir.split(self.sphinx_dir) final_dir = self.sphinx_dir if subpaths and len(subpaths) > 1: subpath = subpaths[1] final_dir += subpath uri = os.path.join(final_dir, filename) # TODO I'm not quite sure why, but as soon as you give it a width, # it becomes clickable?! (i.e. will open the image in the browser) image_node = nodes.image(uri=uri) myst_meta_img = self.node.metadata.get( self.env.config.nb_render_key, {} ).get("image", {}) for key, spec in [ ("classes", directives.class_option), ("alt", directives.unchanged), ("height", directives.length_or_unitless), ("width", directives.length_or_percentage_or_unitless), ("scale", directives.percentage), ("align", align), ]: if key in myst_meta_img: value = myst_meta_img[key] try: image_node[key] = spec(value) except (ValueError, TypeError) as error: error_msg = ( "Invalid image attribute: " "(key: '{}'; value: {})\n{}".format(key, value, error) ) return [self.make_error(error_msg)] myst_meta_fig = self.node.metadata.get( self.env.config.nb_render_key, {} ).get("figure", {}) if "caption" not in myst_meta_fig: return [image_node] figure_node = nodes.figure("", image_node) caption = nodes.caption(myst_meta_fig["caption"], "") figure_node += caption # TODO only contents of one paragraph? (and second should be a legend) self.parse_markdown(myst_meta_fig["caption"], caption) if "name" in myst_meta_fig: name = myst_meta_fig["name"] self.add_source_and_line(figure_node) self.add_name(figure_node, name) # The target should have already been processed by now, with # sphinx.transforms.references.SphinxDomains, which calls # sphinx.domains.std.StandardDomain.process_doc, # so we have to replicate that here std = self.env.get_domain("std") nametypes = self.document.nametypes.items() self.document.nametypes = {name: True} try: std.process_doc(self.env, self.env.docname, self.document) finally: self.document.nametypes = nametypes return [figure_node] return _render_image
def align(argument): return directives.choice(argument, ("left", "center", "right")) class CellOutputRendererInline(CellOutputRenderer): """Replaces literal/math blocks with non-block versions""" def render_stderr(self, output: NotebookNode, index: int): """Output a container with an unhighlighted literal""" return [ nodes.literal( text=output["text"], rawsource="", # disables Pygment highlighting language="none", classes=["stderr"], ) ] def render_stdout(self, output: NotebookNode, index: int): """Output a container with an unhighlighted literal""" return [ nodes.literal( text=output["text"], rawsource="", # disables Pygment highlighting language="none", classes=["output", "stream"], ) ] def render_traceback(self, output: NotebookNode, index: int): traceback = "\n".join(output["traceback"]) text = nbconvert.filters.strip_ansi(traceback) return [ nodes.literal( text=text, rawsource=text, language="ipythontb", classes=["output", "traceback"], ) ] def render_text_latex(self, output: NotebookNode, index: int): data = output["data"]["text/latex"] self.env.get_domain("math").data["has_equations"][self.env.docname] = True return [ nodes.math( text=strip_latex_delimiters(data), nowrap=False, number=None, classes=["output", "text_latex"], ) ] def render_text_plain(self, output: NotebookNode, index: int): data = output["data"]["text/plain"] return [ nodes.literal( text=data, rawsource=data, language="none", classes=["output", "text_plain"], ) ]