Source code for myst_nb.docutils_

"""The docutils parser implementation for myst-nb."""

from __future__ import annotations

from dataclasses import dataclass, field
from functools import lru_cache, partial
from importlib import resources as import_resources
import os
from typing import Any

from docutils import nodes
from docutils.core import default_description, publish_cmdline
from docutils.parsers.rst.directives import _directives
from docutils.parsers.rst.roles import _roles
from markdown_it.token import Token
from markdown_it.tree import SyntaxTreeNode
from myst_parser.config.main import MdParserConfig, merge_file_level
from myst_parser.mdit_to_docutils.base import DocutilsRenderer, token_line
from myst_parser.parsers.docutils_ import Parser as MystParser
from myst_parser.parsers.docutils_ import create_myst_config, create_myst_settings_spec
from myst_parser.parsers.mdit import create_md_parser
import nbformat
from nbformat import NotebookNode
from pygments.formatters import get_formatter_by_name

from myst_nb import static
from myst_nb.core.config import NbParserConfig
from myst_nb.core.execute import create_client
from myst_nb.core.loggers import DocutilsDocLogger  # DEFAULT_LOG_TYPE,
from myst_nb.core.nb_to_tokens import nb_node_to_dict, notebook_to_tokens
from myst_nb.core.read import (
    NbReader,
    UnexpectedCellDirective,
    read_myst_markdown_notebook,
    standard_nb_read,
)
from myst_nb.core.render import (
    MditRenderMixin,
    MimeData,
    NbElementRenderer,
    create_figure_context,
    get_mime_priority,
    load_renderer,
)
from myst_nb.ext.eval import load_eval_docutils
from myst_nb.ext.glue import load_glue_docutils
from myst_nb.warnings_ import MystNBWarnings, create_warning

DOCUTILS_EXCLUDED_ARGS = list(
    {f.name for f in NbParserConfig.get_fields() if f.metadata.get("docutils_exclude")}
)


@dataclass
class DocutilsApp:
    roles: dict[str, Any] = field(default_factory=dict)
    directives: dict[str, Any] = field(default_factory=dict)


@lru_cache(maxsize=1)
def get_nb_roles_directives() -> DocutilsApp:
    app = DocutilsApp()
    app.directives["code-cell"] = UnexpectedCellDirective
    app.directives["raw-cell"] = UnexpectedCellDirective
    load_eval_docutils(app)
    load_glue_docutils(app)
    return app



[docs]
class Parser(MystParser):
    """Docutils parser for Jupyter Notebooks, containing MyST Markdown."""

    supported: tuple[str, ...] = ("mystnb", "ipynb")
    """Aliases this parser supports."""

    settings_spec = (
        "MyST-NB options",
        None,
        create_myst_settings_spec(NbParserConfig, "nb_"),
        *MystParser.settings_spec,
    )
    """Runtime settings specification."""

    config_section = "myst-nb parser"


[docs]
    def parse(self, inputstring: str, document: nodes.document) -> None:
        # register/unregister special directives and roles
        app = get_nb_roles_directives()
        for name, directive in app.directives.items():
            _directives[name] = directive
        for name, role in app.roles.items():
            _roles[name] = role
        try:
            return self._parse(inputstring, document)
        finally:
            for name in app.directives:
                _directives.pop(name, None)
            for name in app.roles:
                _roles.pop(name, None)


    def _parse(self, inputstring: str, document: nodes.document) -> None:
        """Parse source text.

        :param inputstring: The source string to parse
        :param document: The root docutils node to add AST elements to
        """
        document_source = document["source"]

        # get a logger for this document
        logger = DocutilsDocLogger(document)

        # get markdown parsing configuration
        try:
            md_config = create_myst_config(document.settings)
        except (TypeError, ValueError) as error:
            logger.error(f"myst configuration invalid: {error.args[0]}")
            md_config = MdParserConfig()

        # get notebook rendering configuration
        try:
            nb_config = create_myst_config(document.settings, NbParserConfig, "nb_")
        except (TypeError, ValueError) as error:
            logger.error(f"myst-nb configuration invalid: {error.args[0]}")
            nb_config = NbParserConfig()

        # convert inputstring to notebook
        # note docutils does not support the full custom format mechanism
        if nb_config.read_as_md:
            nb_reader = NbReader(
                partial(
                    read_myst_markdown_notebook,
                    config=md_config,
                    add_source_map=True,
                ),
                md_config,
                {"type": "plugin", "name": "myst_nb_md"},
            )
        else:
            nb_reader = NbReader(standard_nb_read, md_config)
        notebook = nb_reader.read(inputstring)

        # update the global markdown config with the file-level config
        warning = lambda wtype, msg: create_warning(  # noqa: E731
            document, msg, line=1, append_to=document, subtype=wtype
        )
        nb_reader.md_config = merge_file_level(
            nb_reader.md_config, notebook.metadata, warning
        )

        # Update mystnb configuration with notebook level metadata
        if nb_config.metadata_key in notebook.metadata:
            overrides = nb_node_to_dict(notebook.metadata[nb_config.metadata_key])
            try:
                nb_config = nb_config.copy(**overrides)
            except Exception as exc:
                logger.warning(
                    f"Failed to update configuration with notebook metadata: {exc}",
                    subtype="config",
                )
            else:
                logger.debug(
                    "Updated configuration with notebook metadata", subtype="config"
                )

        # Setup the markdown parser
        mdit_parser = create_md_parser(nb_reader.md_config, DocutilsNbRenderer)
        mdit_parser.options["document"] = document
        mdit_parser.options["nb_config"] = nb_config
        mdit_renderer: DocutilsNbRenderer = mdit_parser.renderer  # type: ignore
        mdit_env: dict[str, Any] = {}

        # load notebook element renderer class from entry-point name
        # this is separate from DocutilsNbRenderer, so that users can override it
        renderer_name = nb_config.render_plugin
        nb_renderer: NbElementRenderer = load_renderer(renderer_name)(
            mdit_renderer, logger
        )
        # we temporarily store nb_renderer on the document,
        # so that roles/directives can access it
        document.attributes["nb_renderer"] = nb_renderer
        # we currently do this early, so that the nb_renderer has access to things
        mdit_renderer.setup_render(mdit_parser.options, mdit_env)  # type: ignore

        # parse notebook structure to markdown-it tokens
        # note, this does not assume that the notebook has been executed yet
        mdit_tokens = notebook_to_tokens(notebook, mdit_parser, mdit_env, logger)

        # open the notebook execution client,
        # this may execute the notebook immediately or during the page render
        with create_client(notebook, document_source, nb_config, logger) as nb_client:
            mdit_parser.options["nb_client"] = nb_client
            # convert to docutils AST, which is added to the document
            mdit_renderer.render(mdit_tokens, mdit_parser.options, mdit_env)

        # save final execution data
        if nb_client.exec_metadata:
            document["nb_exec_data"] = nb_client.exec_metadata

        if nb_config.output_folder:
            # write final (updated) notebook to output folder (utf8 is standard encoding)
            content = nbformat.writes(notebook).encode("utf-8")
            nb_renderer.write_file(["processed.ipynb"], content, overwrite=True)

            # if we are using an HTML writer, dynamically add the CSS to the output
            if nb_config.append_css and hasattr(document.settings, "stylesheet"):
                css_paths = []

                css_paths.append(
                    nb_renderer.write_file(
                        ["mystnb.css"],
                        (import_resources.files(static) / "mystnb.css").read_bytes(),
                        overwrite=True,
                    )
                )
                fmt = get_formatter_by_name("html", style="default")
                css_paths.append(
                    nb_renderer.write_file(
                        ["pygments.css"],
                        fmt.get_style_defs(".code").encode("utf-8"),
                        overwrite=True,
                    )
                )
                css_paths = [os.path.abspath(path) for path in css_paths]
                # stylesheet and stylesheet_path are mutually exclusive
                if document.settings.stylesheet_path:
                    document.settings.stylesheet_path.extend(css_paths)
                if document.settings.stylesheet:
                    document.settings.stylesheet.extend(css_paths)

            # TODO also handle JavaScript

        # remove temporary state
        document.attributes.pop("nb_renderer")



class DocutilsNbRenderer(DocutilsRenderer, MditRenderMixin):
    """A docutils-only renderer for Jupyter Notebooks."""

    def render_nb_initialise(self, token: SyntaxTreeNode) -> None:
        metadata = self.nb_client.nb_metadata
        special_keys = ["kernelspec", "language_info", "source_map"]
        for key in special_keys:
            # save these special keys on the document, rather than as docinfo
            if key in metadata:
                self.document[f"nb_{key}"] = metadata.get(key)

        if self.nb_config.metadata_to_fm:
            # forward the remaining metadata to the front_matter renderer
            special_keys.append("widgets")
            top_matter = {k: v for k, v in metadata.items() if k not in special_keys}
            self.render_front_matter(
                Token(  # type: ignore
                    "front_matter",
                    "",
                    0,
                    map=[0, 0],
                    content=top_matter,  # type: ignore[arg-type]
                ),
            )

    def _render_nb_cell_code_outputs(
        self, token: SyntaxTreeNode, outputs: list[NotebookNode]
    ) -> None:
        """Render a notebook code cell's outputs."""
        cell_index = token.meta["index"]
        metadata = token.meta["metadata"]
        line = token_line(token)
        # render the outputs
        mime_priority = get_mime_priority(
            self.nb_config.builder_name, self.nb_config.mime_priority_overrides
        )
        for output_index, output in enumerate(outputs):
            if output.output_type == "stream":
                if output.name == "stdout":
                    _nodes = self.nb_renderer.render_stdout(
                        output, metadata, cell_index, line
                    )
                    self.add_line_and_source_path_r(_nodes, token)
                    self.current_node.extend(_nodes)
                elif output.name == "stderr":
                    _nodes = self.nb_renderer.render_stderr(
                        output, metadata, cell_index, line
                    )
                    self.add_line_and_source_path_r(_nodes, token)
                    self.current_node.extend(_nodes)
                else:
                    pass  # TODO warning
            elif output.output_type == "error":
                _nodes = self.nb_renderer.render_error(
                    output, metadata, cell_index, line
                )
                self.add_line_and_source_path_r(_nodes, token)
                self.current_node.extend(_nodes)
            elif output.output_type in ("display_data", "execute_result"):
                # Note, this is different to the sphinx implementation,
                # here we directly select a single output, based on the mime_priority,
                # as opposed to output all mime types, and select in a post-transform
                # (the mime_priority must then be set for the output format)

                try:
                    mime_type = next(x for x in mime_priority if x in output["data"])
                except StopIteration:
                    if output["data"]:
                        create_warning(
                            self.document,
                            "No output mime type found from render_priority "
                            f"(cell<{cell_index}>.output<{output_index}>",
                            line=line,
                            append_to=self.current_node,
                            # wtype=DEFAULT_LOG_TYPE,
                            subtype=MystNBWarnings.MIME_TYPE,
                        )
                else:
                    figure_options = (
                        self.get_cell_level_config(
                            "render_figure_options", metadata, line=line
                        )
                        or None
                    )

                    with create_figure_context(self, figure_options, line):
                        _nodes = self.nb_renderer.render_mime_type(
                            MimeData(
                                mime_type,
                                output["data"][mime_type],
                                cell_metadata=metadata,
                                output_metadata=output.get("metadata", {}),
                                cell_index=cell_index,
                                output_index=output_index,
                                line=line,
                            ),
                        )
                        self.current_node.extend(_nodes)
                        self.add_line_and_source_path_r(_nodes, token)
            else:
                create_warning(
                    self.document,
                    f"Unsupported output type: {output.output_type}",
                    line=line,
                    append_to=self.current_node,
                    # wtype=DEFAULT_LOG_TYPE,
                    subtype=MystNBWarnings.OUTPUT_TYPE,
                )


def _run_cli(
    writer_name: str, builder_name: str, writer_description: str, argv: list[str] | None
):
    """Run the command line interface for a particular writer."""
    publish_cmdline(
        parser=Parser(),
        writer_name=writer_name,
        description=(
            f"Generates {writer_description} from standalone MyST Notebook sources.\n"
            f"{default_description}\n"
            "External outputs are written to `--nb-output-folder`.\n"
        ),
        # to see notebook execution info by default
        settings_overrides={"report_level": 1, "nb_builder_name": builder_name},
        argv=argv,
    )


def cli_html(argv: list[str] | None = None) -> None:
    """Cmdline entrypoint for converting MyST to HTML."""
    _run_cli("html", "html", "(X)HTML documents", argv)


def cli_html5(argv: list[str] | None = None):
    """Cmdline entrypoint for converting MyST to HTML5."""
    _run_cli("html5", "html", "HTML5 documents", argv)


def cli_latex(argv: list[str] | None = None):
    """Cmdline entrypoint for converting MyST to LaTeX."""
    _run_cli("latex", "latex", "LaTeX documents", argv)


def cli_xml(argv: list[str] | None = None):
    """Cmdline entrypoint for converting MyST to XML."""
    _run_cli("xml", "xml", "Docutils-native XML", argv)


def cli_pseudoxml(argv: list[str] | None = None):
    """Cmdline entrypoint for converting MyST to pseudo-XML."""
    _run_cli("pseudoxml", "html", "pseudo-XML", argv)