Source code for myst_nb.core.lexers

"""Pygments lexers"""
from __future__ import annotations

import re

# this is not added as an entry point in ipython, so we add it in this package
from IPython.lib.lexers import IPythonTracebackLexer  # noqa: F401
import pygments.lexer
import pygments.token

_ansi_code_to_color = {
    0: "Black",
    1: "Red",
    2: "Green",
    3: "Yellow",
    4: "Blue",
    5: "Magenta",
    6: "Cyan",
    7: "White",
}


def _token_from_lexer_state(
    bold: bool, faint: bool, fg_color: str | None, bg_color: str | None
):
    """Construct a token given the current lexer state.

    We can only emit one token even though we have a multiple-tuple state.
    To do work around this, we construct tokens like "Bold.Red".
    """
    components: tuple[str, ...] = ()

    if bold:
        components += ("Bold",)

    if faint:
        components += ("Faint",)

    if fg_color:
        components += (fg_color,)

    if bg_color:
        components += ("BG" + bg_color,)

    if len(components) == 0:
        return pygments.token.Text
    else:
        token = pygments.token.Token.Color
        for component in components:
            token = getattr(token, component)
        return token


[docs]class AnsiColorLexer(pygments.lexer.RegexLexer): """Pygments lexer for text containing ANSI color codes. Adapted from https://github.com/chriskuehl/pygments-ansi-color """ name = "ANSI Color" aliases = ("myst-ansi",) flags = re.DOTALL | re.MULTILINE def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.reset_state()
[docs] def reset_state(self): self.bold = False self.faint = False self.fg_color = None self.bg_color = None
@property def current_token(self): return _token_from_lexer_state( self.bold, self.faint, self.fg_color, self.bg_color, )
[docs] def process(self, match): """Produce the next token and bit of text. Interprets the ANSI code (which may be a color code or some other code), changing the lexer state and producing a new token. If it's not a color code, we just strip it out and move on. Some useful reference for ANSI codes: * http://ascii-table.com/ansi-escape-sequences.php """ # "after_escape" contains everything after the start of the escape # sequence, up to the next escape sequence. We still need to separate # the content from the end of the escape sequence. after_escape = match.group(1) # TODO: this doesn't handle the case where the values are non-numeric. # This is rare but can happen for keyboard remapping, e.g. # '\x1b[0;59;"A"p' parsed = re.match( r"([0-9;=]*?)?([a-zA-Z])(.*)$", after_escape, re.DOTALL | re.MULTILINE, ) if parsed is None: # This shouldn't ever happen if we're given valid text + ANSI, but # people can provide us with utter junk, and we should tolerate it. text = after_escape else: value, code, text = parsed.groups() if code == "m": # "m" is "Set Graphics Mode" # Special case \x1b[m is a reset code if value == "": self.reset_state() else: try: values = [int(v) for v in value.split(";")] except ValueError: # Shouldn't ever happen, but could with invalid ANSI. values = [] while len(values) > 0: value = values.pop(0) fg_color = _ansi_code_to_color.get(value - 30) bg_color = _ansi_code_to_color.get(value - 40) if fg_color: self.fg_color = fg_color elif bg_color: self.bg_color = bg_color elif value == 1: self.bold = True elif value == 2: self.faint = True elif value == 22: self.bold = False self.faint = False elif value == 39: self.fg_color = None elif value == 49: self.bg_color = None elif value == 0: self.reset_state() elif value in (38, 48): try: five = values.pop(0) color = values.pop(0) except IndexError: continue else: if five != 5: continue if not 0 <= color <= 255: continue if value == 38: self.fg_color = f"C{color}" else: self.bg_color = f"C{color}" yield match.start(), self.current_token, text
tokens = { "root": [(r"\x1b\[([^\x1b]*)", process), (r"[^\x1b]+", pygments.token.Text)], }