From 27791acf505cebdb72e07b545764f6a9ab7e153c Mon Sep 17 00:00:00 2001 From: Julian Prein Date: Fri, 19 Sep 2025 00:51:47 +0200 Subject: [PATCH] bin:no-ansi: Correct allowed byte ranges Correct the allowed byte ranges of the APC, DCS, OSC, PM & SOS sequences. All but the SOS sequence allow additionally the bytes in the range `0x8 - 0xD` and SOS may be followed by any sequence of bytes except for SOS and ST (i.e. `\eX` & `\e\\`). Link: https://www.ecma-international.org/wp-content/uploads/ECMA-48_5th_edition_june_1991.pdf#page=27 --- .local/bin/no-ansi | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.local/bin/no-ansi b/.local/bin/no-ansi index 76cd61c..c74812b 100755 --- a/.local/bin/no-ansi +++ b/.local/bin/no-ansi @@ -11,13 +11,12 @@ # See: # - https://en.wikipedia.org/wiki/ANSI_escape_code # - https://www.ecma-international.org/wp-content/uploads/ECMA-35_6th_edition_december_1994.pdf -# -# TODO: which bytes do DCS, OSC, SOS, PM & APC (second pattern) actually allow? -# Find some documentation, since `[ -~]` is guessed. +# - https://www.ecma-international.org/wp-content/uploads/ECMA-48_5th_edition_june_1991.pdf env LC_ALL=C sed -E "$(printf "%b" \ 's/' \ '\033\\[[0-?]*[ -/]*[@-~]' '|' \ - '\033[]PX^_][ -~]*\033\\\\' '|' \ + '\033[]P^_][\010-\015 -~]*\033\\\\' '|' \ + '\033X([^\033]|\033+[^\033X\\])*\033+\\\\' '|' \ '\033[ -/]*[0-~]' \ '//g')"