#include <stdio.h>
// state-machine parsing of CSI and OSC strings
// it looks slow because you do jumps every loop iteration
// but it should be fine, since you have branch prediction, right?
// CSI P... I... F
// ESC = 0x1b
// CSI = ESC 0x5b
// P in 0x30-0x3f
// I in 0x20-0x2f
// F in 0x40-0x7e
static inline int is_cs_param(const char c) {
return c >= 0x30 && c <= 0x3f;
}
static inline int is_cs_inter(const char c) {
return c >= 0x20 && c <= 0x2f;
}
static inline int is_cs_final(const char c) {
return c >= 0x40 && c <= 0x7e;
}
// state of parsing control string
enum sp_control {
SPC_CSI, // we have seen CSI (0x1b 0x5b) and are now in the CSI state machine
SPC_PARAM, // we have seen at least 1 param byte, we can see either param, inter, or final
SPC_INTER, // we have seen at least 1 inter byte, we can see either more or final
SPC_FINAL // we have seen a final byte, done parsing
};
// OSC S... ST
// ESC = 0x1b
// OSC = ESC 0x5d
// S in 0x08-0x0d or 0x20-0x7e
// ST = ESC 0x5c
static inline int is_osc_chr(const char c) {
return (c >= 0x08 && c <= 0x0d) || (c >= 0x20 && c <= 0x7e);
}
// state of parsing operating system command
enum sp_oscmd {
SPO_OSC, // we have seen OSC (0x1b 0x5d) and are now in the OSC state machine
SPO_STR, // we are accumulating the command string, are waiting for ST
SPO_ESC, // we have seen ESC (0x1b), we are expecting 0x5c
SPO_ST // we have seen ST (0x1b 0x5c), done parsing
};
// 0 on error, bytes consumed on success, negative if needs more memory
// return value is bytes written including 0 if positive
ssize_t parse_csi(char *dst, size_t n, FILE *src, enum sp_control *state) {
ssize_t out = 0;
size_t i = 0;
int c;
while (i < n) {
c = fgetc(src);
if (c == EOF && (*state) != SPC_FINAL) {
return 0; // error while not terminated
}
switch(*state) {
case SPC_CSI:
// we let SPC_PARAM consume it
ungetc(c, src);
if (!is_cs_param(c)) return 0;
*state = SPC_PARAM;
continue;
case SPC_PARAM:
if (is_cs_param(c)) {
// TODO: consume c
break;
} else if (is_cs_inter(c)) {
// we let SPC_INTER consume it
ungetc(c, src);
*state = SPC_INTER;
continue;
} else if (is_cs_final(c)) {
// we let SPC_FINAL consume it
ungetc(c, src);
*state = SPC_FINAL;
continue;
}
// error
ungetc(c, src);
return 0;
case SPC_INTER:
if (is_cs_inter(c)) {
// TODO: consume c
// break;
} else if (is_cs_final(c)) {
// we let SPC_FINAL consume it
ungetc(c, src);
*state = SPC_FINAL;
continue;
}
// error
ungetc(c, src);
return 0;
case SPC_FINAL:
if (is_cs_final(c)) {
// TODO: consume c
return ++out;
}
// error
ungetc(c, src);
return 0;
}
i++; out++;
}
// ran out of bytes
return -out;
}
const char *parse_csi_errmsg(FILE *src, enum sp_control *state) {
int streamerr = ferror(src) || feof(src);
switch (*state) {
case SPC_CSI:
if (streamerr) {
return "stream error immediately after reading CSI";
} else {
return "unexpected character after CSI: expected parameter byte, intermediate byte, or final byte";
}
case SPC_PARAM:
if (streamerr) {
return "stream error while reading parameter bytes, control string was not terminated";
} else {
return "unexpected character while parsing parameters: expected more parameters, intermediate byte, or final byte";
}
case SPC_INTER:
if (streamerr) {
return "stream error while reading intermediate bytes, control string was not terminated";
} else {
return "unexpected character while parsing intermediate bytes: expected more intermediate bytes, or final byte";
}
default:
return "unknown error, state suggests control string parsing succeeded";
}
}
// 0 on error, bytes consumed on success, negative if needs more memory
// if positive, return value is bytes written, including 0
ssize_t parse_osc(char *dst, size_t n, FILE *src, enum sp_oscmd *state) {
ssize_t out = 0;
size_t i = 0;
int c;
while (i < n) {
c = fgetc(src);
if (c == EOF && (*state) != SPO_ST) {
return 0; // error while not terminated
}
switch(*state) {
case SPO_OSC:
// we let SPO_STR consume it
ungetc(c, src);
if (!is_cs_param(c)) return 0;
*state = SPO_STR;
continue;
case SPO_STR:
if (is_osc_chr(c)) {
// TODO: consume c
break;
} else if (c == 0x1b) {
// we consume it here, it's not part of the output
*state = SPO_ESC;
// TODO: write terminating 0, just in case
break;
}
// error
ungetc(c, src);
return 0;
case SPO_ESC:
if (c == 0x5c) {
// done parsing
*state = SPO_ST;
return ++out;
}
// error
ungetc(c, src);
return 0;
case SPO_ST: // why are we here? presume error
return 0;
}
i++; out++;
}
// ran out of bytes
return -out;
}
const char *parse_osc_errmsg(FILE *src, enum sp_oscmd *state) {
int streamerr = ferror(src) || feof(src);
switch (*state) {
case SPO_OSC:
if (streamerr) {
return "stream error immediately after reading OSC";
} else {
return "unexpected character after OSC: expected command string";
}
case SPO_STR:
if (streamerr) {
return "stream error while reading command string, command string was not terminated";
} else {
return "unexpected character while parsing command string: expected more valid command string bytes or escape";
}
case SPO_ESC:
if (streamerr) {
return "stream error while reading string terminator, command string was not terminated";
} else {
return "unexpected character while parsing string terminator: expected 0x5c";
}
default: // SPO_ST
return "unknown error, state suggests operating system command parsing succeeded";
}
}