const EMPTY_GDOCS_CHARS = [8236, 8237].map(c => String.fromCharCode(c));
const EMPTY_GDOCS_CHARS_RX = new RegExp(`[${EMPTY_GDOCS_CHARS.join('')}]`, 'g');

const NEL = `\u{0085}`; // Next Line
const FF = `\u{000c}`; // Form Feed
const VT = `\u{000b}`; // Vertical Tab
const DLE = `\u{0010}`; // Data Link Escape
const DCO = `\u{0011}`; // Device Control One
const DCT = `\u{0012}`; // Device Control Two
const EOT = `\u{0003}`; // End of Text
const ISF = `\u{001c}`; // Information Separator Four
const ENQ = `\u{0005}`;
const Custom = [DLE, DCO, DCT, EOT, ISF, VT, FF, NEL, ENQ];

/**
 * Function to strip non-printable characters and normalize a string
 * @param str - the string to process
 * @returns the processed string
 */
export function stripNonPrintableAndNormalize(str: string) {
  // normalize newline
  str = str.replace(/\n\r/g, '\n'); // CRLF
  str = str.replace(/\p{Zl}/gu, '\n'); // Line Separator
  str = str.replace(/\p{Zp}/gu, '\n'); // Paragraph Separator
  str = str.replace(/\p{Zs}/gu, ' '); // Space Separator
  str = str.replace(/\t/g, ' '); // Horizontal Tab

  // strip control chars
  str = str.replace(/\p{Cs}/gu, ''); // Surrogate
  str = str.replace(/\p{Co}/gu, ''); // Private Use
  str = str.replace(/\p{Cf}/gu, ''); // Format
  str = str.replace(/\p{Cn}/gu, ''); // Unassigned
  str = str.replace(new RegExp(Custom.join('|'), 'gu'), ''); // Custom Chars
  str = str.replace(EMPTY_GDOCS_CHARS_RX, '');

  return str;
}
