import { sum } from 'lodash-es';

/**
 * Split the given line into "word" tokens.
 * Note: this function was taken (with very slight modifications) from jsdiff.
 * @param line line of code to split into tokens.
 * @returns array of separated tokens.
 */
export function splitLineByWords(line: string): string[] {
  // All whitespace symbols except newline group into one token, each newline - in separate token
  const tokens = line.split(/([^\S\r\n]+|[()[\]{}'"\r\n]|\b)/);

  // Join the boundary splits that we do not consider to be boundaries. This is primarily the extended Latin character set.
  for (let i = 0; i < tokens.length - 1; i++) {
    // If we have an empty string in the next field and we have only word chars before and after, merge
    if (!tokens[i + 1] && tokens[i + 2] && extendedWordChars.test(tokens[i]) && extendedWordChars.test(tokens[i + 2])) {
      tokens[i] += tokens[i + 2];
      tokens.splice(i + 1, 2);
      i--;
    }
  }

  // NOTE: our addition to the jsdiff algorithm: we remove any empty strings to avoid useless garbage.
  return tokens.filter((val) => val);
}

// Based on https://en.wikipedia.org/wiki/Latin_script_in_Unicode
//
// Ranges and exceptions:
// Latin-1 Supplement, 0080–00FF
//  - U+00D7  × Multiplication sign
//  - U+00F7  ÷ Division sign
// Latin Extended-A, 0100–017F
// Latin Extended-B, 0180–024F
// IPA Extensions, 0250–02AF
// Spacing Modifier Letters, 02B0–02FF
//  - U+02C7  ˇ &#711;  Caron
//  - U+02D8  ˘ &#728;  Breve
//  - U+02D9  ˙ &#729;  Dot Above
//  - U+02DA  ˚ &#730;  Ring Above
//  - U+02DB  ˛ &#731;  Ogonek
//  - U+02DC  ˜ &#732;  Small Tilde
//  - U+02DD  ˝ &#733;  Double Acute Accent
// Latin Extended Additional, 1E00–1EFF
const extendedWordChars =
  /^[a-zA-Z\u{C0}-\u{FF}\u{D8}-\u{F6}\u{F8}-\u{2C6}\u{2C8}-\u{2D7}\u{2DE}-\u{2FF}\u{1E00}-\u{1EFF}]+$/u;

export function wordIndexToCharacterRange(line: string, wordIndex: number): { start: number; end: number } | null {
  if (wordIndex < 0) {
    return null;
  }
  const words = splitLineByWords(line);
  return wordIndexToCharacterRangeInner(words, wordIndex);
}

function wordIndexToCharacterRangeInner(words: string[], wordIndex: number): { start: number; end: number } | null {
  if (wordIndex < 0 || wordIndex >= words.length) {
    return null;
  }
  const start = sum(words.slice(0, wordIndex).map((word) => word.length));
  const end = start + words[wordIndex].length - 1;
  return { start, end };
}

export function characterIndexToWordIndex(line: string, characterIndex: number): number | null {
  if (characterIndex < 0 || characterIndex >= line.length) {
    return null;
  }
  const words = splitLineByWords(line);
  return characterIndexToWordIndexInner(words, characterIndex);
}

function characterIndexToWordIndexInner(words: string[], characterIndex: number): number | null {
  if (characterIndex < 0) {
    return null;
  }
  let lastCharacterIndex = 0;
  for (const [wordIndex, word] of words.entries()) {
    const currentCharacaterIndex = lastCharacterIndex + word.length;
    if (lastCharacterIndex <= characterIndex && characterIndex < currentCharacaterIndex) {
      return wordIndex;
    }
    lastCharacterIndex = currentCharacaterIndex;
  }
  return null;
}

/* gets two columns and return the token and word indexes of the token */
export function expandToToken({
  lineData,
  startCharacter,
  endCharacter,
}: {
  lineData: string;
  startCharacter: number;
  endCharacter: number;
}): { wordStart: number; wordEnd: number; token: string } {
  const words = splitLineByWords(lineData);
  const currentTokenWords = [];
  let currentIndex = 0;
  let wordStart = -1;
  for (let i = 0; i < words.length; i++) {
    const word = words[i];
    if (currentIndex + word.length - 1 >= startCharacter && currentIndex <= endCharacter) {
      if (wordStart < 0) {
        wordStart = i;
      }
      currentTokenWords.push(word);
    }
    currentIndex += word.length;
  }
  return {
    wordStart,
    wordEnd: wordStart + currentTokenWords.length - 1,
    token: currentTokenWords.join(''),
  };
}

export class LineWords {
  words: string[];

  constructor(line: string) {
    this.words = splitLineByWords(line);
  }

  wordIndexToCharacterRange(wordIndex: number): { start: number; end: number } | null {
    return wordIndexToCharacterRangeInner(this.words, wordIndex);
  }

  characterIndexToWordIndex(characterIndex: number): number | null {
    return characterIndexToWordIndexInner(this.words, characterIndex);
  }
}
