//Function to validate trainingdata. Returns object validationResults where "invalidStrings" to be rendered as issues in trainingdata.
//Counts number of active and disabled intents and returns this as validationResults.numberOfActiveIntents and validationResults.numberOfDisabledIntents
//Counts number of active trainingscentenses and returns this as validationResults.numberOfActiveSentences
//Counts the number of issues and returns this as validationResults.numberOfIssues

//If trainingsentence with "()" is identified, run this through validateEntityMatches to validate all entities in scentence.
const validateEntityMatches = (sentence) => {
  const entityIdentifyerRegex = /\[.*?\].*?\)/g;
  const entityRegex =
      /\[\s*[a-zA-Z0-9À-ÿ\s-]+\s*\]\(\s*[a-zA-Z0-9À-ÿ\s-]+\s*\)/; // create regex for valid entity (Use this to allow multiple words in definitions)
  const entities = sentence.match(entityIdentifyerRegex);
  // console.log(entities);

  if (/^([^\[\]]*|\[[^\[\]]*\])*$/g.test(sentence) === false) {
    return false;
  }

  return entities ? entities.every((ent) => entityRegex.test(ent)) : false;
};

//If trainingsentence with "{}" is identified, run this through validateSynonymMatches to validate all synonyms.
const validateSynonymMatches = (sentence) => {
  const synonymIdentifyerRegex = /\[.*?\].*?\}/g;

  // create regex for valid entity
  const synonymRegex =
      /\[\s*[a-zA-Z0-9À-ÿ\s-]+\s*\]\{\s*"entity":\s*"\s*[a-zA-Z0-9À-ÿ\s-]+\s*",\s*"value":\s*"\s*[a-zA-Z0-9À-ÿ\s-]+\s*"\s*\}/;

  const synonyms = sentence.match(synonymIdentifyerRegex);

  return synonyms ? synonyms.every((ent) => synonymRegex.test(ent)) : false;
};

//Main validation function
const validateTrainingData = (data) => {
  //If data is empty return "Training data is empty"
  if (!data) return "Training data is empty";

  //Define result variables
  let invalidStrings = [];
  let activeIntentCount = [];
  let disabledIntentCount = [];

  //Push unique intent name to activeIntentCount to count number of active intents.
  data.rasa_nlu_data.common_examples.map((obj) => {
    if (obj.status === "active") {
      if (!activeIntentCount.includes(obj.intent))
        activeIntentCount.push(obj.intent);
    } else if (obj.status === "disabled") {
      if (!disabledIntentCount.includes(obj.intent))
        disabledIntentCount.push(obj.intent);
    }
  });

  //Count intents and validate that it includes atleast 2 training sentences.

  activeIntentCount.forEach((intent) => {
    const sentenceCounter = data.rasa_nlu_data.common_examples.filter(
        (sentence) => {
          return sentence.intent === intent;
        }
    );
    // console.log(sentenceCounter);
    if (sentenceCounter.length < 2) {
      invalidStrings.push({
        intent: intent,
        sentence: "",
        message: "Intent must contain atleast 2 sentences",
        type: "Error",
      });
    }
  });

  //Filter out only active training data
  const activeData = data.rasa_nlu_data.common_examples.filter((row) => {
    return row.status === "active";
  });

  const invalidCharacters = /[+#%&/$£;]/; // create a regular expression for invalid characters

  //VALIDATION LOOP

  activeData.forEach((str) => {
    //Validate invalid characters in training scentenses. Push errors to invalidStrings as error object.
    if (invalidCharacters.test(str.text)) {
      invalidStrings.push({
        intent: str.intent,
        sentence: str.text,
        message: "Sentence contains invalid characters: +#%&/$£;",
        type: "Error",
      });

      //Validate invalid Entities where whitespace follows opening bracket/parentesis or is just before closing bracket/parentesis
    }
    /\[\s|\s\]/.test(str.text) &&
    invalidStrings.push({
      intent: str.intent,
      sentence: str.text,
      message:
          "Whitespace is not allowed directly after opening bracket or just before closing bracket in entity definition",
      type: "Error",
    });

    //##empty##
    /##empty##/.test(str.text) &&
    invalidStrings.push({
      intent: str.intent,
      sentence: str.text,
      message: "Training sentence is empty.",
      type: "Warning",
    });

    //If there is entity reffering characters, validate that the entities in the sentence is correctly defined.
    //1. Find any sign of entity definition with [\[\]\(\)]
    //2. See that the entity definition is invalid with \[(.+?)\]\((\1)\).

    /[\(\)]/.test(str.text) &&
    validateEntityMatches(str.text) === false &&
    invalidStrings.push({
      intent: str.intent,
      sentence: str.text,
      message: "Entity not defined correctly. Should be [value](entity)",
      type: "Error",
    });

    //If there is synonym reffering characters, validate that the synonyms in the sentence is correctly defined.
    //1. Find any sign of synonyms definition with /[\[\]\{\}":]
    //2. See that the synonym definition is invalid with \[(.+?)\]\((\1)\).
    /[\{\}":]/.test(str.text) &&
    validateSynonymMatches(str.text) === false &&
    invalidStrings.push({
      intent: str.intent,
      sentence: str.text,
      message:
          'Synonym not defined correctly. Should be [synonym]{"entity": "entity", "value": "value"}',
      type: "Error",
    });
    // Test so that intents do not start with : or -.
    if (str.intent.startsWith(":") || str.intent.startsWith("-")) {
      invalidStrings.push({
        intent: str.intent,
        sentence: "",
        message: "Intent name contains must not start with : or -",
        type: "Warning",
      });
    }
    //Test so that intents do not end with whitespace.
    if (str.intent.endsWith(" ")) {
      invalidStrings.push({
        intent: str.intent,
        sentence: "",
        message: "Intent name ends with whitespace",
        type: "Error",
      });
    }
    //Test so that intents do not contain invalid charachters.
    if (invalidCharacters.test(str.intent)) {
      invalidStrings.push({
        intent: str.intent,
        sentence: "",
        message: "Intent name contains invalid characters",
        type: "Error",
      });
    }
    //If activeData is empty. Push Error and block training.
    if (!activeData) {
      invalidStrings.push({
        intent: "",
        sentence: "",
        message: "No active intents found.",
        type: "Error",
      });
    }
  });

  let warnings = 0;
  let errors = 0;
  invalidStrings.map((obj) => {
    if (obj.type === "Error") errors++;
    if (obj.type === "Warning") warnings++;
  });

  // console.log(invalidStrings);
  const validationResults = {
    intentsInTotal: activeIntentCount.length + disabledIntentCount.length,
    numberOfActiveIntents: activeIntentCount.length,
    numberOfDisabledIntents: disabledIntentCount.length,
    avgSentencesPerIntent: activeData.length / activeIntentCount.length,
    numberOfActiveSentences: activeData.length,
    numberOfIssues: invalidStrings.length,
    invalidStrings: invalidStrings,
    numberOfWarnings: warnings,
    numberOfErrors: errors,
  };
  // console.log(validationResults);
  return validationResults;
};

export default validateTrainingData;
