Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 | 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 24x 24x 23x 23x 1x 1x 1x 1x 24x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 6x 6x 6x 24x 22x 22x 22x 22x 22x 2x 6x 6x 6x 6x 6x 11x 11x 11x 121x 121x 121x 11x 11x 49x 11x 11x 11x 11x 10x 20x 20x 9x 9x 121x 121x 121x 8x 121x 86x 86x 86x 86x 86x 86x 6x 6x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 6x 6x 6x 6x 6x 6x 6x 6x 6x 6x 6x 6x 6x 6x 1x 1x 1x 69x 69x 66x 66x 69x 1x 7x 7x 1x 1x 1x 1x 1x 1x 3x 3x 1x 1x 1x 1x 1x 3x 3x 1x 1x 1x | /**
 * @author Juan Rodríguez Suárez <alu0101477596@ull.edu.es>
 * @since 01/04/2024
 * @module lexgen-code
 * @file This module exports the functions buildLexer and nearleyLexer
 *       that allows to create lexical analyzers
 */
 
'use strict';
 
const hasNamedRegexp = /\(\?<[^<>()]+?>[^>]+\)/; // A regular expression with a named parenthesis
const isNamedRegexp = /^\(\?<[^<>()]+?>[^>]+\)$/; // A regular expression with a single named parenthesis
 
/**
 * @description A helper function to check a regular expression has a
 *              named and only one named parenthesis and a body
 * @param {RegExp} namedRegexp The regular expression
 * @return {string | boolean} Whether the regular expression is named and has no named parenthesis inside
 */
const checkRegExpIsNamed = (namedRegexp) => {
  if (hasNamedRegexp.test(namedRegexp.source)) {
    if (isNamedRegexp.test(namedRegexp.source)) {
      return true;
    }
    return 'A regular expression must have a single named parenthesis';
  }
  return 'A regular expression must be named';
};
 
/**
 * Creates a lexical analyzer
 * @param {array} regexps An array of regular expressions.
 *     Regexps must be named using a parenthesis. Example: `/(?<NAME>.)/`.
 *     The whole regexp must be inside the parenthesis.
 *     The names SPACE and ERROR are special:<br><br>
 *     1. SPACE. If something matches a parenthesis named SPACE it will
 *     be ignored<br><br>
 *     2. ERROR. It is a special value reserved for the implementation.
 *     When something doesn't match any of the provided regexps it will
 *     be returned as error. The error will span from the point where nothing
 *     matched to the next whitespace(\s)<br><br>
 *     **Note**: When two regexps can match the one that appears
 *     earlier will be chosen
 * @throws {Error} Will throw if a regular expression isn't named
 *     or has more than one name
 * @return {Object} The map of valid tokens and a lexical analyzer in form of a function
 */
const buildLexer = (regexps) => {
  let validTokens = new Map();
  regexps.push(/(?<ERROR>(.|\n)+)/);
  regexps.forEach((regexp) => {
    const NAME = checkRegExpIsNamed(regexp);
    if (NAME === true) {
      const tokenName = regexp.source.match(/\(\?<(.+?)>.+?\)/)[1];
      validTokens.set(tokenName, regexp);
    } else {
      throw new Error(String(NAME));
    }
  });
  const REGEXP = new RegExp(
    regexps.map(r => r.source).join('|'), 'yu'
  );
  let lexer = (string, line = 1) => {
    let match;
    let tokens = [];
    let actualColumn = -1;
    while (match = REGEXP.exec(string)) {
      let token = Array.from(validTokens.keys()).find(key => match.groups[key] !== undefined);
      let newValue = match.groups[token];
      if (newValue.includes('\n')) {
        let lines = newValue.split('');
        lines.forEach((char) => {
          if (char === '\n') ++line;
        });
        if (token !== 'ERROR') actualColumn = match.index;
        // Remove starting whitespaces which do not count for next line
        for (let char of lines) {
          if (char === '\n' || token === 'ERROR') {
            break;
          }
          ++actualColumn;
        };
      }
      if (validTokens.get(token).skip) continue;
      if (validTokens.get(token).value) {
        newValue = validTokens.get(token).value(newValue);
      }
      tokens.push({ type: token, value: newValue, line: line, col: match.index - actualColumn, length: match[0].length });
    }
    return tokens;
  };
  return {validTokens, lexer};
};
 
const nearleyLexer = function(regexps, options) {
  //debugger;
  const {validTokens, lexer} = buildLexer(regexps);
  validTokens.set("EOF");
  return {
    currentPos: 0,
    buffer: '',
    lexer: lexer,
    validTokens: validTokens,
    regexps: regexps,
    /**
     * Sets the internal buffer to data, and restores line/col/state info taken from save().
     * Compatibility not tested
     */
    reset: function(data, info) { 
      this.buffer = data || '';
      this.currentPos = 0;
      let line = info ? info.line : 1;
      this.tokens = lexer(data, line);
      
      let lastToken = {}; 
        // Replicate the last token if it exists
      Object.assign(lastToken, this.tokens[this.tokens.length-1]);
      lastToken.type = "EOF"
      lastToken.value = "EOF"
 
      this.tokens.push(lastToken);
 
      //console.log(this.tokens);
      if (options && options.transform) {
        if (typeof options.transform === 'function') {
          debugger;
          this.tokens = options.transform(this.tokens);
        } else if (Array.isArray(options.transform)) {
          options.transform.forEach(trans => this.tokens = trans(this.tokens))
        }
      } 
      return this;
    },
    /**
     * Returns e.g. {type, value, line, col, …}. Only the value attribute is required.
     */
    next: function() { // next(): Token | undefined;
      if (this.currentPos < this.tokens.length)
        return this.tokens[this.currentPos++];
      return undefined;
    },
    has: function(tokenType) {
      return validTokens.has(tokenType);
    },
    /**
     * Returns an object describing the current line/col etc. This allows nearley.JS
     * to preserve this information between feed() calls, and also to support Parser#rewind().
     * The exact structure is lexer-specific; nearley doesn't care what's in it.
     */
    save: function() {
      return this.tokens[this.currentPos];
    }, // line and col
    /**
     * Returns a string with an error message describing the line/col of the offending token.
     * You might like to include a preview of the line in question.
     */
    formatError: function(token) {
      return `Error near "${token.value}" in line ${token.line}`;
    } // string with error message
  };
}
 
module.exports = { buildLexer, nearleyLexer };
  |