All files main.js

86.36% Statements 57/66
59.25% Branches 16/27
92.3% Functions 12/13
89.83% Lines 53/59

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150                      1x 1x                 1x 22x 22x 21x 21x 21x 20x                                         1x 6x 6x 6x 22x 22x 20x   4x 20x     4x   6x 6x 6x 119x 49x 49x 49x 26x 26x 26x   49x   6x   4x     1x   1x 1x 1x                     1x 1x 1x 1x   1x   1x 1x 1x   1x     1x               1x           1x 1x       2x               1x             1x         1x  
// @ts-check
/**
 * @author Aday Cuesta Correa <alu0101483887@ull.edu.es>
 * @date 02/04/2024
 * @module lexgen-code
 * @file This module exports the functions buildLexer and nearleyLexer
 *     that allows to create lexical analyzers
 */
 
'use strict';
 
const HasNamedRegexp = /[(][?]<(\w+)>(.+)[)]/;
const IsNamedRegexp = /^[(][?]<(\w+)>(.+)[)]$/; // The same regexp as before but with anchors
 
/**
 * A helper function to check a regular expression has a
 *     named and only one named parenthesis and a body
 * @param {RegExp} namedRegexp The regular expression
 * @return {string | boolean} Whether the regular expression is named and has no named parenthesis inside
 * @private
 */
const checkRegExpIsNamed = (namedRegexp) => {
  const id = IsNamedRegexp.exec(namedRegexp.source);
  if (!id) return false;
  let srcRegexp = id[2];
  Iif (!srcRegexp.length) return false;
  if (HasNamedRegexp.exec(srcRegexp)) return false;
  return id[1];
};
 
/**
 * Creates a lexical analyzer
 * @param {array} regexps An array of regular expressions.
 *     Regexps must be named using a parenthesis. Example: `/(?<NAME>.)/`.
 *     The whole regexp must be inside the parenthesis.
 *     The names SPACE and ERROR are special:<br><br>
 *     1. SPACE. If something matches a parenthesis named SPACE it will
 *     be ignored<br><br>
 *     2. ERROR. It is a special value reserved for the implementation.
 *     When something doesn't match any of the provided regexps it will
 *     be returned as error. The error will span from the point where nothing
 *     matched to the next whitespace(\s)<br><br>
 *     **Note**: When two regexps can match the one that appears
 *     earlier will be chosen
 * @throws {Error} Will throw if each regular expression isn't named
 *     or has more than one name
 * @return {Object} The map of valid tokens and a lexical analyzer in form of a function
 */
const buildLexer = (regexps) => {
  let validTokens = new Map();
  regexps.push(/(?<ERROR>(.|\n)+)/);
  regexps.forEach((regexp) => {
    let tokenName = checkRegExpIsNamed(regexp);
    if (!tokenName) throw new Error ('All regular expressions must be named, have a non empty regexp');
    validTokens.set(tokenName, regexp);
  });
  const regexp = new RegExp(
    regexps.map((regexp) => regexp.source).join('|'),
    'yu',
  );
  let lexer = (string, line=1) => {
    let match;
    const result = [];
    let start = 0;
    while (match = regexp.exec(string)) {
      const type = Object.keys(match.groups).find((type) => match.groups[type] !== undefined);      
      line += string.slice(start, regexp.lastIndex).split('\n').length - 1;
      const col = start - string.lastIndexOf('\n', start);
      if (!validTokens.get(type).skip) {
        let value = match.groups[type];
        Iif (validTokens.get(type).value) value = validTokens.get(type).value(value);
        result.push({type, value, line, col, length: regexp.lastIndex - start});
      }
      start = regexp.lastIndex;
    }
    return result;
  };
  return {validTokens, lexer};
};
 
const nearleyLexer = function(regexps, options) {
  //debugger;
  const {validTokens, lexer} = buildLexer(regexps);
  validTokens.set("EOF");
  return {
    currentPos: 0,
    buffer: '',
    lexer: lexer,
    validTokens: validTokens,
    regexps: regexps,
    /**
     * Sets the internal buffer to data, and restores line/col/state info taken from save().
     * Compatibility not tested
     */
    reset: function(data, info) { 
      this.buffer = data || '';
      this.currentPos = 0;
      let line = info ? info.line : 1;
      this.tokens = lexer(data, line);
      
      let lastToken = {}; 
        // Replicate the last token if it exists
      Object.assign(lastToken, this.tokens[this.tokens.length-1]);
      lastToken.type = "EOF"
      lastToken.value = "EOF"
 
      this.tokens.push(lastToken);
 
      //console.log(this.tokens);
      Iif (options && options.transform) {
        if (typeof options.transform === 'function') {
          debugger;
          this.tokens = options.transform(this.tokens);
        } else if (Array.isArray(options.transform)) {
          options.transform.forEach(trans => this.tokens = trans(this.tokens))
        }
      } 
      return this;
    },
    /**
     * Returns e.g. {type, value, line, col, …}. Only the value attribute is required.
     */
    next: function() { // next(): Token | undefined;
      Eif (this.currentPos < this.tokens.length)
        return this.tokens[this.currentPos++];
      return undefined;
    },
    has: function(tokenType) {
      return validTokens.has(tokenType);
    },
    /**
     * Returns an object describing the current line/col etc. This allows nearley.JS
     * to preserve this information between feed() calls, and also to support Parser#rewind().
     * The exact structure is lexer-specific; nearley doesn't care what's in it.
     */
    save: function() {
      return this.tokens[this.currentPos];
    }, // line and col
    /**
     * Returns a string with an error message describing the line/col of the offending token.
     * You might like to include a preview of the line in question.
     */
    formatError: function(token) {
      return `Error near "${token.value}" in line ${token.line}`;
    } // string with error message
  };
}
 
module.exports = { buildLexer, nearleyLexer };