All files main.js

94.47% Statements 154/163
89.74% Branches 35/39
100% Functions 10/10
94.47% Lines 154/163
Press n or j to go to the next uncovered block, b, p or k for the previous block.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164 1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
24x
24x
23x
23x
1x
1x
1x
1x
24x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
6x
6x
6x
24x
22x
22x
22x
22x
22x
2x
6x
6x
6x
6x
6x
11x
11x
11x
121x
121x
121x
11x
11x
49x
11x
11x
11x
11x
10x
20x
20x
9x
9x
121x
121x
121x
8x
121x
86x
86x
86x
86x
86x
86x
6x
6x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
6x
6x
6x
6x
6x
6x
6x
6x
6x
6x
6x
6x
6x
6x
 
 
 
 
 
 
 
 
 
1x
1x
1x
69x
69x
66x
66x
69x
1x
7x
7x
1x
1x
1x
1x
1x
1x
3x
3x
1x
1x
1x
1x
1x
3x
3x
1x
1x
1x
  /**
 * @author Juan Rodríguez Suárez <alu0101477596@ull.edu.es>
 * @since 01/04/2024
 * @module lexgen-code
 * @file This module exports the functions buildLexer and nearleyLexer
 *       that allows to create lexical analyzers
 */
 
'use strict';
 
const hasNamedRegexp = /\(\?<[^<>()]+?>[^>]+\)/; // A regular expression with a named parenthesis
const isNamedRegexp = /^\(\?<[^<>()]+?>[^>]+\)$/; // A regular expression with a single named parenthesis
 
/**
 * @description A helper function to check a regular expression has a
 *              named and only one named parenthesis and a body
 * @param {RegExp} namedRegexp The regular expression
 * @return {string | boolean} Whether the regular expression is named and has no named parenthesis inside
 */
const checkRegExpIsNamed = (namedRegexp) => {
  if (hasNamedRegexp.test(namedRegexp.source)) {
    if (isNamedRegexp.test(namedRegexp.source)) {
      return true;
    }
    return 'A regular expression must have a single named parenthesis';
  }
  return 'A regular expression must be named';
};
 
/**
 * Creates a lexical analyzer
 * @param {array} regexps An array of regular expressions.
 *     Regexps must be named using a parenthesis. Example: `/(?<NAME>.)/`.
 *     The whole regexp must be inside the parenthesis.
 *     The names SPACE and ERROR are special:<br><br>
 *     1. SPACE. If something matches a parenthesis named SPACE it will
 *     be ignored<br><br>
 *     2. ERROR. It is a special value reserved for the implementation.
 *     When something doesn't match any of the provided regexps it will
 *     be returned as error. The error will span from the point where nothing
 *     matched to the next whitespace(\s)<br><br>
 *     **Note**: When two regexps can match the one that appears
 *     earlier will be chosen
 * @throws {Error} Will throw if a regular expression isn't named
 *     or has more than one name
 * @return {Object} The map of valid tokens and a lexical analyzer in form of a function
 */
const buildLexer = (regexps) => {
  let validTokens = new Map();
  regexps.push(/(?<ERROR>(.|\n)+)/);
  regexps.forEach((regexp) => {
    const NAME = checkRegExpIsNamed(regexp);
    if (NAME === true) {
      const tokenName = regexp.source.match(/\(\?<(.+?)>.+?\)/)[1];
      validTokens.set(tokenName, regexp);
    } else {
      throw new Error(String(NAME));
    }
  });
  const REGEXP = new RegExp(
    regexps.map(r => r.source).join('|'), 'yu'
  );
  let lexer = (string, line = 1) => {
    let match;
    let tokens = [];
    let actualColumn = -1;
    while (match = REGEXP.exec(string)) {
      let token = Array.from(validTokens.keys()).find(key => match.groups[key] !== undefined);
      let newValue = match.groups[token];
      if (newValue.includes('\n')) {
        let lines = newValue.split('');
        lines.forEach((char) => {
          if (char === '\n') ++line;
        });
        if (token !== 'ERROR') actualColumn = match.index;
        // Remove starting whitespaces which do not count for next line
        for (let char of lines) {
          if (char === '\n' || token === 'ERROR') {
            break;
          }
          ++actualColumn;
        };
      }
      if (validTokens.get(token).skip) continue;
      if (validTokens.get(token).value) {
        newValue = validTokens.get(token).value(newValue);
      }
      tokens.push({ type: token, value: newValue, line: line, col: match.index - actualColumn, length: match[0].length });
    }
    return tokens;
  };
  return {validTokens, lexer};
};
 
const nearleyLexer = function(regexps, options) {
  //debugger;
  const {validTokens, lexer} = buildLexer(regexps);
  validTokens.set("EOF");
  return {
    currentPos: 0,
    buffer: '',
    lexer: lexer,
    validTokens: validTokens,
    regexps: regexps,
    /**
     * Sets the internal buffer to data, and restores line/col/state info taken from save().
     * Compatibility not tested
     */
    reset: function(data, info) { 
      this.buffer = data || '';
      this.currentPos = 0;
      let line = info ? info.line : 1;
      this.tokens = lexer(data, line);
      
      let lastToken = {}; 
        // Replicate the last token if it exists
      Object.assign(lastToken, this.tokens[this.tokens.length-1]);
      lastToken.type = "EOF"
      lastToken.value = "EOF"
 
      this.tokens.push(lastToken);
 
      //console.log(this.tokens);
      if (options && options.transform) {
        if (typeof options.transform === 'function') {
          debugger;
          this.tokens = options.transform(this.tokens);
        } else if (Array.isArray(options.transform)) {
          options.transform.forEach(trans => this.tokens = trans(this.tokens))
        }
      } 
      return this;
    },
    /**
     * Returns e.g. {type, value, line, col, …}. Only the value attribute is required.
     */
    next: function() { // next(): Token | undefined;
      if (this.currentPos < this.tokens.length)
        return this.tokens[this.currentPos++];
      return undefined;
    },
    has: function(tokenType) {
      return validTokens.has(tokenType);
    },
    /**
     * Returns an object describing the current line/col etc. This allows nearley.JS
     * to preserve this information between feed() calls, and also to support Parser#rewind().
     * The exact structure is lexer-specific; nearley doesn't care what's in it.
     */
    save: function() {
      return this.tokens[this.currentPos];
    }, // line and col
    /**
     * Returns a string with an error message describing the line/col of the offending token.
     * You might like to include a preview of the line in question.
     */
    formatError: function(token) {
      return `Error near "${token.value}" in line ${token.line}`;
    } // string with error message
  };
}
 
module.exports = { buildLexer, nearleyLexer };