0

ある開発者が非常に単純なプログラムを作成しました:

var a = 6;
var b = 7
console.log(a * b);

すべての開発者がすべてのASIルールを知っているとは限らないため、開発者がセミコロンを使用するようにしたいと考えています。他のコード品質チェックを追加する予定なので、Esprimaを使用して、チェック対象のコードのASTを生成したいと考えています。上記の単純なプログラムをEsprima オンライン パーサー([行と列ベース] オプションをオンにして) で解析すると、次の構造が作成されます。

{
    "loc": {
        "start": {
            "line": 1,
            "column": 0
        },
        "end": {
            "line": 3,
            "column": 19
        }
    },
    "type": "Program",
    "body": [
        {
            "loc": {
                "start": {
                    "line": 1,
                    "column": 0
                },
                "end": {
                    "line": 1,
                    "column": 10
                }
            },
            "type": "VariableDeclaration",
            "declarations": [
                {
                    "loc": {
                        "start": {
                            "line": 1,
                            "column": 4
                        },
                        "end": {
                            "line": 1,
                            "column": 9
                        }
                    },
                    "type": "VariableDeclarator",
                    "id": {
                        "loc": {
                            "start": {
                                "line": 1,
                                "column": 4
                            },
                            "end": {
                                "line": 1,
                                "column": 5
                            }
                        },
                        "type": "Identifier",
                        "name": "a"
                    },
                    "init": {
                        "loc": {
                            "start": {
                                "line": 1,
                                "column": 8
                            },
                            "end": {
                                "line": 1,
                                "column": 9
                            }
                        },
                        "type": "Literal",
                        "value": 6,
                        "raw": "6"
                    }
                }
            ],
            "kind": "var"
        },
        {
            "loc": {
                "start": {
                    "line": 2,
                    "column": 0
                },
                "end": {
                    "line": 3,
                    "column": 0
                }
            },
            "type": "VariableDeclaration",
            "declarations": [
                {
                    "loc": {
                        "start": {
                            "line": 2,
                            "column": 4
                        },
                        "end": {
                            "line": 2,
                            "column": 9
                        }
                    },
                    "type": "VariableDeclarator",
                    "id": {
                        "loc": {
                            "start": {
                                "line": 2,
                                "column": 4
                            },
                            "end": {
                                "line": 2,
                                "column": 5
                            }
                        },
                        "type": "Identifier",
                        "name": "b"
                    },
                    "init": {
                        "loc": {
                            "start": {
                                "line": 2,
                                "column": 8
                            },
                            "end": {
                                "line": 2,
                                "column": 9
                            }
                        },
                        "type": "Literal",
                        "value": 7,
                        "raw": "7"
                    }
                }
            ],
            "kind": "var"
        },
        {
            "loc": {
                "start": {
                    "line": 3,
                    "column": 0
                },
                "end": {
                    "line": 3,
                    "column": 19
                }
            },
            "type": "ExpressionStatement",
            "expression": {
                "loc": {
                    "start": {
                        "line": 3,
                        "column": 0
                    },
                    "end": {
                        "line": 3,
                        "column": 18
                    }
                },
                "type": "CallExpression",
                "callee": {
                    "loc": {
                        "start": {
                            "line": 3,
                            "column": 0
                        },
                        "end": {
                            "line": 3,
                            "column": 11
                        }
                    },
                    "type": "MemberExpression",
                    "computed": false,
                    "object": {
                        "loc": {
                            "start": {
                                "line": 3,
                                "column": 0
                            },
                            "end": {
                                "line": 3,
                                "column": 7
                            }
                        },
                        "type": "Identifier",
                        "name": "console"
                    },
                    "property": {
                        "loc": {
                            "start": {
                                "line": 3,
                                "column": 8
                            },
                            "end": {
                                "line": 3,
                                "column": 11
                            }
                        },
                        "type": "Identifier",
                        "name": "log"
                    }
                },
                "arguments": [
                    {
                        "loc": {
                            "start": {
                                "line": 3,
                                "column": 12
                            },
                            "end": {
                                "line": 3,
                                "column": 17
                            }
                        },
                        "type": "BinaryExpression",
                        "operator": "*",
                        "left": {
                            "loc": {
                                "start": {
                                    "line": 3,
                                    "column": 12
                                },
                                "end": {
                                    "line": 3,
                                    "column": 13
                                }
                            },
                            "type": "Identifier",
                            "name": "a"
                        },
                        "right": {
                            "loc": {
                                "start": {
                                    "line": 3,
                                    "column": 16
                                },
                                "end": {
                                    "line": 3,
                                    "column": 17
                                }
                            },
                            "type": "Identifier",
                            "name": "b"
                        }
                    }
                ]
            }
        }
    ]
}

セミコロンが使用されているか使用されていないかを確認するにはどうすればよいですか? VariableDeclarationASTの 2 番目は、以下に示すように で終了することを示しているため、おそらく 2 行目では 1 つは使用されていないと推測でき{line: 3, column: 0}ます。

2 番目の VariableDeclaration の位置は 3 行目で終わります

これは、Esprima を使用する他のツールが行う方法ですか? \r\nvs\n行末をチェックするのはどうですか?Esprima は、このタスクに適したツールではありませんか?

編集

この質問を共有した同僚は、トークンのリストを取得できるように、「おそらく解析ツリーが必要になるだろう」と言っていました。それで、私の問題の一部が解決します。Esprima が提供するトークンは次のとおりです。

[
{
    "type": "Keyword",
    "value": "var"
},
{
    "type": "Identifier",
    "value": "a"
},
{
    "type": "Punctuator",
    "value": "="
},
{
    "type": "Numeric",
    "value": "6"
},
{
    "type": "Punctuator",
    "value": ";"
},
{
    "type": "Keyword",
    "value": "var"
},
{
    "type": "Identifier",
    "value": "b"
},
{
    "type": "Punctuator",
    "value": "="
},
{
    "type": "Numeric",
    "value": "7"
},
{
    "type": "Identifier",
    "value": "console"
},
{
    "type": "Punctuator",
    "value": "."
},
{
    "type": "Identifier",
    "value": "log"
},
{
    "type": "Punctuator",
    "value": "("
},
{
    "type": "Identifier",
    "value": "a"
},
{
    "type": "Punctuator",
    "value": "*"
},
{
    "type": "Identifier",
    "value": "b"
},
{
    "type": "Punctuator",
    "value": ")"
},
{
    "type": "Punctuator",
    "value": ";"
}
]

ここで、このトークン リストを AST と組み合わせて使用​​し、行番号 2 にセミコロンが必要であることを伝える方法を理解する必要があります。

4

1 に答える 1

1

JavaScript インタープリターが検出できない論理エラーまたはプロトコル エラー (たとえば、常にセミコロンでステートメントを終了するなど) を検出するには、独自のステート マシンを作成して文法をモデル化する必要があります。あなたが与えた例のためにCoffeeScript + Node.jsでそれを行う1つの方法を次に示します。

esprima = require 'esprima'

p_type = (is_valid) -> (token) -> is_valid(token.type)
p_value = (is_valid) -> (token) -> is_valid(token.value)

p_is = (target) -> (value) -> value is target
p_in = (targets...) -> (value) -> targets.indexOf(value) >= 0
p_tautology = () -> true

p_disjoin = (fs...) ->
  switch fs.length
    when 0
      p_tautology
    when 1
      [f] = fs
      (value) -> f(value)
    when 2
      [f, g] = fs
      (value) -> f(value) || g(value)
    else
      [f, gs...] = fs
      g = p_disjoin.apply(null, gs)
      (value) -> f(value) || g(value)

p_conjoin = (fs...) ->
  switch fs.length
    when 0
      p_tautology
    when 1
      [f] = fs
      (value) -> f(value)
    when 2
      [f, g] = fs
      (value) -> f(value) && g(value)
    else
      [f, gs...] = fs
      g = p_conjoin.apply(null, gs)
      (value) -> f(value) && g(value)

f_type = (token) -> token.type
f_value = (token) -> token.value
f_constant = (value) -> () -> value
f_identity = (x) -> x
f_token = (fn) -> (token) -> fn(token)
f_transition = (dispatch, transition) -> (token) -> transition[dispatch token]
f_default = (default_value, transition_fn) -> (token) -> transition_fn(token) || default_value

to_string = (value) ->
  if value is null
    'null'
  else if value is `undefined`
    'undefined'
  else if typeof value is 'string'
    '"' + value + '"'
  else if typeof value.length is 'number' and value.length >= 0
    elems = []
    for e in value
      elems.push to_string(e)
    '[' + elems.join(', ') + ']'
  else if typeof value is 'object'
    if value.toString is Object::toString
      attrs = []
      for own k,v of value
        attrs.push k + ': ' + to_string(v)
      '{' + attrs.join(', ') + '}'
    else
      value.toString()
  else
    value.toString()

root =
  is_valid: p_disjoin(
    p_conjoin(p_type(p_is 'Keyword'), p_value(p_is 'var')),
    p_type(p_is 'Identifier')
  )
  next_label: f_transition f_type, 'Keyword': 'variable_declaration', 'Identifier': 'identifier'
  handle_error: (tokens, index) ->
    if index > 0
      [prev_token, curr_token] = tokens.slice(index - 1, index + 1)
      {line, column} = prev_token.loc.end
      process.stderr.write(
        "[Error] line #{line}, column #{1 + column}: Expected variable "+
        "declaration after #{to_string prev_token.value}, but received "+
        "#{to_string curr_token.value}\n")
      process.exit(1)
    else
      curr_token = tokens[index]
      {line, column} = curr_token.loc.start
      process.stderr.write(
        "[Error] line #{line}, column #{1 + column}: Expected variable "+
        "declaration but received #{to_string curr_token.value}\n")
      process.exit(1)
  transition:
    identifier: () ->
      is_valid: p_conjoin p_type(p_is 'Punctuator'), p_value(p_in '.')
      next_label: f_transition f_value, '.': 'membership'
      handle_error: (tokens, index) ->
        [prev_token, curr_token] = tokens.slice(index - 1, index + 1)
        {line, column} = prev_token.loc.end
        process.stderr.write(
          "[Error] line #{line}, column #{1 + column}: Expected '.' after "+
          "#{to_string prev_token.value}, but received #{to_string curr_token.value}\n")
        process.exit(1)
      transition:
        membership: () ->
          is_valid: p_type(p_is 'Identifier')
          next_label: f_constant 'invocation'
          handle_error: (tokens, index) ->
            [prev_token, curr_token] = tokens.slice(index - 1, index + 1)
            {line, column} = prev_token.loc.end
            process.stderr.write(
              "[Error] line #{line}, column #{1 + column}: Expected an identifier "+
              "after #{to_string prev_token.value}, but received "+
              "#{to_string curr_token.value}\n")
            process.exit(1)
          transition:
            invocation: () ->
              is_valid: p_conjoin p_type(p_is 'Punctuator'), p_value(p_is '(')
              next_label: f_constant 'identifier'
              handle_error: (tokens, index) ->
                [prev_token, curr_token] = tokens.slice(index - 1, index + 1)
                {line, column} = prev_token.loc.end
                process.stderr.write(
                  "[Error] line #{line}, column #{1 + column}: Expected '(' after "+
                  "#{to_string prev_token.value}, but received "+
                  "#{to_string curr_token.value}\n")
                process.exit(1)
              transition:
                identifier: () ->
                  is_valid: p_type(p_in 'Identifier')
                  next_label: f_constant 'punctuator'
                  handle_error: (tokens, index) ->
                    [prev_token, curr_token] = tokens.slice(index - 1, index + 1)
                    {line, column} = prev_token.loc.end
                    process.stderr.write(
                      "[Error] line #{line}, column #{1 + column}: Expected "+
                      "an identifier after #{to_string prev_token.value}, "+
                      "but received #{to_string curr_token.value}\n")
                    process.exit(1)
                  transition:
                    punctuator: () ->
                      is_valid: p_conjoin p_type(p_is 'Punctuator'), p_value(p_in '*')
                      next_label: f_transition f_value, '*': 'identifier'
                      handle_error: (tokens, index) ->
                        [prev_token, curr_token] = tokens.slice(index - 1, index + 1)
                        {line, column} = prev_token.loc.end
                        process.stderr.write(
                          "[Error] line #{line}, column #{1 + column}: "+
                          "Expected a binary operator after "+
                          "#{to_string prev_token.value}, but received "+
                          "#{to_string curr_token.value}\n")
                        process.exit(1)
                      transition:
                        identifier: () ->
                          is_valid: p_conjoin p_type(p_is 'Identifier')
                          next_label: f_constant 'punctuator'
                          handle_error: (tokens, index) ->
                            [prev_token, curr_token] = tokens.slice(index - 1, index + 1)
                            {line, column} = prev_token.loc.end
                            process.stderr.write(
                              "[Error] line #{line}, column #{1 + column}: Expected "+
                              "an identifier after #{to_string prev_token.value}, "+
                              "but received #{to_string curr_token.value}\n")
                            process.exit(1)
                          transition:
                            punctuator: () ->
                              is_valid: p_conjoin p_type(p_is 'Punctuator'), p_value(p_is ')')
                              next_label: f_constant 'punctuator'
                              handle_error: (tokens, index) ->
                                [prev_token, curr_token] = tokens.slice(index - 1, index + 1)
                                {line, column} = prev_token.loc.end
                                process.stderr.write(
                                  "[Error] line #{line}, column #{1 + column}: "+
                                  "Expected ')' after #{to_string prev_token.value}, "+
                                  "but received #{to_string curr_token.value}\n")
                                process.exit(1)
                              transition:
                                punctuator: () ->
                                  is_valid: f_constant p_type(p_is 'Punctuator'), p_value(p_is ';')
                                  next_label: f_transition f_value, ';': 'terminator'
                                  handle_error: (tokens, index) ->
                                    [prev_token, curr_token] = tokens.slice(index - 1, index + 1)
                                    {line, column} = prev_token.loc.end
                                    process.stderr.write(
                                      "[Error] line #{line}, column #{1 + column}: "+
                                      "Expected ';' after #{to_string prev_token.value}, "+
                                      "but received #{to_string curr_token.value}\n")
                                    process.exit(1)
                                  transition:
                                    terminator: () -> root
    variable_declaration: () ->
      is_valid: p_type(p_is 'Identifier')
      next_label: f_constant 'punctuator'
      handle_error: (tokens, index) ->
        [prev_token, curr_token] = tokens.slice(index - 1, index + 1)
        {line, column} = prev_token.loc.end
        process.stderr.write(
          "[Error] line #{line}, column #{1 + column}: Expected an identifier "+
          "after #{to_string prev_token.value}, but received "+
          "#{to_string curr_token.value}\n")
        process.exit(1)
      transition:
        punctuator: () ->
          is_valid: p_conjoin p_type(p_is 'Punctuator'), p_value(p_in '=', ',', ';')
          next_label: f_token f_transition f_value, '=': 'assignment', ',': 'separator', ';': 'terminator'
          handle_error: (tokens, index) ->
            [prev_token, curr_token] = tokens.slice(index - 1, index + 1)
            {line, column} = prev_token.loc.end
            process.stderr.write(
              "[Error] line #{line}, column #{1 + column}: Expected '=', ',', "+
              "or ';' after #{to_string prev_token.value}, but received "+
              "#{to_string curr_token.value}\n")
            process.exit(1)
          transition:
            assignment: () ->
              is_valid: p_type(p_in 'Boolean', 'Identifier', 'Null', 'Numeric', 'String', 'RegularExpression')
              next_label: f_constant 'punctuator'
              handle_error: (tokens, index) ->
                [prev_token, curr_token] = tokens.slice(index - 1, index + 1)
                {line, column} = prev_token.loc.end
                process.stderr.write(
                  "[Error] line #{line}, column #{1 + column}: Expected a "+
                  "literal or an identifier after #{to_string prev_token.value}, "+
                  "but received #{to_string curr_token.value}\n")
                process.exit(1)
              transition:
                punctuator: () ->
                  is_valid: p_conjoin p_type(p_is 'Punctuator'), p_value(p_in ',', ';', '.', '(', '{')
                  next_label: f_transition f_value, ',': 'identifier', ';': 'terminator'
                  handle_error: (tokens, index) ->
                    [prev_token, curr_token] = tokens.slice(index - 1, index + 1)
                    {line, column} = prev_token.loc.end
                    process.stderr.write(
                      "[Error] line #{line}, column: #{1 + column}: "+
                      "Expected ',' or ';' after #{to_string prev_token.value}, "+
                      "but received #{to_string curr_token.value}\n")
                    process.exit(1)
                  transition:
                    identifier: () -> root.transition.variable_declaration()
                    terminator: () -> root
            separator: () -> root.transition.variable_declaration()
            terminator: () -> root

lint = (tokens) ->
  state = root
  index = 0
  prev_token = null
  while index < tokens.length
    token = tokens[index]
    if state.is_valid(token)
      state = state.transition[state.next_label token]()
    else
      state.handle_error(tokens, index)
    prev_token = token
    index += 1

text = '''
var a = 6;
var b = 7
console.log(a * b);
'''

tokens = esprima.tokenize(text, loc: true)
lint tokens
于 2014-05-02T15:36:23.863 に答える