1

多かれ少なかれjsonを解析し、正規表現コールバック関数でxmlに直接操作するjsonからxmlへのコンバーターをハックしました。私はこの方法を完全に発明したので、明らかな/ひどい欠点や制限があるかどうかはわかりません.これまでのところ、解析は私にとってほとんど謎でした.

そこにたくさんのコメントを入れたので、フォローしやすいことを願っています。

コード

var cb = function(m,oo,co,st,s,ignore,cl,cm,oa,ca){

  // remove slashes from captured strings
  s = stripslashes(s)

  // this will be built, and returned at the end
  var xml = ""

  // function to add padding to the output, at the start of the lines
  var pad = function(q){ q--; q *= 4
    var out = ''
    for(;q--;) out += ' '
    return out
  }

  // track the arrat status, last item type, current indent level, and tags stack
  this.array = this.array || null
  this.last = this.last || null
  this.level = this.level || 0
  this.tags = this.tags || []

  // Handle opening braces
  if(oo){

    if(this.array && this.last.match(/^(cm|oa)$/)){
      xml += "\n"+pad(this.level)+"<item>"
      this.tags.push("item")
    }

    // increase the indentation level
    this.level++

    // set the last item, which should be the last thing to do in the block
    this.last = 'oo'

  // Handle closing braces
  } else if(co){

    // if the last item was not a string, then add padding
    if(this.last != 'st')
      xml += pad(this.level)

    // close a tag
    xml += "</"+this.tags.pop()+">\n"

    // decrease the indentation level
    this.level--

    // set the last item, which should be the last thing to do in the block
    this.last = 'co'

  // Handle Strings
  } else if(st){

    // Debug by showing the tags as they are pushed and popped from the stack
    // console.log(this.tags)

    // Handle Strings where last item was an Object, or a comma
    if(this.last == "oo" || this.last == "cm"){

      // add the tag to the stack, and xml output
      this.tags.push(s)
      xml += "\n"+pad(this.level)+"<"+s+">"

    // Handle Strings where last item was an Colon
    } else if(this.last == "cl"){
      xml += s
    }

    // set the last item, which should be the last thing to do in the block
    this.last = 'st'

  // Handle the opan arrays
  } else if(oa){
    this.array = 1
    this.level++
    xml += "\n"+pad(this.level)+"<array>"
    this.level++
    this.last = 'oa'

  // Handle the close arrays
  } else if(ca){
    this.array = 0
    xml += pad(this.level)+"</"+this.tags.pop()+">\n"
    this.level--
    xml += pad(this.level)+"</array>\n"
    this.level--
    this.last = 'ca'

  // Handle the colons
  } else if(cl){
    this.last = 'cl'

  // Handle the commas
  } else if(cm){
    if(this.last == "co")
      xml += pad(this.level)+"</"+this.tags.pop()+">"
    else
      xml += "</"+this.tags.pop()+">"
    this.last = 'cm'
  }

  // return the built xml
  return xml

}


// DEPENDENCY
function stripslashes (str) {

  return (str + '').replace(/\\(.?)/g, function (s, n1) {
    switch (n1) {
    case '\\':
      return '\\';
    case '0':
      return '\u0000';
    case '':
      return '';
    default:
      return n1;
    }
  });
}

// DATA FOR TESTING
var o = {
  "hash":"b6f6991d03df0e2e04dafffcd6bc418aac66049e2cd74b80f14ac86db1e3f0da",
  "ver":1,
  "vin_sz":1,
  "vout_sz":2,
  "lock_time":"Unavailable",
  "size":258,
  "relayed_by":"64.179.201.80",
  "block_height": 12200,
  "tx_index":"12563028",
  "inputs":[
    { "prev_out":{
        "hash":"a3e2bcc9a5f776112497a32b05f4b9e5b2405ed9",
        "value":"100000000",
        "tx_index":"12554260",
        "n":"2"
      },
      "scriptSig":"Unavailable"
    },
    { "prev_out":{
        "hash":"a3e2bcc9a5f776112497a32b05f4b9e5b2405ed9",
        "value":"100000000",
        "tx_index":"12554260",
        "n":"2"
      },
      "scriptSig":"Unavailable"
    },
    { "prev_out":{
        "hash":"a3e2bcc9a5f776112497a32b05f4b9e5b2405ed9",
        "value":"100000000",
        "tx_index":"12554260",
        "n":"2"
      },
      "scriptSig":"Unavailable"
    },
    { "prev_out":{
        "hash":"a3e2bcc9a5f776112497a32b05f4b9e5b2405ed9",
        "va'l\"u'e":"1000\"00000",
        "tx_index":"12554260",
        "n":"2"
      },
      "scriptSig":"Unavailable"
    }
  ],
  "out":[
    { "value":"98000000",
      "hash":"29d6a3540acfa0a950bef2bfdc75cd51c24390fd",
      "scriptPubKey":"Unavailable"
    },
    { "value":"2000000",
      "hash":"17b5038a413f5c5ee288caa64cfab35a0c01914e",
      "scriptPubKey":"Unavailable"
    }
  ]
}

var s = JSON.stringify(o)

console.log("Original: "+s+"\n")

// var r = /({)|(})|([""''])(.+?)\3|(:)|(,)|(\[)|(\])/g

// THE REGEX
// capturing groups map to -> m,oo,co,st,s,ignore,cl,cm,oa,ca in the callback
// meaning match, open-object, close-object, string-type, string, ignore, colon, comma, open-array, close-array
// string escaping made possible by... `(['"])((\\\3|[^\3])+?)\3`
var r = /({)|(})|(['"])((\\\3|[^\3])+?)\3|(:)|(,)|(\[)|(\])/g

var xml = s.replace(r,cb)

console.log(xml)

質問

  1. ネストされた配列のサポートを追加するにはどうすればよいですか?
  2. この解析手法は何と呼ばれていますか?
  3. 文字列のエスケープ方法はしっかりしていますか?
  4. これは実行可能な解析方法ですか?
  5. この方法を使用したより一般的な解決策はありますか?
4

1 に答える 1

0

As you are looking for your own solution, I believe you mean understanding how a parser works in this case.

How can I add support for nested arrays?

An array can be the collection of value, object,and an array. So write a method which handles value, array, and object separately. Now when you meet to an array or object inside the array call the same function.

I'm not sure about other questions. But here is the solution I used in fast xml parser

var j2x = function(jObj,level){
    var xmlStr = "", attrStr = "" , val = "";
    var keys = Object.keys(jObj);
    var len = keys.length;
    for(var i=0;i<len;i++){
        var key = keys[i];
        if(typeof jObj[key] !== "object"){//premitive type
            var attr = this.isAttribute(key);
            if(attr){
                attrStr += " " +attr+"=\""+ this.encodeHTMLchar(jObj[key], true) +"\"";
            }else if(this.isCDATA(key)){
                if(jObj[this.options.textNodeName]){
                    val += this.replaceCDATAstr(jObj[this.options.textNodeName], jObj[key]);
                }else{
                    val += this.replaceCDATAstr("", jObj[key]);
                }
            }else{//tag value
                if(key === this.options.textNodeName){
                    if(jObj[this.options.cdataTagName]){
                        //value will added while processing cdata
                    }else{
                        val += this.encodeHTMLchar(jObj[key]);    
                    }
                }else{
                    val += this.buildTextNode(jObj[key],key,"",level);
                }
            }
        }else if(Array.isArray(jObj[key])){//repeated nodes
            if(this.isCDATA(key)){
                if(jObj[this.options.textNodeName]){
                    val += this.replaceCDATAarr(jObj[this.options.textNodeName], jObj[key]);
                }else{
                    val += this.replaceCDATAarr("", jObj[key]);
                }
            }else{//nested nodes
                var arrLen = jObj[key].length;
                for(var j=0;j<arrLen;j++){
                    var item = jObj[key][j];
                    if(typeof item === "object"){
                        var result = this.j2x(item,level+1);
                        val  += this.buildObjNode(result.val,key,result.attrStr,level);
                    }else{
                        val += this.buildTextNode(item,key,"",level);
                    }
                }
            }
        }else{

            if(this.options.attrNodeName && key === this.options.attrNodeName){
                var Ks = Object.keys(jObj[key]);
                var L = Ks.length;
                for(var j=0;j<L;j++){
                    attrStr += " "+Ks[j]+"=\"" + this.encodeHTMLchar(jObj[key][Ks[j]]) + "\"";
                }
            }else{
                var result = this.j2x(jObj[key],level+1);
                val  += this.buildObjNode(result.val,key,result.attrStr,level);
            }
        }
    }
    return {attrStr : attrStr , val : val};
}
于 2018-02-17T03:46:09.357 に答える