多かれ少なかれjsonを解析し、正規表現コールバック関数でxmlに直接操作するjsonからxmlへのコンバーターをハックしました。私はこの方法を完全に発明したので、明らかな/ひどい欠点や制限があるかどうかはわかりません.これまでのところ、解析は私にとってほとんど謎でした.
そこにたくさんのコメントを入れたので、フォローしやすいことを願っています。
コード
var cb = function(m,oo,co,st,s,ignore,cl,cm,oa,ca){
// remove slashes from captured strings
s = stripslashes(s)
// this will be built, and returned at the end
var xml = ""
// function to add padding to the output, at the start of the lines
var pad = function(q){ q--; q *= 4
var out = ''
for(;q--;) out += ' '
return out
}
// track the arrat status, last item type, current indent level, and tags stack
this.array = this.array || null
this.last = this.last || null
this.level = this.level || 0
this.tags = this.tags || []
// Handle opening braces
if(oo){
if(this.array && this.last.match(/^(cm|oa)$/)){
xml += "\n"+pad(this.level)+"<item>"
this.tags.push("item")
}
// increase the indentation level
this.level++
// set the last item, which should be the last thing to do in the block
this.last = 'oo'
// Handle closing braces
} else if(co){
// if the last item was not a string, then add padding
if(this.last != 'st')
xml += pad(this.level)
// close a tag
xml += "</"+this.tags.pop()+">\n"
// decrease the indentation level
this.level--
// set the last item, which should be the last thing to do in the block
this.last = 'co'
// Handle Strings
} else if(st){
// Debug by showing the tags as they are pushed and popped from the stack
// console.log(this.tags)
// Handle Strings where last item was an Object, or a comma
if(this.last == "oo" || this.last == "cm"){
// add the tag to the stack, and xml output
this.tags.push(s)
xml += "\n"+pad(this.level)+"<"+s+">"
// Handle Strings where last item was an Colon
} else if(this.last == "cl"){
xml += s
}
// set the last item, which should be the last thing to do in the block
this.last = 'st'
// Handle the opan arrays
} else if(oa){
this.array = 1
this.level++
xml += "\n"+pad(this.level)+"<array>"
this.level++
this.last = 'oa'
// Handle the close arrays
} else if(ca){
this.array = 0
xml += pad(this.level)+"</"+this.tags.pop()+">\n"
this.level--
xml += pad(this.level)+"</array>\n"
this.level--
this.last = 'ca'
// Handle the colons
} else if(cl){
this.last = 'cl'
// Handle the commas
} else if(cm){
if(this.last == "co")
xml += pad(this.level)+"</"+this.tags.pop()+">"
else
xml += "</"+this.tags.pop()+">"
this.last = 'cm'
}
// return the built xml
return xml
}
// DEPENDENCY
function stripslashes (str) {
return (str + '').replace(/\\(.?)/g, function (s, n1) {
switch (n1) {
case '\\':
return '\\';
case '0':
return '\u0000';
case '':
return '';
default:
return n1;
}
});
}
// DATA FOR TESTING
var o = {
"hash":"b6f6991d03df0e2e04dafffcd6bc418aac66049e2cd74b80f14ac86db1e3f0da",
"ver":1,
"vin_sz":1,
"vout_sz":2,
"lock_time":"Unavailable",
"size":258,
"relayed_by":"64.179.201.80",
"block_height": 12200,
"tx_index":"12563028",
"inputs":[
{ "prev_out":{
"hash":"a3e2bcc9a5f776112497a32b05f4b9e5b2405ed9",
"value":"100000000",
"tx_index":"12554260",
"n":"2"
},
"scriptSig":"Unavailable"
},
{ "prev_out":{
"hash":"a3e2bcc9a5f776112497a32b05f4b9e5b2405ed9",
"value":"100000000",
"tx_index":"12554260",
"n":"2"
},
"scriptSig":"Unavailable"
},
{ "prev_out":{
"hash":"a3e2bcc9a5f776112497a32b05f4b9e5b2405ed9",
"value":"100000000",
"tx_index":"12554260",
"n":"2"
},
"scriptSig":"Unavailable"
},
{ "prev_out":{
"hash":"a3e2bcc9a5f776112497a32b05f4b9e5b2405ed9",
"va'l\"u'e":"1000\"00000",
"tx_index":"12554260",
"n":"2"
},
"scriptSig":"Unavailable"
}
],
"out":[
{ "value":"98000000",
"hash":"29d6a3540acfa0a950bef2bfdc75cd51c24390fd",
"scriptPubKey":"Unavailable"
},
{ "value":"2000000",
"hash":"17b5038a413f5c5ee288caa64cfab35a0c01914e",
"scriptPubKey":"Unavailable"
}
]
}
var s = JSON.stringify(o)
console.log("Original: "+s+"\n")
// var r = /({)|(})|([""''])(.+?)\3|(:)|(,)|(\[)|(\])/g
// THE REGEX
// capturing groups map to -> m,oo,co,st,s,ignore,cl,cm,oa,ca in the callback
// meaning match, open-object, close-object, string-type, string, ignore, colon, comma, open-array, close-array
// string escaping made possible by... `(['"])((\\\3|[^\3])+?)\3`
var r = /({)|(})|(['"])((\\\3|[^\3])+?)\3|(:)|(,)|(\[)|(\])/g
var xml = s.replace(r,cb)
console.log(xml)
質問
- ネストされた配列のサポートを追加するにはどうすればよいですか?
- この解析手法は何と呼ばれていますか?
- 文字列のエスケープ方法はしっかりしていますか?
- これは実行可能な解析方法ですか?
- この方法を使用したより一般的な解決策はありますか?