3

my goal is to parse a string with an specific format to generate an javascript object structure out of it.

one idea was to use the String.replace with an function as parameter. so in the function you get all parts of the match. my test / example till now:

the string:

    !Norm: DIN 7985;
        M2: 2, 2, 2;
        M3:3,3;
        M10: 20,25;
!Norm: DIN 7985 TX;
    M4: 4,  4    , 4;

my test code:

console.clear();
var sTmp = "!Norm: DIN 7985;\n    M2: 2, 2, 2;\n    M3:3,3;\n    M10: 20,25;\n     !Norm: DIN 7985 TX;\n    M2: 6,    10    , 16;";
//console.log(sTmp);

function replacer(match, p1, p2, p3, p4, offset, string){
    //console.log("-");
    console.log("match:", match);
    console.log("p1:", p1);
    console.log("p2:", p2);
    console.log("p3:", p3);
    console.log("p4:", p4);
    console.log("offset:", offset);
    console.log("string:", string);
    return "#";
}
//(?=!Norm:\s?(.+);\s+)
sTmp.replace(/\s*!Norm:\s?(.+);\s+(M\d+:.*\s*;)/g, replacer);

(tested in firebug) console log (shortend):

match: !Norm: DIN 7985; M2: 2, 2, 2;
p1: DIN 7985
p2: M2: 2, 2, 2;
p3: 0
p4: !Norm: DIN 7985; M2: 2, 2, 2; M3:3,3; M10: 20,25; ....
offset: undefined
string: undefined
match: !Norm: DIN 7985 TX; M4: 4, 4 , 4;
p1: DIN 7985 TX
p2: M4: 4, 4 , 4;
p3: 52
p4: !Norm: DIN 7985; M2: 2, 2, 2; M3:3,3; M10: 20,25; !Norm: DIN 7985 TX; M4: 4, 4 , 4;
....

so i can see that the idea works- it matches the norm and i get the Info in one substring. now there are the M3:... parts. so is there a option to specify that the part (M\d+:.*\s*;) matches up to the next !Norm: instead of the ; at the first occurrence? i think it should be possible with a lookahead or something?

the goal behind this idea is to generate an javascript object like this out of the string:

    oDataTmp = {
    DIN 7985 :      {
                        M2        : ["2", "2", "2"],
                        M3        : ["3", "3"],
                        M10       : ["20", "25"],
                    }
    DIN 7985 TX :   {
                        M4        : ["4", "4", "4"],
                    }
}

i know you can do this by split and then parse line by line. i love the challenge to get this brain thing done and to understand how to do it :-)

4

3 に答える 3

2

Here is my regex for this:

\s*!\w+:\s*([^;]+);\s*((?:\s*[^:!]+:[^;]+;)+)

It has the following match groups:

  • Group 1: The DIN section.
  • Group 2: All the remaining settings for the current !Norm.

This regex doesn't specifically expect the keyword NORM. So it could be anything else. If you want to capture it, simply add parentheses around the first \w+.

explanation:

/            # start regex
\s*          # match optional whitespace
!\w+:        # match word between '!' and ':'
\s*          # match optional whitespace
([^;]+);     # capture group 1 - match all characters (without ';') up to the next ';'
\s*          # match optional whitespace
(            # start capture group 2
    (?:          # group (non-capture)
        \s*          # match optional whitespace
        [^:!]+:      # match all characters (without ':' and '!') up to the next ':'
        [^;]+;       # match all characters (without ';') up to the next ';'
    )+           # group end; match this group 1 to n times
)            # end capture group 2
/g           # end regex; set g-Flag for global
于 2012-11-27T17:59:20.403 に答える
1

You need to change two things to get all the members into a single capture. Firstly . does not match line breaks (and you can't change that in JavaScript). But [\s\S] does. And yes, using a negative lookahead, we can make sure that we don't consume the next !Norm:

/\s*!Norm:\s?(.+);\s+((?:(?![!]Norm)[\s\S])*)/g

I have wrapped the literal ! in square brackets, to make clear that it's a literal and to set it off from the ! that is part of the negative lookahead syntax. You could leave out the square brackets, it's just for readability. So basically this will fill the last capture with arbitrary characters, as long as they don't start a new !Norm.

Then you can go ahead, and read individual properties and values from that last capture.

explanation:

/            # start regex
\s*          # match optional whitespace
!Norm:       # match '!Norm:'
\s?          # match optional whitespace
(.+);        # capture group 1 - match all characters (whitout '\n') up to the next ';'
\s+          # match 1..n whitespaces
(            # start capture group 2
    (?:          # group (non-capture)
        (?!          # negative lookahead
            [!]Norm      # match '!Norm'
        )            # end negative lookahead
        [\s\S]       # match a white space or other than white space character
                     # this group match a single character as long as it dont start are new !Norm
    )*           # group end; match this group 0..n times
)            # end capture group 2
/g           # end regex; set g-Flag for global
于 2012-11-27T17:21:45.307 に答える
0

so to have a complete solution here my hole parsing the used regex are from both answers combined:

console.clear();
var sData = "!Norm: DIN 933;\n !Norm: DIN 7985;\n    M2: 2, 2, 2;\n    M3:3,3;\n    M10: 20,25;\n     !Norm: DIN 7985 TX;\n    M2: 6,    10    , 16;";
console.log(sTmp);

var oData = {};

// Parse sData with help of Regex replace
sData.replace(/\s*!Norm:\s*([^;]+);\s*((?:(?![!]Norm)[\s\S])*)/g, 
    function replacer(match, sNorm, sScrews, offset, string) {
        //console.log("match:", match);
        //console.log("sNorm:", sNorm);
        //console.log("sScrews:", sScrews);
        //console.log("offset:", offset);
        //console.log("string:", string);

        var oScrews = {};

        sScrews.replace(/\s*(M\d+):\s*([^;]+);\s*/g, 
            function(match, sScrewSize, sScrewList, offset, string) {
                //console.log("match:", match);
                //console.log("sScrewSize:", sScrewSize);
                //console.log("sScrewList:", sScrewList);
                //console.log("offset:", offset);
                //console.log("string:", string);

                oScrews[sScrewSize] = sScrewList.split(/[\s,]+/);

                return "§";
            });

        oData[sNorm] = oScrews;

        return "#";
    });

console.log("oData: ");
console.dir(oData);

result object (verified in console):

oData = {
    DIN 7985 :      {
                        M10 : ["20", "25"],
                        M2  : ["2", "2", "2"],
                        M3  : ["3", "3"],
                    }
    DIN 7985 TX :   {
                        M4  : ["4", "4", "4"],
                    }
    DIN 933 :       {}
    };
于 2012-11-27T21:32:21.717 に答える