javascript - HTML スニペットが JavaScript で有効かどうかを確認する

Question

コードから呼び出すことができる HTML スニペットが有効かどうかを確認するには、信頼できる JavaScript ライブラリ/関数が必要です。たとえば、開いているタグと引用符が閉じていること、ネストが正しいことなどを確認する必要があります。

何かが100％標準ではないために検証が失敗することは望ましくありません（しかし、とにかく動作します）。

score 39 · Accepted Answer

更新：この回答は限られています - 以下の編集をご覧ください。

@kolinkの回答を拡張して、次を使用します。

var checkHTML = function(html) {
  var doc = document.createElement('div');
  doc.innerHTML = html;
  return ( doc.innerHTML === html );
}

つまり、HTML で一時的な div を作成します。これを行うために、ブラウザーは HTML 文字列に基づいて DOM ツリーを作成します。これには、終了タグなどが含まれる場合があります。

div の HTML コンテンツを元の HTML と比較すると、ブラウザーに何か変更が必要かどうかがわかります。

checkHTML('<a>hell<b>o</b>')

false を返します。

checkHTML('<a>hell<b>o</b></a>')

true を返します。

編集: @Quentin が以下に記しているように、これはさまざまな理由で過度に厳密です: ブラウザーは、終了タグがそのタグのオプションであっても、省略された終了タグを修正することがよくあります。例えば：

<p>one para
<p>second para

...有効と見なされます (P は終了タグを省略できるため) が、checkHTMLfalse を返します。ブラウザはタグの大文字小文字を正規化し、空白を変更します。このアプローチを使用することを決定するときは、これらの制限に注意する必要があります。

score 20 · Accepted Answer

さて、このコード：

function tidy(html) {
    var d = document.createElement('div');
    d.innerHTML = html;
    return d.innerHTML;
}

これにより、ブラウザの能力を最大限に発揮して、不正な HTML が「修正」されます。それが役に立てば、HTML を検証するよりもはるかに簡単です。

score 10 · Accepted Answer

これまでに提示された解決策はどれも、元の質問に答えるのに良い仕事をしていません。

何かが100％標準ではないために検証が失敗することは望ましくありません（ただし、とにかく機能します）。

tldr >> JSFiddleを確認してください

そこで、このトピックに関する回答とコメントの入力を使用して、次のことを行うメソッドを作成しました。

有効な場合、タグごとに html 文字列タグをチェックします
HTML文字列をレンダリングしようとします
作成されたタグ数と実際にレンダリングされた html dom タグ数を理論的に比較します
「厳密」にチェックされている場合 、空の属性の正規化=""は無視されません
レンダリングされた innerHTML を指定された html 文字列と比較します (空白と引用符は無視します)

戻り値

レンダリングされた html が指定された html 文字列と同じ場合はtrue
チェックの 1 つが失敗した場合はfalse
レンダリングされた html が有効に見えるが、指定された html 文字列と等しくない場合の正規化された html 文字列

正規化とは、レンダリング時に、ブラウザーが入力の特定の部分を無視または修復することを意味します (不足している終了タグを追加したり、他の部分を変換したりします (一重引用符から二重引用符への変換やアンパサンドのエンコードなど))。 normalized」を使用すると、コンテンツに「期待どおりにレンダリングされない」というフラグをユーザーに付けることができます。

ほとんどの場合、正規化すると、元の html 文字列をわずかに変更したバージョンが返されますが、結果がまったく異なる場合もあります。したがって、これは、たとえば、ユーザー入力をデータベースに保存する前、または盲目的にレンダリングする前に、さらにレビューするためにユーザー入力にフラグを立てるために使用する必要があります。(正規化の例については、 JSFiddleを参照してください)

チェックでは、次の例外が考慮されます

単一引用符から二重引用符への正規化の無視
imageおよび属性を持つ他のタグは、srcレンダリング中に「武装解除」されます
 (厳密でない場合) >> 変換の無視
(厳密でない場合) 空の属性の正規化を無視 ( >> )
読み取り時の最初はエンコードされていないアンパサンドのエンコード.innerHTML(属性値など)

.

function simpleValidateHtmlStr(htmlStr, strictBoolean) {
  if (typeof htmlStr !== "string")
    return false;

  var validateHtmlTag = new RegExp("<[a-z]+(\s+|\"[^\"]*\"\s?|'[^']*'\s?|[^'\">])*>", "igm"),
    sdom = document.createElement('div'),
    noSrcNoAmpHtmlStr = htmlStr
      .replace(/ src=/, " svhs___src=") // disarm src attributes
      .replace(/&amp;/igm, "#svhs#amp##"), // 'save' encoded ampersands
    noSrcNoAmpIgnoreScriptContentHtmlStr = noSrcNoAmpHtmlStr
      .replace(/\n\r?/igm, "#svhs#nl##") // temporarily remove line breaks
      .replace(/(<script[^>]*>)(.*?)(<\/script>)/igm, "$1$3") // ignore script contents
      .replace(/#svhs#nl##/igm, "\n\r"),  // re-add line breaks
    htmlTags = noSrcNoAmpIgnoreScriptContentHtmlStr.match(/<[a-z]+[^>]*>/igm), // get all start-tags
    htmlTagsCount = htmlTags ? htmlTags.length : 0,
    tagsAreValid, resHtmlStr;


  if(!strictBoolean){
    // ignore <br/> conversions
    noSrcNoAmpHtmlStr = noSrcNoAmpHtmlStr.replace(/<br\s*\/>/, "<br>")
  }

  if (htmlTagsCount) {
    tagsAreValid = htmlTags.reduce(function(isValid, tagStr) {
      return isValid && tagStr.match(validateHtmlTag);
    }, true);

    if (!tagsAreValid) {
      return false;
    }
  }


  try {
    sdom.innerHTML = noSrcNoAmpHtmlStr;
  } catch (err) {
    return false;
  }

  // compare rendered tag-count with expected tag-count
  if (sdom.querySelectorAll("*").length !== htmlTagsCount) {
    return false;
  }

  resHtmlStr = sdom.innerHTML.replace(/&amp;/igm, "&"); // undo '&' encoding

  if(!strictBoolean){
    // ignore empty attribute normalizations
    resHtmlStr = resHtmlStr.replace(/=""/, "")
  }

  // compare html strings while ignoring case, quote-changes, trailing spaces
  var
    simpleIn = noSrcNoAmpHtmlStr.replace(/["']/igm, "").replace(/\s+/igm, " ").toLowerCase().trim(),
    simpleOut = resHtmlStr.replace(/["']/igm, "").replace(/\s+/igm, " ").toLowerCase().trim();
  if (simpleIn === simpleOut)
    return true;

  return resHtmlStr.replace(/ svhs___src=/igm, " src=").replace(/#svhs#amp##/, "&amp;");
}

ここでは、JSFiddle https://jsfiddle.net/abernh/twgj8bev/で、さまざまなテストケースとともに見つけることができます。

"<a href='blue.html id='green'>missing attribute quotes</a>" // FAIL
"<a>hell<B>o</B></a>"                                        // PASS
'<a href="test.html">hell<b>o</b></a>'                       // PASS
'<a href=test.html>hell<b>o</b></a>',                        // PASS
"<a href='test.html'>hell<b>o</b></a>",                      // PASS
'<ul><li>hell</li><li>hell</li></ul>',                       // PASS
'<ul><li>hell<li>hell</ul>',                                 // PASS
'<div ng-if="true && valid">ampersands in attributes</div>'  // PASS

.

score 3 · Accepted Answer

function validHTML(html) {
  var openingTags, closingTags;

  html        = html.replace(/<[^>]*\/\s?>/g, '');      // Remove all self closing tags
  html        = html.replace(/<(br|hr|img).*?>/g, '');  // Remove all <br>, <hr>, and <img> tags
  openingTags = html.match(/<[^\/].*?>/g) || [];        // Get remaining opening tags
  closingTags = html.match(/<\/.+?>/g) || [];           // Get remaining closing tags

  return openingTags.length === closingTags.length ? true : false;
}

var htmlContent = "<p>your html content goes here</p>" // Note: String without any html tag will consider as valid html snippet. If it’s not valid in your case, in that case you can check opening tag count first.

if(validHTML(htmlContent)) {
  alert('Valid HTML')
}
else {
  alert('Invalid HTML');
}

score 0 · Accepted Answer

function isHTML(str)
{
 var a = document.createElement('div');
 a.innerHTML = str;
 for(var c= a.ChildNodes, i = c.length; i--)
 {
    if (c[i].nodeType == 1) return true;
 }
return false;
}

幸運を！

score 0 · Accepted Answer

使用するjs-libraryに依存します。

node.js の HTML バリデータhttps://www.npmjs.com/package/html-validator

jQuery の HTML バリデータhttps://api.jquery.com/jquery.parsehtml/

しかし、前述のように、ブラウザーを使用して壊れた HTML を検証することは素晴らしいアイデアです。

function tidy(html) {
    var d = document.createElement('div');
    d.innerHTML = html;
    return d.innerHTML;
}

score 0 · Accepted Answer

上記からの@Tarunの回答を拡張します。

function validHTML(html) { // checks the validity of html, requires all tags and property-names to only use alphabetical characters and numbers (and hyphens, underscore for properties)
    html = html.toLowerCase().replace(/(?<=<[^>]+?=\s*"[^"]*)[<>]/g,"").replace(/(?<=<[^>]+?=\s*'[^']*)[<>]/g,""); // remove all angle brackets from tag properties
    html = html.replace(/<script.*?<\/script>/g, '');  // Remove all script-elements
    html = html.replace(/<style.*?<\/style>/g, '');  // Remove all style elements tags
    html = html.toLowerCase().replace(/<[^>]*\/\s?>/g, '');      // Remove all self closing tags
    html = html.replace(/<(\!|br|hr|img).*?>/g, '');  // Remove all <br>, <hr>, and <img> tags
    //var tags=[...str.matchAll(/<.*?>/g)]; this would allow for unclosed initial and final tag to pass parsing
    html = html.replace(/^[^<>]+|[^<>]+$|(?<=>)[^<>]+(?=<)/gs,""); // remove all clean text nodes, note that < or > in text nodes will result in artefacts for which we check and return false
    tags = html.split(/(?<=>)(?=<)/);
    if (tags.length%2==1) {
        console.log("uneven number of tags in "+html)
        return false;
    }
    var tagno=0;
    while (tags.length>0) {
        if (tagno==tags.length) {
            console.log("these tags are not closed: "+tags.slice(0,tagno).join());
            return false;
        }
        if (tags[tagno].slice(0,2)=="</") {
            if (tagno==0) {
                console.log("this tag has not been opened: "+tags[0]);
                return false;
            }
            var tagSearch=tags[tagno].match(/<\/\s*([\w\-\_]+)\s*>/);
            if (tagSearch===null) {
                console.log("could not identify closing tag "+tags[tagno]+" after "+tags.slice(0,tagno).join());
                return false;
            } else tags[tagno]=tagSearch[1];
            if (tags[tagno]==tags[tagno-1]) {
                tags.splice(tagno-1,2);
                tagno--;
            } else {
                console.log("tag '"+tags[tagno]+"' trying to close these tags: "+tags.slice(0,tagno).join());
                return false;
            }
        } else {
            tags[tagno]=tags[tagno].replace(/(?<=<\s*[\w_\-]+)(\s+[\w\_\-]+(\s*=\s*(".*?"|'.*?'|[^\s\="'<>`]+))?)*/g,""); // remove all correct properties from tag
            var tagSearch=tags[tagno].match(/<(\s*[\w\-\_]+)/);
            if ((tagSearch===null) || (tags[tagno]!="<"+tagSearch[1]+">")) {
                console.log("fragmented tag with the following remains: "+tags[tagno]);
                return false;
            }
            var tagSearch=tags[tagno].match(/<\s*([\w\-\_]+)/);
            if (tagSearch===null) {
                console.log("could not identify opening tag "+tags[tagno]+" after "+tags.slice(0,tagno).join());
                return false;
            } else tags[tagno]=tagSearch[1];
            tagno++;
        }
    }
    return true;
}

これにより、タグが一致するかどうか、プロパティが解析されるかどうかのテストなど、いくつかの追加チェックが実行されます。既存の DOM に依存しないため、サーバー環境で使用できますが、遅いことに注意してください。また、理論的には、タグ名とプロパティ名には基本的に任意の Unicode を (いくつかの例外を除いて) 使用できるため、タグはより緩い名前にすることができます。ただし、これは私自身の健全性チェックに合格しません。

javascript - HTML スニペットが JavaScript で有効かどうかを確認する

9 に答える 9

Related

Reference