これが私が思いついた比較的単純なものです。申し訳ありませんが、徹底的なテストもパフォーマンステストもありません。私はそれがさらに最適化できることを保証します、私はそれをする時間がなかっただけです。簡単にするためにコメントを付けましたhttp://pastebin.com/nkdTSvi6StackOverflowには少し長いかもしれませんが、とにかくここに投稿します。ペーストビンは、より快適に表示するためのものです。
function buildTrie(hash) {
"use strict";
// A very simple function to build a Trie
// we could compress this later, but simplicity
// is better for this example. If we don't
// perform well, we'll try to optimize this a bit
// there is a room for optimization here.
var p, result = {}, leaf, i;
for (p in hash) {
if (hash.hasOwnProperty(p)) {
leaf = result;
i = 0;
do {
if (p[i] in leaf) {
leaf = leaf[p[i]];
} else {
leaf = leaf[p[i]] = {};
}
i += 1;
} while (i < p.length);
// since, obviously, no character
// equals to empty character, we'll
// use it to store the reference to the
// original value
leaf[""] = hash[p];
}
}
return result;
}
function prefixReplaceHtml(html, trie) {
"use strict";
var i, len = html.length, result = [], lastMatch = 0,
current, leaf, match, matched, replacement;
for (i = 0; i < len; i += 1) {
current = html[i];
if (current === "<") {
// don't check for out of bounds access
// assume we never face a situation, when
// "<" is the last character in an HTML
if (match) {
result.push(
html.substring(lastMatch, i - matched.length),
"<a href=\"", match, "\">", replacement, "</a>");
lastMatch = i - matched.length + replacement.length;
i = lastMatch - 1;
} else {
if (matched) {
// go back to the second character of the
// matched string and try again
i = i - matched.length;
}
}
matched = match = replacement = leaf = "";
if (html[i + 1] === "a") {
// we want to skip replacing inside
// anchor tags. We also assume they
// are never nested, as valid HTML is
// against that idea
if (html[i + 2] in
{ " " : 1, "\t" : 1, "\r" : 1, "\n" : 1 }) {
// this is certainly an anchor
i = html.indexOf("</a", i + 3) + 3;
continue;
}
}
// if we got here, it's a regular tag, just look
// for terminating ">"
i = html.indexOf(">", i + 1);
continue;
}
// if we got here, we need to start checking
// for the match in the trie
if (!leaf) {
leaf = trie;
}
leaf = leaf[current];
// we prefer longest possible match, just like POSIX
// regular expressions do
if (leaf && ("" in leaf)) {
match = leaf[""];
replacement = html.substring(
i - (matched ? matched.length : 0), i + 1);
}
if (!leaf) {
// newby-style inline (all hand work!) pay extra
// attention, this code is duplicated few lines above
if (match) {
result.push(
html.substring(lastMatch, i - matched.length),
"<a href=\"", match, "\">", replacement, "</a>");
lastMatch = i - matched.length + replacement.length;
i = lastMatch - 1;
} else {
if (matched) {
// go back to the second character of the
// matched string and try again
i = i - matched.length;
}
}
matched = match = replacement = "";
} else if (matched) {
// perhaps a bit premature, but we'll try to avoid
// string concatenation, when we can.
matched = html.substring(i - matched.length, i + 1);
} else {
matched = current;
}
}
return result.join("");
}
function testPrefixReplace() {
"use strict";
var trie = buildTrie(
{ "x" : "www.xxx.com", "yyy" : "www.y.com",
"xy" : "www.xy.com", "yy" : "www.why.com" });
return prefixReplaceHtml(
"<html><head>x</head><body><a >yyy</a><p>" +
"xyyy yy x xy</p><abrval><yy>xxy</yy>", trie);
}