javascript - javascript を使用した URL ホスト名のさらなる解析

Question

URL を解析してホスト名を取得する方法を探している投稿がたくさんあります。通常の解決策は、ドキュメント要素を作成し、URL を設定して、.hostname プロパティにアクセスすることです。それは素晴らしい解決策です。私はこのテクニックを少し超えるのに苦労しています。

ホスト名からベースホストを正常に抽出する機能があります。ベースホストの意味を説明するために (正しい命名法がわからない)、関数を示し、いくつかの入力出力の例を示します。

function parseURL(url) {
    var parser = document.createElement('a');
    parser.href = url;  
    url = parser.hostname; 
    //get a version of the url with the last "." and everything beyond it truncated.
    //Uses this as a trick in the next step to get the "second to last" index. 
    url = url.substr(0, url.lastIndexOf("."));
    //get a version of the url with everything before the second to last "." truncated. 
    url = parser.hostname.substr(url.lastIndexOf(".")+1); 
    return url; 
};
parseURL("http://code.google.com/p/jsuri/") 
//google.com - I don't think jsuri handle hosts any more effectively
parseURL("http://www.nytimes.com/pages/nyregion/index.html") 
//nytimes.com
parseURL("http://fivethirtyeight.blogs.nytimes.com/2013/01/12/in-cooperstown-a-crowded-waiting-room/" 
//nytimes.com
parseURL("http://www.guardian.co.uk/uk/2013/jan/13/fears-lulworth-cove-development-heritage" 
//co.uk

最後の例は、私が恐れている例外であり、より実行可能な解決策を探している理由です。ホストを取得するための .hostname メソッドは、優れた最初のステップです。基本レベルのホストの前にあるサブホストをハッキングするためのより良い方法を探しています。

助けていただければ幸いです（私の用語を修正するだけであれば）。

score 0 · Accepted Answer

URLを解析したいときは、次のようにします

function parseURL(url) {
    var a = document.createElement('a'), obj, i, j;
    a.href = url;
    obj = {
        'domain': '',
        'hash': a.hash.slice(1),
        'host': a.host,
        'hostname': a.hostname,
        'href': a.href, // copy back from <a>
        'origin': a.origin,
        'pathname': a.pathname,
        'port': a.port,
        'protocol': a.protocol.slice(0, -1),
        'search': a.search.slice(1),
        'subdomain': ''
    };
    i = obj.hostname.lastIndexOf('.');
    if (obj.hostname.length - i === 3) { // if .yz
        j = obj.hostname.lastIndexOf('.', i-1);
        if (j === i - 3 || j === i - 4) { // test .vwx.yz or .wx.yz
            i = j;
        }
    }
    j = obj.hostname.lastIndexOf('.', i-1);
    if (j !== -1) { // move back one more .
        i = j;
    }
    obj.domain = obj.hostname.slice(i+1);
    obj.subdomain = obj.hostname.slice(0, i);
    return obj; 
};

今使ったら、

var myURL = parseURL('http://www.example.co.uk:8080/hello/world.html?foo=bar#anchor');
/* {
    "domain": "example.co.uk",
    "hash": "anchor",
    "host": "www.example.co.uk:8080",
    "hostname": "www.example.co.uk",
    "href": "http://www.example.co.uk:8080/hello/world.html?foo=bar#anchor",
    "origin": "http://www.example.co.uk:8080",
    "pathname": "/hello/world.html",
    "port": "8080",
    "protocol": "http",
    "search": "foo=bar",
    "subdomain": "www"
} */

したがって、必要な場合は、使用しますmyURL.domain（または関数から残りを削除します）

score 0 · Accepted Answer

この関数をよく使用して、URL からホストを解析します。

function urlParseHost(url){
  var re = new RegExp("^(?:f|ht)tp(?:s)?\://([^/]+)", "im");
  return(url.match(re)[1].toString());
}

ここで GitHub から作業コードを取得できます。

score 0 · Accepted Answer

例のようなccTLDは常に2文字であるという事実に基づいて、コードを分岐できるはずです.uk（明確にするために宣言された変数）：

// Grab the last bit (the top level domain)
var tld = url.subtr(url.lastIndexOf("."))
if (tld.length === 2)
    //do stuff
else if (tld.length === 3)
    //do other stuff

また、探している単語は「ドメイン」だと思いますが、一部の計算では「サブドメイン」(docs.google.com の Google の前のビット) が含まれています。

score 0 · Accepted Answer

function parseURL(str) {
    var re = /^(?:([a-zA-Z]+:)\/\/)?(?:([-+._a-zA-Z0-9]+)(?::([-+._a-zA-Z0-9]+))?@)?(([^-~!@#$%^^&*\(\)_+=\[\]{}:;'"\\,.\/?\s]+(?:[^~!@#$%^^&*\(\)_+=\[\]{}:;'"\\,.\/?\s]+[^-~!@#$%^^&*\(\)_+=\[\]{}:;'"\\,.\/?\s])*(?:\.[^-~!@#$%^^&*\(\)_+=\[\]{}:;'"\\,.\/?\s]+(?:[^~!@#$%^^&*\(\)_+=\[\]{}:;'"\\,.\/?\s]+[^-~!@#$%^^&*\(\)_+=\[\]{}:;'"\\,.\/?\s])*)*)(?::(\d+))?)?(\/[^?#]*)?(\?[^#]*)?(#.*)?$/;
    var scheme = ['protocol', 'user', 'password', 'hostname', 'host', 'port', 'pathname', 'search', 'hash'], parts = re.exec(str);

    if (parts != null) {
        for (var i = 0, l = scheme.length, obj = {}; i < l;) {
            obj[ scheme[i] ] = parts[++i] != undefined ? parts[i] : '';
        }

        return obj;
    }

    return false;
}

javascript - javascript を使用した URL ホスト名のさらなる解析

4 に答える 4

Related

Reference