c++ - デマングルされたシンボルからクラスを抽出する

Question

を使用して、 nm のデマングルされたシンボル出力から (完全な) クラス名を抽出しようとしていboost::regexます。このサンプルプログラム

#include <vector>

namespace Ns1
{
namespace Ns2
{
    template<typename T, class Cont>
    class A
    {
    public:
        A() {}
        ~A() {}
        void foo(const Cont& c) {}
        void bar(const A<T,Cont>& x) {}

    private:
        Cont cont;
    };
}
}

int main()
{
    Ns1::Ns2::A<int,std::vector<int> > a;
    Ns1::Ns2::A<int,std::vector<int> > b;
    std::vector<int> v;

    a.foo(v);
    a.bar(b);
}

クラスAの次のシンボルを生成します

Ns1::Ns2::A<int, std::vector<int, std::allocator<int> > >::A()
Ns1::Ns2::A<int, std::vector<int, std::allocator<int> > >::bar(Ns1::Ns2::A<int, std::vector<int, std::allocator<int> > > const&)
Ns1::Ns2::A<int, std::vector<int, std::allocator<int> > >::foo(std::vector<int, std::allocator<int> > const&)
Ns1::Ns2::A<int, std::vector<int, std::allocator<int> > >::~A()

できれば単一の正規表現パターンを使用してクラス (インスタンス) 名を抽出したいのですが、ペアNs1::Ns2::A<int, std::vector<int, std::allocator<int> > >内で再帰的に発生するクラス指定子を解析するのに問題があります。<>

正規表現パターンを使用してこれを行う方法を知っている人はいますか (でサポートされていboost::regexます)?

私の解決策（ David Hammenの回答に基づいているため、受け入れられます）：

クラスと名前空間のシンボルを抽出するために (単一の) 正規表現は使用しません。シンボル文字列の末尾から括弧文字のペア (<>またはなど)を取り除く単純な関数を作成しました。()

std::string stripBracketPair(char openingBracket,char closingBracket,const std::string& symbol, std::string& strippedPart)
{
    std::string result = symbol;

    if(!result.empty() &&
       result[result.length() -1] == closingBracket)
    {
        size_t openPos = result.find_first_of(openingBracket);
        if(openPos != std::string::npos)
        {
            strippedPart = result.substr(openPos);
            result = result.substr(0,openPos);
        }
    }
    return result;
}

これは、シンボルから名前空間/クラスを抽出する他の 2 つのメソッドで使用されます。

std::string extractNamespace(const std::string& symbol)
{
    std::string ns;
    std::string strippedPart;
    std::string cls = extractClass(symbol);
    if(!cls.empty())
    {
        cls = stripBracketPair('<','>',cls,strippedPart);
        std::vector<std::string> classPathParts;

        boost::split(classPathParts,cls,boost::is_any_of("::"),boost::token_compress_on);
        ns = buildNamespaceFromSymbolPath(classPathParts);
    }
    else
    {
        // Assume this symbol is a namespace global function/variable
        std::string globalSymbolName = stripBracketPair('(',')',symbol,strippedPart);
        globalSymbolName = stripBracketPair('<','>',globalSymbolName,strippedPart);
        std::vector<std::string> symbolPathParts;

        boost::split(symbolPathParts,globalSymbolName,boost::is_any_of("::"),boost::token_compress_on);
        ns = buildNamespaceFromSymbolPath(symbolPathParts);
        std::vector<std::string> wsSplitted;
        boost::split(wsSplitted,ns,boost::is_any_of(" \t"),boost::token_compress_on);
        if(wsSplitted.size() > 1)
        {
            ns = wsSplitted[wsSplitted.size() - 1];
        }
    }

    if(isClass(ns))
    {
        ns = "";
    }
    return ns;
}

std::string extractClass(const std::string& symbol)
{
    std::string cls;
    std::string strippedPart;
    std::string fullSymbol = symbol;
    boost::trim(fullSymbol);
    fullSymbol = stripBracketPair('(',')',symbol,strippedPart);
    fullSymbol = stripBracketPair('<','>',fullSymbol,strippedPart);

    size_t pos = fullSymbol.find_last_of(':');
    if(pos != std::string::npos)
    {
        --pos;
        cls = fullSymbol.substr(0,pos);
        std::string untemplatedClassName = stripBracketPair('<','>',cls,strippedPart);
        if(untemplatedClassName.find('<') == std::string::npos &&
        untemplatedClassName.find(' ') != std::string::npos)
        {
            cls = "";
        }
    }

    if(!cls.empty() && !isClass(cls))
    {
        cls = "";
    }
    return cls;
}

このbuildNamespaceFromSymbolPath()メソッドは、有効な名前空間部分を連結するだけです。

std::string buildNamespaceFromSymbolPath(const std::vector<std::string>& symbolPathParts)
{
    if(symbolPathParts.size() >= 2)
    {
        std::ostringstream oss;
        bool firstItem = true;
        for(unsigned int i = 0;i < symbolPathParts.size() - 1;++i)
        {
            if((symbolPathParts[i].find('<') != std::string::npos) ||
               (symbolPathParts[i].find('(') != std::string::npos))
            {
                break;
            }
            if(!firstItem)
            {
                oss << "::";
            }
            else
            {
                firstItem = false;
            }
            oss << symbolPathParts[i];
        }
        return oss.str();
    }
    return "";
}

少なくとも、isClass()メソッドは正規表現を使用して、コンストラクターメソッドのすべてのシンボルをスキャンします (残念ながら、メンバー関数のみを含むクラスでは機能しないようです)。

std::set<std::string> allClasses;

bool isClass(const std::string& classSymbol)
{
    std::set<std::string>::iterator foundClass = allClasses.find(classSymbol);
    if(foundClass != allClasses.end())
    {
        return true;
    }

std::string strippedPart;
    std::string constructorName = stripBracketPair('<','>',classSymbol,strippedPart);
    std::vector<std::string> constructorPathParts;

    boost::split(constructorPathParts,constructorName,boost::is_any_of("::"),boost::token_compress_on);
    if(constructorPathParts.size() > 1)
    {
        constructorName = constructorPathParts.back();
    }
    boost::replace_all(constructorName,"(","[\\(]");
    boost::replace_all(constructorName,")","[\\)]");
    boost::replace_all(constructorName,"*","[\\*]");

    std::ostringstream constructorPattern;
    std::string symbolPattern = classSymbol;
    boost::replace_all(symbolPattern,"(","[\\(]");
    boost::replace_all(symbolPattern,")","[\\)]");
    boost::replace_all(symbolPattern,"*","[\\*]");
    constructorPattern << "^" << symbolPattern << "::" << constructorName << "[\\(].+$";
    boost::regex reConstructor(constructorPattern.str());

    for(std::vector<NmRecord>::iterator it = allRecords.begin();
        it != allRecords.end();
        ++it)
    {
        if(boost::regex_match(it->symbolName,reConstructor))
        {
            allClasses.insert(classSymbol);
            return true;
        }
    }
    return false;
}

前述のように、クラスがコンストラクターを提供しない場合、最後のメソッドは安全にクラス名を見つけられず、大きなシンボルテーブルでは非常に遅くなります。しかし、少なくともこれは nm のシンボル情報から得られるものをカバーしているようです。

他のユーザーが正規表現が正しいアプローチではないことに気付くかもしれないという質問のために、正規表現タグを残しました。

score 2 · Accepted Answer

これは、C++ の何よりも強力な perl の拡張正規表現では困難です。私は別のタックを提案します：

最初に、データなどの関数のように見えないものを取り除きます (D 指定子を探します)。virtual thunk to this、なども邪魔virtual table for thatになります。メインの解析を行う前にそれらを取り除きます。このフィルタリングは、正規表現が役立つものです。残すべきは関数です。機能ごとに、

最後の閉じ括弧の後のものを取り除きます。たとえば、にFoo::Bar(int,double) constなりFoo::Bar(int,double)ます。
関数の引数を取り除きます。ここでの問題は、括弧の中に括弧を入れることができることです。たとえば、関数ポインターを引数として取る関数は、関数ポインターを引数として取る可能性があります。正規表現を使用しないでください。括弧が一致するという事実を使用します。このステップの後、にFoo::Bar(int,double)なりますがFoo::Bar、にa::b::Baz<lots<of<template>, stuff>>::Baz(int, void (*)(int, void (*)(int)))なりa::b::Baz<lots<of<template>, stuff>>::Bazます。
次に、フロントエンドで作業します。同様のスキームを使用して、そのテンプレートのものを解析します。これで、あのぐちゃぐちゃa::b::Baz<lots<of<template>, stuff>>::Bazになるa::b::Baz::Baz。
この段階で、関数はのようになりますa::b:: ... ::ClassName::function_name。ここで、いくつかの名前空間のフリー関数にわずかな問題があります。デストラクタは、クラスの完全な景品です。関数名がチルダで始まる場合、クラス名があることは間違いありません。Fooコンストラクターは、関数を定義した名前空間を持っていない限り、手元にクラスがあることをほぼ保証しますFoo。
最後に、切り取ったテンプレートを再挿入することもできます。

score 1 · Accepted Answer

シンプルな C++関数で抽出を行いました。

完全なコードについてはリンクを参照してください。背後にあるアイデアは次のとおりです。

で区切られた基本レベルのトークンがあります::。
基本レベルのトークンがN個ある場合、最初N-1にclassNameを記述し、最後に関数を記述します
(またはでレベル（+1）を上げます<
閉じる)か、>1レベル下がります（-1）
もちろん、基本レベルの手段 -level == 0

無制限のレベルのブラケットがあるため、これは正規表現では実行できないと強く感じています。私は自分の機能に 255 を持っています -std::stack<char>無制限のレベルに切り替えることができます。

関数：

std::vector<std::string> parseCppName(std::string line)
{
   std::vector<std::string> retVal;
   int level = 0;
   char closeChars[256];

   size_t startPart = 0;
   for (size_t i = 0; i < line.length(); ++i)
   {
      if (line[i] == ':' && level == 0)
      {
          if (i + 1 >= line.length() || line[i + 1] != ':')
             throw std::runtime_error("missing :");
          retVal.push_back(line.substr(startPart, i - startPart));
          startPart = ++i + 1;
      }
      else if (line[i] == '(') {
         closeChars[level++] = ')';
      } 
      else if (line[i] == '<') {
         closeChars[level++] = '>';
      } 
      else if (level > 0 && line[i] == closeChars[level - 1]) {
         --level;
      }
      else if (line[i] == '>' || line[i] == ')') {
         throw std::runtime_error("Extra )>");
      }
   }
   if (level > 0)
       throw std::runtime_error("Missing )>");
   retVal.push_back(line.substr(startPart));
   return retVal;
}

c++ - デマングルされたシンボルからクラスを抽出する

2 に答える 2

Related

Reference