java - 推論を介して Apache Jena の名前空間間のマッピングを達成するにはどうすればよいですか?

Question

ゴール：

データ移行の一般的なタスクを実行するために、オントロジー間のルールベースのマッピングを実現したくありません。

目標を達成する方法:

これを実現するために、任意のデータ型の xml 表現によって提供されるすべての情報を格納できる抽象データ構造を開発しました。次に、対象となるドキュメントタイプの定義からオントロジーを構築するパーサーを作成しました。ここで、最初にabstractDatatype名前空間に関連付けられているデータを読み取るときに、それをaSと呼びましょう。対象となるデータ構造は名前空間tSにあります。

問題：

そのようなルールを介して、名前が同じで名前空間が異なる2つのリソース間の型の公平性を表現しようとすると:

[mappingRule1: (aS:?a rdf:type aS:?b) (tS:?c rdf:type tS:?b) -> (aS:?a rdf:type tS:?b)]

推論者はそれを取得しません。ルールに誤りがある可能性があります。これは、次のように解釈する必要があります: aS とは異なる名前空間 tS にマップされた同じ型名がある場合、aS のすべての個体はtSでも同じ型を取得します 。もう 1 つの問題は、これがタイプの個人がいない場合、そのようなルールは機能しない可能性があり、そのように表現するだけでは不十分かもしれないと言われました. ほぼ代わりに、すべての組み合わせ間のマッピングを行う SubClassOf ルールを作成することもできますが、それではモデルに多くの汚れが生じるため、より一般的なものにする代わりに、さらに多くのフィルタリング条件を追加できるようにしたいと考えています。

ただし、誰かがルールベースのオントロジーマッピングの経験がある場合は、洞察を得ることができて非常に嬉しく思います。

以下は、マッピングが機能しない問題を示す Java 単体テストです。

import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;

import java.io.BufferedOutputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;

import org.junit.Before;
import org.junit.Test;

import com.hp.hpl.jena.rdf.model.InfModel;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.rdf.model.Property;
import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.rdf.model.Statement;
import com.hp.hpl.jena.rdf.model.StmtIterator;
import com.hp.hpl.jena.reasoner.Derivation;
import com.hp.hpl.jena.reasoner.Reasoner;
import com.hp.hpl.jena.reasoner.ReasonerRegistry;
import com.hp.hpl.jena.reasoner.rulesys.GenericRuleReasoner;
import com.hp.hpl.jena.reasoner.rulesys.Rule;
import com.hp.hpl.jena.util.PrintUtil;
import com.hp.hpl.jena.vocabulary.RDF;
import com.hp.hpl.jena.vocabulary.RDFS;

public class ReasonerTest {

    String aS = "http://www.custom.eu/abstractDatascheme#";
    String tS = "http://www.custom.eu/targetDatascheme#";

    Model model = ModelFactory.createDefaultModel();
    InfModel inf;

    Resource AA = model.createResource(aS + "A");
    Resource AB = model.createResource(aS + "B");
    Resource AC = model.createResource(aS + "C");
    Resource AD = model.createResource(aS + "D");

    Resource TA = model.createResource(tS + "A");
    Resource TB = model.createResource(tS + "B");

    Property p = model.createProperty(aS, "p");
    Property q = model.createProperty(aS, "q");


    @Before
    public void init() {

        PrintUtil.registerPrefix("aS", aS);
        PrintUtil.registerPrefix("tS", tS);

        AA.addProperty(p, "foo");

        // Get an RDFS reasoner
        GenericRuleReasoner rdfsReasoner = (GenericRuleReasoner) ReasonerRegistry.getRDFSReasoner();
        // Steal its rules, and add one of our own, and create a reasoner with these rules
        List<Rule> rdfRules = new ArrayList<>( rdfsReasoner.getRules() );
        List<Rule> rules = new ArrayList<>();
        String customRules  = "[transitiveRule: (?a aS:p ?b) (?b aS:p ?c) -> (?a aS:p ?c)] \n" +
                                      "[mappingRule1: (aS:?a rdf:type aS:?b) (tS:?c rdf:type tS:?b) -> (aS:?a rdf:type tS:?b)] \n" +
                                      "[mappingRule2a: -> (aS:?a rdfs:subClassOf tS:?a)] \n" +
                                      "[mappingRule2b: -> (tS:?a rdfs:subClassOf aS:?a)]";
        rules.addAll(rdfRules);
        rules.add(Rule.parseRule(customRules));
        Reasoner reasoner = new GenericRuleReasoner(rules);
        reasoner.setDerivationLogging(true);
        inf = ModelFactory.createInfModel(reasoner, model);
    }


    @Test
    public void mapping() {
        AA.addProperty(RDF.type, model.createResource(aS + "CommonType"));
        TA.addProperty(RDF.type, model.createResource(tS + "CommonType"));

        String trace = null;
        trace = getDerivations(trace, AA, RDF.type, TA);
        assertNotNull(trace);
    }


    private String getDerivations(String trace, Resource subject, Property predicate, Resource object) {
        PrintWriter out = new PrintWriter(System.out);
        for (StmtIterator i = inf.listStatements(subject, predicate, object); i.hasNext(); ) {
            Statement s = i.nextStatement();
            System.out.println("Statement is " + s);
            for (Iterator<Derivation> id = inf.getDerivation(s); id.hasNext(); ) {
                Derivation deriv = (Derivation) id.next();
                deriv.printTrace(out, true);
                trace += deriv.toString();
            }
        }
        out.flush();
        return trace;
    }


    @Test
    public void subProperty() {

        // Hierarchy
        model.add(p, RDFS.subPropertyOf, q);

        StmtIterator stmts = inf.listStatements(AA, q, (RDFNode) null);
        assertTrue(stmts.hasNext());
        while (stmts.hasNext()) {
            System.out.println("Statement: " + stmts.next());
        }
    }


    @Test
    public void derivation() {

        // Derivations
        AA.addProperty(p, AB);
        AB.addProperty(p, AC);
        AC.addProperty(p, AD);

        String trace = null;
        trace = getDerivations(trace, AA, p, AD);
        assertNotNull(trace);
    }


    @Test
    public void derivations() {
        String trace = null;
        PrintWriter out = new PrintWriter(System.out);
        for (StmtIterator i = inf.listStatements(); i.hasNext(); ) {
            Statement s = i.nextStatement();
            System.out.println("Statement is " + s);
            for (Iterator<Derivation> id = inf.getDerivation(s); id.hasNext(); ) {
                Derivation deriv = (Derivation) id.next();
                deriv.printTrace(out, true);
                trace += deriv.toString();
            }
        }
        out.flush();
        assertNotNull(trace);
    }


    @Test
    public void listStatements() {
        StmtIterator stmtIterator = inf.listStatements();
        while (stmtIterator.hasNext()) {
            System.out.println(stmtIterator.nextStatement());
        }
    }


    @Test
    public void listRules() {
        List<Rule> rules = ((GenericRuleReasoner) inf.getReasoner()).getRules();
        for (Rule rule : rules) {
            System.out.println(rule.toString());
        }
    }


    @Test
    public void saveDerivation() {
        DataOutputStream out1;
        try {
            out1 = new DataOutputStream(new BufferedOutputStream(new FileOutputStream("target/test-output/testOnto.owl")));
            inf.write(out1);
        }
        catch (IOException ex) {
            Logger.getLogger(ReasonerTest.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    @Test
    public void printRdfRules() {

        GenericRuleReasoner rdfsReasoner = (GenericRuleReasoner) ReasonerRegistry.getRDFSReasoner();
        List<Rule> customRules = new ArrayList<>(rdfsReasoner.getRules());

        PrintWriter writer = null;
        try {
            File directory = new File("target/test-output/");
            if (!directory.exists()) {
                directory.mkdir();
            }
            writer = new PrintWriter("target/test-output/rfd.rules", "UTF-8");
        }
        catch (IOException ex) {
            Logger.getLogger(ReasonerTest.class.getName()).log(Level.SEVERE, null, ex);
        }
        for (Rule customRule : customRules) {
            writer.println(customRule.toString());
        }
        writer.close();


    }



}

score 1 · Accepted Answer

ns:?x文字列形式が何かの略語で始まる URI リソースに一致し、残り (または全体)ns:にバインドすることを期待するだけではいけません。?xURI の文字列形式を調べるルールを使用する場合は、strConcat で文字列形式を取得し、正規表現で照合と抽出を行う必要があります。これは、m:Personが型として使用され、それx:a a n:Personがデータ内にあり、m:Personとが接頭辞とn:Personを持つ同じ接尾辞を持ち、結果としてそれを推測する例です。n:m:x:a a m:Person

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;

import com.hp.hpl.jena.rdf.model.InfModel;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.reasoner.Reasoner;
import com.hp.hpl.jena.reasoner.rulesys.GenericRuleReasoner;
import com.hp.hpl.jena.reasoner.rulesys.Rule;
import com.hp.hpl.jena.util.PrintUtil;

public class TypeMappingExample {
    public static void main(String[] args) throws IOException {
        PrintUtil.registerPrefix( "n", "urn:ex:n/" );
        PrintUtil.registerPrefix( "m", "urn:ex:m/" );
        String content = "\n" +
                "@prefix n: <urn:ex:n/>.\n" +
                "@prefix m: <urn:ex:m/>.\n" +
                "@prefix x: <urn:ex:x/>" +
                "\n" +
                "x:a a n:Person.\n" +
                "x:b a m:Person.\n" +
                "";
        Model model = ModelFactory.createDefaultModel();
        try ( InputStream in = new ByteArrayInputStream( content.getBytes() )) {
            model.read( in, null, "TTL" );
        }
        String rule = "\n" +
                "[strConcat(n:,'(.*)',?nprefix),\n" +
                " strConcat(m:,'(.*)',?mprefix),\n" +
                " (?x rdf:type ?ntype), strConcat(?ntype,?ntypestr),\n" +
                " (?y rdf:type ?mtype), strConcat(?mtype,?mtypestr)," +
                " regex(?ntypestr,?nprefix,?nsuffix),\n" +
                " regex(?mtypestr,?mprefix,?msuffix),\n" +
                " equal(?nsuffix,?msuffix)\n" +
                " -> \n" +
                "(?x rdf:type ?mtype)]";
        Reasoner reasoner = new GenericRuleReasoner( Rule.parseRules( rule ));
        InfModel imodel = ModelFactory.createInfModel( reasoner, model );
        imodel.write( System.out, "TTL" );
    }
}

@prefix n:     <urn:ex:n/> .
@prefix m:     <urn:ex:m/> .
@prefix x:     <urn:ex:x/> .

x:a     a       m:Person , n:Person .

x:b     a       m:Person .

ご覧のとおり、文字列の処理はかなり大雑把です。Jena のビルトインは、実際には URI などから文字列を取得するように設計されています。SPARQL 関数の中には、これを少し簡単にするものもありますが、IRI は実際には不透明な識別子であると想定されているため、それでも少しエレガントではありません。

はるかに簡単な解決策は、すべてのクラスにラベルが付けられていることを確認し、2 つのクラスに同じラベルが付けられている場合、一方のインスタンスが他方のインスタンスであると言うことです。rdfs:isDefinedBy をうまく利用した場合は、次のようなものを使用して、これを非常に滑らかにすることができます。

[(?c1 a rdfs:Class) (?c1 rdfs:isDefinedBy ?ont1) (?c1 rdfs:label ?name)
 (?c2 a rdfs:Class) (?c2 rdfs:isDefinedBy ?ont2) (?c2 rdfs:label ?name)
 ->
 [(?x rdf:type ?c1) -> (?x rdf:type ?c2)]]

java - 推論を介して Apache Jena の名前空間間のマッピングを達成するにはどうすればよいですか?

ゴール：

目標を達成する方法:

問題：

1 に答える 1

Related

Reference