ドキュメントが保存されると、テキストが抽出され、注釈も実行されるカスタム CPF があります。エラーが発生するファイルはランダムです。以下はエラーメッセージです
2016-02-22 14:49:31.580 Debug: Forest::insert: content-repo-content-001-1 XDMP-INMMLISTFULL: In-memory list storage full; list: table=79%, wordsused=76%, wordsfree=0%, overhead=24%; tree: table=3%, wordsused=87%, wordsfree=13%, overhead=0%
2016-02-22 14:49:31.580 Info: Saving /var/opt/MarkLogic/Forests/content-repo-content-001-1/00000070
2016-02-22 14:49:31.595 Debug: InMemoryStand /var/opt/MarkLogic/Forests/content-repo-content-001-1/00000071, disk=10MB, memory=436MB, list=341MB, tree=85MB, rangeIndex=11MB, reverseIndex=11MB, tripleIndex=44MB
2016-02-22 14:49:31.805 Info: content-repo: File Name : S-2010-000029581.pdf
2016-02-22 14:49:31.805 Info: content-repo: Content Type : application/pdf
2016-02-22 14:49:31.805 Info: content-repo: id : 11c565782e85d213ef00bed474bf25ad84d465d3
2016-02-22 14:49:32.023 Debug: Retrying SVCProcess::run 1 because SVC-PROCESSRUN: Process run error: fork: Cannot allocate memory
2016-02-22 14:49:32.204 Debug: Retrying SVCProcess::run 2 because SVC-PROCESSRUN: Process run error: fork: Cannot allocate memory
2016-02-22 14:49:32.437 Debug: Retrying SVCProcess::run 3 because SVC-PROCESSRUN: Process run error: fork: Cannot allocate memory
2016-02-22 14:49:32.765 Debug: Retrying SVCProcess::run 4 because SVC-PROCESSRUN: Process run error: fork: Cannot allocate memory
2016-02-22 14:49:33.298 Debug: Retrying SVCProcess::run 5 because SVC-PROCESSRUN: Process run error: fork: Cannot allocate memory
2016-02-22 14:49:34.224 Debug: Retrying SVCProcess::run 6 because SVC-PROCESSRUN: Process run error: fork: Cannot allocate memory
marklogic では、各 CPF アクションの後に一時的に割り当てられたメモリをクリアする方法はありますか??
ありがとう
アップデート
これはメモリ エラーが原因であり、CPF がリソースを解放していないためだと考えていました。そのため、CPF で xdmp:spawn-function を実行して注釈を実行し、タスク キューにキューイングされると考えていました。しかし、それを行うと、単一のドキュメントであっても、次の例外が発生します
2016-02-23 16:25:50.498 情報: TaskServer: 2016-02-23 16:25:50.498 情報: TaskServer: XDMP-CONFLICTINGUPDATES 2016-02-23 16:25:50.498 情報: TaskServer: 2016-02-23 16:25:50.498 情報: TaskServer: 1.0-ml 2016-02-23 16:25:50.498 情報: TaskServer: 競合する更新 2016-02-23 16:25:50.498 情報: TaskServer: XDMP-CONFLICTINGUPDATES: xdmp:document- insert("/documents/BioEln/de249f6f43d7e6ecdb1a809769852542a944087c.pdf/a...",
以下は私のコードです。私のCPFはこの関数を呼び出しますrepo-lib:transformDoc
(:~
: Function that transoforms the documents
:)
declare function repo-lib:transformDoc($uri as xs:string){
if(repo-lib:isEmpty($uri)) then ()
else
(: annotations :)
let $_ := xdmp:spawn-function(function(){repo-lib:loadAnnotatedDoc($uri), xdmp:commit()},
<options xmlns="xdmp:eval">
<transaction-mode>update</transaction-mode>
</options>)
(:let $_ := repo-lib:loadAnnotatedDoc($uri) :)
return ()
};
(:~
: This function annotates the document..
:)
declare function repo-lib:loadAnnotatedDoc($uri as xs:string) as xs:string
{
(: for now we are using termite but we can use anything maybe smartlogic :)
repo-lib:loadTermiteAnnotations($uri)
};
declare private function repo-lib:deleteDocument($uri as xs:string?)
{
if($uri) then
if(fn:exists (fn:doc ($uri))) then
xdmp:document-delete($uri)
else ()
else ()
};
(:~
: Load termite annotations
:)
declare function repo-lib:loadTermiteAnnotations($uri as xs:string) as xs:string
{
if(repo-lib:isEmpty($uri)) then ""
else
if(crfslib:uri-exists($uri)) then
let $newDocUri := $uri||"/annotatedText/"
let $id := xdmp:document-properties($uri)//id/text() (:check if the document has id :)
let $id := if($id) then $id
else xdmp:sha1($newDocUri) (:if not generate a unique id:)
let $app := xdmp:document-properties($uri)//context/text() (:check if the document has id :)
(: This will take care of updates, so delete the old copy :)
let $annotatedTexLocation := xdmp:document-properties($uri)//annotatedText-location/text()
let $_ := repo-lib:deleteDocument($annotatedTexLocation)
let $newDocUri := $newDocUri || $id || ".xml"
return
try{
let $boundary := "------WebKitFormBoundaryIbhnU2N5CiXjjSU0"
let $termite := "http://10.239.12.38:8080/termite"
let $dictionary := "ADVENT,ADVENTMED,ANAT,BIOCHEM,BIOPROC,CELLLINE,CHEMBLDRUG,CHEMSTR,COMPANY,COMPOUNDS,DRUG,DRUGFIND, DRUGTYP,INDICATION,PROTYP,LABCHEM,GENE,GENEONT,MOA,ORPHAN,CHEMO,DEVICE,SPECIES"
(: Let us get the extracted text :)
let $text := xdmp:quote(xdmp:document-filter(fn:doc($uri)))
(:Call termite to do the annotations :)
let $termite_output := lib-multipart-post:multipart-post($termite, $boundary, (<data name="text">{$text}</data>,
<data name="format">any.xml</data>,
<data name="output">xml</data>,
<data name="entities">{$dictionary}</data>))
let $transformed_annotations :=
xdmp:xslt-eval(
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" indent="yes" encoding="utf-8"/>
<xsl:strip-space elements="*"/>
<xsl:template match="@*|node()">
<xsl:copy>
<xsl:apply-templates select="@*|node()" />
</xsl:copy>
</xsl:template>
<!-- Apply all child nodes; don't copy the element itself -->
<xsl:template match="ResponseBlock|ResponsePayload">
<xsl:apply-templates/>
</xsl:template>
<!-- Drop elements -->
<xsl:template match="DictSynList|MatchedSynonyms|TaxonomyList|Taxonomy|MetaHeader|Source|HitLocations|HitLocationString|Exact|TotalSyns|HitCount|Score|NonAmbigSyns|Fragments|KeyValuePairs|Mappings"/>
</xsl:stylesheet>,$termite_output[2]
)
(: delete the document if exisists :)
let $_ := repo-lib:deleteDocument($newDocUri)
(: let us add and annotatons :)
let $_ := xdmp:document-insert($newDocUri, $transformed_annotations,(xdmp:default-permissions()), ("annotated", "termite", $app))
(: let us add the propertier :)
let $_ := xdmp:document-add-properties($newDocUri, (<document-parent-location>{$uri}</document-parent-location>,
<context>{$app}</context>,
<id>{$id}</id>))
let $log := xdmp:log("Dne inserting the document")
(: let $_ := xdmp:commit() :)
(: add the extracgted text as well :)
(:) let $_ := xdmp:document-add-properties($uri, (<annotatedText-location>{$newDocUri}</annotatedText-location>)) :)
return $newDocUri
}catch($ex) {
let $log := xdmp:log("ERROR !!!!!")
let $log := xdmp:log($ex)
return ""
}
else
let $log := xdmp:log("ERRRO!!!! - Document does not exisist yet")
return ""
};