Hi all,
I'm joining elements from two sets of documents in a single database. There are many elements (six figures) in each set. The query doesn't finish after several minutes. My suspicion is that the indices aren't being used, except to limit the fingerprint 'content' elements to those with "Person" as the value of 'contentFamily'. Perhaps my data and query demand some form of optimization that hasn't been implemented yet, but perhaps there is something I could do to fix the query. I'm using BaseX 8.3.0.
The query: ------------------------- xquery version "3.0"; declare namespace fingerprint="http://atira.dk/schemas/pure4/wsdl/template/fingerprint/current"; declare namespace concept="http://atira.dk/schemas/pure4/wsdl/template/concept/current"; declare namespace core="http://atira.dk/schemas/pure4/model/core/current"; declare namespace xsi="http://www.w3.org/2001/XMLSchema-instance"; declare namespace fincur="http://atira.dk/schemas/pure4/model/template/fingerprint/current"; declare namespace concur="http://atira.dk/schemas/pure4/model/template/concept/current";
declare namespace output = "http://www.w3.org/2010/xslt-xquery-serialization"; declare option output:method "csv"; declare option output:csv "header=yes, separator=comma";
let $coll := collection('pure_20151109') let $concept_contents := $coll/concept:GetConceptResponse/core:result/core:content let $fingerprint_contents := $coll/fingerprint:GetFingerprintResponse/core:result/core:content
for $fin_con in $fingerprint_contents[fincur:contentFamily/text()="Person"] /fincur:rankedConcepts/fincur:rankedConcept let $concept_uuid := $fin_con/fincur:conceptUuid let $rank := $fin_con/fincur:rank let $weighted_rank := $fin_con/fincur:weightedRank let $person_uuid := $fin_con/ancestor::core:content[1]/@uuid let $concept_name := $concept_contents[@uuid=$concept_uuid]/concur:name/core:localizedString return <csv> <record> <person_uuid>{data($person_uuid)}</person_uuid> <concept_uuid>{$concept_uuid/text()}</concept_uuid> <rank>{$rank/text()}</rank> <weighted_rank>{$weighted_rank/text()}</weighted_rank> <concept_name>{$concept_name/text()}</concept_name> </record> </csv> -------------------------
The query info from the query, which I stopped after 3 minutes: ------------------------- Error: Interrupted. Compiling: - pre-evaluating collection("pure_20151109") - inlining $coll_0 - inlining $fingerprint_contents_2 - applying text index for "Person" - inlining $rank_5 - inlining $weighted_rank_6 - inlining $person_uuid_7 - inlining $concept_name_8 Query: xquery version "3.0"; declare namespace fingerprint="http://atira.dk/schemas/pure4/wsdl/template/fingerprint/current"; declare namespace concept="http://atira.dk/schemas/pure4/wsdl/template/concept/current"; declare namespace core="http://atira.dk/schemas/pure4/model/core/current"; declare namespace xsi="http://www.w3.org/2001/XMLSchema-instance"; declare namespace fincur="http://atira.dk/schemas/pure4/model/template/fingerprint/current"; declare namespace concur="http://atira.dk/schemas/pure4/model/template/concept/current"; declare namespace output = "http://www.w3.org/2010/xslt-xquery-serialization"; declare option output:method "csv"; declare option output:csv "header=yes, separator=comma"; let $coll := collection('pure_20151109') let $concept_contents := $coll/concept:GetConceptResponse/core:result/core:content let $fingerprint_contents := $coll/fingerprint:GetFingerprintResponse/core:result/core:content for $fin_con in $fingerprint_contents[fincur:contentFamily/text()="Person"] /fincur:rankedConcepts/fincur:rankedConcept let $concept_uuid := $fin_con/fincur:conceptUuid let $rank := $fin_con/fincur:rank let $weighted_rank := $fin_con/fincur:weightedRank let $person_uuid := $fin_con/ancestor::core:content[1]/@uuid let $concept_name := $concept_contents[@uuid=$concept_uuid]/concur:name/core:localizedString return <csv> <record> <person_uuid>{data($person_uuid)}</person_uuid> <concept_uuid>{$concept_uuid/text()}</concept_uuid> <rank>{$rank/text()}</rank> <weighted_rank>{$weighted_rank/text()}</weighted_rank> <concept_name>{$concept_name/text()}</concept_name> </record> </csv> Optimized Query: let $concept_contents_1 := (db:open-pre("pure_20151109",0), ...)/concept:GetConceptResponse/core:result/core:content for $fin_con_3 in db:text("pure_20151109", "Person")/parent::fincur:contentFamily/parent::core:content[parent::core:result/parent::fingerprint:GetFingerprintResponse/parent::document-node()]/fincur:rankedConcepts/fincur:rankedConcept let $concept_uuid_4 := $fin_con_3/fincur:conceptUuid return element csv { (element record { (element person_uuid { (data($fin_con_3/ancestor::core:content[position() = 1]/@uuid)) }, element concept_uuid { ($concept_uuid_4/text()) }, element rank { ($fin_con_3/fincur:rank/text()) }, element weighted_rank { ($fin_con_3/fincur:weightedRank/text()) }, element concept_name { (($concept_contents_1)[(@uuid = $concept_uuid_4)]/concur:name/core:localizedString/text()) }) }) } Query plan: <QueryPlan compiled="true"> <GFLWOR> <Let> <Var name="$concept_contents" id="1"/> <IterPath> <DBNodeSeq size="51261"> <DBNode name="pure_20151109" pre="0"/> <DBNode name="pure_20151109" pre="1603"/> <DBNode name="pure_20151109" pre="2781"/> <DBNode name="pure_20151109" pre="5227"/> <DBNode name="pure_20151109" pre="7212"/> </DBNodeSeq> <IterStep axis="child" test="concept:GetConceptResponse"/> <IterStep axis="child" test="core:result"/> <IterStep axis="child" test="core:content"/> </IterPath> </Let> <For> <Var name="$fin_con" id="3"/> <CachedPath> <ValueAccess data="pure_20151109" type="TEXT"> <Str value="Person" type="xs:string"/> </ValueAccess> <IterStep axis="parent" test="fincur:contentFamily"/> <IterStep axis="parent" test="core:content"> <CachedPath> <IterStep axis="parent" test="core:result"/> <IterStep axis="parent" test="fingerprint:GetFingerprintResponse"/> <IterStep axis="parent" test="document-node()"/> </CachedPath> </IterStep> <IterStep axis="child" test="fincur:rankedConcepts"/> <IterStep axis="child" test="fincur:rankedConcept"/> </CachedPath> </For> <Let> <Var name="$concept_uuid" id="4"/> <IterPath> <VarRef> <Var name="$fin_con" id="3"/> </VarRef> <IterStep axis="child" test="fincur:conceptUuid"/> </IterPath> </Let> <CElem> <QNm value="csv" type="xs:QName"/> <CElem> <QNm value="record" type="xs:QName"/> <CElem> <QNm value="person_uuid" type="xs:QName"/> <FnData name="data([items])"> <CachedPath> <VarRef> <Var name="$fin_con" id="3"/> </VarRef> <IterPosStep axis="ancestor" test="core:content"> <Pos min="1" max="1"/> </IterPosStep> <IterStep axis="attribute" test="uuid"/> </CachedPath> </FnData> </CElem> <CElem> <QNm value="concept_uuid" type="xs:QName"/> <CachedPath> <VarRef> <Var name="$concept_uuid" id="4"/> </VarRef> <IterStep axis="child" test="text()"/> </CachedPath> </CElem> <CElem> <QNm value="rank" type="xs:QName"/> <IterPath> <VarRef> <Var name="$fin_con" id="3"/> </VarRef> <IterStep axis="child" test="fincur:rank"/> <IterStep axis="child" test="text()"/> </IterPath> </CElem> <CElem> <QNm value="weighted_rank" type="xs:QName"/> <IterPath> <VarRef> <Var name="$fin_con" id="3"/> </VarRef> <IterStep axis="child" test="fincur:weightedRank"/> <IterStep axis="child" test="text()"/> </IterPath> </CElem> <CElem> <QNm value="concept_name" type="xs:QName"/> <CachedPath> <IterFilter> <VarRef> <Var name="$concept_contents" id="1"/> </VarRef> <CmpG op="="> <CachedPath> <IterStep axis="attribute" test="uuid"/> </CachedPath> <VarRef> <Var name="$concept_uuid" id="4"/> </VarRef> </CmpG> </IterFilter> <IterStep axis="child" test="concur:name"/> <IterStep axis="child" test="core:localizedString"/> <IterStep axis="child" test="text()"/> </CachedPath> </CElem> </CElem> </CElem> </GFLWOR> </QueryPlan> -------------------------
Thanks in advance for any advice! Chuck