Hello --
Using BaseX 9.6.1 on Linux.
So I'm trying to convert a compact-ish element pattern string, delimited with braces, back into something that looks like XML vocabulary for readability. (The pattern strings are being used to detect similar structure in the content set.)
The test query (below) works fine.
Stick the functions in a module namespace, and run the real content, and I get the "Stack Overflow: Try tail recursion?" error. (Query plan for this case attached.)
Nothing in the real data should be all that large. I suspect I'm being too clever for my own good somehow, but if anyone can tell me specifically where I'd appreciate it.
Thanks! Graydon
======
declare namespace fn = "http://www.w3.org/2005/xpath-functions";
declare variable $NMToken as xs:string := '[\p{L}\p{Nd}._-:]+';
declare function local:tag($names as xs:string*,$active as xs:string*,$event as xs:string*) {
let $do as xs:string := substring($event,1,1)
return if (not(normalize-space($do))) then () else if ($do eq '{') then (concat('<',head($names),'>'), local:tag(tail($names),(head($names),$active),substring($event,2))) else (concat('</',head($active),'>'), local:tag($names,tail($active),substring($event,2))) };
declare function local:emitPattern($in as xs:string) as xs:string {
let $chunked as element(fn:analyze-string-result) := analyze-string($in,$NMToken) let $names as xs:string+ := $chunked/fn:match/string() let $openClose as xs:string+ := $chunked/fn:non-match => string-join()
return string-join(local:tag($names,(),$openClose)) };
let $pattern as xs:string := "{subblock{sublabel}{p}}" let $yikes as xs:string := "{primaryblock{shortdesc}{table{title}{tgroup{colspec}{colspec}{colspec}{thead{row{entry{p}}{entry{p}}{entry{p}}}}{tbody{row{entry{p}}{entry{p{em}}}{entry{p}}}{row{entry{p}}{entry{p{xref}}}{entry{p}}}{row{entry{p}}{entry{p{xref}}}{entry{p}}}{row{entry{p}}{entry{p{em}}}{entry{p}}}{row{entry{p}}{entry{p{xref}{xref}}}{entry{p}}}{row{entry{p}}{entry{p{xref}}}{entry{p}}}{row{entry{p}}{entry{p{xref}}}{entry{p}}}{row{entry{p}}{entry{p{xref}}}{entry{p}}}{row{entry{p}}{entry{p}}{entry{p}}}{row{entry{p}}{entry{p{em}}}{entry{p}}}{row{entry{p}}{entry{p}}{entry{p}}}{row{entry{p}}{entry{p{em}}}{entry{p}}}{row{entry{p}}{entry{p{xref}}}{entry{p}}}{row{entry{p}}{entry{p{xref}}}{entry{p}}}{row{entry{p}}{entry{p{xref}{xref}{xref}}}{entry{p}}}{row{entry{p}}{entry{p{xref}{xref}{xref}}}{entry{p}}}{row{entry{p}}{entry{p{xref}{xref}}}{entry{p}}}{row{entry{p}}{entry{p}}{entry{p}}}{row{entry{p}}{entry{p}}{entry{p}}}{row{entry{p}}{entry{p{em}}}{entry{p}}}{row{entry{p}}{entry{p{xref}{xref}}}{entry{p}}}{row{entry{p}}{entry{p{xref}}}{entry{p}}}{row{entry{p}}{entry{p{xref}}}{entry{p}}}{row{entry{p}}{entry{p{xref}}}{entry{p}}}{row{entry{p}}{entry{p{em}}}{entry{p}}}{row{entry{p}}{entry{p{xref}}}{entry{p}}}{row{entry{p}}{entry{p{em}}}{entry{p}}}}}}{note{p}}}"
return ($pattern,$yikes) ! local:emitPattern(.)
Hi Graydon,
Sorry, I’m lazy. Could you share some minimized code with us that triggers the error?
Thanks in advance, Christian
On Sat, Sep 25, 2021 at 11:13 PM Graydon Saunders graydonish@gmail.com wrote:
Hello --
Using BaseX 9.6.1 on Linux.
So I'm trying to convert a compact-ish element pattern string, delimited with braces, back into something that looks like XML vocabulary for readability. (The pattern strings are being used to detect similar structure in the content set.)
The test query (below) works fine.
Stick the functions in a module namespace, and run the real content, and I get the "Stack Overflow: Try tail recursion?" error. (Query plan for this case attached.)
Nothing in the real data should be all that large. I suspect I'm being too clever for my own good somehow, but if anyone can tell me specifically where I'd appreciate it.
Thanks! Graydon
======
declare namespace fn = "http://www.w3.org/2005/xpath-functions";
declare variable $NMToken as xs:string := '[\p{L}\p{Nd}._-:]+';
declare function local:tag($names as xs:string*,$active as xs:string*,$event as xs:string*) {
let $do as xs:string := substring($event,1,1)
return if (not(normalize-space($do))) then () else if ($do eq '{') then (concat('<',head($names),'>'), local:tag(tail($names),(head($names),$active),substring($event,2))) else (concat('</',head($active),'>'), local:tag($names,tail($active),substring($event,2))) };
declare function local:emitPattern($in as xs:string) as xs:string {
let $chunked as element(fn:analyze-string-result) := analyze-string($in,$NMToken) let $names as xs:string+ := $chunked/fn:match/string() let $openClose as xs:string+ := $chunked/fn:non-match => string-join()
return string-join(local:tag($names,(),$openClose)) };
let $pattern as xs:string := "{subblock{sublabel}{p}}" let $yikes as xs:string := "{primaryblock{shortdesc}{table{title}{tgroup{colspec}{colspec}{colspec}{thead{row{entry{p}}{entry{p}}{entry{p}}}}{tbody{row{entry{p}}{entry{p{em}}}{entry{p}}}{row{entry{p}}{entry{p{xref}}}{entry{p}}}{row{entry{p}}{entry{p{xref}}}{entry{p}}}{row{entry{p}}{entry{p{em}}}{entry{p}}}{row{entry{p}}{entry{p{xref}{xref}}}{entry{p}}}{row{entry{p}}{entry{p{xref}}}{entry{p}}}{row{entry{p}}{entry{p{xref}}}{entry{p}}}{row{entry{p}}{entry{p{xref}}}{entry{p}}}{row{entry{p}}{entry{p}}{entry{p}}}{row{entry{p}}{entry{p{em}}}{entry{p}}}{row{entry{p}}{entry{p}}{entry{p}}}{row{entry{p}}{entry{p{em}}}{entry{p}}}{row{entry{p}}{entry{p{xref}}}{entry{p}}}{row{entry{p}}{entry{p{xref}}}{entry{p}}}{row{entry{p}}{entry{p{xref}{xref}{xref}}}{entry{p}}}{row{entry{p}}{entry{p{xref}{xref}{xref}}}{entry{p}}}{row{entry{p}}{entry{p{xref}{xref}}}{entry{p}}}{row{entry{p}}{entry{p}}{entry{p}}}{row{entry{p}}{entry{p}}{entry{p}}}{row{entry{p}}{entry{p{em}}}{entry{p}}}{row{entry{p}}{entry{p{xref}{xref}}}{entry{p}}}{row{entry{p}}{entry{p{xref}}}{entry{p}}}{row{entry{p}}{entry{p{xref}}}{entry{p}}}{row{entry{p}}{entry{p{xref}}}{entry{p}}}{row{entry{p}}{entry{p{em}}}{entry{p}}}{row{entry{p}}{entry{p{xref}}}{entry{p}}}{row{entry{p}}{entry{p{em}}}{entry{p}}}}}}{note{p}}}"
return ($pattern,$yikes) ! local:emitPattern(.)
Hi Christian --
Apologies; I was hoping there was something in the structure of the function that was obviously daft by inspection.
On Mon, Sep 27, 2021 at 09:15:56AM +0200, Christian Grün scripsit:
Sorry, I’m lazy. Could you share some minimized code with us that triggers the error?
For certain narrowly defined values of lazy!
I've attached a zip.
The variable "awkward" has been assigned the string literal of the first pattern in the sorted list of patterns which triggers the error. Due to a large table in the markup, this pattern is about 75 kb long.
The rest of the query is a function call using this variable as the parameter. It fails with a "Stack Overflow: Try tail recursion?" error.
Thanks!
Graydon
Hi Graydon,
To understand you correctly: You indicated that you only observed the initial error if the code was moved into a module.
Did I get this right? If not, what did you mean by »Stick the functions in a module namespace«?
Thanks again, Christian
On Mon, Sep 27, 2021 at 3:20 PM Graydon graydonish@gmail.com wrote:
Hi Christian --
Apologies; I was hoping there was something in the structure of the function that was obviously daft by inspection.
On Mon, Sep 27, 2021 at 09:15:56AM +0200, Christian Grün scripsit:
Sorry, I’m lazy. Could you share some minimized code with us that triggers the error?
For certain narrowly defined values of lazy!
I've attached a zip.
The variable "awkward" has been assigned the string literal of the first pattern in the sorted list of patterns which triggers the error. Due to a large table in the markup, this pattern is about 75 kb long.
The rest of the query is a function call using this variable as the parameter. It fails with a "Stack Overflow: Try tail recursion?" error.
Thanks!
Graydon
Hi Christian --
On Mon, Sep 27, 2021 at 05:53:24PM +0200, Christian Grün scripsit:
To understand you correctly: You indicated that you only observed the initial error if the code was moved into a module.
That is factual, but it's an artifact of poor testing on my part. I had tested the function against some samples before inserting it in the module and did not take sufficient care to find an actually large pattern string.
As a result, the function worked fine with the test cases, and when it was put it in a module and called as part of processing the whole content set it failed. This is not plausibly a result of putting it in a module rather than a result of poorly chosen test cases.
With an actually large pattern string, the function fails when run by itself, outside of a module.
Did I get this right? If not, what did you mean by »Stick the functions in a module namespace«?
You have what I said correct, but I made a mistake when I said it.
I didn't see the failure until the function had been put in a module and run against the full content set, so I was wondering if that was a contributing factor. If I'd done a better job of picking test cases, I would have seen the failure with the stand-alone function, and do now see the failure with the stand-alone function in example2.zip.
Hope that helps! Graydon
On Mon, Sep 27, 2021 at 3:20 PM Graydon graydonish@gmail.com wrote:
Hi Christian --
Apologies; I was hoping there was something in the structure of the function that was obviously daft by inspection.
On Mon, Sep 27, 2021 at 09:15:56AM +0200, Christian Grün scripsit:
Sorry, I’m lazy. Could you share some minimized code with us that triggers the error?
For certain narrowly defined values of lazy!
I've attached a zip.
The variable "awkward" has been assigned the string literal of the first pattern in the sorted list of patterns which triggers the error. Due to a large table in the markup, this pattern is about 75 kb long.
The rest of the query is a function call using this variable as the parameter. It fails with a "Stack Overflow: Try tail recursion?" error.
Thanks!
Graydon
No problem, Graydon, thanks for the clarification!
You might need to rewrite your code as it’s not tail recursive (see [1] for some examples; you could also try your luck with fold-left). If you want to go the easy path, you can also increase the Java stack size when starting BaseX (by adding -Xss... to the startup script). Obviously, that’ll only postpone the problem ;·).
Cheers, Christian
[1] https://en.wikipedia.org/wiki/Tail_call
On Mon, Sep 27, 2021 at 6:05 PM Graydon graydonish@gmail.com wrote:
Hi Christian --
On Mon, Sep 27, 2021 at 05:53:24PM +0200, Christian Grün scripsit:
To understand you correctly: You indicated that you only observed the initial error if the code was moved into a module.
That is factual, but it's an artifact of poor testing on my part. I had tested the function against some samples before inserting it in the module and did not take sufficient care to find an actually large pattern string.
As a result, the function worked fine with the test cases, and when it was put it in a module and called as part of processing the whole content set it failed. This is not plausibly a result of putting it in a module rather than a result of poorly chosen test cases.
With an actually large pattern string, the function fails when run by itself, outside of a module.
Did I get this right? If not, what did you mean by »Stick the functions in a module namespace«?
You have what I said correct, but I made a mistake when I said it.
I didn't see the failure until the function had been put in a module and run against the full content set, so I was wondering if that was a contributing factor. If I'd done a better job of picking test cases, I would have seen the failure with the stand-alone function, and do now see the failure with the stand-alone function in example2.zip.
Hope that helps! Graydon
On Mon, Sep 27, 2021 at 3:20 PM Graydon graydonish@gmail.com wrote:
Hi Christian --
Apologies; I was hoping there was something in the structure of the function that was obviously daft by inspection.
On Mon, Sep 27, 2021 at 09:15:56AM +0200, Christian Grün scripsit:
Sorry, I’m lazy. Could you share some minimized code with us that triggers the error?
For certain narrowly defined values of lazy!
I've attached a zip.
The variable "awkward" has been assigned the string literal of the first pattern in the sorted list of patterns which triggers the error. Due to a large table in the markup, this pattern is about 75 kb long.
The rest of the query is a function call using this variable as the parameter. It fails with a "Stack Overflow: Try tail recursion?" error.
Thanks!
Graydon
On Mon, Sep 27, 2021 at 06:13:49PM +0200, Christian Grün scripsit:
No problem, Graydon, thanks for the clarification!
You're welcome!
Thank you for your prompt and useful feedback!
You might need to rewrite your code as it’s not tail recursive (see [1] for some examples; you could also try your luck with fold-left). If you want to go the easy path, you can also increase the Java stack size when starting BaseX (by adding -Xss... to the startup script). Obviously, that’ll only postpone the problem ;·).
For posterity:
Increasing the stack size to 1 GB worked.
Fold-left when I'm trying to manage two stacks as well as the accumulating thing seems like it would be more brave than sensible.
Thinking about the accumulating thing did cause the light bulb to flicker about having to not rely on the individual function call's accumulating return values if I wanted tail recursion to happen. The below works:
declare namespace fn = "http://www.w3.org/2005/xpath-functions";
declare variable $NMToken as xs:string := '[\p{L}\p{Nd}._-:]+';
declare function local:tag($nameList as xs:string*,$stack as xs:string*,$event as xs:string*,$built as xs:string*) {
if (empty($event)) then string-join($built) else if (head($event) eq '{') then local:tag(tail($nameList),(head($nameList),$stack),tail($event),($built,('<',head($nameList),'>'))) else local:tag($nameList,tail($stack),tail($event),($built,concat('</',head($stack),'>'))) };
declare function local:emitPattern($in as xs:string) {
let $chunked as element(fn:analyze-string-result) := analyze-string($in,$NMToken) let $names as array(xs:string) := array { $chunked/fn:match/string() } let $openClose as xs:string+ := ($chunked/fn:non-match => string-join() => string-to-codepoints()) ! codepoints-to-string(.)
return local:tag($names,(),$openClose,()) };
basex-talk@mailman.uni-konstanz.de