Writing more efficient xquery code (excluding redundant iteration)

Here is a simplified version of the problem I'm working on: I have a bunch of xml data that encodes information about people. Each person is uniquely identified by the id attribute, but can have many names. For example, in one document I could find

<person id=1>Paul Mcartney</person>
<person id=2>Ringo Starr</person>

And in another I can find:

<person id=1>Sir Paul McCartney</person>
<person id=2>Richard Starkey</person>

I want to use xquery to create a new document that lists all the names associated with this identifier. i.e:.

<person id=1>
    <name>Paul McCartney</name>
    <name>Sir Paul McCartney</name>
    <name>James Paul McCartney</name>
</person>
<person id=2>
    ...
</person>

The way I'm doing it now in xquery looks like this (pseudocode-esque):

let $ids := distinct-terms( [all the id attributes on people] )
for $id in $ids
    return <person id={$id}>
    {
    for $unique-name in distinct-values
            (
            for $name in ( [all names] )
            where $name/@id=$id
            return $name
            )
        return <name>{$unique-name}</name>
    }
    </person>

, . , , ( 1200). (300 , 800 xml), 12 , , 1200 4 ( - 3 ). , , . Saxon, java 10 (!), , 6 .

, ( Python pseudocode):

persons = {}
for id in ids:
    person[id] = set()
for person in all_the_people_in_my_xml_document:
    persons[person.id].add(person.name)

, XML-. , - xquery? , , ( ). , , , Python, xquery ( ) .

- ? , - , xquery, ? , , , .

+3
4

, XQuery 1.0

XQuery 1.1 group by , :

for $person in /person
let $id = $person/@id
group by $id
return  <people id="{$id}">{
          for $name in distinct-values($person)
          return <name>{$name}</name>
        }</people>

, XQuery 1.1 , group by.

XQSharp, , , XQuery 1.1 .

, :

declare variable $people as element(person, xs:untyped)* external;

for $id in distinct-values($people/@id)
return <people id="{$id}">{
          for $person in $people
          where $person/@id = $id
          return <name>{$person}</name>
       }</people>

, :

library http://www.w3.org/2005/xpath-functions external;
library http://www.w3.org/2001/XMLSchema external;
declare variable $people external;

for $distinct-person in $people
let $id := http://www.w3.org/2005/xpath-functions:data($distinct-person/attribute::id)
group by
  $id
aggregate
  element {name} { fs:item-sequence-to-node-sequence($distinct-person) }
as
  $:temp:19
return
  element {person} { (attribute {id} { $id } , fs:item-sequence-to-node-sequence($:temp:19)) }

, as element(person, xs:untyped)*, , ( ), , $person/@id . XQSharp , node . , n log n, , .

, ( ), , XQSharp ; . , - id .

, XQuery 1.0 , (, XQSharp) . , .

, XQSharp, .

+4

: .

let $map := map:map()
let $people :=
  for $person in $all-people
  return map:put($map, $person/@id, 
    (map:get($map, $person/@id), <name>{$person/text()}</name>))
return
  for $id in map:keys($map)
  return 
    <person id="{$id}">{map:get($map, $id)}</person>
+1

, - , xquery, ? , .

XSLT 2.0. <xsl:variable> s):

<xsl:stylesheet version="2.0"
 xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 <xsl:output omit-xml-declaration="yes" indent="yes"/>

 <xsl:variable name="vDoc2">
  <persons>
   <person id="1">Sir Paul McCartney</person>
   <person id="2">Richard Starkey</person>
  </persons>
 </xsl:variable>

 <xsl:variable name="vDoc3">
  <persons>
   <person id="1">James Paul McCartney</person>
   <person id="2">Richard Starkey - Ringo Starr</person>
  </persons>
 </xsl:variable>

 <xsl:template match="/">
  <xsl:for-each-group group-by="@id" select=
   "(/ | $vDoc2 | $vDoc3)/*/person">

   <person id="{current-grouping-key()}">
     <xsl:for-each select="current-group()">
       <name><xsl:sequence select="text()"/></name>
     </xsl:for-each>
   </person>

  </xsl:for-each-group>
 </xsl:template>
</xsl:stylesheet>

XML-:

<persons>
    <person id="1">Paul Mcartney</person>
    <person id="2">Ringo Starr</person>
</persons>

, :

<person id="1">
   <name>Paul Mcartney</name>
   <name>Sir Paul McCartney</name>
   <name>James Paul McCartney</name>
</person>
<person id="2">
   <name>Ringo Starr</name>
   <name>Richard Starkey</name>
   <name>Richard Starkey - Ringo Starr</name>
</person>
+1

XML, , eXist db, , Pythonesque, XML-, , , .

let $persons := doc("/db/temp/p3.xml")/persons
let $person-groups := doc("/db/temp/p2.xml")/person-groups
for $person in $persons/person
let $name := element name {$person/text()}
let $person-group := $person-groups/person-group[@id=$person/@id]
return
   if ($person-group) 
   then update insert $name into $person-group
   else update insert element person-group {attribute id {$person/@id}, $name} 
       into $person-groups

10000 100 eXist 100 .

, XQuery eXist , XQuery Update

0

Source: https://habr.com/ru/post/1745209/


All Articles