How to use Perl XML :: XPath with names other than English?

How can I work with XML :: XPath when the names of some elements are not in English?

I am using Strawberry Perl.

I get employees.xmland train_xml.plfrom the Internet, they work well.

But when, when I add some Chinese characters, I get the following error:

Wide character in die at D:/Strawberry/perl/site/lib/XML/XPath/Parser.pm line 189.

Query:
/employees/employee[@age="30"]/工作...
..............................^^^
Invalid query somewhere around here (I think)

How can i solve this?

employees.xml:

<?xml version="1.0" encoding="utf-8" ?>
<employees>
    <employee age="30">
        <name>linux</name>
    <country>US</country>
    <工作>教师</工作>
    </employee>
    <employee age="10">
    <name>mac</name>
    <country>US</country>
    </employee>
    <employee age="20">
    <name>windows</name>
    <country>US</country>
    </employee>
</employees>

train_xml.pl:

use Encode;
use XML::XPath->new;
use utf8;
my $xp=XML::XPath->new(filename=>"employees.xml");
print $xp->findvalue('/employees/employee[@age="10"]/name'),"\n";
my $path1 = '/employees/employee[@age="30"]/工作';
print $xp->findvalue($path1),"\n";
+4
source share
2 answers

You can use XML :: LibXML :

#!/usr/bin/perl

use strict;
use warnings;

use utf8;
use open ':std', ':encoding(UTF-8)';

use feature qw( say );

use XML::LibXML qw( );

{
   my $parser = XML::LibXML->new();
   my $doc = $parser->parse_file($ARGV[0]);
   say $doc->findvalue('/employees/employee[@age="10"]/name');
   say $doc->findvalue('/employees/employee[@age="30"]/工作');
}

Conclusion:

$ ./a a.xml
mac
教师

If you want to continue to use (buggy, slower and far-less-wide) XML :: XPath , you can use the following:

#!/usr/bin/perl

use strict;
use warnings;

use utf8;
use open ':std', ':encoding(UTF-8)';

use feature qw( say );

use XML::XPath qw( );

{ # Monkeypatch XML::XPath.
   package XML::XPath::Parser;

   # Colon removed from these definitions.
   my $NameStartCharClassBody = "a-zA-Z_\\xC0-\\xD6\\xD8-\\xF6\\xF8-\\x{2FF}\\x{370}-\\x{37D}\\x{37F}-\\x{1FFF}\\x{200C}-\\x{200D}\\x{2070}-\\x{218F}\\x{2C00}-\\x{2FEF}\\x{3001}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFFD}\\x{10000}-\\x{EFFFF}";
   my $NameCharClassBody = "${NameStartCharClassBody}\\-.0-9\\xB7\\x{300}-\\x{36F}\\x{203F}-\\x{2040}";
   my $Name = "(?:[$NameStartCharClassBody][$NameCharClassBody]*)";

   $NCName = $Name;
   $QName = "$NCName(?::$NCName)?";
   $NCWild = "${NCName}:\\*";
}

{
   my $doc = XML::XPath->new(filename => $ARGV[0]);
   say $doc->findvalue('/employees/employee[@age="10"]/name');
   say $doc->findvalue('/employees/employee[@age="30"]/工作');
}

Conclusion:

$ ./a a.xml
mac
教师
+4

, , , , , :

use XML::XPath->new;

, , , XML/XPath/Parser.pm:

$NCName = '([A-Za-z_][\w\\.\\-]*)';

, , , _. :

#!/usr/bin/env perl

use v5.14;
use strict;
use warnings;

use utf8;
use open qw(:std :encoding(UTF-8));

use XML::XPath;

my $xp = XML::XPath->new(ioref => \*DATA );

my $good_path = '/employees/employee[@age="30"]/yağcı';
my $bad_path = '/employees/employee[@age="30"]/şımarık';

say $xp->findvalue($good_path);

say $xp->findvalue($bad_path);

__DATA__
<?xml version="1.0" encoding="utf-8" ?>
<employees>
    <employee age="30">
        <şımarık>değil</şımarık>
        <yağcı>değil</yağcı>
    </employee>
</employees>

:

C:\...\> perl x.pl
değil
Query:
/employees/employee[@age="30"]/şımarık...
..............................^^^
Invalid query somewhere around here (I think)

:

$NCName = '(\w[\w\\.\\-]*)';

:

C:\...\> perl x.pl
değil
değil

, , :

değil
教师

.

, , , , . :

Name          ::= NameStartChar (NameChar)*

NameStartChar ::= ":" | [A-Z] | "_" | [a-z] |
                  [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] |
                  [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] |
                  [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
                  [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]

NameChar      ::= NameStartChar | "-" | "." | [0-9] | #xB7 |
                  [#x0300-#x036F] | [#x203F-#x2040]

.

. 1.41 .

+3

Source: https://habr.com/ru/post/1682232/


All Articles