How can I split large xml into small pieces using VTDGenHuge?

I want to split large xml into small pieces. I use VTDGen to split the xml file into small pieces, and it works fine for a file size <2 GB. VTD-xml uses IN-Memory to parse xml, where I don't want to load xml into memory. Therefore, I am trying to map memory using VTDGenHuge.

The code works fine with VTDGen, but when I VTDGenHuge does not work.

        String prefix = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"+"\n";
    String suffix = "\n</Employees>\n";
    try {

        VTDGenHuge vg = new VTDGenHuge();
        if (vg.parseFile("C:\\Users\\abc\\Desktop\\latestxml\\Input_1.xml", true,VTDGenHuge.MEM_MAPPED)) {
            int splitBy = ;
           System.out.println("Started time"+ new Date());
            VTDNavHuge vn = vg.getNav();               
            AutoPilotHuge ap = new AutoPilotHuge(vn);
            ap.selectXPath("/Employees/Employee");
            FastLongBuffer flb = new FastLongBuffer(4);
            int i;
            byte[] xml = vn.getXML().getBytes();          
            while ((i = ap.evalXPath()) != -1) {  
                flb.append(vn.getElementFragment());
            }
            int size = flb.size();
            if (size != 0) {
                File fo = null;
                FileOutputStream fos = null;
                for (int k = 0; k < size; k++) {
                   if (k % splitBy == 0) {
                        if (fo != null) {
                            fos.write(suffix.getBytes());
                            fos.close();
                            fo = null;
                        }
                    }
                    if (fo == null) {
                        fo = new File("C:\\Users\\abc\\Desktop\\Test\\xml\\"+"out" + k + ".xml");
                        fos = new FileOutputStream(fo);
                        fos.write(prefix.getBytes());
                    }
                    fos.write("\n".getBytes());                       
                    fos.write(xml, flb.lower32At(k), flb.upper32At(k));
                }
                if (fo != null) {                       
                    fos.write(suffix.getBytes());                  
                    fos.close();
                    fo = null;
                }
            }

        }
    } catch (Exception e) {
        e.printStackTrace();
    }

I get the NUll value in "byte [] xml = vn.getXML (). GetBytes ();" when u do syso vn.getXML () you get the value of the object. but with "getBytes ()" return null.I don't know why. But if u "byteAt (x)" x = any long value, it returns a value.

My xml file:

<?xml version="1.0" encoding="UTF-8"?>
<Employees>
<Employee id="1">
    <age>29</age>
    <name>Pankaj</name>
    <gender>Male</gender>
    <role>Java Developer</role>
</Employee>
<Employee id="2">
    <age>35</age>
    <name>Lisa</name>
    <gender>Female</gender>
    <role>CEO</role>
</Employee>
<Employee id="3">
    <age>40</age>
    <name>Tom</name>
    <gender>Male</gender>
    <role>Manager</role>
</Employee>
    <Employee id="1">
    <age>29</age>
    <name>Pankaj</name>
    <gender>Male</gender>
    <role>Java Developer</role>
</Employee>
<Employee id="2">
    <age>35</age>
    <name>Lisa</name>
    <gender>Female</gender>
    <role>CEO</role>
</Employee>
<Employee id="3">
    <age>40</age>
    <name>Tom</name>
    <gender>Male</gender>
    <role>Manager</role>
</Employee>
<Employees>

I want this to be so.

<?xml version="1.0" encoding="UTF-8"?>
 <Employees>
<Employee id="1">
    <age>29</age>
    <name>Pankaj</name>
    <gender>Male</gender>
    <role>Java Developer</role>
</Employee>
<Employee id="2">
    <age>35</age>
    <name>Lisa</name>
    <gender>Female</gender>
    <role>CEO</role>
</Employee>
<Employee id="3">
    <age>40</age>
    <name>Tom</name>
    <gender>Male</gender>
    <role>Manager</role>    
</Employee>
<Employees> 

<?xml version="1.0" encoding="UTF-8"?>
<Employees> 
    <Employee id="1">
    <age>29</age>
    <name>Pankaj</name>
    <gender>Male</gender>
    <role>Java Developer</role>
</Employee>
<Employee id="2">
    <age>35</age>
    <name>Lisa</name>
    <gender>Female</gender>
    <role>CEO</role>
</Employee>
<Employee id="3">
    <age>40</age>
    <name>Tom</name>
    <gender>Male</gender>
    <role>Manager</role>
</Employee>
<Employees>
+4
2

, vn.getXML() vtd-xml IbyteBuffer vtd-xml. inteface, writeOutputToFile(), . , , ...

+1

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml;
using System.Xml.Linq;

namespace ConsoleApplication1
{
    class Program
    {
        const string FILENAME = @"c:\temp\test.xml";
        const int OUTPUT_ELEMENTS = 3;
        static void Main(string[] args)
        {

            XmlReader reader = XmlTextReader.Create(FILENAME, new XmlReaderSettings() { IgnoreWhitespace = true });

            int count = 0;
            XDocument doc = null;
            XElement employees = null;
            reader.ReadToFollowing("Employee");
            while (!reader.EOF)
            {
                if (reader.Name == "Employee")
                {
                    if (doc == null)
                    {
                        string root = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" +
                                        "<Employees>" +
                                        "</Employees>";
                        doc = XDocument.Parse(root);
                        employees = (XElement)doc.FirstNode;
                    }
                    employees.Add(XElement.Parse(reader.ReadOuterXml()));

                    count += 1;
                    if (count % OUTPUT_ELEMENTS == 0)
                    {
                        doc.Save(string.Format(@"c:\temp\test{0}.xml", (int)(count / OUTPUT_ELEMENTS)));
                        doc = null;
                    }
                }
                else
                {
                    if (reader.Value == "")
                    {
                        break;
                    }
                    else
                    {
                        reader.Read();
                    }
                }

            }
            if (doc != null)
            {
                doc.Save(string.Format(@"c:\temp\test{0}", (int)(count / OUTPUT_ELEMENTS)));
            }
        }
    }
}
-1

Source: https://habr.com/ru/post/1608973/


All Articles