C # serialized data

I use BinaryFormatter to serialize data to disk, but it does not seem very scalable. I created a 200 MB data file but could not read it (end of stream that was detected before parsing was completed). He tries to deserialize for about 30 minutes, and then surrenders. This is a pretty decent quad-core processor with 8 GB of RAM.

I am serializing a rather complex structure.

htCacheItems is a Hashtable CacheItems. Each CacheItem has several simple members (lines + int, etc.), and also contains a Hashtable and a custom implementation of a linked list. The sub-hashtable points to the structures of the CacheItemValue, which is currently a simple DTO that contains the key and value. Elements of a linked list are also equally simple.

Invalid data file contains about 400,000 CacheItemValues.

Smaller datasets work well (although it takes more time than I would expect to deserialize and use a hell of a lot of memory).

    public virtual bool Save(String sBinaryFile)
    {
        bool bSuccess = false;
        FileStream fs = new FileStream(sBinaryFile, FileMode.Create);

        try
        {
            BinaryFormatter formatter = new BinaryFormatter();
            formatter.Serialize(fs, htCacheItems);
            bSuccess = true;
        }
        catch (Exception e)
        {
            bSuccess = false;
        }
        finally
        {
            fs.Close();
        }
        return bSuccess;
    }

    public virtual bool Load(String sBinaryFile)
    {
        bool bSuccess = false;

        FileStream fs = null;
        GZipStream gzfs = null;

        try
        {
            fs = new FileStream(sBinaryFile, FileMode.OpenOrCreate);

            if (sBinaryFile.EndsWith("gz"))
            {
                gzfs = new GZipStream(fs, CompressionMode.Decompress);
            }

            //add the event handler
            ResolveEventHandler resolveEventHandler = new ResolveEventHandler(AssemblyResolveEventHandler);
            AppDomain.CurrentDomain.AssemblyResolve += resolveEventHandler;

            BinaryFormatter formatter = new BinaryFormatter();
            htCacheItems = (Hashtable)formatter.Deserialize(gzfs != null ? (Stream)gzfs : (Stream)fs);

            //remove the event handler
            AppDomain.CurrentDomain.AssemblyResolve -= resolveEventHandler;

            bSuccess = true;
        }
        catch (Exception e)
        {
            Logger.Write(new ExceptionLogEntry("Failed to populate cache from file " + sBinaryFile + ". Message is " + e.Message));
            bSuccess = false;
        }
        finally
        {
            if (fs != null)
            {
                fs.Close();
            }
            if (gzfs != null)
            {
                gzfs.Close();
            }
        }
        return bSuccess;
    }

SolutionEventHandler is just a job because I serialize the data in one application and load it into another ( http://social.msdn.microsoft.com/Forums/en-US/netfxbcl/thread/e5f0c371-b900-41d8-9a5b- 1052739f2521 )

The question is, how can I improve this? Is data serialization always inefficient, should I write my own routines?

+3
source share
3 answers

; . BinaryFormatter, DTO (dll), .

dll, IMO BinaryFormatter - , XmlSerializer DataContractSerializer, " " ( : ).

200MB , , . - ; .

, , .


, protobuf-net. , , , ; , , :

using System;
using System.Collections.Generic;
using System.IO;
using ProtoBuf;
[ProtoContract]
class CacheItem
{
    [ProtoMember(1)]
    public int Id { get; set; }
    [ProtoMember(2)]
    public int AnotherNumber { get; set; }
    private readonly Dictionary<string, CacheItemValue> data
        = new Dictionary<string,CacheItemValue>();
    [ProtoMember(3)]
    public Dictionary<string, CacheItemValue> Data { get { return data; } }

    //[ProtoMember(4)] // commented out while I investigate...
    public ListNode Nodes { get; set; }
}
[ProtoContract]
class ListNode // I'd probably expose this as a simple list, though
{
    [ProtoMember(1)]
    public double Head { get; set; }
    [ProtoMember(2)]
    public ListNode Tail { get; set; }
}
[ProtoContract]
class CacheItemValue
{
    [ProtoMember(1)]
    public string Key { get; set; }
    [ProtoMember(2)]
    public float Value { get; set; }
}
static class Program
{
    static void Main()
    {
        // invent 400k CacheItemValue records
        Dictionary<string, CacheItem> htCacheItems = new Dictionary<string, CacheItem>();
        Random rand = new Random(123456);
        for (int i = 0; i < 400; i++)
        {
            string key;
            CacheItem ci = new CacheItem {
                Id = rand.Next(10000),
                AnotherNumber = rand.Next(10000)
            };
            while (htCacheItems.ContainsKey(key = rand.NextString())) {}
            htCacheItems.Add(key, ci);
            for (int j = 0; j < 1000; j++)
            {
                while (ci.Data.ContainsKey(key = rand.NextString())) { }
                ci.Data.Add(key,
                    new CacheItemValue {
                        Key = key,
                        Value = (float)rand.NextDouble()
                    });
                int tail = rand.Next(1, 50);
                ListNode node = null;
                while (tail-- > 0)
                {
                    node = new ListNode
                    {
                        Tail = node,
                        Head = rand.NextDouble()
                    };
                }
                ci.Nodes = node;
            }
        }
        Console.WriteLine(GetChecksum(htCacheItems));
        using (Stream outfile = File.Create("raw.bin"))
        {
            Serializer.Serialize(outfile, htCacheItems);
        }
        htCacheItems = null;
        using (Stream inFile = File.OpenRead("raw.bin"))
        {
            htCacheItems = Serializer.Deserialize<Dictionary<string, CacheItem>>(inFile);
        }
        Console.WriteLine(GetChecksum(htCacheItems));
    }
    static int GetChecksum(Dictionary<string, CacheItem> data)
    {
        int chk = data.Count;
        foreach (var item in data)
        {
            chk += item.Key.GetHashCode()
                + item.Value.AnotherNumber + item.Value.Id;
            foreach (var subItem in item.Value.Data.Values)
            {
                chk += subItem.Key.GetHashCode()
                    + subItem.Value.GetHashCode();
            }
        }
        return chk;
    }
    static string NextString(this Random random)
    {
        const string alphabet = "abcdefghijklmnopqrstuvwxyz0123456789 ";
        int len = random.Next(4, 10);
        char[] buffer = new char[len];
        for (int i = 0; i < len; i++)
        {
            buffer[i] = alphabet[random.Next(0, alphabet.Length)];
        }
        return new string(buffer);
    }
}
+2

, .

, . , ( : #) - , .

, , - , . , ? ( , ..)

+2

-, , - .

mainHashtable.serialize(), XML, . everyItemInYourHashtable.serialize() ..

, "unserialize (String xml)", . ?

, , .

ISerializable, , . IMO, "Microsoft" ( DOM ..), , : cascade.

+1

Source: https://habr.com/ru/post/1712842/


All Articles