Problems processing 1GB json file using JSON.NET

I had an application in which data entry was increased from 50 thousand location records to 1.1 million location records. This caused serious problems since the entire file was previously de-serialized into a single object. The size of the object is ~ 1 GB for production, such as a file with 1.1 million records. Due to the big problems of the GC object, I want to keep the de-serialized object below the 85K mark.

I try to parse one location object at a time and de-serialize it so that I can control the number of objects that are de-serialized and, in turn, control the size of the object. I use the Json.Net libraries for this.

Below is a sample JSON file that I get as a stream to my application.

    {
    "Locations": [{
        "LocationId": "",
        "ParentLocationId": "",
        "DisplayFlag": "Y",
        "DisplayOptions": "",
        "DisplayName": "",
        "Address": "",
        "SecondaryAddress": "",
        "City": "",
        "State": "",
        "PostalCode": "",
        "Country": "",
        "Latitude": 40.59485,
        "Longitude": -73.96174,
        "LatLonQuality": 99,
        "BusinessLogoUrl": "",
        "BusinessUrl": "",
        "DisplayText": "",
        "PhoneNumber": "",
        "VenueGroup": 7,
        "VenueType": 0,
        "SubVenue": 0,
        "IndoorFlag": "",
        "OperatorDefined": "",
        "AccessPoints": [{
            "AccessPointId": "",
            "MACAddress": "",
            "DisplayFlag": "",
            "DisplayOptions": "",
            "Latitude": 40.59485,
            "Longitude": -73.96174,
            "Status": "Up",
            "OperatorDefined": "",
            "RoamingGroups": [{
                "GroupName": ""
            },
            {
                "GroupName": ""
            }],
            "Radios": [{
                "RadioId": "",
                "RadioFrequency": "",
                "RadioProtocols": [{
                    "Protocol": ""
                }],
                "WifiConnections": [{
                    "BSSID": "",
                    "ServiceSets": [{
                        "SSID": "",
                        "SSID_Broadcasted": ""
                    }]
                }]
            }]
        }]
    },
    {
        "LocationId": "",
        "ParentLocationId": "",
        "DisplayFlag": "Y",
        "DisplayOptions": "",
        "DisplayName": "",
        "Address": "",
        "SecondaryAddress": "",
        "City": "",
        "State": "",
        "PostalCode": "",
        "Country": "",
        "Latitude": 40.59485,
        "Longitude": -73.96174,
        "LatLonQuality": 99,
        "BusinessLogoUrl": "",
        "BusinessUrl": "",
        "DisplayText": "",
        "PhoneNumber": "",
        "VenueGroup": 7,
        "VenueType": 0,
        "SubVenue": 0,
        "IndoorFlag": "",
        "OperatorDefined": "",
        "AccessPoints": [{
            "AccessPointId": "",
            "MACAddress": "",
            "DisplayFlag": "",
            "DisplayOptions": "",
            "Latitude": 40.59485,
            "Longitude": -73.96174,
            "Status": "Up",
            "OperatorDefined": "",
            "RoamingGroups": [{
                "GroupName": ""
            },
            {
                "GroupName": ""
            }],
            "Radios": [{
                "RadioId": "",
                "RadioFrequency": "",
                "RadioProtocols": [{
                    "Protocol": ""
                }],
                "WifiConnections": [{
                    "BSSID": "",
                    "ServiceSets": [{
                        "SSID": "",
                        "SSID_Broadcasted": ""
                    }]
                }]
            }]
        }]
    }]
}

,

    {
    "LocationId": "",
    "ParentLocationId": "",
    "DisplayFlag": "Y",
    "DisplayOptions": "",
    "DisplayName": "",
    "Address": "",
    "SecondaryAddress": "",
    "City": "",
    "State": "",
    "PostalCode": "",
    "Country": "",
    "Latitude": 40.59485,
    "Longitude": -73.96174,
    "LatLonQuality": 99,
    "BusinessLogoUrl": "",
    "BusinessUrl": "",
    "DisplayText": "",
    "PhoneNumber": "",
    "VenueGroup": 7,
    "VenueType": 0,
    "SubVenue": 0,
    "IndoorFlag": "",
    "OperatorDefined": "",
    "AccessPoints": [{
        "AccessPointId": "",
        "MACAddress": "",
        "DisplayFlag": "",
        "DisplayOptions": "",
        "Latitude": 40.59485,
        "Longitude": -73.96174,
        "Status": "Up",
        "OperatorDefined": "",
        "RoamingGroups": [{
            "GroupName": ""
        },
        {
            "GroupName": ""
        }],
        "Radios": [{
            "RadioId": "",
            "RadioFrequency": "",
            "RadioProtocols": [{
                "Protocol": ""
            }],
            "WifiConnections": [{
                "BSSID": "",
                "ServiceSets": [{
                    "SSID": "",
                    "SSID_Broadcasted": ""
                }]
            }]
        }]
    }]
}

Json.NET JsonTextReader , - , "", , , , .

, , ,

var ser = new JsonSerializer();
using (var reader = new JsonTextReader(new StreamReader(stream)))
{
    reader.SupportMultipleContent = true;

    while (reader.Read())
    {   
        if (reader.TokenType == JsonToken.StartObject && reader.Depth == 2)
        {                            
            do
            {
                reader.Read();                                
            } while (reader.TokenType != JsonToken.EndObject && reader.Depth == 2);

            var singleLocation = ser.Deserialize<Locations>(reader);
        }
    }
}

. , , , .

0
2

, , , - .

JObject, - , .

,

while (reader.Read())
{
    if (reader.TokenType == JsonToken.StartObject && reader.Depth == 2)
    {
        location = JObject.Load(reader).ToObject<Location>();

        var lv = new LocationValidator(location, FootprintInfo.OperatorId, FootprintInfo.RoamingGroups, true);
        var vr = lv.IsValid();
        if (vr.Successful)
        {
            yield return location;
        }
        else
        {
            errors.Add(new Error(elNumber, location.LocationId, vr.Error.Field, vr.Error.Detail));
            if (errors.Count >= maxErrors)
            {
                yield break;
            }
        }

        ++elNumber;
    }
}
0

, ( Locations ), ser.Deserialize<T>(reader), , , . , Location , :

    public static IEnumerable<T> DeserializeNestedItems<T>(TextReader textReader)
    {
        var ser = new JsonSerializer();
        using (var reader = new JsonTextReader(textReader))
        {
            reader.SupportMultipleContent = true;

            while (reader.Read())
            {
                if (reader.TokenType == JsonToken.StartObject && reader.Depth == 2)
                {
                    var item = ser.Deserialize<T>(reader);
                    yield return item;
                }
            }
        }
    }

:

        Debug.Assert(DeserializeNestedItems<Location>(new StringReader(json)).Count() == 2); // No assert.

        var list = DeserializeNestedItems<Location>(new StringReader(json)).SelectMany(l => l.AccessPoints).Select(a => new { a.Latitude, a.Longitude }).ToList();

        Debug.WriteLine(JsonConvert.SerializeObject(list, Formatting.Indented));

:

[
  {
    "Latitude": 40.59485,
    "Longitude": -73.96174
  },
  {
    "Latitude": 40.59485,
    "Longitude": -73.96174
  }
]

. Location JSON http://json2csharp.com/.

0

Source: https://habr.com/ru/post/1618191/


All Articles