Format specific JSON objects on one line

Question

Format specific JSON objects on one line

Consider the following code:

>>> import json
>>> data = {
...     'x': [1, {'$special': 'a'}, 2],
...     'y': {'$special': 'b'},
...     'z': {'p': True, 'q': False}
... }
>>> print(json.dumps(data, indent=2))
{
  "y": {
    "$special": "b"
  },
  "z": {
    "q": false,
    "p": true
  },
  "x": [
    1,
    {
      "$special": "a"
    },
    2
  ]
}

I want to format JSON so that JSON objects that have only one property '$special'appear on the same line, as shown below.

{
  "y": {"$special": "b"},
  "z": {
    "q": false,
    "p": true
  },
  "x": [
    1,
    {"$special": "a"},
    2
  ]
}

I played with the implementation JSONEncoderand passed this in json.dumpsas an argument cls, but two methods JSONEncoderdo not have a problem:

The method JSONEncoder defaultis called for each part data, but the return value is not a raw JSON string, so there seems to be no way to configure its formatting.
The method JSONEncoder encodereturns the original JSON string, but it is called only once for datathe whole.

Is there any way to get JSONEncoderto do what I want?

+4

json python python-3.x formatting

Timothy Shields 13 . '16 18:50

3

json ; .

, json, , tokenize module:

import tokenize
from io import BytesIO


def inline_special(json_data):
    def adjust(t, ld,):
        """Adjust token line number by offset"""
        (sl, sc), (el, ec) = t.start, t.end
        return t._replace(start=(sl + ld, sc), end=(el + ld, ec))

    def transform():
        with BytesIO(json_data.encode('utf8')) as b:
            held = []  # to defer newline tokens
            lastend = None  # to track the end pos of the prev token
            loffset = 0     # line offset to adjust tokens by
            tokens = tokenize.tokenize(b.readline)
            for tok in tokens:
                if tok.type == tokenize.NL:
                    # hold newlines until we know there no special key coming
                    held.append(adjust(tok, loffset))
                elif (tok.type == tokenize.STRING and
                        tok.string == '"$special"'):
                    # special string, collate tokens until the next rbrace
                    # held newlines are discarded, adjust the line offset
                    loffset -= len(held)
                    held = []
                    text = [tok.string]
                    while tok.exact_type != tokenize.RBRACE:
                        tok = next(tokens)
                        if tok.type != tokenize.NL:
                            text.append(tok.string)
                            if tok.string in ':,':
                                text.append(' ')
                        else:
                            loffset -= 1  # following lines all shift
                    line, col = lastend
                    text = ''.join(text)
                    endcol = col + len(text)
                    yield tokenize.TokenInfo(
                        tokenize.STRING, text, (line, col), (line, endcol),
                        '')
                    # adjust any remaining tokens on this line
                    while tok.type != tokenize.NL:
                        tok = next(tokens)
                        yield tok._replace(
                            start=(line, endcol),
                            end=(line, endcol + len(tok.string)))
                        endcol += len(tok.string)
                else:
                    # uninteresting token, yield any held newlines
                    if held:
                        yield from held
                        held = []
                    # adjust and remember last position
                    tok = adjust(tok, loffset)
                    lastend = tok.end
                    yield tok

    return tokenize.untokenize(transform()).decode('utf8')

:

import json

data = {
    'x': [1, {'$special': 'a'}, 2],
    'y': {'$special': 'b'},
    'z': {'p': True, 'q': False}
}

>>> print(inline_special(json.dumps(data, indent=2)))
{
  "x": [
    1,
    {"$special": "a"},
    2
  ],
  "y": {"$special": "b"},
  "z": {
    "p": true,
    "q": false
  }
}

+2

Martijn Pieters 13 . '16 20:48

, / json.encoder, .

Basically, copy completely _make_iterencodefrom json.encoderand make changes so your special dictionary prints without indentation of a new line. Then monkeypatch json package to use your modified version, run json dump, and then cancel monkeypatch removal (if you want).

The function is _make_iterencodequite long, so I just placed the parts that need to be changed.

import json
import json.encoder

def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
    ...
    def _iterencode_dict(dct, _current_indent_level):
        ...
        if _indent is not None:
            _current_indent_level += 1
            if '$special' in dct:
                newline_indent = ''
                item_separator = _item_separator
            else:
                newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
                item_separator = _item_separator + newline_indent
            yield newline_indent
        ...
        if newline_indent is not None:
            _current_indent_level -= 1
            if '$special' not in dct:
                yield '\n' + (' ' * (_indent * _current_indent_level))

def main():
    data = {
        'x': [1, {'$special': 'a'}, 2],
        'y': {'$special': 'b'},
        'z': {'p': True, 'q': False},
    }

    orig_make_iterencoder = json.encoder._make_iterencode
    json.encoder._make_iterencode = _make_iterencode
    print(json.dumps(data, indent=2))
    json.encoder._make_iterencode = orig_make_iterencoder

0

Brendan abel Oct 13 '16 at 19:44

source share

Timothy Shields · Accepted Answer · 2016-10-18T17:11:36+0000

, , hellip; .

import json
import re
data = {
    'x': [1, {'$special': 'a'}, 2],
    'y': {'$special': 'b'},
    'z': {'p': True, 'q': False}
}
text = json.dumps(data, indent=2)
pattern = re.compile(r"""
{
\s*
"\$special"
\s*
:
\s*
"
((?:[^"]|\\"))*  # Captures zero or more NotQuote or EscapedQuote
"
\s*
}
""", re.VERBOSE)
print(pattern.sub(r'{"$special": "\1"}', text))

.

{
  "x": [
    1,
    {"$special": "a"},
    2
  ],
  "y": {"$special": "b"},
  "z": {
    "q": false,
    "p": true
  }
}

Format specific JSON objects on one line

More articles: