How to use yaml.load_all with fileinput.input?

Without resorting to it ''.join, does Pythonic have a way to use PyYAML yaml.load_allwith fileinput.input()to easily stream multiple documents from multiple sources?

I am looking for something like the following (non-working example):

# example.py
import fileinput

import yaml

for doc in yaml.load_all(fileinput.input()):
    print(doc)

Expected Result:

$ cat >pre.yaml <<<'--- prefix-doc'
$ cat >post.yaml <<<'--- postfix-doc'
$ python example.py pre.yaml - post.yaml <<<'--- hello'
prefix-doc
hello
postfix-doc

Of course, it yaml.load_allexpects a string, bytes, or a file-like object fileinput.input()to be none of these things, so the above example does not work.

Actual output:

$ python example.py pre.yaml - post.yaml <<<'--- hello'
...
AttributeError: FileInput instance has no attribute 'read'

You can do an example of working with ''.join, but this is a hoax. I am looking for a way that does not immediately read the entire stream into memory.

: , - , ? , yaml.load_all , , .

, - :

for doc in yaml.load_all(minimal_adapter(fileinput.input())):
    print(doc)
+4
2

minimal_adapter fileinput.FileInput , load_all. load_all , , read().

_ - , __call__, . , read(), load_all:

import fileinput
import ruamel.yaml


class MinimalAdapter:
    def __init__(self):
        self._fip = None
        self._buf = None  # storage of read but unused material, maximum one line

    def __call__(self, fip):
        self._fip = fip  # store for future use
        self._buf = ""
        return self

    def read(self, size):
        if len(self._buf) >= size:
            # enough in buffer from last read, just cut it off and return
            tmp, self._buf = self._buf[:size], self._buf[size:]
            return tmp
        for line in self._fip:
            self._buf += line
            if len(self._buf) > size:
                break
        else:
            # ran out of lines, return what we have
            tmp, self._buf = self._buf, ''
            return tmp
        tmp, self._buf = self._buf[:size], self._buf[size:]
        return tmp


minimal_adapter = MinimalAdapter()

for doc in ruamel.yaml.load_all(minimal_adapter(fileinput.input())):
    print(doc)

- .

, , . load_all 1024 ( , MinimalAdapter.read()), fileinput ( strace, , ).


ruamel.yaml YAML 1.2, . PyYAML, ruamel.yaml .

+3

fileinput.input , read, yaml.load_all. fileinput, , , :

import sys                                                                      
import yaml                                                                     

class BunchOFiles (object):                                                     
    def __init__(self, *files):                                                 
        self.files = files                                                      
        self.fditer = self._fditer()                                            
        self.fd = self.fditer.next()                                            

    def _fditer(self):                                                          
        for fn in self.files:                                                   
            with sys.stdin if fn == '-' else open(fn, 'r') as fd:               
                yield fd                                                        

    def read(self, size=-1):                                                    
        while True:                                                             
            data = self.fd.read(size)                                           

            if data:                                                            
                break                                                           
            else:                                                               
                try:                                                            
                    self.fd = self.fditer.next()                                
                except StopIteration:                                           
                    self.fd = None                                              
                    break                                                       

        return data                                                             

bunch = BunchOFiles(*sys.argv[1:])                                              
for doc in yaml.load_all(bunch):                                                
    print doc                                                                   

BunchOFiles read, , . , , .

+4

Source: https://habr.com/ru/post/1653847/


All Articles