Reading an HDF5 file into a numpy array

I have the following code to read an hdf5 file as a numpy array:

hf = h5py.File('path/to/file', 'r')
n1 = hf.get('dataset_name')
n2 = np.array(n1)

and when I print n2, I get the following:

Out[15]:
array([[<HDF5 object reference>, <HDF5 object reference>,
        <HDF5 object reference>, <HDF5 object reference>...

How can I read HDF5 object referenceto view the data stored in it?

+1
source share
4 answers

The simplest is to use the .valueHDF5 dataset attribute .

>>> hf = h5py.File('/path/to/file', 'r')
>>> data = hf.get('dataset_name').value # `data` is now an ndarray.

You can also slice the dataset that creates the actual ndarray with the requested data:

>>> hf['dataset_name'][:10] # produces ndarray as well

, h5py ndarray. , , , NumPy. , , : np.mean(hf.get('dataset_name')).

EDIT:

. , , HDF5. , h5py. . .

:

>>> f = h5py.File('tmp.h5', 'w')
>>> ds = f.create_dataset('data', data=np.zeros(10,))

.

>>> ref_dtype = h5py.special_dtype(ref=h5py.Reference)
>>> ref_ds = f.create_dataset('data_refs', data=(ds.ref, ds.ref), dtype=ref_dtype)

, , , .

>>> name = h5py.h5r.get_name(ref_ds[0], f.id) # 2nd argument is the file identifier
>>> print(name)
b'/data'
>>> out = f[name]
>>> print(out.shape)
(10,)

, , , . TL; DR: .

:

h5py.h5r.dereference , . . , ( ). .

+3

, , hdf5, , .

with h5py.File('name-of-file.h5', 'r') as hf:
    data = hf['name-of-dataset'][:]
0

HDF5 ( , " " ) ( ). , .

". HDF5.

h5py - , ( ).

. , , ref,

 my_object = my_file[ref]

: 1. 2.

# Open the file
hf = h5py.File('path/to/file', 'r')
# Obtain the dataset of references
n1 = hf['dataset_name']
# Obtain the dataset pointed to by the first reference
ds = hf[n1[0]]
# Obtain the data in ds
data = ds[:]

, , 2D, ,

ds = hf[n1[0,0]]

,

data = ds[()]

:

all_data = [hf[ref] for ref in n1[:]]

1D n1. 2D , .

, , "-" " ":

import numpy as np
import h5py

# Open file                                                                                    
myfile = h5py.File('myfile.hdf5', 'w')

# Create dataset                                                                               
ds_0 = myfile.create_dataset('dataset_0', data=np.arange(10))
ds_1 = myfile.create_dataset('dataset_1', data=9-np.arange(10))

# Create a data                                                                                
ref_dtype = h5py.special_dtype(ref=h5py.Reference)

ds_refs = myfile.create_dataset('ref_to_dataset', shape=(2,), dtype=ref_dtype)

ds_refs[0] = ds_0.ref
ds_refs[1] = ds_1.ref

myfile.close()

import numpy as np
import h5py

# Open file                                                                                    
myfile = h5py.File('myfile.hdf5', 'r')

# Read the references                                                                          
ref_to_ds_0 = myfile['ref_to_dataset'][0]
ref_to_ds_1 = myfile['ref_to_dataset'][1]

# Read the dataset                                                                             
ds_0 = myfile[ref_to_ds_0]
ds_1 = myfile[ref_to_ds_1]

# Read the value in the dataset                                                                
data_0 = ds_0[:]
data_1 = ds_1[:]

myfile.close()

print(data_0)
print(data_1)

, NumPy . , HDF5 NumPy. .

0

Here is a direct approach to reading the hdf5 file as a numpy array:

import numpy as np
import h5py

hf = h5py.File('path/to/file.h5', 'r')
n1 = np.array(hf["dataset_name"][:]) #dataset_name is same as hdf5 object name 

print(n1)
0
source

Source: https://habr.com/ru/post/1629434/


All Articles