Using psycopg2 converter to extract bytea data from PostgreSQL

I want to store Numpy arrays in a PostgreSQL database in binary form (bytea). I can make this work fine in test # 1 (see below), but I don’t want me to manipulate the data arrays before inserting and after each selection every time - I want to use psycopg2 adapters and converters.

Here is what I have at the moment:

import numpy as np import psycopg2, psycopg2.extras def my_adapter(spectrum): return psycopg2.Binary(spectrum) def my_converter(my_buffer, cursor): return np.frombuffer(my_buffer) class MyBinaryTest(): # Connection info user = 'postgres' password = 'XXXXXXXXXX' host = 'localhost' database = 'test_binary' def __init__(self): pass def set_up(self): # Set up connection = psycopg2.connect(host=self.host, user=self.user, password=self.password) connection.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT) cursor = connection.cursor() try: # Clear out any old test database cursor.execute('drop database %s' % (self.database, )) except: pass cursor.execute('create database %s' % (self.database, )) cursor.close() connection.close() # Direct connectly to the database and set up our table self.connection = psycopg2.connect(host=self.host, user=self.user, password=self.password, database=self.database) self.cursor = self.connection.cursor(cursor_factory=psycopg2.extras.DictCursor) self.cursor.execute('''CREATE TABLE spectrum ( "sid" integer not null primary key, "data" bytea not null ); CREATE SEQUENCE spectrum_id; ALTER TABLE spectrum ALTER COLUMN sid SET DEFAULT NEXTVAL('spectrum_id'); ''') self.connection.commit() def perform_test_one(self): # Lets do a test shape = (2, 100) data = np.random.random(shape) # Binary up the data send_data = psycopg2.Binary(data) self.cursor.execute('insert into spectrum (data) values (%s) returning sid;', [send_data]) self.connection.commit() # Retrieve the data we just inserted query = self.cursor.execute('select * from spectrum') result = self.cursor.fetchall() print "Type of data retrieved:", type(result[0]['data']) # Convert it back to a numpy array of the same shape retrieved_data = np.frombuffer(result[0]['data']).reshape(*shape) # Ensure there was no problem assert np.all(retrieved_data == data) print "Everything went swimmingly in test one!" return True def perform_test_two(self): if not self.use_adapters: return False # Lets do a test shape = (2, 100) data = np.random.random(shape) # No changes made to the data, as the adapter should take care of it (and it does) self.cursor.execute('insert into spectrum (data) values (%s) returning sid;', [data]) self.connection.commit() # Retrieve the data we just inserted query = self.cursor.execute('select * from spectrum') result = self.cursor.fetchall() # No need to change the type of data, as the converter should take care of it # (But, we never make it here) retrieved_data = result[0]['data'] # Ensure there was no problem assert np.all(retrieved_data == data.flatten()) print "Everything went swimmingly in test two!" return True def setup_adapters_and_converters(self): # Set up test adapters psycopg2.extensions.register_adapter(np.ndarray, my_adapter) # Register our converter self.cursor.execute("select null::bytea;") my_oid = self.cursor.description[0][1] obj = psycopg2.extensions.new_type((my_oid, ), "numpy_array", my_converter) psycopg2.extensions.register_type(obj, self.connection) self.connection.commit() self.use_adapters = True def tear_down(self): # Tear down self.cursor.close() self.connection.close() connection = psycopg2.connect(host=self.host, user=self.user, password=self.password) connection.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT) cursor = connection.cursor() cursor.execute('drop database %s' % (self.database, )) cursor.close() connection.close() test = MyBinaryTest() test.set_up() test.perform_test_one() test.setup_adapters_and_converters() test.perform_test_two() test.tear_down() 

Now test # 1 works fine. When I take the code that I used in test 1 and set up the adapter and psycopg2 converter, it does not work (test 2). This is because the data supplied to the converter is no longer a buffer; this is a string representation of PosgreSQL bytea. The output is as follows:

 In [1]: run -i test_binary.py Type of data retrieved: type 'buffer'> Everything went swimmingly in test one! ERROR: An unexpected error occurred while tokenizing input The following traceback may be corrupted or invalid The error message is: ('EOF in multi-line statement', (273, 0)) --------------------------------------------------------------------------- ValueError Traceback (most recent call last) /Users/andycasey/thesis/scope/scope/test_binary.py in <module>() 155 test.perform_test_one() 156 test.setup_adapters_and_converters() --> 157 test.perform_test_two() 158 test.tear_down() 159 /Users/andycasey/thesis/scope/scope/test_binary.py in perform_test_two(self) 101 # Retrieve the data we just inserted 102 query = self.cursor.execute('select * from spectrum') --> 103 result = self.cursor.fetchall() 104 105 # No need to change the type of data, as the converter should take care of it /Library/Python/2.6/site-packages/psycopg2/extras.pyc in fetchall(self) 81 def fetchall(self): 82 if self._prefetch: ---> 83 res = _cursor.fetchall(self) 84 if self._query_executed: 85 self._build_index() /Users/andycasey/thesis/scope/scope/test_binary.py in my_converter(my_buffer, cursor) 7 8 def my_converter(my_buffer, cursor): ----> 9 return np.frombuffer(my_buffer) 10 11 ValueError: buffer size must be a multiple of element size WARNING: Failure executing file: <test_binary.py> In [2]: %debug > /Users/andycasey/thesis/scope/scope/test_binary.py(9)my_converter() 8 def my_converter(my_buffer, cursor): ----> 9 return np.frombuffer(my_buffer) 10 ipdb> my_buffer '\\' 

Does anyone know how I can (a) de-serialize the string representation returning to me in my_converter so that I return a Numpy array each time or (b) force the PostgreSQL / psycopg2 representation into the converter (which I can use) instead of the string representation ?

Thanks!

I'm on OS X 10.6.8 with Python 2.6.1 (r261: 67515), PostgreSQL 9.0.3 and psycopg2 2.4 (dt dec pq3 ext)

+6
source share
3 answers

The format you see in the debugger is easy to parse: it is the hexagonal format of PostgreSQL (http://www.postgresql.org/docs/9.1/static/datatype-binary.html). psycopg can parse this format and return a buffer containing data; you can use this buffer to get an array. Instead of writing typecaster from scratch, write one, invoking the original func and postprocess its result. Sorry, but I can’t remember his name now, and I’m writing from my mobile: you can get additional help from the mailing list.


Edit: complete solution.

By default, bytea typecaster (which is an object that can parse the binary postgres representation and return a buffer object from it) psycopg2.BINARY. We can use it to create a typecaster conversion in instead:

 In [1]: import psycopg2 In [2]: import numpy as np In [3]: a = np.eye(3) In [4]: a Out[4]: array([[ 1., 0., 0.], [ 0., 1., 0.], [ 0., 0., 1.]]) In [5]: cnn = psycopg2.connect('') # The adapter: converts from python to postgres # note: this only works on numpy version whose arrays # support the buffer protocol, # eg it works on 1.5.1 but not on 1.0.4 on my tests. In [12]: def adapt_array(a): ....: return psycopg2.Binary(a) ....: In [13]: psycopg2.extensions.register_adapter(np.ndarray, adapt_array) # The typecaster: from postgres to python In [21]: def typecast_array(data, cur): ....: if data is None: return None ....: buf = psycopg2.BINARY(data, cur) ....: return np.frombuffer(buf) ....: In [24]: ARRAY = psycopg2.extensions.new_type(psycopg2.BINARY.values, 'ARRAY', typecast_array) In [25]: psycopg2.extensions.register_type(ARRAY) # Now it works "as expected" In [26]: cur = cnn.cursor() In [27]: cur.execute("select %s", (a,)) In [28]: cur.fetchone()[0] Out[28]: array([ 1., 0., 0., 0., 1., 0., 0., 0., 1.]) 

As you know, np.frombuffer (a) is losing the shape of the array, so you'll have to figure out how to save it.

+8
source

p In the case of numpy arrays, you can avoid the buffer strategy with all its drawbacks, such as loss of shape and data type. After asking fooobar.com/questions/352504 / ... about storing the numpy array in sqlite3, you can easily adapt the approach for postgres.

 import os import psycopg2 as psql import numpy as np # converts from python to postgres def _adapt_array(text): out = io.BytesIO() np.save(out, text) out.seek(0) return psql.Binary(out.read()) # converts from postgres to python def _typecast_array(value, cur): if value is None: return None data = psql.BINARY(value, cur) bdata = io.BytesIO(data) bdata.seek(0) return np.load(bdata) con = psql.connect('') psql.extensions.register_adapter(np.ndarray, _adapt_array) t_array = psql.extensions.new_type(psql.BINARY.values, "numpy", _typecast_array) psql.extensions.register_type(t_array) cur = con.cursor() 

Now you can create and populate the table (with a defined as in the previous post)

 cur.execute("create table test (column BYTEA)") cur.execute("insert into test values(%s)", (a,)) 

And restore the numpy object

 cur.execute("select * from test") cur.fetchone()[0] 

Result:

 array([[ 1., 0., 0.], [ 0., 1., 0.], [ 0., 0., 1.]]) 
+2
source

I tried both of these answers and couldn't get them to work until I changed Daniel's code to use np.savetxt and np.loadtxt and changed its typecaster

 bdata = BytesIO(data[1:-1]) 

so the two functions now look like

 def _adapt_array(arr): out = BytesIO() np.savetxt(out, arr, fmt='%.2f') out.seek(0) return pg2.Binary(out.read()) def _typecast_array(value, cur): if value is None: return None data = pg2.BINARY(value, cur) bdata = BytesIO(data[1:-1]) bdata.seek(0) return np.loadtxt(bdata) pg2.extensions.register_adapter(np.ndarray, _adapt_array) t_array = pg2.extensions.new_type(pg2.BINARY.values, 'numpy', _typecast_array) pg2.extensions.register_type(t_array) 

The error I received could not convert string to float: '[473.07' . I suspect that this fix will only work for flat arrays, but the way my data has been structured so that it works for me.

0
source

Source: https://habr.com/ru/post/915367/


All Articles