import numpy as np
import numpy.lib.recfunctions as recfunctions
a1 = np.array([('2:6506', 4.6725971801473496e-25, 0.99999999995088695),
('2:6601', 2.2452745388799898e-27, 0.99999999995270605),
('2:21801', 1.9849650921836601e-31, 0.99999999997999001),],
dtype=[('pos', '|S100'), ('col1', '<f8'), ('col2', '<f8')])
a2 = np.array([('3:6506', 4.6725971801473496e-25, 0.99999999995088695),
('3:6601', 2.2452745388799898e-27, 0.99999999995270605),
('3:21801', 1.9849650921836601e-31, 0.99999999997999001),],
dtype=[('pos', '|S100'), ('col1', '<f8'), ('col2', '<f8')])
a3 = np.array([('2:6506', 4.6725971801473496e-25, 0.99999999995088695),
('2:6601', 2.2452745388799898e-27, 0.99999999995270605),
('2:21801', 1.9849650921836601e-31, 0.99999999997999001),],
dtype=[('pos', '|S100'), ('col3', '<f8'), ('col4', '<f8')])
result = a1
for a in (a2, a3):
cols = list(set(result.dtype.names).intersection(a.dtype.names))
result = recfunctions.join_by(cols, result, a, jointype='outer')
print(result)
gives
[ ('2:21801', 1.98496509218366e-31, 0.99999999997999, 1.98496509218366e-31, 0.99999999997999)
('2:6506', 4.67259718014735e-25, 0.999999999950887, 4.67259718014735e-25, 0.999999999950887)
('2:6601', 2.24527453887999e-27, 0.999999999952706, 2.24527453887999e-27, 0.999999999952706)
('3:21801', 1.98496509218366e-31, 0.99999999997999, --, --)
('3:6506', 4.67259718014735e-25, 0.999999999950887, --, --)
('3:6601', 2.24527453887999e-27, 0.999999999952706, --, --)]
SQL- NumPy, Pandas. Pandas NumPy :
import numpy as np
import pandas as pd
a1 = np.array([('2:6506', 4.6725971801473496e-25, 0.99999999995088695),
('2:6601', 2.2452745388799898e-27, 0.99999999995270605),
('2:21801', 1.9849650921836601e-31, 0.99999999997999001),],
dtype=[('pos', '|S100'), ('col1', '<f8'), ('col2', '<f8')])
a2 = np.array([('3:6506', 4.6725971801473496e-25, 0.99999999995088695),
('3:6601', 2.2452745388799898e-27, 0.99999999995270605),
('3:21801', 1.9849650921836601e-31, 0.99999999997999001),],
dtype=[('pos', '|S100'), ('col1', '<f8'), ('col2', '<f8')])
a3 = np.array([('2:6506', 4.6725971801473496e-25, 0.99999999995088695),
('2:6601', 2.2452745388799898e-27, 0.99999999995270605),
('2:21801', 1.9849650921836601e-31, 0.99999999997999001),],
dtype=[('pos', '|S100'), ('col3', '<f8'), ('col4', '<f8')])
dfs = [pd.DataFrame.from_records(a) for a in (a1, a2, a3)]
result = dfs[0]
for df in dfs[1:]:
cols = list(set(result.columns).intersection(df.columns))
result = pd.merge(result, df, how='outer', left_on=cols, right_on=cols)
print(result)
pos col1 col2 col3 col4
0 2:6506 4.672597e-25 1 4.672597e-25 1
1 2:6601 2.245275e-27 1 2.245275e-27 1
2 2:21801 1.984965e-31 1 1.984965e-31 1
3 3:6506 4.672597e-25 1 NaN NaN
4 3:6601 2.245275e-27 1 NaN NaN
5 3:21801 1.984965e-31 1 NaN NaN
[6 rows x 5 columns]
Pandas , NumPy. , Pandas , , NaN - , ad hoc- NumPy .
, Pandas DataFrames .values, NumPy .to_records, . , , Dataframe.from_records, DataFrames. , DataFrames NumPy, .
, , - Pandas, , , .