h5py.File メソッドを使用して取得するために使用する方法でデータにアクセスできない pandas HDFStore メソッドの問題に遭遇しました。コード スニペットは次のとおりです。
In [1]: import pandas as pd
In [2]: import numpy as np
In [3]: import h5py as h5
In [4]: hdf = pd.HDFStore("tmp.h5")
In [5]: hdf.put('tables/t1', pd.DataFrame(np.random.rand(20,5)))
In [6]: hdf.put('t2', pd.DataFrame(np.random.rand(10,5)))
In [7]:
In [7]: hdf.close()
In [8]:
In [8]: ############ Read using pd.HDFStore ############
In [9]:
In [9]: data = pd.HDFStore ("tmp.h5")
In [10]: data["tables/t1"]
Out[10]:
0 1 2 3 4
0 0.384926 0.712066 0.022438 0.686217 0.942678
1 0.079548 0.466799 0.575394 0.276646 0.514414
2 0.672582 0.828567 0.801799 0.296046 0.124042
3 0.568058 0.931348 0.225348 0.547913 0.736184
4 0.496768 0.419699 0.724118 0.313427 0.353825
5 0.771868 0.963346 0.523821 0.793295 0.052085
6 0.358478 0.845149 0.334389 0.674448 0.239096
7 0.454559 0.604438 0.183654 0.027641 0.186922
8 0.776586 0.155783 0.253801 0.123986 0.560601
9 0.201239 0.932080 0.040997 0.119049 0.154076
10 0.753566 0.770133 0.123285 0.112419 0.353622
11 0.040959 0.384800 0.806119 0.247106 0.013442
12 0.739205 0.100547 0.855418 0.774874 0.710557
13 0.865856 0.565094 0.815860 0.816869 0.834415
14 0.251312 0.624995 0.976317 0.854855 0.744861
15 0.179678 0.435902 0.602303 0.118516 0.386935
16 0.452009 0.973729 0.067736 0.097811 0.292619
17 0.285994 0.569845 0.584602 0.001671 0.422877
18 0.727996 0.291086 0.736912 0.960595 0.132891
19 0.356397 0.747693 0.458485 0.100849 0.072220
In [11]: ## Success
In [12]: data ["tables"]["t1"]
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-12-c7599d16a7b6> in <module>()
----> 1 data ["tables"]["t1"]
/usr/conda/lib/python2.7/site-packages/pandas/io/pytables.py in __getitem__(self, key)
415
416 def __getitem__(self, key):
--> 417 return self.get(key)
418
419 def __setitem__(self, key, value):
/usr/conda/lib/python2.7/site-packages/pandas/io/pytables.py in get(self, key)
632 if group is None:
633 raise KeyError('No object named %s in the file' % key)
--> 634 return self._read_group(group)
635
636 def select(self, key, where=None, start=None, stop=None, columns=None,
/usr/conda/lib/python2.7/site-packages/pandas/io/pytables.py in _read_group(self, group, **kwargs)
1268
1269 def _read_group(self, group, **kwargs):
-> 1270 s = self._create_storer(group)
1271 s.infer_axes()
1272 return s.read(**kwargs)
/usr/conda/lib/python2.7/site-packages/pandas/io/pytables.py in _create_storer(self, group, format, value, append, **kwargs)
1151 else:
1152 raise TypeError(
-> 1153 "cannot create a storer if the object is not existing "
1154 "nor a value are passed")
1155 else:
TypeError: cannot create a storer if the object is not existing nor a value are passed
In [13]:
In [13]: data.close()
In [14]:
In [14]: ########### Read using h5py.File ##############
In [15]:
In [15]: data = h5.File("tmp.h5","r")
In [16]:
In [16]: data["tables"]
Out[16]: <HDF5 group "/tables" (1 members)>
In [17]:
In [17]: data["tables"]["t1"]
Out[17]: <HDF5 group "/tables/t1" (4 members)>
In [18]:
In [18]: data['tables']['t1'].keys ()
Out[18]: [u'axis0', u'axis1', u'block0_items', u'block0_values']
In [19]: [u'axis0', u'axis1', u'block0_items', u'block0_values']
Out[19]: [u'axis0', u'axis1', u'block0_items', u'block0_values']
In [20]:
In [20]: data['tables']['t1']['block0_values'].value
Out[20]:
array([[ 0.38492571, 0.71206567, 0.02243773, 0.68621713, 0.9426783 ],
[ 0.07954806, 0.4667994 , 0.57539433, 0.27664603, 0.51441446],
[ 0.67258161, 0.82856681, 0.80179916, 0.29604625, 0.12404214],
[ 0.56805845, 0.93134797, 0.22534757, 0.54791294, 0.73618366],
[ 0.49676792, 0.41969943, 0.72411835, 0.31342698, 0.35382463],
[ 0.77186804, 0.96334586, 0.52382094, 0.7932945 , 0.05208528],
[ 0.3584784 , 0.84514863, 0.33438851, 0.6744483 , 0.23909552],
[ 0.45455901, 0.6044383 , 0.18365449, 0.02764097, 0.18692162],
[ 0.77658631, 0.15578276, 0.25380109, 0.12398617, 0.56060138],
[ 0.20123928, 0.93207974, 0.04099724, 0.11904895, 0.15407568],
[ 0.75356644, 0.77013349, 0.12328475, 0.11241904, 0.35362213],
[ 0.04095888, 0.38480023, 0.80611853, 0.24710571, 0.01344193],
[ 0.73920528, 0.1005474 , 0.85541761, 0.7748739 , 0.71055697],
[ 0.86585587, 0.5650938 , 0.81586031, 0.81686915, 0.83441517],
[ 0.25131205, 0.62499501, 0.97631707, 0.85485518, 0.74486096],
[ 0.17967805, 0.43590236, 0.60230302, 0.11851596, 0.38693535],
[ 0.4520091 , 0.97372923, 0.0677363 , 0.09781059, 0.29261929],
[ 0.28599448, 0.56984462, 0.5846021 , 0.00167063, 0.42287738],
[ 0.72799625, 0.29108631, 0.7369122 , 0.96059508, 0.13289119],
[ 0.35639696, 0.7476934 , 0.45848456, 0.10084881, 0.07221995]])
In [21]:
In [21]: ######################## End ###############
In [22]:
In [22]:
data['tables']['t1']を使用してデータにアクセスしたい。この問題が原因で立ち往生しています。私が観察したのは、パンダが hd5 のすべてのデータフレームをグループとして挿入することです。データに簡単にアクセスできるように、データセットとして挿入したいと考えています。