0

私は巨大なデータセットに取り組んでおり、数値配列をスパースに変換する際に問題があります。

import pandas as pd
l = pd.read_csv('merge_from_ofoct.csv')
l.drop('Unnamed: 12', axis=1, inplace=True)
l.drop('CRS_ARR_TIME', axis=1, inplace=True)
l.drop('CRS_DEP_TIME', axis=1, inplace=True)
l = l[(l.T != 0).any()]
count_nan = len(l) - l.count()  #gives the no. of rows with no values in it
l_no_missing = l.dropna()  #Dropping the rows with missing values
f = l_no_missing  #final dataframe with no missing values
count_nan = len(f) - f.count()  #verifying if the missing vaules are removed
count_nan
airport_data = pd.read_csv('Airport_data.csv', 
                               header = 0) 
training.drop(training.columns[0], axis = 1, inplace = True)
f['CARRIER'] = f['UNIQUE_CARRIER']
f["CARRIER"] = pd.factorize(f["CARRIER"])[0]
CARRIER = f[['UNIQUE_CARRIER', 'CARRIER']].drop_duplicates()
training = f
training.drop('UNIQUE_CARRIER', axis = 1, inplace = True)
scalingDF = training[['DISTANCE']] # Numerical features
categDF = training[['MONTH', 'DAY_OF_MONTH', 'ORIGIN_AIRPORT_ID', 
                   'DEST_AIRPORT_ID', 
                   'CARRIER', 'DAY_OF_WEEK']] # Categorical features


from sklearn.preprocessing import OneHotEncoder

encoder = OneHotEncoder() # Create encoder object
categDF_encoded = encoder.fit_transform(categDF) 

type(categDF_encoded) 
from scipy import sparse # Need this to create a sparse array
scalingDF_sparse = sparse.csr_matrix(scalingDF) #can't convert numerical array to sparse

TypeError Traceback (most recent call last) in () 1 from scipy import sparse # スパース配列を作成するにはこれが必要 ----> 2 scalingDF_sparse = sparse.csr_matrix(scalingDF)

/Users/nikhil_maladkar/anaconda/lib/python2.7/site-packages/scipy/sparse/compressed.pyc in init (self、arg1、shape、dtype、copy) 67 self.format) 68 from .coo import coo_matrix -- -> 69 self._set_self(self. class (coo_matrix(arg1, dtype=dtype))) 70 71 # 指定された行列の次元があれば読み取ります

/Users/nikhil_maladkar/anaconda/lib/python2.7/site-packages/scipy/sparse/compressed.pyc in init (self、arg1、shape、dtype、copy) 29 arg1 = arg1.copy() 30 その他: -- -> 31 arg1 = arg1.asformat(self.format) 32 self._set_self(arg1) 33

/Users/nikhil_maladkar/anaconda/lib/python2.7/site-packages/scipy/sparse/base.pyc in asformat(self, format) 218 return self 219 else: --> 220 return getattr(self,'to' +フォーマット)() 221 222 ############################################## ########################

/Users/nikhil_maladkar/anaconda/lib/python2.7/site-packages/scipy/sparse/coo.pyc in tocsr(self) 328 indptr = np.empty(M + 1, dtype=idx_dtype) 329 インデックス = np.empty (self.nnz, dtype=idx_dtype) --> 330 データ = np.empty(self.nnz, dtype=upcast(self.dtype)) 331 332 coo_tocsr(M, N, self.nnz,

/Users/nikhil_maladkar/anaconda/lib/python2.7/site-packages/scipy/sparse/sputils.pyc in upcast(*args) 55 return t 56 ---> 57 raise TypeError('no supported conversion for types: % r' % (args,)) 58 59

TypeError: タイプの変換がサポートされていません: (dtype('O'),)

4

0 に答える 0