python - 可変サイズの複数の NetCDF Rainfall ファイルを読み取る Python

Question

私が抱えている問題は、オーストラリア気象局が降雨データファイルを提供してくれたということです。これには、アクティブなすべてのゲージについて 30 分ごとに記録された降雨記録が含まれています。問題は、1 日に 48 個の 30Minute ファイルがあることです。特定の Gauge の時系列を作成したいと考えています。つまり、48 個のファイルすべてを読み取り、ゲージ ID を検索して、1 30 分間ゲージが何も記録しなかった場合に失敗しないことを確認します。ファイル形式へのリンクは次のとおりです。

https://dl.dropboxusercontent.com/u/15223371/14/gauge_30min_20100214_000000.nc

https://dl.dropboxusercontent.com/u/15223371/14/gauge_30min_20100214_003000.nc

https://dl.dropboxusercontent.com/u/15223371/14/gauge_30min_20100214_010000.nc

これは私がこれまでに試したことです：

"""
This script is used to read a directory of raingauge data from a Data Directory





"""
from anuga.file.netcdf import NetCDFFile
from anuga.config import netcdf_mode_r, netcdf_mode_w, netcdf_mode_a, \
                            netcdf_float
import os
import glob
from easygui import *
import string
import numpy
"""
print 'Default file Extension...'
msg="Enter 3 letter extension."
title = "Enter the 3 letter file extension to search for in DIR "
default = "csv"
file_extension = enterbox(msg,title,default)
"""


print 'Present Directory Open...'
title = "Select Directory to Read Multiple rainfall .nc files"
msg = "This is a test of the diropenbox.\n\nPick the directory that you wish to open."
d = diropenbox(msg, title)
fromdir = d

filtered_list = glob.glob(os.path.join(fromdir, '*.nc'))
filtered_list.sort()

nf = len(filtered_list)
print nf

import numpy

rain = numpy.zeros(nf,'float')
t = numpy.arange(nf)

Stn_Loc_File='Station_Location.csv'
outfid = open(Stn_Loc_File, 'w')

prec = numpy.zeros((nf,1752),numpy.float)

gauge_id_list = ['570002','570021','570025','570028','570030','570032','570031','570035','570036',
                 '570047','570772','570781','570910','570903','570916','570931','570943','570965',
                 '570968','570983','570986','70214','70217','70349','70351']
"""
title = "Select Gauge to plot"
msg = "Select Gauge"
gauge_id = int(choicebox(msg=msg,title=title, choices=gauge_id_list))
"""
#for gauge_id in gauge_id_list:
#    gauge_id = int(gauge_id)
try:    

    for i, infile in enumerate(filtered_list):

        infilenet = NetCDFFile(infile, netcdf_mode_r)
        print infilenet.variables
        raw_input('Hold.... check variables...')
        stn_lats = infilenet.variables['latitude']
        stn_longs = infilenet.variables['longitude']
        stn_ids = infilenet.variables['station_id']
        stn_rain = infilenet.variables['precipitation']

        print stn_ids.shape
        #print stn_lats.shape
        #print stn_longs.shape
        #print infile.dimensions
        stn_ids = numpy.array(stn_ids)

        l_id = numpy.where(stn_ids == gauge_id)
        if stn_ids in gauge_id_list:
            try:
                l_id = l_id[0][0]
                rain[i] = stn_rain[l_id]
            except:
                rain[i] = numpy.nan
    print 'End for i...'            
    #print rain

    import pylab as pl

    pl.bar(t,rain)
    pl.title('Rain Gauge data')
    pl.xlabel('time steps')
    pl.ylabel('rainfall (mm)')
    pl.show()
except:
    pass 
raw_input('END....')

score 1 · Accepted Answer

OK、必要以上に複雑な形式のデータを取得しました。彼らは一日を簡単にnetCDFファイルに詰め込むことができた. 実際、これを解決するためのオプションの 1 つは、たとえば NCO コマンドラインツールを使用して、すべてのファイルを時間次元で 1 つに結合することでした。

しかし、これは scipy netcdf モジュールを使用するソリューションです。私はそれが非推奨であると信じています-私自身、NetCDF4ライブラリを好みます。主なアプローチは次のとおりです。出力データ構造をnp.nan値で事前設定します。入力ファイルをループして、降水量とステーション ID を取得します。関心のあるステーション ID ごとに、インデックスを取得し、そのインデックスでの降水量を取得します。出力構造に追加します。(タイムスタンプを抽出する作業は行っていません。それはあなた次第です。)

import glob
import numpy as np
from scipy.io import netcdf

# load data file names 
stationdata = glob.glob('gauge*.nc')
stationdata.sort()
# initialize np arrays of integer gauging station ids
gauge_id_list = ['570002','570021','570025','570028','570030','570032','570031','570035','570036',
                 '570047','570772','570781','570910','570903','570916','570931','570943','570965',
                 '570968','570983','570986','70214','70217','70349','70351']
gauge_ids = np.array(gauge_id_list).astype('int32')
ngauges = len(gauge_ids)
ntimesteps = 48
# initialize output dictionary
dtypes = zip(gauge_id_list, ['float32']*ngauges)
timeseries_per_station = np.empty((ntimesteps,))
timeseries_per_station.fill(np.nan)
timeseries_per_station = timeseries_per_station.astype(dtypes)

# Instead of using the index, you could extract the datetime stamp 
for timestep, datafile in enumerate(stationdata):
    data = netcdf.NetCDFFile(datafile, 'r')
    precip = data.variables['precip'].data
    stid = data.variables['stid'].data
    # create np array of indices of the gaugeid present in file
    idx = np.where(np.in1d(stid, gauge_ids))[0]
    for i in idx:
        timeseries_per_station[str(stid[i])][timestep] = precip[i]
    data.close()

np.set_printoptions(precision=1)
for gauge_id in gauge_id_list:
    print "Station %s:" % gauge_id
    print timeseries_per_station[gauge_id]

出力は次のようになります。

Station 570002:
[ 1.9  0.3  0.   nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan
  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan
  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan
  nan  nan  nan]
Station 570021:
[  0.   0.   0.  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan
  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan
  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan  nan
  nan  nan  nan]
...

(明らかに、ファイルは 3 つしかありませんでした。)

編集： OPは、彼の変数名が「降水量」と「station_id」であるため、コードがエラーなしで実行されていなかったことを指摘しました。コードは、彼が投稿したファイルで実行されます。明らかに、提供されたファイルで使用されている変数名を使用する必要があります。それらは彼が使用するためにカスタム作成されたファイルのように見えるため、作成者が変数の命名に一貫性を持たない可能性があると考えられます。

python - 可変サイズの複数の NetCDF Rainfall ファイルを読み取る Python

1 に答える 1

Related

Reference