私は、静けさを介してリアルタイムで csv データを取り込むユースケースで Druid を評価しています。以下はサーバー構成です:-
{
"dataSources" : {
"audience" : {
"spec" : {
"dataSchema" : {
"dataSource" : "audience",
"parser" : {
"type" : "string",
"parseSpec":{
"format" : "csv",
"timestampSpec" : {
"column" : "timestamp"
},
"columns" : ["timestamp","partner_id","event_id","product_id","device_id","count"],
"dimensionsSpec" : {
"dimensions" : ["partner_id","event_id","product_id","device_id"]
}
}
},
"metricsSpec" : [{ "type" : "longSum", "name" : total, "fieldName" : "count" }],
"granularitySpec" : {
"segmentGranularity" : "HOUR",
"queryGranularity" : "HOUR",
"intervals" : [ "2013-08-31/2013-09-01" ]
}
},
"ioConfig" : {
"type" : "realtime"
},
"tuningConfig" : {
"type" : "realtime",
"maxRowsInMemory" : "100000",
"intermediatePersistPeriod" : "PT10M",
"windowPeriod" : "PT10M"
}
},
"properties" : {
"task.partitions" : "1",
"task.replicants" : "1"
}
}
},
"properties" : {
"zookeeper.connect" : "localhost",
"druid.discovery.curator.path" : "/druid/discovery",
"druid.selectors.indexing.serviceName" : "druid/overlord",
"http.port" : "8200",
"http.threads" : "8"
}
}
データは次のように python スクリプトによってランダムに生成されます:-
1471336991,1,960,136,3ZLA7,1
1471336991,1,369,367,8MP2B,1
1471336991,2,544,550,C9ZG8,1
1471336991,1,135,394,XFX31,1
1471336991,2,590,552,VXMTL,1
1471336991,1,493,615,0C2HR,1
1471336991,2,435,710,HKYP0,1
1471336991,1,394,483,V2HP9,1
1471336991,2,441,376,J1LYO,1
次のコマンドは、データを送信して返します{"result":{"received":1000,"sent":0}}
python createData.py |curl -XPOST -H'Content-Type: text/plain' --data-binary @- http://localhost:8200/v1/post/audience.