次のようなデータがあるとします。
AC-057|Ethanol CBOT (Pit) Liq Cont|20050329|0.121|0.123|0.121|0.123|47|233|32|219
AC-057|Ethanol CBOT (Pit) Liq Cont|20050330|0.124|0.124|0.122|0.122|68|233|0|219
AC-057|Ethanol CBOT (Pit) Liq Cont|20050331|0.123|0.123|0.123|0.123|68|246|57|226
AC-057|Ethanol CBOT (Pit) Liq Cont|20050401|0.122|0.122|0.122|0.122|5|241|5|221
AC-057|Ethanol CBOT (Pit) Liq Cont|20050404|0.12|0.12|0.12|0.12|1|240|0|220
AC-057|Ethanol CBOT (Pit) Liq Cont|20050405|0.12|0.12|0.12|0.12|5|241|0|220
AC-057|Ethanol CBOT (Pit) Liq Cont|20050406|0.12|0.12|0.12|0.12|4|241|2|220
AC-057|Ethanol CBOT (Pit) Liq Cont|20050407|0.119|0.119|0.116|0.116|30|233|23|209
AC-057|Ethanol CBOT (Pit) Liq Cont|20050408|0.115|0.115|0.115|0.115|35|217|34|194
AC-057|Ethanol CBOT (Pit) Liq Cont|20050411|0.117|0.117|0.117|0.117|5|217|0|194
AC-057|Ethanol CBOT (Pit) Liq Cont|20050412|0.117|0.117|0.117|0.117|5|217|2|194
AC-057|Ethanol CBOT (Pit) Liq Cont|20050413|0.117|0.117|0.117|0.117|9|217|0|194
AC-057|Ethanol CBOT (Pit) Liq Cont|20050414|0.117|0.117|0.117|0.117|9|217|0|194
AC-057|Ethanol CBOT (Pit) Liq Cont|20050415|0.117|0.117|0.117|0.117|9|218|4|190
AC-057|Ethanol CBOT (Pit) Liq Cont|20050418|0.117|0.117|0.117|0.117|5|218|0|190
AC-057|Ethanol CBOT (Pit) Liq Cont|20050419|0.119|0.119|0.119|0.119|5|218|5|190
AC-057|Ethanol CBOT (Pit) Liq Cont|20050420|0.119|0.119|0.119|0.119|0|218|0|190
AC-057|Ethanol CBOT (Pit) Liq Cont|20050421|0.119|0.119|0.119|0.119|5|218|0|190
AC-057|Ethanol CBOT (Pit) Liq Cont|20050422|0.119|0.119|0.119|0.119|5|223|0|190
AC-057|Ethanol CBOT (Pit) Liq Cont|20050425|0.119|0.119|0.119|0.119|0|223|0|190
AC-057|Ethanol CBOT (Pit) Liq Cont|20050426|0.119|0.119|0.119|0.119|0|223|0|190
SYMBOL|DESCRIPTION |yyyymmdd|OPEN |HIGH |LOW |CLOSE|.|. |.|...
...あらゆる種類の異なるシンボルを使用します。
そしてこのようなスキーマ:
CREATE TABLE IF NOT EXISTS ma (
id INTEGER PRIMARY KEY AUTOINCREMENT,
symbol TEXT,
description TEXT,
year INTEGER,
month INTEGER,
day INTEGER,
open REAL,
high REAL,
low REAL,
close REAL
);
CREATE INDEX ma_id_idx ON ma(id);
CREATE INDEX ma_sym_idx ON ma(symbol);
CREATE INDEX ma_yea_idx ON ma(year);
CREATE INDEX ma_mon_idx ON ma(month);
CREATE INDEX ma_day_idx ON ma(day);
CREATE INDEX ma_open_idx ON ma(open);
CREATE INDEX ma_high_idx ON ma(high);
CREATE INDEX ma_low_idx ON ma(low);
CREATE INDEX ma_close_idx ON ma(close);
そして、次のようにデータをデータベースにインポートするPythonスクリプト:
import csv
import sqlite3 as lite
__infile__ = 'ma.csv'
__outfile__ = 'ma3.db'
input = csv.reader(open(__infile__, 'rb'), delimiter='|')
conn = lite.connect(__outfile__)
ssql = """
PRAGMA JOURNAL_MODE = MEMORY;
"""
isql = """
INSERT INTO ma (
symbol,
description,
year,
month,
day,
open,
high,
low,
close
) VALUES (
?, ?, ?, ?, ?, ?, ?, ?, ?
)
"""
conn.executescript(ssql)
for row in input:
year = row[2][0:4]
month = row[2][4:6]
day = row[2][6:8]
tup = (row[0], row[1], year, month, day, row[3], row[4], row[5], row[6])
conn.execute(isql, tup)
conn.commit()
このスキーマを生成するために、一連のレコードをどのように収集しますか。
CREATE TABLE trends (
id INTEGER PRIMARY KEY AUTOINCREMENT,
symbol TEXT,
date DATE,
p1 REAL,
p20 REAL,
p50 REAL,
p100 REAL,
p200 REAL
);
その特定のシンボルの各日付ポイントで。
私はたくさんのことを試しました。特にこの最後のものは永遠にかかっているので、まだうまくいくかどうかはわかりません。(1週間の計算時間がかかるため、機能しません)。元のcsvデータは現在250メガバイトのようなものですが、将来的には2.5ギガ以上になり、おそらくより大きなデータベースを使用する必要があります。
これが私が試した(または試している)他のものです:
ma.sql
__________________________
CREATE TABLE symbols (
id INTEGER PRIMARY KEY AUTOINCREMENT,
symbol TEXT,
UNIQUE(symbol) ON CONFLICT IGNORE
);
CREATE TABLE descriptions (
id INTEGER PRIMARY KEY AUTOINCREMENT,
description TEXT,
UNIQUE(description) ON CONFLICT IGNORE
);
CREATE TABLE dates (
id INTEGER PRIMARY KEY AUTOINCREMENT,
entry DATE,
UNIQUE(entry) ON CONFLICT IGNORE
);
CREATE VIEW trend_dates AS
SELECT
id AS id,
entry AS p1,
date(entry, '-7 day') AS p7,
date(entry, '-14 day') AS p14,
date(entry, '-20 day') AS p20,
date(entry, '-50 day') AS p50,
date(entry, '-100 day') AS p100,
date(entry, '-200 day') AS p200, -- LEFT OFF HERE
CREATE TRIGGER update_entry_format AFTER INSERT ON dates
BEGIN
UPDATE dates SET entry =
(SELECT
substr(entry, 1, 4) || '-' ||
substr(entry, 5, 2) || '-' ||
substr(entry, 7, 2)
)
WHERE rowid = new.rowid;
END;
CREATE TABLE trends (
id INTEGER PRIMARY KEY AUTOINCREMENT,
symbol INTEGER,
entry INTEGER,
p1 REAL,
p20 REAL,
p50 REAL,
p100 REAL,
p200 REAL
);
CREATE TABLE master (
id INTEGER PRIMARY KEY AUTOINCREMENT,
SYMBOL INTEGER,
DESCRIPTION INTEGER,
ENTRY INTEGER,
OPEN REAL,
HIGH REAL,
LOW REAL,
CLOSE REAL,
VOLUME INTEGER,
OPEN_INTEREST INTEGER,
CONTRACT_VOLUME INTEGER,
CONTRACT_OPEN_INTEREST INTEGER
);
CREATE INDEX symbols_index ON symbols(symbol);
CREATE INDEX descriptions_index ON descriptions(description);
CREATE INDEX dates_index ON dates(entry);
CREATE INDEX symbols_index2 ON symbols(id, symbol);
CREATE INDEX descriptions_index2 ON descriptions(id, description);
CREATE INDEX dates_index2 ON dates(id, entry);
CREATE INDEX symbols_index3 ON symbols(id);
CREATE INDEX descriptions_index3 ON descriptions(id);
CREATE INDEX dates_index3 ON dates(id);
CREATE INDEX master_index ON master(
id,
SYMBOL,
DESCRIPTION,
ENTRY,
OPEN,
HIGH,
LOW,
CLOSE,
VOLUME
);
CREATE INDEX master_index2 ON master(id);
CREATE INDEX master_index3 ON master(symbol);
CREATE INDEX master_index4 ON master(entry);
CREATE INDEX master_index5 ON master(close);
CREATE VIEW ma AS SELECT
master.id,
symbols.symbol,
descriptions.description,
dates.entry,
master.OPEN,
master.HIGH,
master.LOW,
master.CLOSE,
master.VOLUME,
master.OPEN_INTEREST,
master.CONTRACT_VOLUME,
master.CONTRACT_OPEN_INTEREST
FROM master
INNER JOIN symbols
INNER JOIN descriptions
INNER JOIN dates
WHERE
master.SYMBOL = symbols.id AND
master.DESCRIPTION = descriptions.id AND
master.entry = dates.id
;
CREATE TRIGGER update_master INSTEAD OF INSERT ON ma
BEGIN
INSERT INTO symbols(symbol) VALUES (new.SYMBOL);
INSERT INTO descriptions(description) VALUES (new.DESCRIPTION);
INSERT INTO dates(entry) VALUES (new.ENTRY);
INSERT OR REPLACE INTO MASTER(
SYMBOL,
DESCRIPTION,
ENTRY,
OPEN,
HIGH,
LOW,
CLOSE,
VOLUME,
OPEN_INTEREST,
CONTRACT_VOLUME,
CONTRACT_OPEN_INTEREST
)
VALUES(
coalesce(
( SELECT id FROM symbols
WHERE symbol = new.SYMBOL
),
new.SYMBOL
),
coalesce(
( SELECT id FROM descriptions
WHERE description = new.DESCRIPTION
),
new.DESCRIPTION
),
coalesce(
( SELECT id FROM dates
WHERE entry = new.ENTRY
),
new.ENTRY
),
new.OPEN,
new.HIGH,
new.LOW,
new.CLOSE,
new.VOLUME,
new.OPEN_INTEREST,
new.CONTRACT_VOLUME,
new.CONTRACT_OPEN_INTEREST
);
END;
CREATE VIEW sma
AS SELECT
a.ENTRY,
a.CLOSE,
AVG(b.close)
FROM
ma AS a
JOIN ma AS b
ON a.ENTRY >= b.ENTRY
AND b.ENTRY >= date(a.CLOSE, '-20 day')
GROUP BY a.ENTRY, a.CLOSE
ORDER BY 1
;
ma.py
----------------------
import sqlite3 as lite
import csv
import glob;
print 'connecting...'
conn = lite.connect('MA.db')
infile = csv.reader(open('MA.CSV', 'rb'), delimiter='|', quotechar=r'"')
conn.execute('BEGIN TRANSACTION')
conn.execute('PRAGMA JOURNAL_MODE = MEMORY')
isql = 'insert into ma(SYMBOL, DESCRIPTION, ENTRY, OPEN, HIGH, LOW, CLOSE, VOLUME, OPEN_INTEREST, CONTRACT_VOLUME, CONTRACT_OPEN_INTEREST) values (?,?,?,?,?,?,?,?,?,?,?)'
print 'inserting data...'
for row in infile:
conn.execute(isql, row)
conn.commit()
conn.close()
import sqlite3 as lite
conn = lite.connect('ma.db')
tsql = 'SELECT close FROM master WHERE symbol = ? AND entry = ?'
cur1 = conn.cursor()
cur2 = conn.cursor()
cur3 = conn.cursor()
cur4 = conn.cursor()
cur5 = conn.cursor()
dcur = conn.cursor()
scur = conn.cursor()
dcur.execute('SELECT id FROM dates ORDER BY entry DESC')
scur.execute('SELECT id FROM symbols ORDER BY symbol ASC')
dates = dcur.fetchall()
symbols = scur.fetchall()
print 'building trends...'
conn.execute('PRAGMA synchronous=OFF')
conn.execute('PRAGMA journal_mode=MEMORY')
conn.execute('BEGIN TRANSACTION')
while len(dates) > 0:
for symbol in symbols:
try:
cur1.execute(tsql, (symbol[0], dates[0][0]))
cur2.execute(tsql, (symbol[0], dates[20][0]))
cur3.execute(tsql, (symbol[0], dates[50][0]))
cur4.execute(tsql, (symbol[0], dates[100][0]))
cur5.execute(tsql, (symbol[0], dates[200][0]))
except Exception, e:
print repr(e)
pass
try:
p1 = cur1.fetchone()[0]
p2 = cur2.fetchone()[0]
p3 = cur3.fetchone()[0]
p4 = cur4.fetchone()[0]
p5 = cur5.fetchone()[0]
conn.execute('INSERT INTO trends(symbol, entry, p1, p20, p50, p100, p200) VALUES(?, ?, ?, ?, ?, ?, ?)', (symbol[0], dates[0][0], p1, p2, p3, p4, p5))
#print "(" + repr(dates[0][0]) + ", " + repr(symbol[0]) + "): " + repr(p1) + " " + repr(p2) + " " + repr(p3) + " " + repr(p4) + " " + repr(p5)
except Exception, e:
#print repr(e)
pass
print "done: " + repr(dates[0][0])
dates.remove(dates[0])
conn.commit()
conn.close()
ありがとう!
tl; dr:元のリストの各エントリについて、終値を使用して、各日付の各シンボルの7、14、20、50、100、200日の価格を取得したいと思います。そしてそれをテーブルに入れます。純粋なSQLで実行したいのですが、Pythonでも機能します。