DBに接続して重複エントリをチェックし、重複を削除して元のレコードの同じテキストの数を更新するプログラムがあります。
私の問題は、わずか 100000 レコードで数時間かかり、プログラムが「ヒープスペースが必要です」というエラーで終了することです。
このプログラムを最適化して、タスクを非常に迅速に実行するにはどうすればよいですか?
public class Main {
// serveropsdb.properties file location = /opt/serverops/scripts/serverops.properties
private final static String PROPERTIES_FILENAME=File.separatorChar+"opt"+File.separatorChar+"serverops"+File.separatorChar+"scripts"+File.separatorChar+"serveropsdb.properties";
private final static String DATE_FORMAT = "yyyy/MM/dd HH:mm:ss";
private final static String PROPERTY_NAME_STARTTIME= "start.time";
private final static String PROPERTY_NAME_ENDTIME= "end.time";
private final static String PROPERTY_NAME_DB_TABLENAME= "db.tablename";
private final static String PROPERTY_NAME_DB_USERNAME= "db.username";
private final static String PROPERTY_NAME_DB_PASSWORD= "db.password";
private final static String PROPERTY_NAME_DB_NAME= "db.name";
public static void main(String[] args) {
System.out.println("DB consolidation started");
Properties properties = new Properties();
try { //loading serverops.properties file
properties.load(new FileInputStream(PROPERTIES_FILENAME));
} catch (IOException e) {
System.out.println("Error loading "+PROPERTIES_FILENAME+" properties!");
e.printStackTrace();
}
try { // loading jdbc driver
Class.forName("com.mysql.jdbc.Driver");
} catch (ClassNotFoundException e) {
System.out.println(" JDBC Driver not found");
e.printStackTrace();
return;
}
System.out.println("MySQL JDBC Driver found! Connecting to Database");
Connection connection = null;
ResultSet resultSet = null;
ResultSet findResultSet = null;
PreparedStatement allStatement, findStatement, updateStatement;
//date formate used yyyy/MM/dd HH:mm:ss
DateTimeFormatter dateFromatter= DateTimeFormat.forPattern(DATE_FORMAT);
DateTime startDate = dateFromatter.parseDateTime(properties.getProperty(PROPERTY_NAME_STARTTIME));
DateTime endDate= dateFromatter.parseDateTime( properties.getProperty( PROPERTY_NAME_ENDTIME));
Timestamp t1 = new Timestamp(startDate.getMillis());
Timestamp t2 = new Timestamp(endDate.getMillis());
StringBuilder sql;
String toBeRemovedIndex ;
int updateNumEntries;
String toBeRemovedLog;
String toBeRemovedHostname;
boolean updateDB=false;
try { // connecting to DB with credentials from the properties file
connection = DriverManager.getConnection(
"jdbc:mysql://localhost:3306/"+properties.getProperty(PROPERTY_NAME_DB_NAME),
properties.getProperty(PROPERTY_NAME_DB_USERNAME),
properties.getProperty(PROPERTY_NAME_DB_PASSWORD));
// getting all record between start time and end time
sql = new StringBuilder( "SELECT * FROM"
+ properties.getProperty(PROPERTY_NAME_DB_TABLENAME)
+ " WHERE last_seen BETWEEN ? AND ?");
allStatement = connection.prepareStatement(sql.toString());
allStatement.setTimestamp(1, t1);
allStatement.setTimestamp(2,t2);
resultSet = allStatement.executeQuery( );
while (resultSet.next()) {
toBeRemovedIndex = resultSet.getString(1);
updateNumEntries = resultSet.getInt(2);
toBeRemovedLog = resultSet.getString(3);
toBeRemovedHostname = resultSet.getString(5);
// selecting the duplicate entries with logmessage , id and hostname
sql = new StringBuilder( "SELECT * FROM "
+ properties.getProperty(PROPERTY_NAME_DB_TABLENAME)
+ " where log_message=? and id <> ? and hostname = ?" );
findStatement = connection.prepareStatement(sql.toString());
findStatement.setString(1, toBeRemovedLog );
findStatement.setString(2, toBeRemovedIndex );
findStatement.setString(3, toBeRemovedHostname);
findResultSet = findStatement.executeQuery();
String newId="";
while( findResultSet.next()) {
newId = findResultSet.getString(1);
updateNumEntries +=findResultSet.getInt(2);
updateDB = true;
}
if(updateDB ) { // if duplicate entry found - deleting it from the db
sql = new StringBuilder( "DELETE FROM"
+ properties.getProperty(PROPERTY_NAME_DB_TABLENAME)
+ " where id = ?" );
updateStatement = connection.prepareStatement(sql.toString());
updateStatement.setString(1, toBeRemovedIndex );
updateStatement.executeUpdate();
// updating similar entry with number of records
sql = new StringBuilder( "Update "
+ properties.getProperty(PROPERTY_NAME_DB_TABLENAME)
+ " set number_of_entries = ? where id = ? " );
updateStatement = connection.prepareStatement(sql.toString());
updateStatement.setLong(1, updateNumEntries );
updateStatement.setString(2, newId );
updateStatement.executeUpdate();
updateDB = false;
updateStatement=null;
}
}
} catch (SQLException e) {
System.out.println("Connection Failed! Check output console");
e.printStackTrace();
return;
}
DateTime now = new DateTime();
if(connection != null) {
try {
connection.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
System.out.println( "Current time = "
+ now.toString()
+ " DB consoladiation done successfully between "
+ t1.toString() + " and "+ t2.toString());
}
}
テーブル構造:
CREATE TABLE IF NOT EXISTS `syslog_parsed` (
`id` char(36) CHARACTER SET utf8 COLLATE utf8_unicode_ci NOT NULL,
`number_of_entries` int(11) NOT NULL,
`log_message` text CHARACTER SET utf8 COLLATE utf8_unicode_ci NOT NULL,
`last_seen` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
`hostname` varchar(200) CHARACTER SET utf8 COLLATE utf8_unicode_ci NOT NULL,
KEY `ID` (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1;