最近、Ganglia はほとんどの Hadoop サーバーを赤色で表示します。
atop
, htop
, ... を調べると、とjobtracker
プロセスnamenode
が ~300% の CPU を消費していることがわかります:
30858 adtech adtech 42 15.54s 13.17s 0K 0K 0K 0K -- - S 6 332% java
31066 adtech adtech 24 6.18s 8.86s 0K 0K 0K 0K -- - S 7 174% java
31164 adtech adtech 44 5.66s 8.38s 0K 0K 0K 4K -- - S 5 162% java
関連する構成ファイルは次のとおりです。
core-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://hmaster90:9000</value>
<description>
The name of the default file system. Either the literal string
"local" or a host:port for NDFS.
</description>
</property>
</configuration>
hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.name.dir</name>
<value>/data/hdfs11/dfs/name</value>
</property>
<property>
<name>dfs.data.dir</name>
<value>/data/hdfs11/dfs/data</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/data/hdfs11/dfs/tmp</value>
</property>
</configuration>
mapred-site.xml
<?xml version="1.0" encoding="UTF-8" ?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>mapred.job.tracker</name>
<value>hmaster90:9001</value>
</property>
<property>
<name>mapred.system.dir</name>
<value>/data/hdfs11/mapreduce/system</value>
</property>
<property>
<name>mapred.local.dir</name>
<value>/data/hdfs11/mapreduce/local</value>
</property>
<property>
<name>mapred.temp.dir</name>
<value>/data/hdfs11/mapreduce/temp</value>
</property>
<property>
<name>mapred.tasktracker.map.tasks.maximum</name>
<value>7</value>
<description>
The maximum number of map tasks that will be run simultaneously by a task tracker.
</description>
</property>
<property>
<name>mapred.tasktracker.reduce.tasks.maximum</name>
<value>7</value>
<description>
The maximum number of reduce tasks that will be run simultaneously by a task tracker.
</description>
</property>
<property>
<name>mapred.map.tasks</name>
<value>56</value>
<description>
This should be a prime number larger than multiple number of slave hosts,
e.g. for 3 nodes set this to 17
</description>
</property>
<property>
<name>mapred.reduce.tasks</name>
<value>21</value>
<description>
This should be a prime number close to a low multiple of slave hosts,
e.g. for 3 nodes set this to 7
</description>
</property>
</configuration>
jobtracker.log : https://gist.github.com/3032037
まずどこを見ればいいですか?
@J-16SDiZ に返信:
# su - adtech
$ jstack -J-d64 -m 3135
Attaching to process ID 3135, please wait...
Exception in thread "main" java.lang.reflect.InvocationTargetException
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:616)
at sun.tools.jstack.JStack.runJStackTool(JStack.java:136)
at sun.tools.jstack.JStack.main(JStack.java:102)
Caused by: sun.jvm.hotspot.runtime.VMVersionMismatchException: Supported versions are 14.0-b16. Target VM is 20.5-b03
at sun.jvm.hotspot.runtime.VM.checkVMVersion(VM.java:223)
at sun.jvm.hotspot.runtime.VM.<init>(VM.java:286)
at sun.jvm.hotspot.runtime.VM.initialize(VM.java:344)
at sun.jvm.hotspot.bugspot.BugSpotAgent.setupVM(BugSpotAgent.java:594)
at sun.jvm.hotspot.bugspot.BugSpotAgent.go(BugSpotAgent.java:494)
at sun.jvm.hotspot.bugspot.BugSpotAgent.attach(BugSpotAgent.java:332)
at sun.jvm.hotspot.tools.Tool.start(Tool.java:163)
at sun.jvm.hotspot.tools.JStack.main(JStack.java:86)