HttpClient のクラスを使用して、Google のヒット数を連続して多数の用語で抽出したいのですが、Google サーバーがこの操作を繰り返し実行することを許可していません。これが私のプログラムです。パラメーター Concept は検索したい用語です。</p>
public static double extractGoogleCount(String Concept)
{
double temp = 0;
HttpClient httpClient = new HttpClient();
String url = "http://www.google.com/search?hl=en&newwindow=1&q="
+ Concept + "&aq=f&aqi=&aql=&oq=&gs_rfai=";
GetMethod getMethod = new GetMethod(url);
getMethod.getParams().setParameter(HttpMethodParams.RETRY_HANDLER,
new DefaultHttpMethodRetryHandler());
try
{
int statusCode = httpClient.executeMethod(getMethod);
if (statusCode != HttpStatus.SC_OK)
{
System.err.println("Method failed: "
+ getMethod.getStatusLine() + url);
}
InputStream responseBody = getMethod.getResponseBodyAsStream();
DataInputStream dis = new DataInputStream(responseBody);
String returnPage = dis.readLine();
while (returnPage != null)
{
int index = returnPage.indexOf("<div id=\"resultStats\">");
if (index == -1)
{
returnPage = dis.readLine();
continue;
}
String sub = returnPage.substring(index, index + 100);
if (sub.indexOf("About") >= 0)
{
String[] result = sub.split(" ");
String number = result[2].replaceAll(",", "");
temp = Double.parseDouble(number);
} else
{
String[] result = sub.split(" ");
String number = result[1].substring(result[1]
.indexOf(">") + 1);
System.out.println("number:" + number);
temp = Double.parseDouble(number);
}
break;
}
return temp;
} catch (HttpException e)
{
System.out.println("Please check your provided http address!");
e.printStackTrace();
} catch (IOException e)
{
e.printStackTrace();
}
catch (Exception e)
{
e.printStackTrace();
return temp;
} finally
{
httpClient.getState().clear();
getMethod.releaseConnection();
}
}