また、次のようなUTF-8エンコーディングでHttpConnection入力ストリームを開くのにも役立ちます。
String encoding = "UTF-8";
Reader reader = new InputStreamReader(in, encoding);
この一連のStringUtilsを使用して、クリーンで適切にフォーマットされたテキストを取得します。:
/**
* Method removes HTML tags from given string.
*
* @param text Input parameter containing HTML tags (eg. <b>cat</b>)
* @return String without HTML tags (eg. cat)
*/
public static String removeHtml(String text) {
try {
int idx = text.indexOf("<");
if (idx == -1) {
text = decodeEntities(text);
return text;
}
String plainText = "";
String htmlText = text;
int htmlStartIndex = htmlText.indexOf("<", 0);
if (htmlStartIndex == -1) {
return text;
}
htmlText = StringUtils.replace(htmlText, "</p>", "\r\n");
htmlText = StringUtils.replace(htmlText, "<br/>", "\r\n");
htmlText = StringUtils.replace(htmlText, "<br>", "\r\n");
while (htmlStartIndex >= 0) {
plainText += htmlText.substring(0, htmlStartIndex);
int htmlEndIndex = htmlText.indexOf(">", htmlStartIndex);
htmlText = htmlText.substring(htmlEndIndex + 1);
htmlStartIndex = htmlText.indexOf("<", 0);
}
plainText = plainText.trim();
plainText = decodeEntities(plainText);
return plainText;
} catch (Exception e) {
System.err.println("Error while removing HTML: " + e.toString());
return text;
}
}
public static String decodeEntities(String html) {
String result = StringUtils.replace(html, "<", "<");
result = StringUtils.replace(result, ">", ">");
result = StringUtils.replace(result, " ", " ");
result = StringUtils.replace(result, "&", "&");
result = StringUtils.replace(result, "ä", "ä");
result = StringUtils.replace(result, "ö", "ö");
result = StringUtils.replace(result, """, "'");
result = StringUtils.replace(result, "&lquot;", "'");
result = StringUtils.replace(result, "&rquot;", "'");
result = StringUtils.replace(result, "
", "\r");
return result;
}
/* Replace all instances of a String in a String.
* @param s String to alter.
* @param f String to look for.
* @param r String to replace it with, or null to just remove it.
*/
public static String replace(String s, String f, String r) {
if (s == null) {
return s;
}
if (f == null) {
return s;
}
if (r == null) {
r = "";
}
int index01 = s.indexOf(f);
while (index01 != -1) {
s = s.substring(0, index01) + r + s.substring(index01 + f.length());
index01 += r.length();
index01 = s.indexOf(f, index01);
}
return s;
}
public static String cleanEncodedString(String str) {
String resultStr = str;
String encoding = "UTF-8";
InputStream in = new ByteArrayInputStream(str.getBytes());
InputStreamReader isr;
try {
isr = new InputStreamReader(in, encoding);
ByteArrayOutputStream buf = new ByteArrayOutputStream();
int result = isr.read();
while (result != -1) {
byte b = (byte) result;
buf.write(b);
result = isr.read();
}
resultStr = buf.toString();
return resultStr;
} catch (Exception uee) {
uee.printStackTrace();
}
return resultStr;
}