8

以下のようなExcelシートにHTMLタグ付きのデータがあります。

<b>This is test data<br>Nice
<div> Go on this is next Cell
Very goood <b>.....</b>

SO、基本的には、Excelシートのすべてのhtmlタグを削除またはスペースに置き換えたいです。

4

3 に答える 3

39

パターンReplace Allで適用:<*>

タグのパターンを置き換える

これを開くには、リボンに移動するHome > Find & Select > Replace...か、単にCTRL+を押しHます。

余分なスペースは、TRIM関数を使用してさらに削除できます。幸運を!

于 2013-02-05T10:59:48.867 に答える
5

Excel で VBA を開き (Alt + F11)、右側のプロジェクト エクスプローラーでプロジェクト名 (スプレッドシート名) をクリックします。挿入 -> 新しいモジュール。以下のユーザー定義関数をモジュール Window に貼り付けます。マクロを許可する .XLSM として保存します。

データがセル A2 にあると仮定して、関数「=StripHTML(A2)」を入力します。ここから実際の例をダウンロードすることもできます。

http://jfrancisconsulting.com/how-to-strip-html-tags-in-excel/

Function StripHTML(cell As Range) As String
    Dim RegEx As Object
    Set RegEx = CreateObject("vbscript.regexp")
    Dim sInput As String
    Dim sOut As String
    sInput = cell.Text

    sInput = Replace(sInput, "\x0D\x0A", Chr(10))
    sInput = Replace(sInput, "\x00", Chr(10))

    'replace HTML breaks and end of paragraphs with line breaks
    sInput = Replace(sInput, "</P>", Chr(10) & Chr(10))
    sInput = Replace(sInput, "<BR>", Chr(10))

    'replace bullets with dashes
    sInput = Replace(sInput, "<li>", "-")

    'add back all of the special characters
    sInput = Replace(sInput, "&ndash;", "–")
    sInput = Replace(sInput, "&mdash;", "—")
    sInput = Replace(sInput, "&iexcl;", "¡")
    sInput = Replace(sInput, "&iquest;", "¿")
    sInput = Replace(sInput, "&quot;", "")
    sInput = Replace(sInput, "&ldquo;", "")
    sInput = Replace(sInput, "&rdquo;", "")
    sInput = Replace(sInput, "", "'")
    sInput = Replace(sInput, "&lsquo;", "'")
    sInput = Replace(sInput, "&rsquo;", "’")
    sInput = Replace(sInput, "&laquo;", "«")
    sInput = Replace(sInput, "&raquo;", "»")
    sInput = Replace(sInput, "&nbsp;", " ")
    sInput = Replace(sInput, "&amp;", "&")
    sInput = Replace(sInput, "&cent;", "¢")
    sInput = Replace(sInput, "&copy;", "©")
    sInput = Replace(sInput, "&divide;", "÷")
    sInput = Replace(sInput, "&gt;", ">")
    sInput = Replace(sInput, "&lt;", "<")
    sInput = Replace(sInput, "&micro;", "µ")
    sInput = Replace(sInput, "&middot;", "·")
    sInput = Replace(sInput, "&para;", "¶")
    sInput = Replace(sInput, "&plusmn;", "±")
    sInput = Replace(sInput, "&euro;", "€")
    sInput = Replace(sInput, "&pound;", "£")
    sInput = Replace(sInput, "&reg;", "®")
    sInput = Replace(sInput, "&sect;", "§")
    sInput = Replace(sInput, "&trade;", "™")
    sInput = Replace(sInput, "&yen;", "¥")
    sInput = Replace(sInput, "&aacute;", "á")
    sInput = Replace(sInput, "&Aacute;", "Á")
    sInput = Replace(sInput, "&agrave;", "à")
    sInput = Replace(sInput, "&Agrave;", "À")
    sInput = Replace(sInput, "&acirc;", "â")
    sInput = Replace(sInput, "&Acirc;", "Â")
    sInput = Replace(sInput, "&aring;", "å")
    sInput = Replace(sInput, "&Aring;", "Å")
    sInput = Replace(sInput, "&atilde;", "ã")
    sInput = Replace(sInput, "&Atilde;", "Ã")
    sInput = Replace(sInput, "&auml;", "ä")
    sInput = Replace(sInput, "&Auml;", "Ä")
    sInput = Replace(sInput, "&aelig;", "æ")
    sInput = Replace(sInput, "&AElig;", "Æ")
    sInput = Replace(sInput, "&ccedil;", "ç")
    sInput = Replace(sInput, "&Ccedil;", "Ç")
    sInput = Replace(sInput, "&eacute;", "é")
    sInput = Replace(sInput, "&Eacute;", "É")
    sInput = Replace(sInput, "&egrave;", "è")
    sInput = Replace(sInput, "&Egrave;", "È")
    sInput = Replace(sInput, "&ecirc;", "ê")
    sInput = Replace(sInput, "&Ecirc;", "Ê")
    sInput = Replace(sInput, "&euml;", "ë")
    sInput = Replace(sInput, "&Euml;", "Ë")
    sInput = Replace(sInput, "&iacute;", "í")
    sInput = Replace(sInput, "&Iacute;", "Í")
    sInput = Replace(sInput, "&igrave;", "ì")
    sInput = Replace(sInput, "&Igrave;", "Ì")
    sInput = Replace(sInput, "&icirc;", "î")
    sInput = Replace(sInput, "&Icirc;", "Î")
    sInput = Replace(sInput, "&iuml;", "ï")
    sInput = Replace(sInput, "&Iuml;", "Ï")
    sInput = Replace(sInput, "&ntilde;", "ñ")
    sInput = Replace(sInput, "&Ntilde;", "Ñ")
    sInput = Replace(sInput, "&oacute;", "ó")
    sInput = Replace(sInput, "&Oacute;", "Ó")
    sInput = Replace(sInput, "&ograve;", "ò")
    sInput = Replace(sInput, "&Ograve;", "Ò")
    sInput = Replace(sInput, "&ocirc;", "ô")
    sInput = Replace(sInput, "&Ocirc;", "Ô")
    sInput = Replace(sInput, "&oslash;", "ø")
    sInput = Replace(sInput, "&Oslash;", "Ø")
    sInput = Replace(sInput, "&otilde;", "õ")
    sInput = Replace(sInput, "&Otilde;", "Õ")
    sInput = Replace(sInput, "&ouml;", "ö")
    sInput = Replace(sInput, "&Ouml;", "Ö")
    sInput = Replace(sInput, "&szlig;", "ß")
    sInput = Replace(sInput, "&uacute;", "ú")
    sInput = Replace(sInput, "&Uacute;", "Ú")
    sInput = Replace(sInput, "&ugrave;", "ù")
    sInput = Replace(sInput, "&Ugrave;", "Ù")
    sInput = Replace(sInput, "&ucirc;", "û")
    sInput = Replace(sInput, "&Ucirc;", "Û")
    sInput = Replace(sInput, "&uuml;", "ü")
    sInput = Replace(sInput, "&Uuml;", "Ü")
    sInput = Replace(sInput, "&yuml;", "ÿ")
    sInput = Replace(sInput, "", "´")
    sInput = Replace(sInput, "", "`")

    'replace all the remaining HTML Tags
    With RegEx
    .Global = True
    .IgnoreCase = True
    .MultiLine = True
    .Pattern = "<[^>]+>" 'Regular Expression for HTML Tags.

    End With
    sOut = RegEx.Replace(sInput, "")
    StripHTML = sOut
    Set RegEx = Nothing
    End Function
于 2013-06-18T15:21:08.973 に答える
0

上記のマクロがうまくいかなかったので、自分で修正しました。これは私の最初のスクリプトです。皆さんがそれを改善し、より速くし、さらに追加することができれば、大歓迎です!

わかりました、私は以前にプログラミングの経験がありませんでした (6 年前の非常に基本的な Java を除いて) が、いくつかの助けを借りて、多くの推測 (実際には数時間) で、このスクリプトを作成することができました。 8#text ですが<BR>、改行には置き換えられません (CTRL + H を押すことでこれを行うことができます。"find: <br>" "replace: (Alt を押したまま NUMPAD でタイプ 0010 を使用します。replace で小さなドットが点滅するはずです)。ウィンドウ、次に「すべて置換」を押します)。

以下のコードをユーザー モジュールに貼り付けます (alt + f11、シート 1 を右クリック -> 挿入 -> モジュール -> コードの貼り付け)

[ファイル] -> [オプション] -> [リボンのカスタマイズ] -> [開発者] チェックボックスをオンにして、ボタンを作成します。次に、開発者タブ -> 挿入 -> ボタン -> に移動し、ボタンを配置して右クリック -> マクロの割り当て -> タグの削除を選択します。

Sub RemoveTags()
    Dim r As Range

    Selection.NumberFormat = "@"  'set cells to text numberformat

    With CreateObject("vbscript.regexp")
      .Pattern = "\<.*?\>"
      .Global = True

      For Each r In Selection
        r.Value = Replace(.Replace(r.Value, ""), "&#8217;", " ")
        r.Value2 = Replace(.Replace(r.Value2, ""), "&#8211;", " ")
      Next r

      For Each r In Selection
        r.Value = Replace(.Replace(r.Value, ""), "&#8216;", " ")
        r.Value2 = Replace(.Replace(r.Value2, ""), "&#8232;", " ")
      Next r

      For Each r In Selection
        r.Value = Replace(.Replace(r.Value, ""), "&#8233;", " ")
        r.Value2 = Replace(.Replace(r.Value2, ""), "&#146;s", " ")
      Next r
    End With
End Sub


Private Sub CommandButton1_Click()

End Sub
于 2016-01-14T08:02:12.163 に答える