ここでのアドバイスの後、VB で次の関数を考案しました。これは、常に完璧ではありませんが (会社名とスイート ラインが指定されている場合は、スイートと都市を組み合わせて) 使用可能なデータを作成します。私自身のルールを破ったことなどについて、気軽にコメント/リファクタリング/怒鳴ってください。
Public Function parseAddress(ByVal input As String) As Collection
input = input.Replace(",", "")
input = input.Replace(" ", " ")
Dim splitString() As String = Split(input)
Dim streetMarker() As String = New String() {"street", "st", "st.", "avenue", "ave", "ave.", "blvd", "blvd.", "highway", "hwy", "hwy.", "box", "road", "rd", "rd.", "lane", "ln", "ln.", "circle", "circ", "circ.", "court", "ct", "ct."}
Dim address1 As String
Dim address2 As String = ""
Dim city As String
Dim state As String
Dim zip As String
Dim streetMarkerIndex As Integer
zip = splitString(splitString.Length - 1).ToString()
state = splitString(splitString.Length - 2).ToString()
streetMarkerIndex = getLastIndexOf(splitString, streetMarker) + 1
Dim sb As New StringBuilder
For counter As Integer = streetMarkerIndex To splitString.Length - 3
sb.Append(splitString(counter) + " ")
Next counter
city = RTrim(sb.ToString())
Dim addressIndex As Integer = 0
For counter As Integer = 0 To streetMarkerIndex
If IsNumeric(splitString(counter)) _
Or splitString(counter).ToString.ToLower = "po" _
Or splitString(counter).ToString().ToLower().Replace(".", "") = "po" Then
addressIndex = counter
Exit For
End If
Next counter
sb = New StringBuilder
For counter As Integer = addressIndex To streetMarkerIndex - 1
sb.Append(splitString(counter) + " ")
Next counter
address1 = RTrim(sb.ToString())
sb = New StringBuilder
If addressIndex = 0 Then
If splitString(splitString.Length - 2).ToString() <> splitString(streetMarkerIndex + 1) Then
For counter As Integer = streetMarkerIndex To splitString.Length - 2
sb.Append(splitString(counter) + " ")
Next counter
End If
Else
For counter As Integer = 0 To addressIndex - 1
sb.Append(splitString(counter) + " ")
Next counter
End If
address2 = RTrim(sb.ToString())
Dim output As New Collection
output.Add(address1, "Address1")
output.Add(address2, "Address2")
output.Add(city, "City")
output.Add(state, "State")
output.Add(zip, "Zip")
Return output
End Function
Private Function getLastIndexOf(ByVal sArray As String(), ByVal checkArray As String()) As Integer
Dim sourceIndex As Integer = 0
Dim outputIndex As Integer = 0
For Each item As String In checkArray
For Each source As String In sArray
If source.ToLower = item.ToLower Then
outputIndex = sourceIndex
If item.ToLower = "box" Then
outputIndex = outputIndex + 1
End If
End If
sourceIndex = sourceIndex + 1
Next
sourceIndex = 0
Next
Return outputIndex
End Function
parseAddress
関数「AP Croll & Son 2299 Lewes-Georgetown Hwy, Georgetown, DE 19947」を渡すと、次のように返されます。
2299 Lewes-Georgetown Hwy
A. P. Croll & Son
Georgetown
DE
19947