データの正規化を試みます。
できない場合は、州名を含むテーブルを設定します。
SELECT c.col,
TRIM(SUBSTR(c.col, 1, LENGTH(c.col) - LENGTH(c.state))) AS city,
c.state AS state
FROM (SELECT a.col,
CASE
WHEN b.state IS NULL
THEN SUBSTRING_INDEX(a.state, ' ', -1)
ELSE b.state
END AS state
FROM
(SELECT col,
CASE
WHEN LENGTH(col) - LENGTH(REPLACE(col, ' ', '')) + 1 = 2
THEN SUBSTRING_INDEX(col, ' ', -1)
WHEN LENGTH(col) - LENGTH(REPLACE(col, ' ', '')) + 1 > 2
THEN SUBSTRING_INDEX(col, ' ', -2)
ELSE NULL
END AS state
FROM bad_data) a
LEFT JOIN state_names b ON b.state = a.state) c
結果
| | コール | 都市 | 状態 |
-------------------------------------------------- ----
| | 春のテキサス | 春 | 春 | テキサス | テキサス |
| | コーパスクリスティ テキサス | コーパスクリスティ | テキサス | テキサス |
| | オレンジカリフォルニア | オレンジ | カリフォルニア |
| | ニューヨーク ニューヨーク | ニューヨーク | ニューヨーク | ニューヨーク | ニューヨーク |
テーブルを設定できない場合は、次のクエリで行う必要があります。
SELECT c.col,
TRIM(SUBSTR(c.col, 1, LENGTH(c.col) - LENGTH(c.state))) AS city,
c.state AS state
FROM (SELECT a.col,
CASE
WHEN b.state IS NULL
THEN SUBSTRING_INDEX(a.state, ' ', -1)
ELSE b.state
END AS state
FROM
(SELECT col,
CASE
WHEN LENGTH(col) - LENGTH(REPLACE(col, ' ', '')) + 1 = 2
THEN SUBSTRING_INDEX(col, ' ', -1)
WHEN LENGTH(col) - LENGTH(REPLACE(col, ' ', '')) + 1 > 2
THEN SUBSTRING_INDEX(col, ' ', -2)
ELSE NULL
END AS state
FROM bad_data) a
LEFT JOIN (SELECT 'Alabama' AS state
UNION ALL
SELECT 'Arizona'
UNION ALL
SELECT 'Arkansas'
UNION ALL
SELECT 'California'
UNION ALL
SELECT 'Colorado'
UNION ALL
SELECT 'Connecticut'
UNION ALL
SELECT 'Delaware'
UNION ALL
SELECT 'Florida'
UNION ALL
SELECT 'Georgia'
UNION ALL
SELECT 'Guam'
UNION ALL
SELECT 'Hawaii'
UNION ALL
SELECT 'Idaho'
UNION ALL
SELECT 'Illinois'
UNION ALL
SELECT 'Indiana'
UNION ALL
SELECT 'Iowa'
UNION ALL
SELECT 'Kansas'
UNION ALL
SELECT 'Kentucky'
UNION ALL
SELECT 'Louisiana'
UNION ALL
SELECT 'Maine'
UNION ALL
SELECT 'Maryland'
UNION ALL
SELECT 'Massachusetts'
UNION ALL
SELECT 'Michigan'
UNION ALL
SELECT 'Minnesota'
UNION ALL
SELECT 'Mississippi'
UNION ALL
SELECT 'Missouri'
UNION ALL
SELECT 'Montana'
UNION ALL
SELECT 'Nebraska'
UNION ALL
SELECT 'Nevada'
UNION ALL
SELECT 'New Hampshire'
UNION ALL
SELECT 'New Jersey'
UNION ALL
SELECT 'New Mexico'
UNION ALL
SELECT 'New York'
UNION ALL
SELECT 'North Carolina'
UNION ALL
SELECT 'North Dakota'
UNION ALL
SELECT 'Ohio'
UNION ALL
SELECT 'Oklahoma'
UNION ALL
SELECT 'Oregon'
UNION ALL
SELECT 'Pennsylvania'
UNION ALL
SELECT 'Puerto Rico'
UNION ALL
SELECT 'Rhode Island'
UNION ALL
SELECT 'South Carolina'
UNION ALL
SELECT 'South Dakota'
UNION ALL
SELECT 'Tennessee'
UNION ALL
SELECT 'Texas'
UNION ALL
SELECT 'Utah'
UNION ALL
SELECT 'Vermont'
UNION ALL
SELECT 'Virginia'
UNION ALL
SELECT 'Washington'
UNION ALL
SELECT 'West Virginia'
UNION ALL
SELECT 'Wisconsin'
UNION ALL
SELECT 'Wyoming') b ON b.state = a.state) c