クラウドサーチを空にするために、Pythonアダプターbotoを使用して、次のことを行ってきました。きれいではありませんが、それは仕事を成し遂げます。難しいのは、取得する量をクラウドサーチの 5 MB の制限内に収めることです。
count = CloudSearchAdaptor.Instance().get_total_documents()
while count > 0:
results = CloudSearchAdaptor.Instance().search("lolzcat|-lolzcat", 'simple', 1000)
for doc in results.docs:
CloudSearchAdaptor.Instance().delete(doc['id'])
CloudSearchAdaptor.Instance().commit()
#add delay here if cloudsearch takes to long to propigate delete change
count = CloudSearchAdaptor.Instance().get_total_documents()
Cloudsearch アダプター クラスは次のようになります。
from boto.cloudsearch2.layer2 import Layer2
from singleton import Singleton
@Singleton
class CloudSearchAdaptor:
def __init__(self):
layer2 = Layer2(
aws_access_key_id='AWS_ACCESS_KEY_ID',
aws_secret_access_key='AWS_SECRET_ACCESS_KEY',
region='AWS_REGION'
)
self.domain = layer2.lookup('AWS_DOMAIN'))
self.doc_service = self.domain.get_document_service()
self.search_service = self.domain.get_search_service()
@staticmethod
def delete(id):
instance = CloudSearchAdaptor.Instance()
try:
response = instance.doc_service.delete(id)
except Exception as e:
print 'Error deleting to CloudSearch'
@staticmethod
def search(query, parser='structured', size=1000):
instance = CloudSearchAdaptor.Instance()
try:
results = instance.search_service.search(q=query, parser=parser, size=size)
return results
except Exception as e:
print 'Error searching CloudSearch'
@staticmethod
def get_total_documents():
instance = CloudSearchAdaptor.Instance()
try:
results = instance.search_service.search(q='matchall', parser='structured', size=0)
return results.hits
except Exception as e:
print 'Error getting total documents from CloudSearch'
@staticmethod
def commit():
try:
response = CloudSearchAdaptor.Instance().doc_service.commit()
CloudSearchAdaptor.Instance().doc_service.clear_sdf()
except Exception as e:
print 'Error committing to CloudSearch'