Index: haystack/backends/whoosh_backend.py =================================================================== --- haystack/backends/whoosh_backend.py (wersja 353) +++ haystack/backends/whoosh_backend.py (kopia robocza) @@ -6,6 +6,7 @@ from haystack.backends import BaseSearchBackend, BaseSearchQuery from haystack.exceptions import MissingDependency, SearchBackendError from haystack.models import SearchResult +from threading import Lock try: from whoosh import store from whoosh.fields import Schema, ID, STORED, TEXT, KEYWORD @@ -35,6 +36,7 @@ def __init__(self, site=None): super(SearchBackend, self).__init__(site) self.setup_complete = False + self.lock = Lock() if not hasattr(settings, 'HAYSTACK_WHOOSH_PATH'): raise ImproperlyConfigured('You must specify a HAYSTACK_WHOOSH_PATH in your settings.') @@ -43,28 +45,32 @@ """ Defers loading until needed. """ - new_index = False - - # Make sure the index is there. - if not os.path.exists(settings.HAYSTACK_WHOOSH_PATH): - os.makedirs(settings.HAYSTACK_WHOOSH_PATH) - new_index = True - - self.storage = store.FileStorage(settings.HAYSTACK_WHOOSH_PATH) - self.content_field_name, fields = self.site.build_unified_schema() - self.schema = self.build_schema(fields) - self.parser = QueryParser(self.content_field_name, schema=self.schema) - - if new_index is True: - self.index = index.create_in(settings.HAYSTACK_WHOOSH_PATH, self.schema) - else: - try: - self.index = index.Index(self.storage, schema=self.schema) - except index.EmptyIndexError: + self.lock.acquire() + try: + new_index = False + + # Make sure the index is there. + if not os.path.exists(settings.HAYSTACK_WHOOSH_PATH): + os.makedirs(settings.HAYSTACK_WHOOSH_PATH) + new_index = True + + self.storage = store.FileStorage(settings.HAYSTACK_WHOOSH_PATH) + self.content_field_name, fields = self.site.build_unified_schema() + self.schema = self.build_schema(fields) + self.parser = QueryParser(self.content_field_name, schema=self.schema) + + if new_index is True: self.index = index.create_in(settings.HAYSTACK_WHOOSH_PATH, self.schema) - - self.setup_complete = True - + else: + try: + self.index = index.Index(self.storage, schema=self.schema) + except index.EmptyIndexError: + self.index = index.create_in(settings.HAYSTACK_WHOOSH_PATH, self.schema) + + self.setup_complete = True + finally: + self.lock.release() + def build_schema(self, fields): schema_fields = { 'id': ID(stored=True, unique=True), @@ -99,35 +105,44 @@ if not self.setup_complete: self.setup() - writer = self.index.writer() - - for obj in iterable: - doc = {} - doc['id'] = force_unicode(self.get_identifier(obj)) - doc['django_ct_s'] = force_unicode("%s.%s" % (obj._meta.app_label, obj._meta.module_name)) - doc['django_id_s'] = force_unicode(obj.pk) - other_data = index.prepare(obj) + self.lock.acquire() + try: + writer = self.index.writer() - # Really make sure it's unicode, because Whoosh won't have it any - # other way. - for key in other_data: - other_data[key] = force_unicode(other_data[key]) + for obj in iterable: + doc = {} + doc['id'] = force_unicode(self.get_identifier(obj)) + doc['django_ct_s'] = force_unicode("%s.%s" % (obj._meta.app_label, obj._meta.module_name)) + doc['django_id_s'] = force_unicode(obj.pk) + other_data = index.prepare(obj) + + # Really make sure it's unicode, because Whoosh won't have it any + # other way. + for key in other_data: + other_data[key] = force_unicode(other_data[key]) + + doc.update(other_data) + writer.update_document(**doc) - doc.update(other_data) - writer.update_document(**doc) - - # For now, commit no matter what, as we run into locking issues otherwise. - writer.commit() + # For now, commit no matter what, as we run into locking issues otherwise. + writer.commit() + finally: + self.lock.release() def remove(self, obj, commit=True): if not self.setup_complete: self.setup() whoosh_id = self.get_identifier(obj) - self.index.delete_by_query(q=self.parser.parse('id:"%s"' % whoosh_id)) - # For now, commit no matter what, as we run into locking issues otherwise. - self.index.commit() + self.lock.acquire() + try: + self.index.delete_by_query(q=self.parser.parse('id:"%s"' % whoosh_id)) + + # For now, commit no matter what, as we run into locking issues otherwise. + self.index.commit() + finally: + self.lock.release() def clear(self, models=[], commit=True): if not self.setup_complete: @@ -141,7 +156,11 @@ for model in models: models_to_delete.append("django_ct_s:%s.%s" % (model._meta.app_label, model._meta.module_name)) - self.index.delete_by_query(q=self.parser.parse(" OR ".join(models_to_delete))) + self.lock.acquire() + try: + self.index.delete_by_query(q=self.parser.parse(" OR ".join(models_to_delete))) + finally: + self.lock.release() # For now, commit no matter what, as we run into locking issues otherwise. self.index.commit() @@ -149,14 +168,18 @@ def delete_index(self): # Per the Whoosh mailing list, if wiping out everything from the index, # it's much more efficient to simply delete the index files. - if os.path.exists(settings.HAYSTACK_WHOOSH_PATH): - index_files = os.listdir(settings.HAYSTACK_WHOOSH_PATH) + self.lock.acquire() + try: + if os.path.exists(settings.HAYSTACK_WHOOSH_PATH): + index_files = os.listdir(settings.HAYSTACK_WHOOSH_PATH) + + for index_file in index_files: + os.remove(os.path.join(settings.HAYSTACK_WHOOSH_PATH, index_file)) + + os.removedirs(settings.HAYSTACK_WHOOSH_PATH) + finally: + self.lock.release() - for index_file in index_files: - os.remove(os.path.join(settings.HAYSTACK_WHOOSH_PATH, index_file)) - - os.removedirs(settings.HAYSTACK_WHOOSH_PATH) - # Recreate everything. self.setup() @@ -164,7 +187,11 @@ if not self.setup_complete: self.setup() - self.index.optimize() + self.lock.acquire() + try: + self.index.optimize() + finally: + self.lock.release() def search(self, query_string, sort_by=None, start_offset=0, end_offset=None, fields='', highlight=False, facets=None, date_facets=None, query_facets=None, @@ -202,16 +229,20 @@ # kwargs['fq'] = list(narrow_queries) pass - if self.index.doc_count: - searcher = self.index.searcher() - # DRL_TODO: Ignoring offsets for now, as slicing caused issues with pagination. - raw_results = searcher.search(self.parser.parse(query_string), sortedby=sort_by, reverse=reverse) - return self._process_results(raw_results, highlight=highlight, query_string=query_string) - else: - return { - 'results': [], - 'hits': 0, - } + self.lock.acquire() + try: + if self.index.doc_count: + searcher = self.index.searcher() + # DRL_TODO: Ignoring offsets for now, as slicing caused issues with pagination. + raw_results = searcher.search(self.parser.parse(query_string), sortedby=sort_by, reverse=reverse) + return self._process_results(raw_results, highlight=highlight, query_string=query_string) + else: + return { + 'results': [], + 'hits': 0, + } + finally: + self.lock.release() def more_like_this(self, model_instance): warnings.warn("Whoosh does not handle More Like This.", Warning, stacklevel=2)