Skip to content
Snippets Groups Projects
Commit 08db0789 authored by juanf's avatar juanf
Browse files

SP-782 BIS-497 Search, improved scoring.

SVN: 29582
parent 5cc203c7
No related branches found
No related tags found
No related merge requests found
...@@ -12,6 +12,7 @@ from ch.systemsx.cisd.openbis.generic.shared.managed_property import ManagedProp ...@@ -12,6 +12,7 @@ from ch.systemsx.cisd.openbis.generic.shared.managed_property import ManagedProp
import codecs import codecs
import time import time
import re
# #
# BEGIN Infrastructure # BEGIN Infrastructure
...@@ -679,37 +680,43 @@ class YeastLabSearchRequestHandler(SearchRequestHandler): ...@@ -679,37 +680,43 @@ class YeastLabSearchRequestHandler(SearchRequestHandler):
self.enzymes = samplesByType.getSamples('ENZYME') self.enzymes = samplesByType.getSamples('ENZYME')
self.westernBlottings = samplesByType.getSamples('WESTERN_BLOTTING') self.westernBlottings = samplesByType.getSamples('WESTERN_BLOTTING')
def calculate_score(self, sample, searchTerms): def calculate_score(self, sample, matches_exactly_terms, matches_wildcards_patterns):
score = 0; score = 0;
for searchTerm in searchTerms: for i in range(0,len(matches_exactly_terms)):
searchTermWithoutWildcards = searchTerm.replace("*","").replace("?",""); if self.matches_with_wildcards(sample.getCode(), matches_wildcards_patterns[i]):
if self.matches_with_wildcards(sample.getCode(), searchTermWithoutWildcards):
score += 100000; score += 100000;
if self.matches_exactly(sample.getCode(), searchTermWithoutWildcards): if self.matches_exactly(sample.getCode(), matches_exactly_terms[i]):
score += 1000000; score += 1000000;
for property in sample.sample.getProperties(): for property in sample.sample.getProperties():
if self.matches_with_wildcards(property.getValue(), searchTermWithoutWildcards): if self.matches_with_wildcards(property.getValue(), matches_wildcards_patterns[i]):
score += 100; score += 100;
if self.matches_exactly(property.getValue(), searchTermWithoutWildcards): if self.matches_exactly(property.getValue(), matches_exactly_terms[i]):
score += 10000; score += 10000;
if self.matches_exactly(sample.getSampleType(), searchTermWithoutWildcards): if self.matches_exactly(sample.getSampleType(), matches_exactly_terms[i]):
score += 1000; score += 1000;
print "For Sample: %s Score: %d" % (sample.getCode(), score); print "For Sample: %s Score: %d" % (sample.getCode(), score);
return score; return score;
def matches_exactly(self, string, searchTerm): def prepare_search_term_without_wildcards(self, search_term):
if string != None and searchTerm != None: return search_term.replace("*","").replace("?","");
return string.lower() == searchTerm.lower();
def prepare_search_pattern_with_wildcards(self, search_term):
search_term = re.sub(r"\*", ".*", search_term);
search_term = re.sub(r"\?", ".?", search_term);
return re.compile(search_term, re.IGNORECASE);
def matches_exactly(self, string, search_term):
if string != None and search_term != None:
return string.lower() == search_term.lower();
else: else:
return False; return False;
def matches_with_wildcards(self, string, searchTerm): def matches_with_wildcards(self, string, searchPattern):
if string != None and searchTerm != None: if string != None and searchPattern != None:
return string.lower().find(searchTerm.lower()) != -1; return searchPattern.search(string) != None;
else: else:
return False; return False;
...@@ -717,22 +724,27 @@ class YeastLabSearchRequestHandler(SearchRequestHandler): ...@@ -717,22 +724,27 @@ class YeastLabSearchRequestHandler(SearchRequestHandler):
all_samples_sc = SearchCriteria(); all_samples_sc = SearchCriteria();
all_samples_sc.setOperator(SearchOperator.MATCH_ALL_CLAUSES); all_samples_sc.setOperator(SearchOperator.MATCH_ALL_CLAUSES);
search_parameters = self.parameters['searchtext'].split(); search_parameters = self.parameters['searchtext'].split();
matches_exactly_terms = []
matches_wildcards_patterns = []
for search_parameter in search_parameters: for search_parameter in search_parameters:
matches_exactly_terms.append(self.prepare_search_term_without_wildcards(search_parameter));
matches_wildcards_patterns.append(self.prepare_search_pattern_with_wildcards(search_parameter));
all_samples_sc.addMatchClause(SearchCriteria.MatchClause.createAnyFieldMatch( "*" + search_parameter + "*")); all_samples_sc.addMatchClause(SearchCriteria.MatchClause.createAnyFieldMatch( "*" + search_parameter + "*"));
all_unsorted_samples = self.searchService.searchForSamples(all_samples_sc); all_unsorted_samples = self.searchService.searchForSamples(all_samples_sc);
#Custom Sorting #Custom Sorting
# start = int(round(time.time() * 1000)); start = int(round(time.time() * 1000));
samples_with_scores = []; samples_with_scores = [];
for sample in all_unsorted_samples: for sample in all_unsorted_samples:
score = self.calculate_score(sample, search_parameters); score = self.calculate_score(sample, matches_exactly_terms, matches_wildcards_patterns);
samples_with_scores.append([sample, score]); samples_with_scores.append([sample, score]);
samples_with_scores = sorted(samples_with_scores, key=lambda sample_with_score: sample_with_score[1], reverse=True); #Sorting the list using the score samples_with_scores = sorted(samples_with_scores, key=lambda sample_with_score: sample_with_score[1], reverse=True); #Sorting the list using the score
self.samples = [sample_with_score[0] for sample_with_score in samples_with_scores[0:100]]; #Get first 100 results for the ipad self.samples = [sample_with_score[0] for sample_with_score in samples_with_scores[0:100]]; #Get first 100 results for the ipad
# total = int(round(time.time() * 1000)) - start; total = int(round(time.time() * 1000)) - start;
# print "Time To Sort: %d" % total; print "Time To Sort: %d" % total;
# Children of the results # Children of the results
self.sort_samples_by_type(self.samples) self.sort_samples_by_type(self.samples)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment