|
Package sensei ::
Module sensei_client_lib
|
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14 """Python client library for Sensei
15 """
16
17
18 import urllib
19 import urllib2
20 import json
21 import sys
22 import logging
23 import datetime
24 from datetime import datetime
25 import time
26 import re
27
28
29 logger = logging.getLogger("sensei_client_lib")
30
31
32
33
34
35
36 PARAM_OFFSET = "start"
37 PARAM_COUNT = "rows"
38 PARAM_QUERY = "q"
39 PARAM_QUERY_PARAM = "qparam"
40 PARAM_SORT = "sort"
41 PARAM_SORT_ASC = "asc"
42 PARAM_SORT_DESC = "desc"
43 PARAM_SORT_SCORE = "relevance"
44 PARAM_SORT_SCORE_REVERSE = "relrev"
45 PARAM_SORT_DOC = "doc"
46 PARAM_SORT_DOC_REVERSE = "docrev"
47 PARAM_FETCH_STORED = "fetchstored"
48 PARAM_SHOW_EXPLAIN = "showexplain"
49 PARAM_ROUTE_PARAM = "routeparam"
50 PARAM_GROUP_BY = "groupby"
51 PARAM_MAX_PER_GROUP = "maxpergroup"
52 PARAM_SELECT = "select"
53 PARAM_SELECT_VAL = "val"
54 PARAM_SELECT_NOT = "not"
55 PARAM_SELECT_OP = "op"
56 PARAM_SELECT_OP_AND = "and"
57 PARAM_SELECT_OP_OR = "or"
58 PARAM_SELECT_PROP = "prop"
59 PARAM_FACET = "facet"
60 PARAM_DYNAMIC_INIT = "dyn"
61 PARAM_PARTITIONS = "partitions"
62
63 PARAM_FACET_EXPAND = "expand"
64 PARAM_FACET_MAX = "max"
65 PARAM_FACET_MINHIT = "minhit"
66 PARAM_FACET_ORDER = "order"
67 PARAM_FACET_ORDER_HITS = "hits"
68 PARAM_FACET_ORDER_VAL = "val"
69
70 PARAM_DYNAMIC_TYPE = "type"
71 PARAM_DYNAMIC_TYPE_STRING = "string"
72 PARAM_DYNAMIC_TYPE_BYTEARRAY = "bytearray"
73 PARAM_DYNAMIC_TYPE_BOOL = "boolean"
74 PARAM_DYNAMIC_TYPE_INT = "int"
75 PARAM_DYNAMIC_TYPE_LONG = "long"
76 PARAM_DYNAMIC_TYPE_DOUBLE = "double"
77 PARAM_DYNAMIC_VAL = "vals"
78
79 PARAM_RESULT_PARSEDQUERY = "parsedquery"
80 PARAM_RESULT_HIT_STORED_FIELDS = "stored"
81 PARAM_RESULT_HIT_STORED_FIELDS_NAME = "name"
82 PARAM_RESULT_HIT_STORED_FIELDS_VALUE = "val"
83 PARAM_RESULT_HIT_EXPLANATION = "explanation"
84 PARAM_RESULT_FACETS = "facets"
85
86 PARAM_RESULT_TID = "tid"
87 PARAM_RESULT_TOTALDOCS = "totaldocs"
88 PARAM_RESULT_NUMHITS = "numhits"
89 PARAM_RESULT_HITS = "hits"
90 PARAM_RESULT_HIT_UID = "uid"
91 PARAM_RESULT_HIT_DOCID = "docid"
92 PARAM_RESULT_HIT_SCORE = "score"
93 PARAM_RESULT_HIT_SRC_DATA = "srcdata"
94 PARAM_RESULT_TIME = "time"
95
96 PARAM_SYSINFO_NUMDOCS = "numdocs"
97 PARAM_SYSINFO_LASTMODIFIED = "lastmodified"
98 PARAM_SYSINFO_VERSION = "version"
99 PARAM_SYSINFO_FACETS = "facets"
100 PARAM_SYSINFO_FACETS_NAME = "name"
101 PARAM_SYSINFO_FACETS_RUNTIME = "runtime"
102 PARAM_SYSINFO_FACETS_PROPS = "props"
103 PARAM_SYSINFO_CLUSTERINFO = "clusterinfo"
104 PARAM_SYSINFO_CLUSTERINFO_ID = "id"
105 PARAM_SYSINFO_CLUSTERINFO_PARTITIONS = "partitions"
106 PARAM_SYSINFO_CLUSTERINFO_NODELINK = "nodelink"
107 PARAM_SYSINFO_CLUSTERINFO_ADMINLINK = "adminlink"
108
109 PARAM_RESULT_HITS_EXPL_VALUE = "value"
110 PARAM_RESULT_HITS_EXPL_DESC = "description"
111 PARAM_RESULT_HITS_EXPL_DETAILS = "details"
112
113 PARAM_RESULT_FACET_INFO_VALUE = "value"
114 PARAM_RESULT_FACET_INFO_COUNT = "count"
115 PARAM_RESULT_FACET_INFO_SELECTED = "selected"
116
117
118
119
120
121 JSON_PARAM_COLUMNS = "columns"
122 JSON_PARAM_EXPLAIN = "explain"
123 JSON_PARAM_FACETS = "facets"
124 JSON_PARAM_FACET_INIT = "facetInit"
125 JSON_PARAM_FETCH_STORED = "fetchStored"
126 JSON_PARAM_FETCH_TERM_VECTORS = "fetchTermVectors"
127 JSON_PARAM_FILTER = "filter"
128 JSON_PARAM_FROM = "from"
129 JSON_PARAM_GROUPBY = "groupBy"
130 JSON_PARAM_PARTITIONS = "partitions"
131 JSON_PARAM_QUERY = "query"
132 JSON_PARAM_QUERY_STRING = "query_string"
133 JSON_PARAM_ROUTEPARAM = "routeParam"
134 JSON_PARAM_SELECTIONS = "selections"
135 JSON_PARAM_SIZE = "size"
136 JSON_PARAM_SORT = "sort"
137 JSON_PARAM_TOP = "top"
138 JSON_PARAM_VALUES = "values"
139 JSON_PARAM_EXCLUDES = "excludes"
140 JSON_PARAM_OPERATOR = "operator"
141 JSON_PARAM_NO_OPTIMIZE = "_noOptimize"
142
143
144 GROUP_VALUE = "groupvalue"
145 GROUP_HITS = "grouphits"
146
147
148 DEFAULT_REQUEST_OFFSET = 0
149 DEFAULT_REQUEST_COUNT = 10
150 DEFAULT_REQUEST_MAX_PER_GROUP = 10
151 DEFAULT_FACET_MINHIT = 1
152 DEFAULT_FACET_MAXHIT = 10
153 DEFAULT_FACET_ORDER = PARAM_FACET_ORDER_HITS
154
155
156
157
158
159 -def print_line(keys, max_lens, char='-', sep_char='+'):
160 sys.stdout.write(sep_char)
161 for key in keys:
162 sys.stdout.write(char * (max_lens[key] + 2) + sep_char)
163 sys.stdout.write('\n')
164
166 print_line(keys, max_lens, char=char, sep_char=sep_char)
167 sys.stdout.write('|')
168 for key in keys:
169 sys.stdout.write(' %s%s |' % (key, ' ' * (max_lens[key] - len(key))))
170 sys.stdout.write('\n')
171 print_line(keys, max_lens, char=char, sep_char=sep_char)
172
175
177 """Return the byte string representation of obj."""
178 try:
179 return str(obj)
180 except UnicodeEncodeError:
181
182 return unicode(obj).encode("unicode_escape")
183
186 """Exception raised for all errors related to Sensei client."""
187
190
192 return repr(self.value)
193
198
200 self.facets[facet_name]={"max":maxCounts, "minCount":minHits, "expand":expand, "order":orderBy }
201 return self
202
205
209 self.type = type;
210 self.selection = {}
211
214
216 return self.selection
217
222
225 - def __init__(self, column, values, excludes, operator):
226 SenseiSelection.__init__(self, "terms")
227 self.selection={"terms": {column : {"values" : values, "excludes":excludes, "operator":operator}}}
228
231 - def __init__(self, column, from_str="*", to_str="*", include_lower=True, include_upper=True):
232 SenseiSelection.__init__(self, "range")
233 self.selection={"range":{column:{"to":to_str, "from":from_str, "include_lower":include_lower, "include_upper":include_upper}}}
234
236 - def __init__(self, column, value, strict=False, depth=1):
239
243 self.type = type
244 self.query = {}
245
248
251
256
258 target = (self.query)["match_all"]
259 target["boost"]=boost
260 return self
261
264 SenseiQuery.__init__(self, "ids")
265 self.query={"ids" : {"values" : [], "excludes":[], "boost":1.0}}
266 if isinstance(values, list) and isinstance(excludes, list):
267 self.query = {"ids" : {"values" : values, "excludes":excludes, "boost":1.0}}
268
270 if self.query.has_key("ids"):
271 values_excludes = self.query["ids"]
272 if values_excludes.has_key("values"):
273 orig_values = values_excludes["values"]
274 orig_set = set(orig_values)
275 for new_value in values:
276 if new_value not in orig_set:
277 orig_values.append(new_value)
278 return self
279
281 if self.query.has_key("ids"):
282 values_excludes = self.query["ids"]
283 if values_excludes.has_key("excludes"):
284 orig_excludes = values_excludes["excludes"]
285 orig_set = set(orig_excludes)
286 for new_value in excludes:
287 if new_value not in orig_set:
288 orig_excludes.append(new_value)
289 return self
290
292 target = (self.query)["ids"]
293 target["boost"]=boost
294 return self
295
298 SenseiQuery.__init__(self, "query_string")
299 self.query={"query_string":{"query":query,
300 "default_field":"contents",
301 "default_operator":"OR",
302 "allow_leading_wildcard":True,
303 "lowercase_expanded_terms":True,
304 "enable_position_increments":True,
305 "fuzzy_prefix_length":0,
306 "fuzzy_min_sim":0.5,
307 "phrase_slop":0,
308 "boost":1.0,
309 "auto_generate_phrase_queries":False,
310 "fields":[],
311 "use_dis_max":True,
312 "tie_breaker":0
313 }}
314
316 self.query["query_string"]["default_field"]=field
317 return self
318
320 self.query["query_string"]["default_operator"]=operator
321 return self
322
324 self.query["query_string"]["allow_leading_wildcard"]=allow_leading_wildcard
325 return self
326
328 self.query["query_string"]["lowercase_expanded_terms"]=lowercase_expanded_terms
329 return self
330
332 self.query["query_string"]["enable_position_increments"]=enable_position_increments
333 return self
334
336 self.query["query_string"]["fuzzy_prefix_length"]=fuzzy_prefix_length
337 return self
338
340 self.query["query_string"]["fuzzy_min_sim"]=fuzzy_min_sim
341 return self
342
344 self.query["query_string"]["phrase_slop"]=phrase_slop
345 return self
346
348 self.query["query_string"]["boost"]=boost
349 return self
350
352 self.query["query_string"]["auto_generate_phrase_queries"]=auto_generate_phrase_queries
353 return self
354
356 if isinstance(fields, list):
357 self.query["query_string"]["fields"]=fields
358 return self
359
361 self.query["query_string"]["use_dis_max"]=use_dis_max
362 return self
363
365 self.query["query_string"]["tie_breaker"]=tie_breaker
366 return self
367
368
369 -class SenseiQueryText(SenseiQuery):
370 - def __init__(self, message, operator, type):
371 SenseiQuery.__init__(self, "text")
372 self.query={"text":{"message":message, "operator":operator, "type":type}}
373
378
380 target = (self.query)["term"]
381 for column, desc in target.iterms():
382 desc["boost"]=boost
383 return self
384
388 self.type = type
389 self.filter = {}
390
393
396
400 SenseiFilter.__init__(self, "ids")
401 self.filter={"ids" : {"values" : [], "excludes":[]}}
402 if isinstance(values, list) and isinstance(excludes, list):
403 self.filter = {"ids" : {"values" : values, "excludes":excludes}}
404
406 if self.filter.has_key("ids"):
407 values_excludes = self.filter["ids"]
408 if values_excludes.has_key("values"):
409 orig_values = values_excludes["values"]
410 orig_set = set(orig_values)
411 for new_value in values:
412 if new_value not in orig_set:
413 orig_values.append(new_value)
414 return self
415
417 if self.filter.has_key("ids"):
418 values_excludes = self.filter["ids"]
419 if values_excludes.has_key("excludes"):
420 orig_excludes = values_excludes["excludes"]
421 orig_set = set(orig_excludes)
422 for new_value in excludes:
423 if new_value not in orig_set:
424 orig_excludes.append(new_value)
425 return self
426
428 - def __init__(self, must_filter=None, must_not_filter=None, should_filter=None):
429 SenseiFilter.__init__(self, "bool");
430 self.filter = {"bool":{"must":{}, "must_not":{}, "should":{}}}
431 if must_filter is not None and isinstance(must_filter, SenseiFilter):
432 target = (self.filter)["bool"]
433 target["must"]=must_filter.get_filter()
434 if must_not_filter is not None and isinstance(must_not_filter, SenseiFilter):
435 target = (self.filter)["bool"]
436 target["must_not"]=must_not_filter.get_filter()
437 if should_filter is not None and isinstance(should_filter, list):
438 should_filters_json=[]
439 for should_item in should_filter:
440 should_filters_json.append(should_item.get_filter())
441 target = (self.filter)["bool"]
442 target["should"]=should_filters_json
443
446 SenseiFilter.__init__(self, "and")
447 self.filter={"and":[]}
448 old_filter_list = (self.filter)["and"]
449 if isinstance(filter_list, list):
450 for new_filter in filter_list:
451 if isinstance(new_filter, SenseiFilter):
452 old_filter_list.append(new_filter.get_filter())
453
456 SenseiFilter.__init__(self, "or")
457 self.filter={"or":[]}
458 old_filter_list = (self.filter)["or"]
459 if isinstance(filter_list, list):
460 for new_filter in filter_list:
461 if isinstance(new_filter, SenseiFilter):
462 old_filter_list.append(new_filter.get_filter())
463
465 - def __init__(self, column, value, noOptimize=False):
468
471 - def __init__(self, column, values=None, excludes=None, operator="or", noOptimize=False):
472 SenseiFilter.__init__(self, "terms")
473 self.filter={"terms":{}}
474 if values is not None and isinstance(values, list):
475 if excludes is not None and isinstance(excludes, list):
476
477 self.filter={"terms":{column:{"values":values, "excludes":excludes, "operator":operator, "_noOptimize":noOptimize}}}
478 else:
479 self.filter={"terms":{column:values}}
480
482 - def __init__(self, column, from_val, to_val):
483 SenseiFilter.__init__(self, "range")
484 self.filter={"range":{column:{"from":from_val, "to":to_val, "_noOptimize":False}}}
485
487 range = (self.filter)["range"]
488 for key, value in range.items():
489 if value is not None:
490 value["_type"] = type
491 value["_noOptimize"] = True
492 if type == "date" and date_format is not None:
493 value["_date_format"]=date_format
494 return self
495
502
509
512 - def __init__(self, field, reverse=False):
523
526
528 if self.dir:
529 return self.field + ":" + self.dir
530 else:
531 return self.field
532
534 if self.dir:
535 return {self.field: self.dir}
536 elif self.field == PARAM_SORT_SCORE:
537 return "_score"
538 else:
539 return self.field
540
544
545 - def add_facet_init(self, facet_name, param_name, param_values, param_type="string"):
546 if isinstance(param_values, list):
547
548 if facet_name in self.facet_init:
549 params = self.facet_init[facet_name]
550 params[param_name]={"type":param_type, "values":param_values}
551 else:
552 (self.facet_init)[facet_name]={}
553 params = self.facet_init[facet_name]
554 params[param_name]={"type":param_type, "values":param_values}
555 return self
556
558 return self.facet_init
559
562
563 - def __init__(self, name, runtime=False, props={}):
564 self.name = name
565 self.runtime = runtime
566 self.props = props
567
570
573
576
578 self.runtime = runtime
579
582
585
588
589 - def __init__(self, id, partitions, node_link, admin_link):
590 self.id = id
591 self.partitions = partitions
592 self.node_link = node_link
593 self.admin_link = admin_link
594
597
599 return self.partitions
600
602 return self.node_link
603
605 return self.admin_link
606
609
623
625 """Display sysinfo."""
626
627 keys = ["facet_name", "facet_type", "runtime", "column", "column_type", "depends"]
628 max_lens = None
629
630
631 def get_max_lens(columns):
632 max_lens = {}
633 for column in columns:
634 max_lens[column] = len(column)
635 for facet_info in self.facet_infos:
636 props = facet_info.get_props()
637
638 tmp_len = len(facet_info.get_name())
639 if tmp_len > max_lens["facet_name"]:
640 max_lens["facet_name"] = tmp_len
641
642 tmp_len = len(props.get("type"))
643 if tmp_len > max_lens["facet_type"]:
644 max_lens["facet_type"] = tmp_len
645
646
647
648
649 tmp_len = len(props.get("column"))
650 if tmp_len > max_lens["column"]:
651 max_lens["column"] = tmp_len
652
653 tmp_len = len(props.get("column_type"))
654 if tmp_len > max_lens["column_type"]:
655 max_lens["column_type"] = tmp_len
656
657 tmp_len = len(props.get("depends"))
658 if tmp_len > max_lens["depends"]:
659 max_lens["depends"] = tmp_len
660 return max_lens
661
662 max_lens = get_max_lens(keys)
663 print_header(keys, max_lens)
664
665 for facet_info in self.facet_infos:
666 props = facet_info.get_props()
667 sys.stdout.write('|')
668 val = facet_info.get_name()
669 sys.stdout.write(' %s%s |' % (val, ' ' * (max_lens["facet_name"] - len(val))))
670
671 val = props.get("type")
672 sys.stdout.write(' %s%s |' % (val, ' ' * (max_lens["facet_type"] - len(val))))
673
674 val = facet_info.get_runtime() and "true" or "false"
675 sys.stdout.write(' %s%s |' % (val, ' ' * (max_lens["runtime"] - len(val))))
676
677 val = props.get("column")
678 sys.stdout.write(' %s%s |' % (val, ' ' * (max_lens["column"] - len(val))))
679
680 val = props.get("column_type")
681 sys.stdout.write(' %s%s |' % (val, ' ' * (max_lens["column_type"] - len(val))))
682
683 val = props.get("depends")
684 sys.stdout.write(' %s%s |' % (val, ' ' * (max_lens["depends"] - len(val))))
685
686 sys.stdout.write('\n')
687
688 print_footer(keys, max_lens)
689
692
694 self.num_docs = num_docs
695
697 return self.last_modified
698
700 self.last_modified = last_modified
701
703 return self.facet_infos
704
706 self.facet_infos = facet_infos
707
710
712 self.version = version
713
715 return self.cluster_info
716
718 self.cluster_info = cluster_info
719
722
729 self.qParam = {}
730 self.explain = False
731 self.route_param = None
732 self.query = None
733 self.offset = offset
734 self.count = count
735 self.columns = []
736 self.sorts = None
737 self.selections = []
738 self.filter = {}
739 self.query_pred = {}
740 self.facets = {}
741 self.fetch_stored = False
742 self.groupby = None
743 self.max_per_group = max_per_group
744 self.facet_init_param_map = {}
745
747 self.offset = offset
748 return self
749
753
755 self.query = query.get_query()
756 return self
757
759 self.explain = explain
760 return self
761
763 self.fetch_stored = fetch_stored
764 return self
765
767 self.route_param = route_param
768 return self
769
771 self.sorts = sorts
772 return self
773
775 if isinstance(sort, SenseiSort):
776 if self.sorts is None:
777 self.sorts = []
778 self.sorts.append(sort)
779 else:
780 self.sorts.append(sort)
781 return self
782
784 self.filter = filter.get_filter()
785 return self
786
788 if self.selections is None:
789 self.selections = []
790 if isinstance(selection, SenseiSelection):
791 self.selections.append(selection.get_selection())
792 return self
793
794
796 self.facets = facets.get_facets()
797 return self
798
800 self.groupby = groupby
801 return self
802
804 self.max_per_group = max_per_group
805 return self
806
808 self.facet_init_param_map = facet_init_param_map
809 return self
810
813
817 self.docid = None
818 self.uid = None
819 self.srcData = {}
820 self.score = None
821 self.explanation = None
822 self.stored = None
823
824 - def load(self, jsonHit):
835
846
849 """Sensei search results for a query."""
850
870
871 - def display(self, columns=['*'], max_col_width=40):
872 """Print the results in SQL SELECT result format."""
873
874 keys = []
875 max_lens = None
876 has_group_hits = False
877
878 def get_max_lens(columns):
879 max_lens = {}
880 has_group_hits = False
881 for col in columns:
882 max_lens[col] = len(col)
883 for hit in self.hits:
884 group_hits = [hit]
885 if hit.has_key(GROUP_HITS):
886 group_hits = hit.get(GROUP_HITS)
887 has_group_hits = True
888 for group_hit in group_hits:
889 for col in columns:
890 if group_hit.has_key(col):
891 v = group_hit.get(col)
892 else:
893 v = '<Not Found>'
894 if isinstance(v, list):
895 v = ','.join([safe_str(item) for item in v])
896 elif isinstance(v, (int, long, float)):
897 v = str(v)
898 value_len = len(v)
899 if value_len > max_lens[col]:
900 max_lens[col] = min(value_len, max_col_width)
901 return max_lens, has_group_hits
902
903 if not self.hits:
904 print "No hit is found."
905 return
906 elif not columns:
907 print "No column is selected."
908 return
909
910 if len(columns) == 1 and columns[0] == '*':
911 keys = self.hits[0].keys()
912 if GROUP_HITS in keys:
913 keys.remove(GROUP_HITS)
914 if GROUP_VALUE in keys:
915 keys.remove(GROUP_VALUE)
916 if PARAM_RESULT_HIT_SRC_DATA in keys:
917 keys.remove(PARAM_RESULT_HIT_SRC_DATA)
918 else:
919 keys = columns
920
921 max_lens, has_group_hits = get_max_lens(keys)
922
923 print_header(keys, max_lens,
924 has_group_hits and '=' or '-',
925 has_group_hits and '=' or '+')
926
927
928 for hit in self.hits:
929 group_hits = [hit]
930 if hit.has_key(GROUP_HITS):
931 group_hits = hit.get(GROUP_HITS)
932 for group_hit in group_hits:
933 sys.stdout.write('|')
934 for key in keys:
935 if group_hit.has_key(key):
936 v = group_hit.get(key)
937 else:
938 v = '<Not Found>'
939 if isinstance(v, list):
940 v = ','.join([safe_str(item) for item in v])
941 elif isinstance(v, (int, float, long)):
942 v = str(v)
943 else:
944
945 v = safe_str(v)
946 if len(v) > max_col_width:
947 v = v[:max_col_width]
948 sys.stdout.write(' %s%s |' % (v, ' ' * (max_lens[key] - len(v))))
949 sys.stdout.write('\n')
950 if has_group_hits:
951 print_line(keys, max_lens)
952
953 print_footer(keys, max_lens,
954 has_group_hits and '=' or '-',
955 has_group_hits and '=' or '+')
956
957 sys.stdout.write('%s %s%s in set, %s hit%s, %s total doc%s (server: %sms, total: %sms)\n' %
958 (len(self.hits),
959 has_group_hits and 'group' or 'row',
960 len(self.hits) > 1 and 's' or '',
961 self.numHits,
962 self.numHits > 1 and 's' or '',
963 self.totalDocs,
964 self.totalDocs > 1 and 's' or '',
965 self.time,
966 self.total_time
967 ))
968
969
970 for facet, values in self.jsonMap.get(PARAM_RESULT_FACETS).iteritems():
971 max_val_len = len(facet)
972 max_count_len = 1
973 for val in values:
974 max_val_len = max(max_val_len, min(max_col_width, len(val.get('value'))))
975 max_count_len = max(max_count_len, len(str(val.get('count'))))
976 total_len = max_val_len + 2 + max_count_len + 3
977
978 sys.stdout.write('+' + '-' * total_len + '+\n')
979 sys.stdout.write('| ' + facet + ' ' * (total_len - len(facet) - 1) + '|\n')
980 sys.stdout.write('+' + '-' * total_len + '+\n')
981
982 for val in values:
983 sys.stdout.write('| %s%s (%s)%s |\n' %
984 (val.get('value'),
985 ' ' * (max_val_len - len(val.get('value'))),
986 val.get('count'),
987 ' ' * (max_count_len - len(str(val.get('count'))))))
988 sys.stdout.write('+' + '-' * total_len + '+\n')
989
993 """Sensei client class."""
994
995 - def __init__(self, host='localhost', port=8080, path='sensei', sysinfo=None):
996 self.host = host
997 self.port = port
998 self.path = path
999 self.url = 'http://%s:%d/%s' % (self.host, self.port, self.path)
1000 self.opener = urllib2.build_opener()
1001 self.opener.addheaders = [('User-agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.91 Safari/534.30')]
1002
1003 if sysinfo:
1004 self.sysinfo = SenseiSystemInfo(sysinfo)
1005 else:
1006 urlReq = urllib2.Request(self.url + "/sysinfo")
1007 res = self.opener.open(urlReq)
1008 line = res.read()
1009 jsonObj = json.loads(line)
1010
1011 self.sysinfo = SenseiSystemInfo(jsonObj)
1012 self.facet_map = {}
1013 for facet_info in self.sysinfo.get_facet_infos():
1014 self.facet_map[facet_info.get_name()] = facet_info
1015
1016
1018 """Build a Sensei request in JSON format.
1019
1020 Once built, a Sensei request in JSON format can be sent to a Sensei
1021 broker using the following command:
1022
1023 $ curl -XPOST http://localhost:8080/sensei -d '{
1024 "fetchStored": "true",
1025 "from": 0,
1026 "size": 10
1027 }'
1028
1029 """
1030
1031 output_json = {}
1032
1033 output_json[JSON_PARAM_FROM] = req.offset
1034 output_json[JSON_PARAM_SIZE] = req.count
1035
1036 if req.query:
1037 output_json[JSON_PARAM_QUERY] = req.query
1038
1039 if req.explain:
1040 output_json[JSON_PARAM_QUERY] = req.explain
1041 if req.fetch_stored:
1042 output_json[JSON_PARAM_FETCH_STORED] = req.fetch_stored
1043 if req.route_param:
1044 output_json[JSON_PARAM_ROUTEPARAM] = req.route_param
1045 if req.sorts:
1046 output_json[JSON_PARAM_SORT] = [sort.build_sort_spec() for sort in req.sorts]
1047
1048 if req.filter:
1049 output_json[JSON_PARAM_FILTER] = req.filter
1050
1051 if req.query_pred:
1052 output_json[JSON_PARAM_QUERY] = req.query_pred[JSON_PARAM_QUERY]
1053
1054 if req.selections:
1055 output_json[JSON_PARAM_SELECTIONS] = req.selections
1056
1057 if req.facets:
1058 output_json[JSON_PARAM_FACETS]=req.facets
1059
1060 facet_init_map = {}
1061 for facet_name, initParams in req.facet_init_param_map.iteritems():
1062 inner_map = {}
1063 for name, vals in initParams.bool_map.iteritems():
1064 inner_map[name] = {PARAM_DYNAMIC_TYPE : PARAM_DYNAMIC_TYPE_BOOL,
1065 "values" : vals}
1066 for name, vals in initParams.int_map.iteritems():
1067 inner_map[name] = {PARAM_DYNAMIC_TYPE : PARAM_DYNAMIC_TYPE_INT,
1068 "values" : [safe_str(val) for val in vals]}
1069 for name, vals in initParams.long_map.iteritems():
1070 inner_map[name] = {PARAM_DYNAMIC_TYPE : PARAM_DYNAMIC_TYPE_LONG,
1071 "values" : [safe_str(val) for val in vals]}
1072 for name, vals in initParams.string_map.iteritems():
1073 inner_map[name] = {PARAM_DYNAMIC_TYPE : PARAM_DYNAMIC_TYPE_STRING,
1074 "values" : vals}
1075 for name, vals in initParams.byte_map.iteritems():
1076 inner_map[name] = {PARAM_DYNAMIC_TYPE : PARAM_DYNAMIC_TYPE_BYTEARRAY,
1077 "values" : [safe_str(val) for val in vals]}
1078 for name, vals in initParams.double_map.iteritems():
1079 inner_map[name] = {PARAM_DYNAMIC_TYPE : PARAM_DYNAMIC_TYPE_DOUBLE,
1080 "values" : [safe_str(val) for val in vals]}
1081 facet_init_map[facet_name] = inner_map
1082 if facet_init_map:
1083 output_json[JSON_PARAM_FACET_INIT] = facet_init_map
1084
1085 if req.groupby:
1086
1087 output_json[JSON_PARAM_GROUPBY] = {
1088 JSON_PARAM_COLUMNS: [req.groupby],
1089 JSON_PARAM_TOP: req.max_per_group
1090 }
1091
1092
1093 return json.dumps(output_json, sort_keys=sort_keys, indent=indent)
1094
1095 @staticmethod
1097 paramMap = {}
1098 paramMap[PARAM_OFFSET] = req.offset
1099 paramMap[PARAM_COUNT] = req.count
1100 if req.query:
1101 paramMap[PARAM_QUERY]=req.query
1102 if req.explain:
1103 paramMap[PARAM_SHOW_EXPLAIN] = "true"
1104 if req.fetch_stored:
1105 paramMap[PARAM_FETCH_STORED] = "true"
1106 if req.route_param:
1107 paramMap[PARAM_ROUTE_PARAM] = req.route_param
1108
1109 if req.sorts:
1110 paramMap[PARAM_SORT] = ",".join(sort.build_sort_field() for sort in req.sorts)
1111
1112 if req.qParam.get("query"):
1113 paramMap[PARAM_QUERY] = req.qParam.get("query")
1114 del req.qParam["query"]
1115 if req.qParam:
1116 paramMap[PARAM_QUERY_PARAM] = ",".join(param + ":" + req.qParam.get(param)
1117 for param in req.qParam.keys() if param != "query")
1118
1119 for selection in req.selections.values():
1120 paramMap[selection.getSelectNotParam()] = selection.getSelectNotParamValues()
1121 paramMap[selection.getSelectOpParam()] = selection.operation
1122 paramMap[selection.getSelectValParam()] = selection.getSelectValParamValues()
1123 if selection.properties:
1124 paramMap[selection.getSelectPropParam()] = selection.getSelectPropParamValues()
1125
1126
1127 for facet_name, facet_spec in req.facets.iteritems():
1128 paramMap["%s.%s.%s" % (PARAM_FACET, facet_name, PARAM_FACET_MAX)] = facet_spec.maxCounts
1129 paramMap["%s.%s.%s" % (PARAM_FACET, facet_name, PARAM_FACET_ORDER)] = facet_spec.orderBy
1130 paramMap["%s.%s.%s" % (PARAM_FACET, facet_name, PARAM_FACET_EXPAND)] = facet_spec.expand and "true" or "false"
1131 paramMap["%s.%s.%s" % (PARAM_FACET, facet_name, PARAM_FACET_MINHIT)] = facet_spec.minHits
1132
1133 for facet_name, initParams in req.facet_init_param_map.iteritems():
1134 for name, vals in initParams.bool_map.iteritems():
1135 paramMap["%s.%s.%s.%s" %
1136 (PARAM_DYNAMIC_INIT, facet_name, name, PARAM_DYNAMIC_TYPE)] = PARAM_DYNAMIC_TYPE_BOOL
1137 paramMap["%s.%s.%s.%s" %
1138 (PARAM_DYNAMIC_INIT, facet_name, name,
1139 PARAM_DYNAMIC_VAL)] = ','.join([val and "true" or "false" for val in vals])
1140 for name, vals in initParams.int_map.iteritems():
1141 paramMap["%s.%s.%s.%s" %
1142 (PARAM_DYNAMIC_INIT, facet_name, name, PARAM_DYNAMIC_TYPE)] = PARAM_DYNAMIC_TYPE_INT
1143 paramMap["%s.%s.%s.%s" %
1144 (PARAM_DYNAMIC_INIT, facet_name, name,
1145 PARAM_DYNAMIC_VAL)] = ','.join([safe_str(val) for val in vals])
1146 for name, vals in initParams.long_map.iteritems():
1147 paramMap["%s.%s.%s.%s" %
1148 (PARAM_DYNAMIC_INIT, facet_name, name, PARAM_DYNAMIC_TYPE)] = PARAM_DYNAMIC_TYPE_LONG
1149 paramMap["%s.%s.%s.%s" %
1150 (PARAM_DYNAMIC_INIT, facet_name, name,
1151 PARAM_DYNAMIC_VAL)] = ','.join([safe_str(val) for val in vals])
1152 for name, vals in initParams.string_map.iteritems():
1153 paramMap["%s.%s.%s.%s" %
1154 (PARAM_DYNAMIC_INIT, facet_name, name, PARAM_DYNAMIC_TYPE)] = PARAM_DYNAMIC_TYPE_STRING
1155 paramMap["%s.%s.%s.%s" %
1156 (PARAM_DYNAMIC_INIT, facet_name, name,
1157 PARAM_DYNAMIC_VAL)] = ','.join(vals)
1158 for name, vals in initParams.byte_map.iteritems():
1159 paramMap["%s.%s.%s.%s" %
1160 (PARAM_DYNAMIC_INIT, facet_name, name, PARAM_DYNAMIC_TYPE)] = PARAM_DYNAMIC_TYPE_BYTEARRAY
1161 paramMap["%s.%s.%s.%s" %
1162 (PARAM_DYNAMIC_INIT, facet_name, name,
1163 PARAM_DYNAMIC_VAL)] = ','.join([safe_str(val) for val in vals])
1164 for name, vals in initParams.double_map.iteritems():
1165 paramMap["%s.%s.%s.%s" %
1166 (PARAM_DYNAMIC_INIT, facet_name, name, PARAM_DYNAMIC_TYPE)] = PARAM_DYNAMIC_TYPE_DOUBLE
1167 paramMap["%s.%s.%s.%s" %
1168 (PARAM_DYNAMIC_INIT, facet_name, name,
1169 PARAM_DYNAMIC_VAL)] = ','.join([safe_str(val) for val in vals])
1170
1171 if req.groupby:
1172 paramMap[PARAM_GROUP_BY] = req.groupby
1173 if req.max_per_group > 0:
1174 paramMap[PARAM_MAX_PER_GROUP] = req.max_per_group
1175
1176 return urllib.urlencode(paramMap)
1177
1178 - def doQuery(self, req, using_json=True):
1179 """Execute a search query."""
1180
1181 time1 = datetime.now()
1182 query_string = None
1183 if using_json:
1184 query_string = self.buildJsonString(req)
1185 else:
1186 query_string = SenseiClient.buildUrlString(req)
1187 logger.debug(query_string)
1188 urlReq = urllib2.Request(self.url, query_string)
1189 res = self.opener.open(urlReq)
1190 line = res.read()
1191 jsonObj = json.loads(line)
1192 res = SenseiResult(jsonObj)
1193 delta = datetime.now() - time1
1194 res.total_time = delta.seconds * 1000 + delta.microseconds / 1000
1195 return res
1196
1197 - def get(self, ids):
1198 """Get the source data through a list of document IDs.
1199 The input is either a list of ID numbers, or ID strings;
1200 The output is a jsonarray string;
1201 """
1202 ids_str = '['
1203 count = 0
1204 for id in ids:
1205 if count == 0 :
1206 ids_str = ids_str + str(id)
1207 else:
1208 ids_str = ids_str + ',' + str(id)
1209 ids_str = ids_str+ ']'
1210 ids = '[1,2]'
1211 urlReq = urllib2.Request(self.url + '/get', ids_str)
1212 res = self.opener.open(urlReq)
1213
1214 return res.read()
1215
1218
1220 return self.facet_map
1221
1222
1223
1224 -def main(argv):
1225
1226
1227
1228 req = SenseiRequest()
1229
1230
1231 req.set_count(50) \
1232 .set_offset(0)
1233
1234
1235 req.set_query(SenseiQueryTerm("tags", "automatic"))
1236
1237
1238 range_selection = SenseiSelectionRange("year", "1995", "2000", True, False)
1239 req.append_selection(range_selection)
1240
1241
1242 req.set_filter(SenseiFilterRange("price", 7900, 11000))
1243
1244
1245 req.set_groupby("category").set_max_per_group(4)
1246
1247
1248 req.append_sort(SenseiSort("color", True))
1249
1250
1251 req.set_fetch_stored(False)
1252
1253
1254 req.set_explain(False)
1255
1256
1257 facets = SenseiFacets().add_facet("color", False, 1, 10, "hits") \
1258 .add_facet("year")
1259 req.set_facets(facets)
1260
1261
1262 proxy = SenseiServiceProxy()
1263 sensei_results = proxy.doQuery(req)
1264 sensei_results.display(["*"], max_col_width=40)
1265
1266 print proxy.get([1,2])
1267
1268 print proxy.get(['1','2'])
1269
1270 if __name__ == "__main__":
1271 main(sys.argv)
1272