Package sensei :: Module sensei_client_lib
[hide private]
[frames] | no frames]

Source Code for Module sensei.sensei_client_lib

   1  #!/usr/bin/env python 
   2  # Licensed under the Apache License, Version 2.0 (the "License"); you may 
   3  # not use this file except in compliance with the License. You may obtain 
   4  # a copy of the License at 
   5  # 
   6  #     http://www.apache.org/licenses/LICENSE-2.0 
   7  # 
   8  # Unless required by applicable law or agreed to in writing, software 
   9  # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 
  10  # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 
  11  # License for the specific language governing permissions and limitations 
  12  # under the License. 
  13   
  14  """Python client library for Sensei 
  15  """ 
  16   
  17   
  18  import urllib 
  19  import urllib2 
  20  import json 
  21  import sys 
  22  import logging 
  23  import datetime 
  24  from datetime import datetime 
  25  import time 
  26  import re 
  27   
  28   
  29  logger = logging.getLogger("sensei_client_lib") 
  30   
  31   
  32   
  33  # 
  34  # REST API parameter constants 
  35  # 
  36  PARAM_OFFSET = "start" 
  37  PARAM_COUNT = "rows" 
  38  PARAM_QUERY = "q" 
  39  PARAM_QUERY_PARAM = "qparam" 
  40  PARAM_SORT = "sort" 
  41  PARAM_SORT_ASC = "asc" 
  42  PARAM_SORT_DESC = "desc" 
  43  PARAM_SORT_SCORE = "relevance" 
  44  PARAM_SORT_SCORE_REVERSE = "relrev" 
  45  PARAM_SORT_DOC = "doc" 
  46  PARAM_SORT_DOC_REVERSE = "docrev" 
  47  PARAM_FETCH_STORED = "fetchstored" 
  48  PARAM_SHOW_EXPLAIN = "showexplain" 
  49  PARAM_ROUTE_PARAM = "routeparam" 
  50  PARAM_GROUP_BY = "groupby" 
  51  PARAM_MAX_PER_GROUP = "maxpergroup" 
  52  PARAM_SELECT = "select" 
  53  PARAM_SELECT_VAL = "val" 
  54  PARAM_SELECT_NOT = "not" 
  55  PARAM_SELECT_OP = "op" 
  56  PARAM_SELECT_OP_AND = "and" 
  57  PARAM_SELECT_OP_OR = "or" 
  58  PARAM_SELECT_PROP = "prop" 
  59  PARAM_FACET = "facet" 
  60  PARAM_DYNAMIC_INIT = "dyn" 
  61  PARAM_PARTITIONS = "partitions" 
  62   
  63  PARAM_FACET_EXPAND = "expand" 
  64  PARAM_FACET_MAX = "max" 
  65  PARAM_FACET_MINHIT = "minhit" 
  66  PARAM_FACET_ORDER = "order" 
  67  PARAM_FACET_ORDER_HITS = "hits" 
  68  PARAM_FACET_ORDER_VAL = "val" 
  69   
  70  PARAM_DYNAMIC_TYPE = "type" 
  71  PARAM_DYNAMIC_TYPE_STRING = "string" 
  72  PARAM_DYNAMIC_TYPE_BYTEARRAY = "bytearray" 
  73  PARAM_DYNAMIC_TYPE_BOOL = "boolean" 
  74  PARAM_DYNAMIC_TYPE_INT = "int" 
  75  PARAM_DYNAMIC_TYPE_LONG = "long" 
  76  PARAM_DYNAMIC_TYPE_DOUBLE = "double" 
  77  PARAM_DYNAMIC_VAL = "vals" 
  78   
  79  PARAM_RESULT_PARSEDQUERY = "parsedquery" 
  80  PARAM_RESULT_HIT_STORED_FIELDS = "stored" 
  81  PARAM_RESULT_HIT_STORED_FIELDS_NAME = "name" 
  82  PARAM_RESULT_HIT_STORED_FIELDS_VALUE = "val" 
  83  PARAM_RESULT_HIT_EXPLANATION = "explanation" 
  84  PARAM_RESULT_FACETS = "facets" 
  85   
  86  PARAM_RESULT_TID = "tid" 
  87  PARAM_RESULT_TOTALDOCS = "totaldocs" 
  88  PARAM_RESULT_NUMHITS = "numhits" 
  89  PARAM_RESULT_HITS = "hits" 
  90  PARAM_RESULT_HIT_UID = "uid" 
  91  PARAM_RESULT_HIT_DOCID = "docid" 
  92  PARAM_RESULT_HIT_SCORE = "score" 
  93  PARAM_RESULT_HIT_SRC_DATA = "srcdata" 
  94  PARAM_RESULT_TIME = "time" 
  95   
  96  PARAM_SYSINFO_NUMDOCS = "numdocs" 
  97  PARAM_SYSINFO_LASTMODIFIED = "lastmodified" 
  98  PARAM_SYSINFO_VERSION = "version" 
  99  PARAM_SYSINFO_FACETS = "facets" 
 100  PARAM_SYSINFO_FACETS_NAME = "name" 
 101  PARAM_SYSINFO_FACETS_RUNTIME = "runtime" 
 102  PARAM_SYSINFO_FACETS_PROPS = "props" 
 103  PARAM_SYSINFO_CLUSTERINFO = "clusterinfo" 
 104  PARAM_SYSINFO_CLUSTERINFO_ID = "id" 
 105  PARAM_SYSINFO_CLUSTERINFO_PARTITIONS = "partitions" 
 106  PARAM_SYSINFO_CLUSTERINFO_NODELINK = "nodelink" 
 107  PARAM_SYSINFO_CLUSTERINFO_ADMINLINK = "adminlink" 
 108   
 109  PARAM_RESULT_HITS_EXPL_VALUE = "value" 
 110  PARAM_RESULT_HITS_EXPL_DESC = "description" 
 111  PARAM_RESULT_HITS_EXPL_DETAILS = "details" 
 112   
 113  PARAM_RESULT_FACET_INFO_VALUE = "value" 
 114  PARAM_RESULT_FACET_INFO_COUNT = "count" 
 115  PARAM_RESULT_FACET_INFO_SELECTED = "selected" 
 116   
 117  # 
 118  # JSON API parameter constants 
 119  # 
 120   
 121  JSON_PARAM_COLUMNS = "columns" 
 122  JSON_PARAM_EXPLAIN = "explain" 
 123  JSON_PARAM_FACETS = "facets" 
 124  JSON_PARAM_FACET_INIT = "facetInit" 
 125  JSON_PARAM_FETCH_STORED = "fetchStored" 
 126  JSON_PARAM_FETCH_TERM_VECTORS = "fetchTermVectors" 
 127  JSON_PARAM_FILTER = "filter" 
 128  JSON_PARAM_FROM = "from" 
 129  JSON_PARAM_GROUPBY = "groupBy" 
 130  JSON_PARAM_PARTITIONS = "partitions" 
 131  JSON_PARAM_QUERY = "query" 
 132  JSON_PARAM_QUERY_STRING = "query_string" 
 133  JSON_PARAM_ROUTEPARAM = "routeParam" 
 134  JSON_PARAM_SELECTIONS = "selections" 
 135  JSON_PARAM_SIZE = "size" 
 136  JSON_PARAM_SORT = "sort" 
 137  JSON_PARAM_TOP = "top" 
 138  JSON_PARAM_VALUES = "values" 
 139  JSON_PARAM_EXCLUDES = "excludes" 
 140  JSON_PARAM_OPERATOR = "operator" 
 141  JSON_PARAM_NO_OPTIMIZE = "_noOptimize" 
 142   
 143  # Group by related column names 
 144  GROUP_VALUE = "groupvalue" 
 145  GROUP_HITS = "grouphits" 
 146   
 147  # Default constants 
 148  DEFAULT_REQUEST_OFFSET = 0 
 149  DEFAULT_REQUEST_COUNT = 10 
 150  DEFAULT_REQUEST_MAX_PER_GROUP = 10 
 151  DEFAULT_FACET_MINHIT = 1 
 152  DEFAULT_FACET_MAXHIT = 10 
 153  DEFAULT_FACET_ORDER = PARAM_FACET_ORDER_HITS 
 164   
 172   
 175   
176 -def safe_str(obj):
177 """Return the byte string representation of obj.""" 178 try: 179 return str(obj) 180 except UnicodeEncodeError: 181 # obj is unicode 182 return unicode(obj).encode("unicode_escape")
183
184 185 -class SenseiClientError(Exception):
186 """Exception raised for all errors related to Sensei client.""" 187
188 - def __init__(self, value):
189 self.value = value
190
191 - def __str__(self):
192 return repr(self.value)
193
194 195 -class SenseiFacets:
196 - def __init__(self):
197 self.facets={}
198
199 - def add_facet(self, facet_name, expand=False,minHits=1,maxCounts=10,orderBy=PARAM_FACET_ORDER_HITS):
200 self.facets[facet_name]={"max":maxCounts, "minCount":minHits, "expand":expand, "order":orderBy } 201 return self
202
203 - def get_facets(self):
204 return self.facets
205
206 207 -class SenseiSelection:
208 - def __init__(self, type):
209 self.type = type; 210 self.selection = {}
211
212 - def get_type(self):
213 return self.type
214
215 - def get_selection(self):
216 return self.selection
217
218 -class SenseiSelectionTerm(SenseiSelection):
219 - def __init__(self, column, value):
220 SenseiSelection.__init__(self, "term") 221 self.selection = {"term": {column : {"value" : value}}}
222
223 224 -class SenseiSelectionTerms(SenseiSelection):
225 - def __init__(self, column, values, excludes, operator):
226 SenseiSelection.__init__(self, "terms") 227 self.selection={"terms": {column : {"values" : values, "excludes":excludes, "operator":operator}}}
228
229 230 -class SenseiSelectionRange(SenseiSelection):
231 - def __init__(self, column, from_str="*", to_str="*", include_lower=True, include_upper=True):
232 SenseiSelection.__init__(self, "range") 233 self.selection={"range":{column:{"to":to_str, "from":from_str, "include_lower":include_lower, "include_upper":include_upper}}}
234
235 -class SenseiSelectionPath(SenseiSelection):
236 - def __init__(self, column, value, strict=False, depth=1):
237 SenseiSelection.__init__(self, "path") 238 self.selection={"path": {column : {"value":value, "strict":strict, "depth":depth}}}
239
240 241 -class SenseiQuery:
242 - def __init__(self, type):
243 self.type = type 244 self.query = {}
245
246 - def get_type(self):
247 return self.type
248
249 - def get_query(self):
250 return self.query
251
252 -class SenseiQueryMatchAll(SenseiQuery):
253 - def __init__(self):
254 SenseiQuery.__init__(self, "match_all") 255 self.query={"match_all":{"boost":1.0}}
256
257 - def set_boost(self, boost):
258 target = (self.query)["match_all"] 259 target["boost"]=boost 260 return self
261
262 -class SenseiQueryIDs(SenseiQuery):
263 - def __init__(self, values, excludes):
264 SenseiQuery.__init__(self, "ids") 265 self.query={"ids" : {"values" : [], "excludes":[], "boost":1.0}} 266 if isinstance(values, list) and isinstance(excludes, list): 267 self.query = {"ids" : {"values" : values, "excludes":excludes, "boost":1.0}}
268
269 - def add_values(self, values):
270 if self.query.has_key("ids"): 271 values_excludes = self.query["ids"] 272 if values_excludes.has_key("values"): 273 orig_values = values_excludes["values"] 274 orig_set = set(orig_values) 275 for new_value in values: 276 if new_value not in orig_set: 277 orig_values.append(new_value) 278 return self
279
280 - def add_excludes(self, excludes):
281 if self.query.has_key("ids"): 282 values_excludes = self.query["ids"] 283 if values_excludes.has_key("excludes"): 284 orig_excludes = values_excludes["excludes"] 285 orig_set = set(orig_excludes) 286 for new_value in excludes: 287 if new_value not in orig_set: 288 orig_excludes.append(new_value) 289 return self
290
291 - def set_boost(self, boost):
292 target = (self.query)["ids"] 293 target["boost"]=boost 294 return self
295
296 -class SenseiQueryString(SenseiQuery):
297 - def __init__(self, query):
298 SenseiQuery.__init__(self, "query_string") 299 self.query={"query_string":{"query":query, 300 "default_field":"contents", 301 "default_operator":"OR", 302 "allow_leading_wildcard":True, 303 "lowercase_expanded_terms":True, 304 "enable_position_increments":True, 305 "fuzzy_prefix_length":0, 306 "fuzzy_min_sim":0.5, 307 "phrase_slop":0, 308 "boost":1.0, 309 "auto_generate_phrase_queries":False, 310 "fields":[], 311 "use_dis_max":True, 312 "tie_breaker":0 313 }}
314
315 - def set_field(self, field):
316 self.query["query_string"]["default_field"]=field 317 return self
318
319 - def set_operator(self, operator):
320 self.query["query_string"]["default_operator"]=operator 321 return self
322
323 - def set_allow_leading_wildcard(self, allow_leading_wildcard):
324 self.query["query_string"]["allow_leading_wildcard"]=allow_leading_wildcard 325 return self
326
327 - def set_lowercase_expanded_terms(self, lowercase_expanded_terms):
328 self.query["query_string"]["lowercase_expanded_terms"]=lowercase_expanded_terms 329 return self
330
331 - def set_enable_position_increments(self, enable_position_increments):
332 self.query["query_string"]["enable_position_increments"]=enable_position_increments 333 return self
334
335 - def set_fuzzy_prefix_length(self, fuzzy_prefix_length):
336 self.query["query_string"]["fuzzy_prefix_length"]=fuzzy_prefix_length 337 return self
338
339 - def set_fuzzy_min_sim(self, fuzzy_min_sim):
340 self.query["query_string"]["fuzzy_min_sim"]=fuzzy_min_sim 341 return self
342
343 - def set_phrase_slop(self, phrase_slop):
344 self.query["query_string"]["phrase_slop"]=phrase_slop 345 return self
346
347 - def set_boost(self, boost):
348 self.query["query_string"]["boost"]=boost 349 return self
350
351 - def set_auto_generate_phrase_queries(self, auto_generate_phrase_queries):
352 self.query["query_string"]["auto_generate_phrase_queries"]=auto_generate_phrase_queries 353 return self
354
355 - def set_fields(self, fields):
356 if isinstance(fields, list): 357 self.query["query_string"]["fields"]=fields 358 return self
359
360 - def set_use_dis_max(self, use_dis_max):
361 self.query["query_string"]["use_dis_max"]=use_dis_max 362 return self
363
364 - def set_tie_breaker(self, tie_breaker):
365 self.query["query_string"]["tie_breaker"]=tie_breaker 366 return self
367
368 369 -class SenseiQueryText(SenseiQuery):
370 - def __init__(self, message, operator, type):
371 SenseiQuery.__init__(self, "text") 372 self.query={"text":{"message":message, "operator":operator, "type":type}}
373
374 -class SenseiQueryTerm(SenseiQuery):
375 - def __init__(self, column, value):
376 SenseiQuery.__init__(self, "term") 377 self.query={"term":{column:{"value":value, "boost":1.0}}}
378
379 - def set_boost(self, boost):
380 target = (self.query)["term"] 381 for column, desc in target.iterms(): 382 desc["boost"]=boost 383 return self
384
385 386 -class SenseiFilter:
387 - def __init__(self, type):
388 self.type = type 389 self.filter = {}
390
391 - def get_type(self):
392 return self.type
393
394 - def get_filter(self):
395 return self.filter
396
397 398 -class SenseiFilterIDs(SenseiFilter):
399 - def __init__(self, values, excludes):
400 SenseiFilter.__init__(self, "ids") 401 self.filter={"ids" : {"values" : [], "excludes":[]}} 402 if isinstance(values, list) and isinstance(excludes, list): 403 self.filter = {"ids" : {"values" : values, "excludes":excludes}}
404
405 - def add_values(self, values):
406 if self.filter.has_key("ids"): 407 values_excludes = self.filter["ids"] 408 if values_excludes.has_key("values"): 409 orig_values = values_excludes["values"] 410 orig_set = set(orig_values) 411 for new_value in values: 412 if new_value not in orig_set: 413 orig_values.append(new_value) 414 return self
415
416 - def add_excludes(self, excludes):
417 if self.filter.has_key("ids"): 418 values_excludes = self.filter["ids"] 419 if values_excludes.has_key("excludes"): 420 orig_excludes = values_excludes["excludes"] 421 orig_set = set(orig_excludes) 422 for new_value in excludes: 423 if new_value not in orig_set: 424 orig_excludes.append(new_value) 425 return self
426
427 -class SenseiFilterBool(SenseiFilter):
428 - def __init__(self, must_filter=None, must_not_filter=None, should_filter=None):
429 SenseiFilter.__init__(self, "bool"); 430 self.filter = {"bool":{"must":{}, "must_not":{}, "should":{}}} 431 if must_filter is not None and isinstance(must_filter, SenseiFilter): 432 target = (self.filter)["bool"] 433 target["must"]=must_filter.get_filter() 434 if must_not_filter is not None and isinstance(must_not_filter, SenseiFilter): 435 target = (self.filter)["bool"] 436 target["must_not"]=must_not_filter.get_filter() 437 if should_filter is not None and isinstance(should_filter, list): 438 should_filters_json=[] 439 for should_item in should_filter: 440 should_filters_json.append(should_item.get_filter()) 441 target = (self.filter)["bool"] 442 target["should"]=should_filters_json
443
444 -class SenseiFilterAND(SenseiFilter):
445 - def __init__(self, filter_list):
446 SenseiFilter.__init__(self, "and") 447 self.filter={"and":[]} 448 old_filter_list = (self.filter)["and"] 449 if isinstance(filter_list, list): 450 for new_filter in filter_list: 451 if isinstance(new_filter, SenseiFilter): 452 old_filter_list.append(new_filter.get_filter())
453
454 -class SenseiFilterOR(SenseiFilter):
455 - def __init__(self, filter_list):
456 SenseiFilter.__init__(self, "or") 457 self.filter={"or":[]} 458 old_filter_list = (self.filter)["or"] 459 if isinstance(filter_list, list): 460 for new_filter in filter_list: 461 if isinstance(new_filter, SenseiFilter): 462 old_filter_list.append(new_filter.get_filter())
463
464 -class SenseiFilterTerm(SenseiFilter):
465 - def __init__(self, column, value, noOptimize=False):
466 SenseiFilter.__init__(self, "term") 467 self.filter={"term":{column:{"value": value, "_noOptimize":noOptimize}}}
468
469 470 -class SenseiFilterTerms(SenseiFilter):
471 - def __init__(self, column, values=None, excludes=None, operator="or", noOptimize=False):
472 SenseiFilter.__init__(self, "terms") 473 self.filter={"terms":{}} 474 if values is not None and isinstance(values, list): 475 if excludes is not None and isinstance(excludes, list): 476 # complicated mode 477 self.filter={"terms":{column:{"values":values, "excludes":excludes, "operator":operator, "_noOptimize":noOptimize}}} 478 else: 479 self.filter={"terms":{column:values}}
480
481 -class SenseiFilterRange(SenseiFilter):
482 - def __init__(self, column, from_val, to_val):
483 SenseiFilter.__init__(self, "range") 484 self.filter={"range":{column:{"from":from_val, "to":to_val, "_noOptimize":False}}}
485
486 - def set_No_optimization(self, type, date_format=None):
487 range = (self.filter)["range"] 488 for key, value in range.items(): 489 if value is not None: 490 value["_type"] = type 491 value["_noOptimize"] = True 492 if type == "date" and date_format is not None: 493 value["_date_format"]=date_format 494 return self
495
496 -class SenseiFilterQuery(SenseiFilter):
497 - def __init__(self, query):
498 SenseiFilter.__init__(self, "query") 499 self.filter={"query":{}} 500 if isinstance(query, SenseiQuery): 501 self.filter={"query": query.get_query()}
502
503 -class SenseiFilterSelection(SenseiFilter):
504 - def __init__(self, selection):
505 SenseiFilter.__init__(self, "selection") 506 self.filter = {"selection":{}} 507 if isinstance(selection, SenseiSelection): 508 self.filter={"selection":selection.get_selection()}
509
510 511 -class SenseiSort:
512 - def __init__(self, field, reverse=False):
513 self.field = field 514 self.dir = None 515 if not (field == PARAM_SORT_SCORE or 516 field == PARAM_SORT_SCORE_REVERSE or 517 field == PARAM_SORT_DOC or 518 field == PARAM_SORT_DOC_REVERSE): 519 if reverse: 520 self.dir = PARAM_SORT_DESC 521 else: 522 self.dir = PARAM_SORT_ASC
523
524 - def __str__(self):
525 return self.build_sort_field()
526
527 - def build_sort_field(self):
528 if self.dir: 529 return self.field + ":" + self.dir 530 else: 531 return self.field
532
533 - def build_sort_spec(self):
534 if self.dir: 535 return {self.field: self.dir} 536 elif self.field == PARAM_SORT_SCORE: 537 return "_score" 538 else: 539 return self.field
540
541 -class SenseiFacetInits:
542 - def __init__(self):
543 self.facet_init={}
544
545 - def add_facet_init(self, facet_name, param_name, param_values, param_type="string"):
546 if isinstance(param_values, list): 547 # parameter type, valid values are: "int","string","boolean","long","bytes","double", default: "string" 548 if facet_name in self.facet_init: 549 params = self.facet_init[facet_name] 550 params[param_name]={"type":param_type, "values":param_values} 551 else: 552 (self.facet_init)[facet_name]={} 553 params = self.facet_init[facet_name] 554 params[param_name]={"type":param_type, "values":param_values} 555 return self
556
557 - def get_facet_inits(self):
558 return self.facet_init
559
560 561 -class SenseiFacetInfo:
562
563 - def __init__(self, name, runtime=False, props={}):
564 self.name = name 565 self.runtime = runtime 566 self.props = props
567
568 - def get_name(self):
569 return self.name
570
571 - def set_name(self, name):
572 self.name = name
573
574 - def get_runtime(self):
575 return self.runtime
576
577 - def set_runtime(self, runtime):
578 self.runtime = runtime
579
580 - def get_props(self):
581 return self.props
582
583 - def set_props(self, props):
584 self.props = props
585
586 587 -class SenseiNodeInfo:
588
589 - def __init__(self, id, partitions, node_link, admin_link):
590 self.id = id 591 self.partitions = partitions 592 self.node_link = node_link 593 self.admin_link = admin_link
594
595 - def get_id(self):
596 return self.id
597
598 - def get_partitions(self):
599 return self.partitions
600 603
606
607 608 -class SenseiSystemInfo:
609
610 - def __init__(self, json_data):
611 logger.debug("json_data = %s" % json_data) 612 self.num_docs = int(json_data.get(PARAM_SYSINFO_NUMDOCS)) 613 self.last_modified = long(json_data.get(PARAM_SYSINFO_LASTMODIFIED)) 614 self.version = json_data.get(PARAM_SYSINFO_VERSION) 615 self.facet_infos = [] 616 for facet in json_data.get(PARAM_SYSINFO_FACETS): 617 facet_info = SenseiFacetInfo(facet.get(PARAM_SYSINFO_FACETS_NAME), 618 facet.get(PARAM_SYSINFO_FACETS_RUNTIME), 619 facet.get(PARAM_SYSINFO_FACETS_PROPS)) 620 self.facet_infos.append(facet_info) 621 # TODO: get cluster_info 622 self.cluster_info = None
623
624 - def display(self):
625 """Display sysinfo.""" 626 627 keys = ["facet_name", "facet_type", "runtime", "column", "column_type", "depends"] 628 max_lens = None 629 # XXX add existing flags 630 631 def get_max_lens(columns): 632 max_lens = {} 633 for column in columns: 634 max_lens[column] = len(column) 635 for facet_info in self.facet_infos: 636 props = facet_info.get_props() 637 638 tmp_len = len(facet_info.get_name()) 639 if tmp_len > max_lens["facet_name"]: 640 max_lens["facet_name"] = tmp_len 641 642 tmp_len = len(props.get("type")) 643 if tmp_len > max_lens["facet_type"]: 644 max_lens["facet_type"] = tmp_len 645 646 # runtime can only contain "true" or "false", so len("runtime") 647 # is big enough 648 649 tmp_len = len(props.get("column")) 650 if tmp_len > max_lens["column"]: 651 max_lens["column"] = tmp_len 652 653 tmp_len = len(props.get("column_type")) 654 if tmp_len > max_lens["column_type"]: 655 max_lens["column_type"] = tmp_len 656 657 tmp_len = len(props.get("depends")) 658 if tmp_len > max_lens["depends"]: 659 max_lens["depends"] = tmp_len 660 return max_lens
661 662 max_lens = get_max_lens(keys) 663 print_header(keys, max_lens) 664 665 for facet_info in self.facet_infos: 666 props = facet_info.get_props() 667 sys.stdout.write('|') 668 val = facet_info.get_name() 669 sys.stdout.write(' %s%s |' % (val, ' ' * (max_lens["facet_name"] - len(val)))) 670 671 val = props.get("type") 672 sys.stdout.write(' %s%s |' % (val, ' ' * (max_lens["facet_type"] - len(val)))) 673 674 val = facet_info.get_runtime() and "true" or "false" 675 sys.stdout.write(' %s%s |' % (val, ' ' * (max_lens["runtime"] - len(val)))) 676 677 val = props.get("column") 678 sys.stdout.write(' %s%s |' % (val, ' ' * (max_lens["column"] - len(val)))) 679 680 val = props.get("column_type") 681 sys.stdout.write(' %s%s |' % (val, ' ' * (max_lens["column_type"] - len(val)))) 682 683 val = props.get("depends") 684 sys.stdout.write(' %s%s |' % (val, ' ' * (max_lens["depends"] - len(val)))) 685 686 sys.stdout.write('\n') 687 688 print_footer(keys, max_lens)
689
690 - def get_num_docs(self):
691 return self.num_docs
692
693 - def set_num_docs(self, num_docs):
694 self.num_docs = num_docs
695
696 - def get_last_modified(self):
697 return self.last_modified
698
699 - def set_last_modified(self, last_modified):
700 self.last_modified = last_modified
701
702 - def get_facet_infos(self):
703 return self.facet_infos
704
705 - def set_facet_infos(self, facet_infos):
706 self.facet_infos = facet_infos
707
708 - def get_version(self):
709 return self.version
710
711 - def set_version(self, version):
712 self.version = version
713
714 - def get_cluster_info(self):
715 return self.cluster_info
716
717 - def set_cluster_info(self, cluster_info):
718 self.cluster_info = cluster_info
719
720 721 -class SenseiRequest:
722
723 - def __init__(self, 724 bql_req=None, 725 offset=DEFAULT_REQUEST_OFFSET, 726 count=DEFAULT_REQUEST_COUNT, 727 max_per_group=DEFAULT_REQUEST_MAX_PER_GROUP, 728 facet_map=None):
729 self.qParam = {} 730 self.explain = False 731 self.route_param = None 732 self.query = None 733 self.offset = offset 734 self.count = count 735 self.columns = [] 736 self.sorts = None 737 self.selections = [] 738 self.filter = {} 739 self.query_pred = {} 740 self.facets = {} 741 self.fetch_stored = False 742 self.groupby = None 743 self.max_per_group = max_per_group 744 self.facet_init_param_map = {}
745
746 - def set_offset(self, offset):
747 self.offset = offset 748 return self
749
750 - def set_count(self, count):
751 self.count = count 752 return self
753
754 - def set_query(self, query):
755 self.query = query.get_query() 756 return self
757
758 - def set_explain(self, explain):
759 self.explain = explain 760 return self
761
762 - def set_fetch_stored(self, fetch_stored):
763 self.fetch_stored = fetch_stored 764 return self
765
766 - def set_route_param(self, route_param):
767 self.route_param = route_param 768 return self
769
770 - def set_sorts(self, sorts):
771 self.sorts = sorts 772 return self
773
774 - def append_sort(self, sort):
775 if isinstance(sort, SenseiSort): 776 if self.sorts is None: 777 self.sorts = [] 778 self.sorts.append(sort) 779 else: 780 self.sorts.append(sort) 781 return self
782
783 - def set_filter(self, filter):
784 self.filter = filter.get_filter() 785 return self
786
787 - def append_selection(self, selection):
788 if self.selections is None: 789 self.selections = [] 790 if isinstance(selection, SenseiSelection): 791 self.selections.append(selection.get_selection()) 792 return self
793 794
795 - def set_facets(self, facets):
796 self.facets = facets.get_facets() 797 return self
798
799 - def set_groupby(self, groupby):
800 self.groupby = groupby 801 return self
802
803 - def set_max_per_group(self, max_per_group):
804 self.max_per_group = max_per_group 805 return self
806
807 - def set_facet_init_param_map(self, facet_init_param_map):
808 self.facet_init_param_map = facet_init_param_map 809 return self
810
811 - def get_columns(self):
812 return self.columns
813
814 815 -class SenseiHit:
816 - def __init__(self):
817 self.docid = None 818 self.uid = None 819 self.srcData = {} 820 self.score = None 821 self.explanation = None 822 self.stored = None
823
824 - def load(self, jsonHit):
825 self.docid = jsonHit.get(PARAM_RESULT_HIT_DOCID) 826 self.uid = jsonHit.get(PARAM_RESULT_HIT_UID) 827 self.score = jsonHit.get(PARAM_RESULT_HIT_SCORE) 828 srcStr = jsonHit.get(PARAM_RESULT_HIT_SRC_DATA) 829 self.explanation = jsonHit.get(PARAM_RESULT_HIT_EXPLANATION) 830 self.stored = jsonHit.get(PARAM_RESULT_HIT_STORED_FIELDS) 831 if srcStr: 832 self.srcData = json.loads(srcStr) 833 else: 834 self.srcData = None
835
836 837 -class SenseiResultFacet:
838 value = None 839 count = None 840 selected = None 841
842 - def load(self,json):
846
847 848 -class SenseiResult:
849 """Sensei search results for a query.""" 850
851 - def __init__(self, json_data):
852 logger.debug("json_data = %s" % json_data) 853 self.jsonMap = json_data 854 self.parsedQuery = json_data.get(PARAM_RESULT_PARSEDQUERY) 855 self.totalDocs = json_data.get(PARAM_RESULT_TOTALDOCS, 0) 856 self.time = json_data.get(PARAM_RESULT_TIME, 0) 857 self.total_time = 0 858 self.numHits = json_data.get(PARAM_RESULT_NUMHITS, 0) 859 self.hits = json_data.get(PARAM_RESULT_HITS) 860 map = json_data.get(PARAM_RESULT_FACETS) 861 self.facetMap = {} 862 if map: 863 for k, v in map.items(): 864 facetList = [] 865 for facet in v: 866 facetObj = SenseiResultFacet() 867 facetObj.load(facet) 868 facetList.append(facetObj) 869 self.facetMap[k]=facetList
870
871 - def display(self, columns=['*'], max_col_width=40):
872 """Print the results in SQL SELECT result format.""" 873 874 keys = [] 875 max_lens = None 876 has_group_hits = False 877 878 def get_max_lens(columns): 879 max_lens = {} 880 has_group_hits = False 881 for col in columns: 882 max_lens[col] = len(col) 883 for hit in self.hits: 884 group_hits = [hit] 885 if hit.has_key(GROUP_HITS): 886 group_hits = hit.get(GROUP_HITS) 887 has_group_hits = True 888 for group_hit in group_hits: 889 for col in columns: 890 if group_hit.has_key(col): 891 v = group_hit.get(col) 892 else: 893 v = '<Not Found>' 894 if isinstance(v, list): 895 v = ','.join([safe_str(item) for item in v]) 896 elif isinstance(v, (int, long, float)): 897 v = str(v) 898 value_len = len(v) 899 if value_len > max_lens[col]: 900 max_lens[col] = min(value_len, max_col_width) 901 return max_lens, has_group_hits
902 903 if not self.hits: 904 print "No hit is found." 905 return 906 elif not columns: 907 print "No column is selected." 908 return 909 910 if len(columns) == 1 and columns[0] == '*': 911 keys = self.hits[0].keys() 912 if GROUP_HITS in keys: 913 keys.remove(GROUP_HITS) 914 if GROUP_VALUE in keys: 915 keys.remove(GROUP_VALUE) 916 if PARAM_RESULT_HIT_SRC_DATA in keys: 917 keys.remove(PARAM_RESULT_HIT_SRC_DATA) 918 else: 919 keys = columns 920 921 max_lens, has_group_hits = get_max_lens(keys) 922 923 print_header(keys, max_lens, 924 has_group_hits and '=' or '-', 925 has_group_hits and '=' or '+') 926 927 # Print the results 928 for hit in self.hits: 929 group_hits = [hit] 930 if hit.has_key(GROUP_HITS): 931 group_hits = hit.get(GROUP_HITS) 932 for group_hit in group_hits: 933 sys.stdout.write('|') 934 for key in keys: 935 if group_hit.has_key(key): 936 v = group_hit.get(key) 937 else: 938 v = '<Not Found>' 939 if isinstance(v, list): 940 v = ','.join([safe_str(item) for item in v]) 941 elif isinstance(v, (int, float, long)): 942 v = str(v) 943 else: 944 # The value may contain unicode characters 945 v = safe_str(v) 946 if len(v) > max_col_width: 947 v = v[:max_col_width] 948 sys.stdout.write(' %s%s |' % (v, ' ' * (max_lens[key] - len(v)))) 949 sys.stdout.write('\n') 950 if has_group_hits: 951 print_line(keys, max_lens) 952 953 print_footer(keys, max_lens, 954 has_group_hits and '=' or '-', 955 has_group_hits and '=' or '+') 956 957 sys.stdout.write('%s %s%s in set, %s hit%s, %s total doc%s (server: %sms, total: %sms)\n' % 958 (len(self.hits), 959 has_group_hits and 'group' or 'row', 960 len(self.hits) > 1 and 's' or '', 961 self.numHits, 962 self.numHits > 1 and 's' or '', 963 self.totalDocs, 964 self.totalDocs > 1 and 's' or '', 965 self.time, 966 self.total_time 967 )) 968 969 # Print facet information 970 for facet, values in self.jsonMap.get(PARAM_RESULT_FACETS).iteritems(): 971 max_val_len = len(facet) 972 max_count_len = 1 973 for val in values: 974 max_val_len = max(max_val_len, min(max_col_width, len(val.get('value')))) 975 max_count_len = max(max_count_len, len(str(val.get('count')))) 976 total_len = max_val_len + 2 + max_count_len + 3 977 978 sys.stdout.write('+' + '-' * total_len + '+\n') 979 sys.stdout.write('| ' + facet + ' ' * (total_len - len(facet) - 1) + '|\n') 980 sys.stdout.write('+' + '-' * total_len + '+\n') 981 982 for val in values: 983 sys.stdout.write('| %s%s (%s)%s |\n' % 984 (val.get('value'), 985 ' ' * (max_val_len - len(val.get('value'))), 986 val.get('count'), 987 ' ' * (max_count_len - len(str(val.get('count')))))) 988 sys.stdout.write('+' + '-' * total_len + '+\n')
989
990 991 992 -class SenseiServiceProxy:
993 """Sensei client class.""" 994
995 - def __init__(self, host='localhost', port=8080, path='sensei', sysinfo=None):
996 self.host = host 997 self.port = port 998 self.path = path 999 self.url = 'http://%s:%d/%s' % (self.host, self.port, self.path) 1000 self.opener = urllib2.build_opener() 1001 self.opener.addheaders = [('User-agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.91 Safari/534.30')] 1002 1003 if sysinfo: 1004 self.sysinfo = SenseiSystemInfo(sysinfo) 1005 else: 1006 urlReq = urllib2.Request(self.url + "/sysinfo") 1007 res = self.opener.open(urlReq) 1008 line = res.read() 1009 jsonObj = json.loads(line) 1010 # print json.dumps(jsonObj, indent=4) 1011 self.sysinfo = SenseiSystemInfo(jsonObj) 1012 self.facet_map = {} 1013 for facet_info in self.sysinfo.get_facet_infos(): 1014 self.facet_map[facet_info.get_name()] = facet_info
1015 1016
1017 - def buildJsonString(self, req, sort_keys=True, indent=None):
1018 """Build a Sensei request in JSON format. 1019 1020 Once built, a Sensei request in JSON format can be sent to a Sensei 1021 broker using the following command: 1022 1023 $ curl -XPOST http://localhost:8080/sensei -d '{ 1024 "fetchStored": "true", 1025 "from": 0, 1026 "size": 10 1027 }' 1028 1029 """ 1030 1031 output_json = {} 1032 1033 output_json[JSON_PARAM_FROM] = req.offset 1034 output_json[JSON_PARAM_SIZE] = req.count 1035 1036 if req.query: 1037 output_json[JSON_PARAM_QUERY] = req.query 1038 1039 if req.explain: 1040 output_json[JSON_PARAM_QUERY] = req.explain 1041 if req.fetch_stored: 1042 output_json[JSON_PARAM_FETCH_STORED] = req.fetch_stored 1043 if req.route_param: 1044 output_json[JSON_PARAM_ROUTEPARAM] = req.route_param 1045 if req.sorts: 1046 output_json[JSON_PARAM_SORT] = [sort.build_sort_spec() for sort in req.sorts] 1047 1048 if req.filter: 1049 output_json[JSON_PARAM_FILTER] = req.filter 1050 1051 if req.query_pred: 1052 output_json[JSON_PARAM_QUERY] = req.query_pred[JSON_PARAM_QUERY] 1053 1054 if req.selections: 1055 output_json[JSON_PARAM_SELECTIONS] = req.selections 1056 1057 if req.facets: 1058 output_json[JSON_PARAM_FACETS]=req.facets 1059 1060 facet_init_map = {} 1061 for facet_name, initParams in req.facet_init_param_map.iteritems(): 1062 inner_map = {} 1063 for name, vals in initParams.bool_map.iteritems(): 1064 inner_map[name] = {PARAM_DYNAMIC_TYPE : PARAM_DYNAMIC_TYPE_BOOL, 1065 "values" : vals} 1066 for name, vals in initParams.int_map.iteritems(): 1067 inner_map[name] = {PARAM_DYNAMIC_TYPE : PARAM_DYNAMIC_TYPE_INT, 1068 "values" : [safe_str(val) for val in vals]} 1069 for name, vals in initParams.long_map.iteritems(): 1070 inner_map[name] = {PARAM_DYNAMIC_TYPE : PARAM_DYNAMIC_TYPE_LONG, 1071 "values" : [safe_str(val) for val in vals]} 1072 for name, vals in initParams.string_map.iteritems(): 1073 inner_map[name] = {PARAM_DYNAMIC_TYPE : PARAM_DYNAMIC_TYPE_STRING, 1074 "values" : vals} 1075 for name, vals in initParams.byte_map.iteritems(): 1076 inner_map[name] = {PARAM_DYNAMIC_TYPE : PARAM_DYNAMIC_TYPE_BYTEARRAY, 1077 "values" : [safe_str(val) for val in vals]} 1078 for name, vals in initParams.double_map.iteritems(): 1079 inner_map[name] = {PARAM_DYNAMIC_TYPE : PARAM_DYNAMIC_TYPE_DOUBLE, 1080 "values" : [safe_str(val) for val in vals]} 1081 facet_init_map[facet_name] = inner_map 1082 if facet_init_map: 1083 output_json[JSON_PARAM_FACET_INIT] = facet_init_map 1084 1085 if req.groupby: 1086 # For now we only support group-by on single column 1087 output_json[JSON_PARAM_GROUPBY] = { 1088 JSON_PARAM_COLUMNS: [req.groupby], 1089 JSON_PARAM_TOP: req.max_per_group 1090 } 1091 1092 # print ">>> output_json = ", output_json 1093 return json.dumps(output_json, sort_keys=sort_keys, indent=indent)
1094 1095 @staticmethod
1096 - def buildUrlString(req):
1097 paramMap = {} 1098 paramMap[PARAM_OFFSET] = req.offset 1099 paramMap[PARAM_COUNT] = req.count 1100 if req.query: 1101 paramMap[PARAM_QUERY]=req.query 1102 if req.explain: 1103 paramMap[PARAM_SHOW_EXPLAIN] = "true" 1104 if req.fetch_stored: 1105 paramMap[PARAM_FETCH_STORED] = "true" 1106 if req.route_param: 1107 paramMap[PARAM_ROUTE_PARAM] = req.route_param 1108 1109 if req.sorts: 1110 paramMap[PARAM_SORT] = ",".join(sort.build_sort_field() for sort in req.sorts) 1111 1112 if req.qParam.get("query"): 1113 paramMap[PARAM_QUERY] = req.qParam.get("query") 1114 del req.qParam["query"] 1115 if req.qParam: 1116 paramMap[PARAM_QUERY_PARAM] = ",".join(param + ":" + req.qParam.get(param) 1117 for param in req.qParam.keys() if param != "query") 1118 1119 for selection in req.selections.values(): 1120 paramMap[selection.getSelectNotParam()] = selection.getSelectNotParamValues() 1121 paramMap[selection.getSelectOpParam()] = selection.operation 1122 paramMap[selection.getSelectValParam()] = selection.getSelectValParamValues() 1123 if selection.properties: 1124 paramMap[selection.getSelectPropParam()] = selection.getSelectPropParamValues() 1125 1126 1127 for facet_name, facet_spec in req.facets.iteritems(): 1128 paramMap["%s.%s.%s" % (PARAM_FACET, facet_name, PARAM_FACET_MAX)] = facet_spec.maxCounts 1129 paramMap["%s.%s.%s" % (PARAM_FACET, facet_name, PARAM_FACET_ORDER)] = facet_spec.orderBy 1130 paramMap["%s.%s.%s" % (PARAM_FACET, facet_name, PARAM_FACET_EXPAND)] = facet_spec.expand and "true" or "false" 1131 paramMap["%s.%s.%s" % (PARAM_FACET, facet_name, PARAM_FACET_MINHIT)] = facet_spec.minHits 1132 1133 for facet_name, initParams in req.facet_init_param_map.iteritems(): 1134 for name, vals in initParams.bool_map.iteritems(): 1135 paramMap["%s.%s.%s.%s" % 1136 (PARAM_DYNAMIC_INIT, facet_name, name, PARAM_DYNAMIC_TYPE)] = PARAM_DYNAMIC_TYPE_BOOL 1137 paramMap["%s.%s.%s.%s" % 1138 (PARAM_DYNAMIC_INIT, facet_name, name, 1139 PARAM_DYNAMIC_VAL)] = ','.join([val and "true" or "false" for val in vals]) 1140 for name, vals in initParams.int_map.iteritems(): 1141 paramMap["%s.%s.%s.%s" % 1142 (PARAM_DYNAMIC_INIT, facet_name, name, PARAM_DYNAMIC_TYPE)] = PARAM_DYNAMIC_TYPE_INT 1143 paramMap["%s.%s.%s.%s" % 1144 (PARAM_DYNAMIC_INIT, facet_name, name, 1145 PARAM_DYNAMIC_VAL)] = ','.join([safe_str(val) for val in vals]) 1146 for name, vals in initParams.long_map.iteritems(): 1147 paramMap["%s.%s.%s.%s" % 1148 (PARAM_DYNAMIC_INIT, facet_name, name, PARAM_DYNAMIC_TYPE)] = PARAM_DYNAMIC_TYPE_LONG 1149 paramMap["%s.%s.%s.%s" % 1150 (PARAM_DYNAMIC_INIT, facet_name, name, 1151 PARAM_DYNAMIC_VAL)] = ','.join([safe_str(val) for val in vals]) 1152 for name, vals in initParams.string_map.iteritems(): 1153 paramMap["%s.%s.%s.%s" % 1154 (PARAM_DYNAMIC_INIT, facet_name, name, PARAM_DYNAMIC_TYPE)] = PARAM_DYNAMIC_TYPE_STRING 1155 paramMap["%s.%s.%s.%s" % 1156 (PARAM_DYNAMIC_INIT, facet_name, name, 1157 PARAM_DYNAMIC_VAL)] = ','.join(vals) 1158 for name, vals in initParams.byte_map.iteritems(): 1159 paramMap["%s.%s.%s.%s" % 1160 (PARAM_DYNAMIC_INIT, facet_name, name, PARAM_DYNAMIC_TYPE)] = PARAM_DYNAMIC_TYPE_BYTEARRAY 1161 paramMap["%s.%s.%s.%s" % 1162 (PARAM_DYNAMIC_INIT, facet_name, name, 1163 PARAM_DYNAMIC_VAL)] = ','.join([safe_str(val) for val in vals]) 1164 for name, vals in initParams.double_map.iteritems(): 1165 paramMap["%s.%s.%s.%s" % 1166 (PARAM_DYNAMIC_INIT, facet_name, name, PARAM_DYNAMIC_TYPE)] = PARAM_DYNAMIC_TYPE_DOUBLE 1167 paramMap["%s.%s.%s.%s" % 1168 (PARAM_DYNAMIC_INIT, facet_name, name, 1169 PARAM_DYNAMIC_VAL)] = ','.join([safe_str(val) for val in vals]) 1170 1171 if req.groupby: 1172 paramMap[PARAM_GROUP_BY] = req.groupby 1173 if req.max_per_group > 0: 1174 paramMap[PARAM_MAX_PER_GROUP] = req.max_per_group 1175 1176 return urllib.urlencode(paramMap)
1177
1178 - def doQuery(self, req, using_json=True):
1179 """Execute a search query.""" 1180 1181 time1 = datetime.now() 1182 query_string = None 1183 if using_json: # Use JSON format 1184 query_string = self.buildJsonString(req) 1185 else: 1186 query_string = SenseiClient.buildUrlString(req) 1187 logger.debug(query_string) 1188 urlReq = urllib2.Request(self.url, query_string) 1189 res = self.opener.open(urlReq) 1190 line = res.read() 1191 jsonObj = json.loads(line) 1192 res = SenseiResult(jsonObj) 1193 delta = datetime.now() - time1 1194 res.total_time = delta.seconds * 1000 + delta.microseconds / 1000 1195 return res
1196
1197 - def get(self, ids):
1198 """Get the source data through a list of document IDs. 1199 The input is either a list of ID numbers, or ID strings; 1200 The output is a jsonarray string; 1201 """ 1202 ids_str = '[' 1203 count = 0 1204 for id in ids: 1205 if count == 0 : 1206 ids_str = ids_str + str(id) 1207 else: 1208 ids_str = ids_str + ',' + str(id) 1209 ids_str = ids_str+ ']' 1210 ids = '[1,2]' 1211 urlReq = urllib2.Request(self.url + '/get', ids_str) 1212 res = self.opener.open(urlReq) 1213 #print res.read() 1214 return res.read()
1215
1216 - def get_sysinfo(self):
1217 return self.sysinfo
1218
1219 - def get_facet_map(self):
1220 return self.facet_map
1221
1222 1223 1224 -def main(argv):
1225 1226 # create a sample sensei request 1227 1228 req = SenseiRequest() 1229 1230 # add paging info; 1231 req.set_count(50) \ 1232 .set_offset(0) 1233 1234 # add query info; 1235 req.set_query(SenseiQueryTerm("tags", "automatic")) 1236 1237 # add selection info; 1238 range_selection = SenseiSelectionRange("year", "1995", "2000", True, False) # [1995 TO 2000) 1239 req.append_selection(range_selection) 1240 1241 # add filter info; 1242 req.set_filter(SenseiFilterRange("price", 7900, 11000)) 1243 1244 # add group by; 1245 req.set_groupby("category").set_max_per_group(4) 1246 1247 # add sort; 1248 req.append_sort(SenseiSort("color", True)) 1249 1250 # add fetch_stored 1251 req.set_fetch_stored(False) 1252 1253 # need explain or not 1254 req.set_explain(False) 1255 1256 # add facets information 1257 facets = SenseiFacets().add_facet("color", False, 1, 10, "hits") \ 1258 .add_facet("year") 1259 req.set_facets(facets) 1260 1261 # execute and display results; 1262 proxy = SenseiServiceProxy() 1263 sensei_results = proxy.doQuery(req) 1264 sensei_results.display(["*"], max_col_width=40) 1265 1266 print proxy.get([1,2]) 1267 1268 print proxy.get(['1','2'])
1269 1270 if __name__ == "__main__": 1271 main(sys.argv) 1272