Package kenozooid :: Module uddf
Source Code for Module kenozooid.uddf

   1  # 
   2  # Kenozooid - dive planning and analysis toolbox. 
   3  # 
   4  # Copyright (C) 2009-2019 by Artur Wroblewski <wrobell@riseup.net> 
   5  # 
   6  # This program is free software: you can redistribute it and/or modify 
   7  # it under the terms of the GNU General Public License as published by 
   8  # the Free Software Foundation, either version 3 of the License, or 
   9  # (at your option) any later version. 
  10  # 
  11  # This program is distributed in the hope that it will be useful, 
  12  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
  13  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  14  # GNU General Public License for more details. 
  15  # 
  16  # You should have received a copy of the GNU General Public License 
  17  # along with this program.  If not, see <http://www.gnu.org/licenses/>. 
  18  # 
  19   
  20  """ 
  21  The `kenozooid.uddf` module provides support for parsing, searching and 
  22  manipulation of data stored in UDDF files. 
  23   
  24  The functions implemented in this module can be divided into the following 
  25  categories 
  26   
  27  - XML nodes functions 
  28  - generic XML data searching and manipulation functions 
  29  - functions for searching and manipulation of diving specific data 
  30   
  31  Searching functions use XPath expressions (queries) to find data. Each tag 
  32  name in an query should be prefixed with 'uddf:' string to indicate UDDF 
  33  namespace, i.e. 'uddf:diver', 'uddf:waypoint' - appropriate namespace 
  34  mapping for this prefix is defined for each XPath call. 
  35   
  36  The result of parsing or search of data is usually iterator of XML nodes or 
  37  data records (named tuples in Python terms). 
  38   
  39  Module `lxml` is used for XML parsing and querying with XPath. Full 
  40  capabilities of underlying `libxml2` library is used by design. The 
  41  ElementTree XML data model is used for XML nodes. 
  42  """ 
  43   
  44  from collections import namedtuple, OrderedDict, Counter 
  45  from lxml import etree as et 
  46  from functools import partial 
  47  from datetime import datetime 
  48  from dateutil.parser import parse as dparse 
  49  from io import FileIO 
  50  from operator import itemgetter 
  51  from uuid import uuid4 as uuid 
  52  from copy import deepcopy 
  53  from dirty.xml import xml 
  54  from dirty import RawString 
  55  import base64 
  56  import bz2 
  57  import itertools 
  58  import hashlib 
  59  import logging 
  60  import os 
  61  import os.path 
  62  import pkg_resources 
  63   
  64  import kenozooid 
  65  import kenozooid.util as kt 
  66   
  67  log = logging.getLogger('kenozooid.uddf') 
  68   
  69  # 
  70  # Default UDDF namespace mapping. 
  71  # 
  72  _NSMAP = {'uddf': 'http://www.streit.cc/uddf/3.2/'} 
  73   
  74  # Node id formatter 
  75  FORMAT_ID = 'id-{}' 
  76   
  77  FMT_F = partial(str.format, '{0:.1f}') 
  78  FMT_F2 = partial(str.format, '{0:.2f}') 
  79  FMT_I = lambda v: '{}'.format(int(round(v))) 
  80  FMT_DT = lambda dt: format(dt, '%Y-%m-%dT%H:%M:%S%z') 
  81   
  82  # 
  83  # Parsing and searching. 
  84  # 
  85   
  86  XPath = partial(et.XPath, namespaces=_NSMAP) 
  87  XPath.__doc__ = """ 
  88      XPath query constructor for UDDF data. 
  89   
  90      Use `uddf` prefix to create a query, i.e.:: 
  91   
  92          XPath('uddf:informationbeforedive/uddf:datetime/text()') 
  93   
  94      .. seealso: lxml.etree.XPath 
  95  """ 
  96   
  97  # XPath queries for default dive data 
  98  XP_DEFAULT_DIVE_DATA = ( 
  99      XPath('uddf:informationbeforedive/uddf:divenumber/text()'), 
 100      XPath('uddf:informationbeforedive/uddf:datetime/text()'), 
 101      XPath('uddf:informationafterdive/uddf:greatestdepth/text()'), 
 102      XPath('uddf:informationafterdive/uddf:diveduration/text()'), 
 103      XPath('uddf:informationafterdive/uddf:lowesttemperature/text()'), 
 104      XPath('uddf:informationafterdive/uddf:averagedepth/text()'), 
 105      XPath('uddf:samples/uddf:waypoint/uddf:divemode[1]/@type'), 
 106      None, 
 107  ) 
 108   
 109  # XPath queries for default dive profile sample data 
 110  XP_DEFAULT_PROFILE_DATA =  ( 
 111      XPath('uddf:depth/text()'), 
 112      XPath('uddf:divetime/text()'), 
 113      XPath('uddf:temperature/text()'), 
 114      XPath('uddf:setpo2/text()'), 
 115      XPath('uddf:setpo2/@setby'), 
 116      XPath('uddf:decostop/@duration'), 
 117      XPath('uddf:decostop/@decodepth'), 
 118      XPath('uddf:alarm/text()'), 
 119      XPath('uddf:switchmix/@ref'), 
 120  ) 
 121   
 122  XP_DEFAULT_GAS_DATA =  ( 
 123      XPath('@id'), 
 124      XPath('uddf:name/text()'), 
 125      XPath('uddf:o2/text()'), 
 126      XPath('uddf:he/text()'), 
 127  ) 
 128   
 129  # XPath query to locate dive profile sample 
 130  XP_WAYPOINT = XPath('./uddf:samples/uddf:waypoint') 
 131  # XPath query to locate gas mix 
 132  XP_MIX = XPath('/uddf:uddf/uddf:gasdefinitions/uddf:mix') 
 133   
 134  # XPath queries for default dive computer dump data 
 135  XP_DEFAULT_DUMP_DATA = ( 
 136      XPath('uddf:link/@ref'), 
 137      # //uddf:divecomputerdump[position()] gives current() 
 138      XPath('../../uddf:diver/uddf:owner//uddf:divecomputer[' \ 
 139              '@id = //uddf:divecomputerdump[position()]/uddf:link/@ref' \ 
 140          ']/uddf:model/text()'), 
 141      XPath('uddf:datetime/text()'), 
 142      XPath('uddf:dcdump/text()'), 
 143  ) 
 144   
 145  # XPath queries for default buddy data 
 146  XP_DEFAULT_BUDDY_DATA = ( 
 147      XPath('@id'), 
 148      XPath('uddf:personal/uddf:firstname/text()'), 
 149      XPath('uddf:personal/uddf:middlename/text()'), 
 150      XPath('uddf:personal/uddf:lastname/text()'), 
 151      XPath('uddf:personal/uddf:membership/@organisation'), 
 152      XPath('uddf:personal/uddf:membership/@memberid'), 
 153  ) 
 154   
 155  # XPath queries for default dive site data 
 156  XP_DEFAULT_SITE_DATA = ( 
 157      XPath('@id'), 
 158      XPath('uddf:name/text()'), 
 159      XPath('uddf:geography/uddf:location/text()'), 
 160      XPath('uddf:geography/uddf:longitude/text()'), 
 161      XPath('uddf:geography/uddf:latitude/text()'), 
 162  ) 
 163   
 164  # XPath query to find a buddy 
 165  XP_FIND_BUDDY = XPath('/uddf:uddf/uddf:diver/uddf:buddy[' \ 
 166      '@id = $buddy' \ 
 167      ' or uddf:personal/uddf:membership/@memberid = $buddy' \ 
 168      ' or uddf:personal/uddf:membership/@organisation = $buddy' \ 
 169      ' or contains(uddf:personal/uddf:firstname/text(), $buddy)' \ 
 170      ' or contains(uddf:personal/uddf:lastname/text(), $buddy)' \ 
 171      ']') 
 172   
 173  # XPath query to find a dive site 
 174  XP_FIND_SITE = XPath('/uddf:uddf/uddf:divesite/uddf:site[' \ 
 175      '@id = $site' \ 
 176      ' or contains(uddf:name/text(), $site)' \ 
 177      ' or contains(uddf:geography/uddf:location/text(), $site)' \ 
 178      ']') 
 179   
 180  # XPath query to find dives 
 181  XP_FIND_DIVES = XPath('/uddf:uddf/uddf:profiledata' \ 
 182      '/uddf:repetitiongroup/uddf:dive[in-range(position(), $nodes)' \ 
 183      ' and in-range(uddf:informationbeforedive/uddf:divenumber/text(), $dives)]') 
 184   
 185  # XPath query to find dive gases 
 186  XP_FIND_DIVE_GASES = XPath('/uddf:uddf/uddf:gasdefinitions' \ 
 187      '/uddf:mix[@id=/uddf:uddf/uddf:profiledata/uddf:repetitiongroup' \ 
 188      '/uddf:dive[in-range(position(), $nodes)]' \ 
 189      '/uddf:samples/uddf:waypoint/uddf:switchmix/@ref]') 
 190   
 191   
 192 -class RangeError(ValueError): 
 193      """ 
 194      Error raised when a range cannot be parsed. 
 195   
 196      .. seealso:: 
 197          parse_range 
 198      """ 
 199      pass 
 200   
 201   
 202 -def parse(f, ver_check=True): 
 203      """ 
 204      Parse XML file and return document object. 
 205   
 206      File to parse can be anything supported by ``lxml`` library. 
 207   
 208      If file to parse is file name and ends with '.bz2', then it is treated 
 209      as file compressed with bzip2. 
 210   
 211      :Parameters: 
 212       f 
 213          File to parse. 
 214       ver_check 
 215          Check version of UDDF file. 
 216      """ 
 217      if isinstance(f, str) and (f.endswith('.bz2') or f.endswith('.bz2.bak')): 
 218          log.debug('detected compressed file') 
 219          f = bz2.BZ2File(f) 
 220      doc = et.parse(f) 
 221      if ver_check: 
 222          v1, v2, *_ = doc.getroot().get('version').split('.') 
 223          if (v1, v2) != ('3', '2'): 
 224              raise ValueError('UDDF file version {}.{} is not supported.' \ 
 225                      ' Please upgrade file with "kz upgrade" command.' \ 
 226                      .format(v1, v2)) 
 227      return doc 
 228   
 229   
 230 -def find(f, query, **params): 
 231      """ 
 232      Find XML nodes in UDDF file using XPath query. 
 233   
 234      UDDF file can be a file name, file object, URL and basically everything 
 235      what is supported by `lxml` library. 
 236   
 237      File to parse can be a file name ending with '.bz2'. It is treated as 
 238      file compressed with bzip2. 
 239   
 240      :Parameters: 
 241       f 
 242          UDDF file to parse. 
 243       query 
 244          XPath expression or XPath object. 
 245       params 
 246          XPath query parameters. 
 247   
 248      .. seealso:: :py:func:`XPath`, :py:func:`parse` 
 249      """ 
 250      log.debug('parsing and searching with query: {}; parameters {}' \ 
 251              .format(query, params)) 
 252      doc = parse(f) 
 253      if isinstance(query, str): 
 254          return xp(doc, query) 
 255      else: 
 256          return (n for n in query(doc, **params)) 
 257   
 258   
 259 -def xp(node, query): 
 260      """ 
 261      Find items with XPath query. 
 262   
 263      The query is performed using UDDF namespace. 
 264   
 265      Iterator of items (strings, nodes) found by query is returned. 
 266       
 267      :Parameters: 
 268       node 
 269          Document node or query starting node. 
 270       query 
 271          XPath query. 
 272   
 273      .. seealso:: 
 274          lxml.etree.Element.xpath 
 275      """ 
 276      for n in node.xpath(query, namespaces=_NSMAP): 
 277          yield n  
 278   
 279   
 280 -def xp_first(node, query): 
 281      """ 
 282      Get first element found with XPath query. 
 283   
 284      The query is performed using UDDF namespace. 
 285   
 286      First element is returned or None if it is not found. 
 287       
 288      :Parameters: 
 289       node 
 290          Document node or query starting node. 
 291       query 
 292          XPath query. 
 293   
 294      .. seealso:: 
 295          lxml.etree.Element.xpath 
 296      """ 
 297      data = xp(node, query) 
 298      return next(data, None) 
 299   
 300   
 301 -def xp_last(node, query): 
 302      """ 
 303      Get last element found with XPath query. 
 304   
 305      The query is performed using UDDF namespace. 
 306   
 307      Last element is returned or None if it is not found. 
 308       
 309      :Parameters: 
 310       node 
 311          Document node or query starting node. 
 312       query 
 313          XPath query. 
 314   
 315      .. seealso:: 
 316          lxml.etree.Element.xpath 
 317      """ 
 318      nodes = node.xpath(query, namespaces=_NSMAP) 
 319      return nodes[-1] if nodes else None 
 320   
 321   
 322 -def find_data(name, node, fields, queries, parsers, nquery=None): 
 323      """ 
 324      Find data records starting from specified XML node. 
 325   
 326      A record type (namedtuple) is created with specified fields. The data 
 327      of a record is retrieved with XPath expression objects, which is 
 328      converted from string to appropriate type using parsers. 
 329   
 330      A parser can be any type or function, i.e. `float`, `int` or 
 331      `dateutil.parser.parse`. 
 332   
 333      If XML node is too high to execture XPath expression objects, then the 
 334      basis for field queries can be relocated with `nquery` parameter. If 
 335      `nquery` parameter is not specified, then only one record is returned. 
 336      Otherwise it is generator of records. 
 337   
 338      The length of fields, field queries and field parsers should be the same. 
 339   
 340      :Parameters: 
 341       name 
 342          Name of the record to be created. 
 343       node 
 344          XML node. 
 345       fields 
 346          Names of fields to be created in a record. 
 347       queries 
 348          XPath expression objects for each field to retrieve its value. 
 349       parsers 
 350          Parsers of field values to be created in a record. 
 351       nquery 
 352          XPath expression object to relocate from node to more appropriate 
 353          position in XML document for record data retrieval. 
 354   
 355      .. seealso:: :py:func:`dive_data`, :py:func:`dive_profile` 
 356      """ 
 357      T = namedtuple(name, ' '.join(fields))._make 
 358      if nquery: 
 359          data = nquery(node) 
 360          return (_record(T, n, queries, parsers) for n in data) 
 361      else: 
 362          return _record(T, node, queries, parsers) 
 363   
 364   
 365 -def dive_data(node, fields=None, queries=None, parsers=None): 
 366      """ 
 367      Specialized function to return record of a dive data. 
 368   
 369      At the moment record of dive data contains dive start time only, by 
 370      default. It should be enhanced in the future to return more rich data 
 371      record. 
 372   
 373      Dive record data can be reconfigured with optional fields, field 
 374      queries and field parsers parameters. 
 375   
 376      :Parameters: 
 377       node 
 378          XML node. 
 379       fields 
 380          Names of fields to be created in a record. 
 381       queries 
 382          XPath expression object for each field to retrieve its value. 
 383       parsers 
 384          Parsers field values to be created in a record. 
 385   
 386      .. seealso:: :py:func:`find_data` 
 387      """ 
 388      if fields is None: 
 389          fields = ('number', 'datetime', 'depth', 'duration', 'temp', 
 390              'avg_depth', 'mode', 'profile') 
 391          queries = XP_DEFAULT_DIVE_DATA 
 392          parsers = (int, dparse, float, float, float, float, str, dive_profile) 
 393   
 394      return find_data('Dive', node, fields, queries, parsers) 
 395   
 396   
 397 -def dive_profile(node, fields=None, queries=None, parsers=None): 
 398      """ 
 399      Specialized function to return generator of dive profiles records. 
 400   
 401      By default, dive profile record contains following fields 
 402   
 403      time 
 404          dive time in seconds 
 405      depth 
 406          dive depth in meters 
 407      temp 
 408          temperature in Kelvins 
 409   
 410      Dive profile record data can be reconfigured with optional fields, 
 411      field queries and field parsers parameters. 
 412   
 413      :Parameters: 
 414       node 
 415          XML node. 
 416       fields 
 417          Names of fields to be created in a record. 
 418       queries 
 419          XPath expression objects for each field to retrieve its value. 
 420       parsers 
 421          Parsers of field values to be created in a record. 
 422   
 423      .. seealso:: :py:func:`find_data` 
 424      """ 
 425      if fields is None: 
 426          fields = ('depth', 'time', 'temp', 'setpoint', 'setpointby', 
 427                  'deco_time', 'deco_depth', 'alarm', 'gas') 
 428          queries = XP_DEFAULT_PROFILE_DATA 
 429          gases = dict(((gas.id, gas) for gas in gas_data(node))) 
 430          parsers = (float, ) * 4 + (str, float, float, str, gases.get) 
 431   
 432      return find_data('Sample', node, fields, queries, parsers, 
 433              nquery=XP_WAYPOINT) 
 434   
 435   
 436 -def gas_data(node, fields=None, queries=None, parsers=None): 
 437      if fields is None: 
 438          fields = ('id', 'name', 'o2', 'he') 
 439          queries = XP_DEFAULT_GAS_DATA 
 440          parsers = (str, str, int, int) 
 441   
 442      return find_data('Gas', node, fields, queries, parsers, 
 443              nquery=XP_MIX) 
 444   
 445   
 446 -def dump_data(node, fields=None, queries=None, parsers=None): 
 447      """ 
 448      Get dive computer dump data. 
 449   
 450      The following data is returned 
 451   
 452      dc_id 
 453          Dive computer id. 
 454      dc_model 
 455          Dive computer model information. 
 456      datetime 
 457          Date and time when dive computer dump was obtained. 
 458      data 
 459          Dive computer dump data. 
 460   
 461      :Parameters: 
 462       node 
 463          XML node. 
 464       fields 
 465          Names of fields to be created in a record. 
 466       queries 
 467          XPath expression objects for each field to retrieve its value. 
 468       parsers 
 469          Parsers of field values to be created in a record. 
 470   
 471      .. seealso:: :py:func:`find_data` 
 472      """ 
 473      if fields is None: 
 474          fields = ('dc_id', 'dc_model', 'datetime', 'data') 
 475          queries = XP_DEFAULT_DUMP_DATA 
 476          parsers = (str, str, dparse, _dump_decode) 
 477      return find_data('DiveComputerDump', node, fields, queries, parsers) 
 478   
 479   
 480 -def buddy_data(node, fields=None, queries=None, parsers=None): 
 481      """ 
 482      Get dive buddy data. 
 483   
 484      The following data is returned by default 
 485   
 486      id 
 487          Buddy id. 
 488      fname 
 489          Buddy first name. 
 490      mname 
 491          Buddy middle name. 
 492      lname 
 493          Buddy last name. 
 494      org 
 495          Organization, which a buddy is member of. 
 496      number 
 497          Member number id in the organisation. 
 498   
 499      :Parameters: 
 500       node 
 501          XML node. 
 502       fields 
 503          Names of fields to be created in a record. 
 504       queries 
 505          XPath expression objects for each field to retrieve its value. 
 506       parsers 
 507          Parsers of field values to be created in a record. 
 508   
 509      .. seealso:: :py:func:`find_data` 
 510      """ 
 511      if fields is None: 
 512          fields = ('id', 'fname', 'mname', 'lname', 'org', 'number') 
 513          queries = XP_DEFAULT_BUDDY_DATA 
 514          parsers = (str, ) * 7 
 515      return find_data('Buddy', node, fields, queries, parsers) 
 516   
 517   
 518 -def site_data(node, fields=None, queries=None, parsers=None): 
 519      """ 
 520      Get dive site data. 
 521   
 522      The following data is returned by default 
 523   
 524      id 
 525          Dive site id. 
 526      name 
 527          Dive site name. 
 528      location 
 529          Dive site location. 
 530      x 
 531          Dive site longitude. 
 532      y 
 533          Dive site latitude. 
 534   
 535      :Parameters: 
 536       node 
 537          XML node. 
 538       fields 
 539          Names of fields to be created in a record. 
 540       queries 
 541          XPath expression objects for each field to retrieve its value. 
 542       parsers 
 543          Parsers of field values to be created in a record. 
 544   
 545      .. seealso:: :py:func:`find_data` 
 546      """ 
 547      if fields is None: 
 548          fields = ('id', 'name', 'location', 'x', 'y') 
 549          queries = XP_DEFAULT_SITE_DATA 
 550          parsers = (str, str, str, float, float) 
 551      return find_data('DiveSite', node, fields, queries, parsers) 
 552   
 553   
 554 -def parse_range(s): 
 555      """ 
 556      Parse textual representation of number range into Python expression. 
 557   
 558      Examples of a ranges 
 559   
 560      >>> parse_range('1-3,5') 
 561      '1 <= n and n <= 3 or n == 5' 
 562   
 563      >>> parse_range('-3,10') 
 564      'n <= 3 or n == 10' 
 565   
 566      Example of infinite range 
 567   
 568      >>> parse_range('20-') 
 569      '20 <= n' 
 570   
 571      :Parameters: 
 572       s 
 573          Textual representation of number range. 
 574      """ 
 575      data = [] 
 576      try: 
 577          for r in s.split(','): 
 578              d = r.split('-') 
 579              if len(d) == 1: 
 580                  data.append('n == %d' % int(d[0])) 
 581              elif len(d) == 2: 
 582                  p1 = d[0].strip() 
 583                  p2 = d[1].strip() 
 584                  if p1 and p2: 
 585                      data.append('{} <= n and n <= {}'.format(int(p1), int(p2))) 
 586                  elif p1 and not p2: 
 587                      data.append('{} <= n'.format(int(p1))) 
 588                  elif not p1 and p2: 
 589                      data.append('n <= {}'.format(int(p2))) 
 590              else: 
 591                  raise RangeError('Invalid range %s' % s) 
 592      except ValueError as ex: 
 593          raise RangeError('Invalid range %s' % s) 
 594      return ' or '.join(data) 
 595   
 596   
 597 -def in_range(ctx, pos, nodes): 
 598      """ 
 599      XPath expression function to restrict position of a node to be within 
 600      numeric range. 
 601   
 602      :Parameters: 
 603       ctx 
 604          XPath context object. 
 605       pos 
 606          Node position. 
 607       nodes 
 608          Number range, i.e. "2-3". 
 609   
 610      .. seealso:: :py:func:`parse_range` 
 611      """ 
 612      if not nodes: 
 613          return True 
 614   
 615      if isinstance(pos, list): 
 616          if len(pos) == 0: 
 617              return False 
 618          if len(pos) != 1: 
 619              raise ValueError('Too many parameters') 
 620          pos = int(pos[0]) 
 621   
 622      kf = 'in-range({})'.format(nodes) 
 623      if kf not in ctx.eval_context: 
 624          nr = parse_range(nodes) 
 625          fstr = 'ctx.eval_context["{}"] = lambda n: {}'.format(kf, nr) 
 626          exec(fstr) 
 627      return ctx.eval_context[kf](pos) 
 628   
 629  # register in-range XPath function 
 630  ns = et.FunctionNamespace(None) 
 631  ns['in-range'] = in_range 
 632   
 633   
 634 -def _field(node, query, parser): 
 635      """ 
 636      Find text value of a node starting from specified XML node. 
 637   
 638      The text value is converted with function `t` and then returned. 
 639   
 640      If node is not found, then `None` is returned. 
 641   
 642      :Parameters: 
 643       node 
 644          XML node. 
 645       query 
 646          XPath expression object to find node with text value. 
 647       parser 
 648          Parser to convert text value to requested type. 
 649      """ 
 650      data = [node] if query is None else query(node) 
 651      if data: 
 652          return parser(data[0]) 
 653   
 654   
 655 -def _record(rt, node, queries, parsers): 
 656      """ 
 657      Create record with data. 
 658   
 659      The record data is found with queries (XPath expressions objects) 
 660      starting from XML node. 
 661       
 662      The data is converted to their appropriate type using parsers. 
 663   
 664      If query is `None`, then record data is node itself. 
 665   
 666      :Parameters: 
 667       rt 
 668          Record type (named tuple) of record data. 
 669       node 
 670          XML node. 
 671       queries 
 672          XPath expression objects for each field to retrieve its value. 
 673       parsers 
 674          Parsers of field values to be created in a record. 
 675      """ 
 676      return rt(_field(node, f, p) for f, p in zip(queries, parsers)) 
 677   
 678   
 679 -def _dump_decode(data): 
 680      """ 
 681      Decode dive computer data, which is stored in UDDF dive computer dump 
 682      file. 
 683      """ 
 684      s = base64.b64decode(data.encode()) 
 685      return bz2.decompress(s) 
 686   
 687   
 688  # 
 689  # Creating UDDF data. 
 690  # 
 691   
 692  DEFAULT_FMT_DIVE_PROFILE = { 
 693      'depth': lambda d: str.format('{0:.1f}', max(d, 0)), 
 694      'temp': partial(str.format, '{0:.1f}'), 
 695  } 
 696   
 697  # basic data for an UDDF file 
 698  # fixme: obsolete 
 699  UDDF_BASIC = """\ 
 700  <uddf xmlns="http://www.streit.cc/uddf/3.2/" version="3.2.0"> 
 701  <generator> 
 702      <name>kenozooid</name> 
 703      <manufacturer id='kenozooid'> 
 704        <name>Kenozooid Team</name> 
 705        <contact> 
 706          <homepage>http://wrobell.dcmod.org/kenozooid/</homepage> 
 707        </contact> 
 708      </manufacturer> 
 709      <version>{kzver}</version> 
 710      <datetime></datetime> 
 711  </generator> 
 712  <diver> 
 713      <owner id='owner'> 
 714          <personal> 
 715              <firstname>Anonymous</firstname> 
 716              <lastname>Guest</lastname> 
 717          </personal> 
 718      </owner> 
 719  </diver> 
 720  </uddf> 
 721  """.format(kzver=kenozooid.__version__) 
 722   
 723   
 724 -def create(datetime=datetime.now()): 
 725      """ 
 726      fixme: obsolete 
 727   
 728      Create basic UDDF structure. 
 729   
 730      :Parameters: 
 731       datetime 
 732          Timestamp of file creation, current time by default. 
 733      """ 
 734      root = et.XML(UDDF_BASIC) 
 735   
 736      now = datetime.now() 
 737      n = root.xpath('//uddf:generator/uddf:datetime', namespaces=_NSMAP)[0] 
 738      n.text = FMT_DT(datetime) 
 739      return root 
 740   
 741   
 742 -def set_data(node, queries, formatters=None, **data): 
 743      """ 
 744      Set data of nodes or attributes using XPath queries relative to 
 745      specified XML node. 
 746   
 747      The data values are converted to string with formatters functions. 
 748   
 749      :Parameters: 
 750       node 
 751          XML node. 
 752       queries 
 753          Path-like expressions of XML structure to be created. 
 754       formatters 
 755          Data formatters. 
 756       data 
 757          Data values to be set within XML document. 
 758      """ 
 759      if formatters is None: 
 760          formatters = {} 
 761   
 762      nodes = {} # created nodes 
 763      attrs = set() # created attributes 
 764   
 765      for key, path in queries.items(): 
 766          value = data.get(key) 
 767          if value is None: 
 768              continue 
 769   
 770          if isinstance(path, str): 
 771              path = [path] 
 772              value = [value] 
 773   
 774          for p, v in zip(path, value): 
 775              f = formatters.get(key, str) 
 776              v = f(v) 
 777   
 778              attr = None 
 779              tags = p.rsplit('/', 1) 
 780              if tags[-1].startswith('@'): 
 781                  attr = tags[-1][1:]  # skip '@' 
 782                  p = tags[0] if len(tags) > 1 else None 
 783   
 784              n = node 
 785              if p: 
 786                  n = nodes.get(p) 
 787                  # reuse node created in this call to make t/@a t/@b work, 
 788                  # but create new node to not overwrite attribute value 
 789                  if n is None or (p, attr) in attrs: 
 790                      *_, n = create_node(p, parent=node) 
 791                      nodes[p] = n 
 792                      attrs.add((p, attr)) 
 793   
 794              assert n is not None 
 795   
 796              if attr: 
 797                  n.set(attr, v) 
 798              else: 
 799                  n.text = v 
 800   
 801   
 802 -def create_node(path, parent=None, append=True): 
 803      """ 
 804      TODO: get rid of parent, does not make sense 
 805   
 806      Create a hierarchy of nodes using XML nodes path specification. 
 807   
 808      Path is a string of node names separated by slash character, i.e. a/b/c 
 809      creates:: 
 810   
 811          <a><b><c/></b><a> 
 812   
 813      If parent node is specified and some part of node hierarchy already 
 814      exists then only non-existant nodes are created, i.e. if parent is 
 815      'x' node in 
 816   
 817          <x><y/></x> 
 818   
 819      then path 'x/z' modifies XML document as follows 
 820   
 821          <x><y/><z/></x> 
 822   
 823      :Parameters: 
 824       path 
 825          Hierarchy of nodes. 
 826       parent 
 827           Optional parent node. 
 828      """ 
 829      # preserve namespace prefix option... please?!? :/ 
 830      T = lambda tag: tag.replace('uddf:', '{' + _NSMAP['uddf'] + '}') 
 831      tags = path.split('/') 
 832      n = parent 
 833      for t in tags: 
 834          is_last = tags[-1] == t 
 835          k_exists = False 
 836   
 837          k = None 
 838          if n is not None: 
 839              k = xp_first(n, t) 
 840          if is_last or k is None: 
 841              k = et.Element(T(t)) 
 842          elif k is not None: 
 843              k_exists = True 
 844          if n is not None and not k_exists: 
 845              if append: 
 846                  n.append(k) 
 847              else: 
 848                  n.insert(0, k) 
 849          n = k 
 850          yield n 
 851   
 852   
 853 -def create_dive_data(node=None, queries=None, formatters=None, **data): 
 854      """ 
 855      Create dive data. 
 856   
 857      :Parameters: 
 858       node 
 859          Base node (UDDF root node). 
 860       queries 
 861          Path-like expressions of XML structure to be created. 
 862       formatters 
 863          Dive data formatters. 
 864       data 
 865          Dive data. 
 866      """ 
 867      if queries == None: 
 868          bd = data.get('buddies') 
 869          bno = len(bd) if bd else 0 
 870          f = ('site', 'buddies', 'datetime', 'depth', 'duration', 'temp') 
 871          q = ('uddf:informationbeforedive/uddf:link/@ref', 
 872              ['uddf:informationbeforedive/uddf:link/@ref'] * bno, 
 873              'uddf:informationbeforedive/uddf:datetime', 
 874              'uddf:informationafterdive/uddf:greatestdepth', 
 875              'uddf:informationafterdive/uddf:diveduration', 
 876              'uddf:informationafterdive/uddf:lowesttemperature') 
 877          queries = OrderedDict(zip(f, q)) 
 878      if formatters == None: 
 879          formatters = { 
 880              'datetime': FMT_DT, 
 881              'depth': partial(str.format, '{0:.1f}'), 
 882              'duration': partial(str.format, '{0:.0f}'), 
 883              'temp': partial(str.format, '{0:.1f}'), 
 884          } 
 885      _, rg, dn = create_node('uddf:profiledata/uddf:repetitiongroup/uddf:dive', 
 886              parent=node) 
 887      _set_id(rg) 
 888      _set_id(dn) 
 889      set_data(dn, queries, formatters, **data) 
 890      return dn 
 891   
 892   
 893 -def create_buddy_data(node, queries=None, formatters=None, **data): 
 894      """ 
 895      Create buddy data. 
 896   
 897      :Parameters: 
 898       node 
 899          Base node (UDDF root node). 
 900       queries 
 901          Path-like expressions of XML structure to be created. 
 902       formatters 
 903          Buddy data formatters. 
 904       data 
 905          Buddy data. 
 906        
 907      """ 
 908      if queries == None: 
 909          f = ('id', 'fname', 'mname', 'lname', 'org', 'number') 
 910          q = ('@id', 
 911              'uddf:personal/uddf:firstname', 
 912              'uddf:personal/uddf:middlename', 
 913              'uddf:personal/uddf:lastname', 
 914              'uddf:personal/uddf:membership/@organisation', 
 915              'uddf:personal/uddf:membership/@memberid') 
 916          queries = OrderedDict(zip(f, q)) 
 917   
 918      if formatters == None: 
 919          formatters = {} 
 920   
 921      if 'id' not in data or data['id'] is None: 
 922          data['id'] = uuid().hex 
 923           
 924      _, buddy = create_node('uddf:diver/uddf:buddy', parent=node) 
 925      set_data(buddy, queries, formatters, **data) 
 926      return buddy 
 927   
 928   
 929 -def create_site_data(node, queries=None, formatters=None, **data): 
 930      """ 
 931      Create dive site data. 
 932   
 933      :Parameters: 
 934       node 
 935          Base node (UDDF root node). 
 936       queries 
 937          Path-like expressions of XML structure to be created. 
 938       formatters 
 939          Dive site data formatters. 
 940       data 
 941          Dive site data. 
 942        
 943      """ 
 944      if queries == None: 
 945          f = ('id', 'name', 'location', 'x', 'y') 
 946          q = ('@id', 
 947              'uddf:name', 
 948              'uddf:geography/uddf:location', 
 949              'uddf:geography/uddf:longitude', 
 950              'uddf:geography/uddf:latitude') 
 951          queries = OrderedDict(zip(f, q)) 
 952   
 953      if formatters == None: 
 954          formatters = {} 
 955   
 956      if 'id' not in data or data['id'] is None: 
 957          data['id'] = uuid().hex 
 958           
 959      _, site = create_node('uddf:divesite/uddf:site', parent=node) 
 960      set_data(site, queries, formatters, **data) 
 961      return site 
 962           
 963   
 964 -def _dump_encode(data): 
 965      """ 
 966      Encode dive computer data, so it can be stored in UDDF file. 
 967   
 968      The encoded string is returned. 
 969      """ 
 970      s = bz2.compress(data) 
 971      return base64.b64encode(s) 
 972   
 973   
 974 -def create_uddf(datetime=datetime.now(), equipment=None, gases=None, dives=None, 
 975          dump=None): 
 976      """ 
 977      Create UDDF XML data. 
 978   
 979      :Parameters: 
 980       datetime 
 981          Timestamp of UDDF creation. 
 982       equipment 
 983          Diver's (owner) equipment XML data (see create_dc_data). 
 984       gases 
 985          List of gases used by the dives. 
 986       dives 
 987          Dives XML data (see create_dives). 
 988       dump 
 989          Dive computer dump XML data (see create_dump_data). 
 990      """ 
 991      doc = xml.uddf( 
 992          xml.generator( 
 993              xml.name('kenozooid'), 
 994              xml.manufacturer( 
 995                  xml.name('Kenozooid Team'), 
 996                  xml.contact(xml.homepage('https://wrobell.dcmod.org/kenozooid/')), 
 997                  id='kenozooid'), 
 998              xml.version(kenozooid.__version__), 
 999              xml.datetime(FMT_DT(datetime)), 
1000          ), 
1001   
1002          xml.diver( 
1003              xml.owner( 
1004                  xml.personal(xml.firstname('Anonymous'), xml.lastname('Guest')), 
1005                  xml.equipment(equipment) if equipment else None, 
1006                  id='owner')), 
1007   
1008          xml.divecomputercontrol(dump) if dump else None, 
1009          xml.gasdefinitions(gases) if gases else None, 
1010          xml.profiledata(xml.repetitiongroup(dives, id=gen_id())) 
1011              if dives else None, 
1012   
1013          xmlns=_NSMAP['uddf'], 
1014          version='3.2.0', 
1015      ) 
1016      return doc 
1017   
1018   
1019 -def create_dives(dives, equipment=None): 
1020      """ 
1021      Create dives UDDF XML data. 
1022   
1023      :Parameters: 
1024       dives 
1025          Iterable of dive tuples. 
1026       equipment 
1027          List of used equipment references. 
1028      """ 
1029      f = partial(create_dive, equipment=equipment) 
1030      yield from (f(dive) for dive in dives) 
1031   
1032 -def create_dive(dive, equipment=None): 
1033      """ 
1034      Create dive UDDF XML data. 
1035   
1036      :Parameters: 
1037       dive 
1038          Dive to render as XML. 
1039       equipment 
1040          List of used equipment references. 
1041      """ 
1042      eq = itertools.chain(kt.nit(dive.equipment), kt.nit(equipment)) 
1043      log.debug('convert dive {0.datetime}/{0.depth:.1f}/{0.duration} into XML' 
1044              .format(dive)) 
1045      return xml.dive( 
1046          xml.informationbeforedive(xml.datetime(FMT_DT(dive.datetime))), 
1047          None if not dive.profile else xml.samples(create_dive_samples(dive.profile, dive.mode)), 
1048          xml.informationafterdive( 
1049              xml.greatestdepth(FMT_F(dive.depth)), 
1050              xml.diveduration(FMT_I(dive.duration)), 
1051              None if dive.temp is None else xml.lowesttemperature(FMT_F(dive.temp)), 
1052              xml.equipmentused((xml.link(ref=v) for v in eq)), 
1053              None if dive.avg_depth is None else xml.averagedepth(FMT_F(dive.avg_depth)), 
1054          ), 
1055          id=gen_id(), 
1056      ) 
1057   
1058   
1059 -def create_dive_samples(samples, mode=None): 
1060      """ 
1061      Create dive samples UDDF XML data. 
1062   
1063      :Parameters: 
1064       samples 
1065          Iterable of tuples of dive samples. 
1066       mode 
1067          Dive mode, i.e. opencircuit, closedcircuit. 
1068      """ 
1069      for i, s in enumerate(samples): 
1070          yield xml.waypoint( 
1071              None if s.alarm is None else (xml.alarm(a) for a in s.alarm), 
1072              None if s.deco_time is None else 
1073                  xml.decostop( 
1074                      duration=FMT_I(s.deco_time), 
1075                      decodepth=FMT_I(s.deco_depth), 
1076                      kind='mandatory' 
1077                  ), 
1078              xml.depth(FMT_F(s.depth)), 
1079              xml.divetime(FMT_I(s.time)), 
1080              None if s.setpoint is None else xml.setpo2(FMT_F2(s.setpoint), 
1081                  setby=s.setpointby), 
1082              None if s.gas is None else xml.switchmix(ref=str(s.gas.id)), 
1083              None if s.temp is None else xml.temperature(FMT_F(s.temp)), 
1084              None if mode is None or i > 0 else xml.divemode(type=mode), 
1085          ) 
1086   
1087   
1088 -def create_gas(gas): 
1089      """ 
1090      Create gas UDDF XML data. 
1091   
1092      :Parameters: 
1093       gas 
1094          Gas information to render as XML. 
1095      """ 
1096      return xml.mix( 
1097          xml.name(gas.name), 
1098          xml.o2(str(gas.o2)), 
1099          xml.he(str(gas.he)), 
1100          id=gas.id, 
1101      ) 
1102   
1103   
1104 -def create_dc_data(dc_id, model): 
1105      """ 
1106      Create dive computer UDDF XML data. 
1107   
1108      :Parameters: 
1109       dc_id 
1110          Dive computer id. 
1111       model 
1112          Dive computer model. 
1113      """ 
1114      yield xml.divecomputer(xml.name(model), xml.model(model), id=dc_id) 
1115   
1116   
1117 -def create_dump_data(dc_id, datetime, data): 
1118      """ 
1119      Create dive computer dump UDDF XML data. 
1120   
1121      :Parameters: 
1122       dc_id 
1123          Dive computer id. 
1124       datetime 
1125          Date and time when the dump was created. 
1126       data 
1127          Dive computer binary data. 
1128      """ 
1129      yield xml.divecomputerdump( 
1130          xml.link(ref=dc_id), 
1131          xml.datetime(FMT_DT(datetime)), 
1132          xml.dcdump(_dump_encode(data).decode()), 
1133      ) 
1134   
1135   
1136 -def save(doc, fout, validate=True): 
1137      """ 
1138      Save UDDF XML data into a file. 
1139   
1140      If output file is file name ending with '.bz2', then it is compressed 
1141      with bzip2. 
1142   
1143      The UDDF XML data can be ElementTree XML object or iterable of strings. 
1144   
1145      If output file exists then backup file with ``.bak`` extension is 
1146      created. 
1147   
1148      :Parameters: 
1149       doc 
1150          UDDF XML data. 
1151       fout 
1152          Output file. 
1153       validate 
1154          Validate UDDF file after saving if True. 
1155      """ 
1156      log.debug('saving uddf file') 
1157      is_fn = isinstance(fout, str) 
1158      openf = open 
1159      if is_fn and fout.endswith('.bz2'): 
1160          openf = bz2.BZ2File 
1161          log.debug('uddf file will be compressed') 
1162   
1163      fbk = '{}.bak'.format(fout) 
1164      if is_fn and os.path.exists(fout): 
1165          os.rename(fout, fbk) 
1166          log.debug('backup file created') 
1167      try: 
1168          f = openf(fout, 'wb') if is_fn else fout 
1169   
1170          if et.iselement(doc): 
1171              et.ElementTree(doc).write(f, 
1172                      encoding='utf-8', 
1173                      xml_declaration=True, 
1174                      pretty_print=True) 
1175          else: 
1176              f.writelines(l.encode('utf-8') for l in doc) 
1177   
1178          if validate: 
1179              log.debug('validating uddf file') 
1180              fs = pkg_resources.resource_stream('kenozooid', 'uddf/uddf_3.2.0.xsd') 
1181              if hasattr(fs, 'name'): 
1182                  log.debug('uddf xsd found: {}'.format(fs.name)) 
1183              schema = et.XMLSchema(et.parse(fs)) 
1184              if is_fn: 
1185                  f = openf(fout) 
1186              else: 
1187                  f.seek(0) 
1188              schema.assertValid(et.parse(f)) 
1189              log.debug('uddf file is valid') 
1190      except Exception as ex: 
1191          if os.path.exists(fbk): 
1192              os.rename(fbk, fout) 
1193              log.debug('backup file restored') 
1194          raise ex 
1195   
1196   
1197  # 
1198  # Removing UDDF data. 
1199  # 
1200   
1201 -def remove_nodes(node, query, **params): 
1202      """ 
1203      Remove nodes from XML document using XPath query. 
1204   
1205      :Parameters: 
1206       node 
1207          Starting XML node for XPath query. 
1208       query 
1209          XPath query to find nodes to remove. 
1210       params 
1211          XPath query parameters. 
1212      """ 
1213      log.debug('node removal with query: {}, params: {}'.format(query, params)) 
1214      for n in query(node, **params): 
1215          p = n.getparent() 
1216          p.remove(n) 
1217   
1218  # 
1219  # Processing UDDF data. 
1220  # 
1221   
1222   
1223 -def reorder(doc): 
1224      """ 
1225      fixme: obsolete 
1226   
1227      Reorder and cleanup dives in UDDF document. 
1228   
1229      Following operations are being performed 
1230   
1231      - dives are sorted by dive start time  
1232      - duplicate dives and repetition groups are removed 
1233   
1234      :Parameters: 
1235       doc 
1236          UDDF document. 
1237      """ 
1238      find = partial(doc.xpath, namespaces=_NSMAP) 
1239   
1240      profiles = find('//uddf:profiledata') 
1241      rgroups = find('//uddf:profiledata/uddf:repetitiongroup') 
1242      if not profiles or not rgroups: 
1243          raise ValueError('No profile data to reorder') 
1244      pd = profiles[0] 
1245   
1246      q = '/uddf:uddf/uddf:profiledata/uddf:repetitiongroup/uddf:dive' 
1247      qt = q + '/uddf:informationbeforedive/uddf:datetime/text()' 
1248      nodes = find(q) 
1249      times = find(qt) 
1250   
1251      dives = {} 
1252      for n, t in zip(nodes, times): 
1253          dt = dparse(t) # don't rely on string representation for sorting 
1254          if dt not in dives: 
1255              dives[dt] = n 
1256   
1257      log.debug('removing old repetition groups') 
1258      for rg in rgroups: # cleanup old repetition groups 
1259          pd.remove(rg) 
1260      rg, = create_node('uddf:repetitiongroup', parent=pd) 
1261      _set_id(rg) 
1262   
1263      # sort dive nodes by dive time 
1264      log.debug('sorting dives') 
1265      for dt, n in sorted(dives.items(), key=itemgetter(0)): 
1266          rg.append(n) 
1267   
1268   
1269 -class NodeCopier(object): 
1270      """ 
1271      UDDF dcument node copier. 
1272   
1273      See :py:func:`NodeCopier.copy` for details. 
1274   
1275      :Attributes: 
1276       doc 
1277          The target document. 
1278       doc_ids 
1279          The cache of target document ids. 
1280      """ 
1281 -    def __init__(self, doc): 
1282          """ 
1283          Initialize node copier. 
1284          """ 
1285          self.doc = doc 
1286          self.doc_ids = set(xp(doc, '//uddf:*/@id')) 
1287   
1288   
1289 -    def __enter__(self): 
1290          """ 
1291          Create UDDF node copier context manager. 
1292          """ 
1293          return self 
1294   
1295   
1296 -    def __exit__(self, *args): 
1297          """ 
1298          Close UDDF node copier context manager. 
1299          """ 
1300          pass 
1301   
1302   
1303 -    def copy(self, node, target): 
1304          """ 
1305          Copy node from UDDF document to target node in destination UDDF 
1306          document. Target node becomes parent of node to be copied. 
1307   
1308          The copying works under following assumptions 
1309   
1310          - whole node is being copied including its descendants 
1311          - node is not copied if it has id and id already exists in the target 
1312          - if copied nodes reference non-descendant nodes and they do _not_ 
1313            exist in destination document, then referencing nodes are _removed_ 
1314          - if, due to node removal, its parent node becomes empty, then parent 
1315            is removed, too 
1316   
1317          Copy of the node is returned. 
1318   
1319          :Parameters: 
1320           node 
1321              Node to copy. 
1322           target 
1323              The future parent of the copied node. 
1324          """ 
1325          cn = deepcopy(node) 
1326   
1327          cn_id = cn.get('id') 
1328          if cn_id in self.doc_ids: 
1329              log.debug('id {} already exists, not copying'.format(cn_id)) 
1330              return None 
1331   
1332          s_ids = set(xp(cn, 'descendant-or-self::uddf:*/@id')) 
1333          self.doc_ids.update(s_ids) 
1334   
1335          # get referencing nodes 
1336          nodes = list(xp(cn, 'descendant-or-self::uddf:*[@ref]')) 
1337          refs = set(k.get('ref') for k in nodes) 
1338   
1339          left = refs - self.doc_ids 
1340          if __debug__: 
1341              log.debug('references to remove: {} = {} - {}'.format(left, 
1342                  refs, self.doc_ids)) 
1343   
1344          if cn.get('ref') in left: 
1345              raise ValueError('Node to copy references non-existing node') 
1346   
1347          # remove nodes referencing missing data 
1348          to_remove = (n for n in nodes if n.get('ref') in left) 
1349          assert cn.getparent() is None 
1350          for n in to_remove: 
1351              p = n.getparent() 
1352              while p is not None and len(p) == 1: 
1353                  n = p 
1354                  p = n.getparent() 
1355              if p is not None: 
1356                  p.remove(n) 
1357   
1358          target.append(cn) 
1359          return cn 
1360   
1361   
1362 -def _set_id(node): 
1363      """ 
1364      Generate id for a node if there is no id yet. 
1365   
1366      :Parameters: 
1367       node 
1368          Node for which id should be generated. 
1369      """ 
1370      if node.get('id') is None: 
1371          node.set('id', FORMAT_ID.format(uuid().hex)) 
1372   
1373   
1374 -def gen_id(value=None): 
1375      """ 
1376      Generate id for a value. 
1377       
1378      If value is specified then id is MD5 hash of value. If not specified, 
1379      then id is generated with UUID 4. 
1380   
1381      The returned id is a string prefixed with ``id-`` to make it XML 
1382      compliant. 
1383      """ 
1384      if value is None: 
1385          vid = uuid().hex 
1386      else: 
1387          vid = hashlib.md5(str(value).encode()).hexdigest() 
1388      return 'id-{}'.format(vid) 
1389   
1390   
1391 -def xml_file_copy(f): 
1392      """ 
1393      Iterator of raw XML data from a file to data into ``dirty.xml`` nodes. 
1394   
1395      :Parameters: 
1396       f 
1397          File containing XML data. 
1398      """ 
1399      while True: 
1400          l = f.read(4096) 
1401          if not l: 
1402              break 
1403          yield RawString(l) 
1404   
1405   
1406 -def get_version(f): 
1407      """ 
1408      Get major version of UDDF file. 
1409   
1410      Tuple (major, minor) is returned, i.e. (3, 0), (3, 1), etc. 
1411   
1412      :Parameters: 
1413       f 
1414          File to check. 
1415      """ 
1416      n = parse(f, ver_check=False).getroot() 
1417      v1, v2, *_ = n.get('version').split('.') 
1418      if isinstance(f, FileIO): 
1419          f.seek(0, 0) 
1420      log.debug('detected version {}.{}'.format(v1, v2)) 
1421      return int(v1), int(v2) 
1422   
1423   
1424  # vim: sw=4:et:ai 
1425