Package kenozooid :: Module uddf

Source Code for Module kenozooid.uddf

   1  # 
   2  # Kenozooid - dive planning and analysis toolbox. 
   3  # 
   4  # Copyright (C) 2009-2019 by Artur Wroblewski <wrobell@riseup.net> 
   5  # 
   6  # This program is free software: you can redistribute it and/or modify 
   7  # it under the terms of the GNU General Public License as published by 
   8  # the Free Software Foundation, either version 3 of the License, or 
   9  # (at your option) any later version. 
  10  # 
  11  # This program is distributed in the hope that it will be useful, 
  12  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
  13  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  14  # GNU General Public License for more details. 
  15  # 
  16  # You should have received a copy of the GNU General Public License 
  17  # along with this program.  If not, see <http://www.gnu.org/licenses/>. 
  18  # 
  19   
  20  """ 
  21  The `kenozooid.uddf` module provides support for parsing, searching and 
  22  manipulation of data stored in UDDF files. 
  23   
  24  The functions implemented in this module can be divided into the following 
  25  categories 
  26   
  27  - XML nodes functions 
  28  - generic XML data searching and manipulation functions 
  29  - functions for searching and manipulation of diving specific data 
  30   
  31  Searching functions use XPath expressions (queries) to find data. Each tag 
  32  name in an query should be prefixed with 'uddf:' string to indicate UDDF 
  33  namespace, i.e. 'uddf:diver', 'uddf:waypoint' - appropriate namespace 
  34  mapping for this prefix is defined for each XPath call. 
  35   
  36  The result of parsing or search of data is usually iterator of XML nodes or 
  37  data records (named tuples in Python terms). 
  38   
  39  Module `lxml` is used for XML parsing and querying with XPath. Full 
  40  capabilities of underlying `libxml2` library is used by design. The 
  41  ElementTree XML data model is used for XML nodes. 
  42  """ 
  43   
  44  from collections import namedtuple, OrderedDict, Counter 
  45  from lxml import etree as et 
  46  from functools import partial 
  47  from datetime import datetime 
  48  from dateutil.parser import parse as dparse 
  49  from io import FileIO 
  50  from operator import itemgetter 
  51  from uuid import uuid4 as uuid 
  52  from copy import deepcopy 
  53  from dirty.xml import xml 
  54  from dirty import RawString 
  55  import base64 
  56  import bz2 
  57  import itertools 
  58  import hashlib 
  59  import logging 
  60  import os 
  61  import os.path 
  62  import pkg_resources 
  63   
  64  import kenozooid 
  65  import kenozooid.util as kt 
  66   
  67  log = logging.getLogger('kenozooid.uddf') 
  68   
  69  # 
  70  # Default UDDF namespace mapping. 
  71  # 
  72  _NSMAP = {'uddf': 'http://www.streit.cc/uddf/3.2/'} 
  73   
  74  # Node id formatter 
  75  FORMAT_ID = 'id-{}' 
  76   
  77  FMT_F = partial(str.format, '{0:.1f}') 
  78  FMT_F2 = partial(str.format, '{0:.2f}') 
  79  FMT_I = lambda v: '{}'.format(int(round(v))) 
  80  FMT_DT = lambda dt: format(dt, '%Y-%m-%dT%H:%M:%S%z') 
  81   
  82  # 
  83  # Parsing and searching. 
  84  # 
  85   
  86  XPath = partial(et.XPath, namespaces=_NSMAP) 
  87  XPath.__doc__ = """ 
  88      XPath query constructor for UDDF data. 
  89   
  90      Use `uddf` prefix to create a query, i.e.:: 
  91   
  92          XPath('uddf:informationbeforedive/uddf:datetime/text()') 
  93   
  94      .. seealso: lxml.etree.XPath 
  95  """ 
  96   
  97  # XPath queries for default dive data 
  98  XP_DEFAULT_DIVE_DATA = ( 
  99      XPath('uddf:informationbeforedive/uddf:divenumber/text()'), 
 100      XPath('uddf:informationbeforedive/uddf:datetime/text()'), 
 101      XPath('uddf:informationafterdive/uddf:greatestdepth/text()'), 
 102      XPath('uddf:informationafterdive/uddf:diveduration/text()'), 
 103      XPath('uddf:informationafterdive/uddf:lowesttemperature/text()'), 
 104      XPath('uddf:informationafterdive/uddf:averagedepth/text()'), 
 105      XPath('uddf:samples/uddf:waypoint/uddf:divemode[1]/@type'), 
 106      None, 
 107  ) 
 108   
 109  # XPath queries for default dive profile sample data 
 110  XP_DEFAULT_PROFILE_DATA =  ( 
 111      XPath('uddf:depth/text()'), 
 112      XPath('uddf:divetime/text()'), 
 113      XPath('uddf:temperature/text()'), 
 114      XPath('uddf:setpo2/text()'), 
 115      XPath('uddf:setpo2/@setby'), 
 116      XPath('uddf:decostop/@duration'), 
 117      XPath('uddf:decostop/@decodepth'), 
 118      XPath('uddf:alarm/text()'), 
 119      XPath('uddf:switchmix/@ref'), 
 120  ) 
 121   
 122  XP_DEFAULT_GAS_DATA =  ( 
 123      XPath('@id'), 
 124      XPath('uddf:name/text()'), 
 125      XPath('uddf:o2/text()'), 
 126      XPath('uddf:he/text()'), 
 127  ) 
 128   
 129  # XPath query to locate dive profile sample 
 130  XP_WAYPOINT = XPath('./uddf:samples/uddf:waypoint') 
 131  # XPath query to locate gas mix 
 132  XP_MIX = XPath('/uddf:uddf/uddf:gasdefinitions/uddf:mix') 
 133   
 134  # XPath queries for default dive computer dump data 
 135  XP_DEFAULT_DUMP_DATA = ( 
 136      XPath('uddf:link/@ref'), 
 137      # //uddf:divecomputerdump[position()] gives current() 
 138      XPath('../../uddf:diver/uddf:owner//uddf:divecomputer[' \ 
 139              '@id = //uddf:divecomputerdump[position()]/uddf:link/@ref' \ 
 140          ']/uddf:model/text()'), 
 141      XPath('uddf:datetime/text()'), 
 142      XPath('uddf:dcdump/text()'), 
 143  ) 
 144   
 145  # XPath queries for default buddy data 
 146  XP_DEFAULT_BUDDY_DATA = ( 
 147      XPath('@id'), 
 148      XPath('uddf:personal/uddf:firstname/text()'), 
 149      XPath('uddf:personal/uddf:middlename/text()'), 
 150      XPath('uddf:personal/uddf:lastname/text()'), 
 151      XPath('uddf:personal/uddf:membership/@organisation'), 
 152      XPath('uddf:personal/uddf:membership/@memberid'), 
 153  ) 
 154   
 155  # XPath queries for default dive site data 
 156  XP_DEFAULT_SITE_DATA = ( 
 157      XPath('@id'), 
 158      XPath('uddf:name/text()'), 
 159      XPath('uddf:geography/uddf:location/text()'), 
 160      XPath('uddf:geography/uddf:longitude/text()'), 
 161      XPath('uddf:geography/uddf:latitude/text()'), 
 162  ) 
 163   
 164  # XPath query to find a buddy 
 165  XP_FIND_BUDDY = XPath('/uddf:uddf/uddf:diver/uddf:buddy[' \ 
 166      '@id = $buddy' \ 
 167      ' or uddf:personal/uddf:membership/@memberid = $buddy' \ 
 168      ' or uddf:personal/uddf:membership/@organisation = $buddy' \ 
 169      ' or contains(uddf:personal/uddf:firstname/text(), $buddy)' \ 
 170      ' or contains(uddf:personal/uddf:lastname/text(), $buddy)' \ 
 171      ']') 
 172   
 173  # XPath query to find a dive site 
 174  XP_FIND_SITE = XPath('/uddf:uddf/uddf:divesite/uddf:site[' \ 
 175      '@id = $site' \ 
 176      ' or contains(uddf:name/text(), $site)' \ 
 177      ' or contains(uddf:geography/uddf:location/text(), $site)' \ 
 178      ']') 
 179   
 180  # XPath query to find dives 
 181  XP_FIND_DIVES = XPath('/uddf:uddf/uddf:profiledata' \ 
 182      '/uddf:repetitiongroup/uddf:dive[in-range(position(), $nodes)' \ 
 183      ' and in-range(uddf:informationbeforedive/uddf:divenumber/text(), $dives)]') 
 184   
 185  # XPath query to find dive gases 
 186  XP_FIND_DIVE_GASES = XPath('/uddf:uddf/uddf:gasdefinitions' \ 
 187      '/uddf:mix[@id=/uddf:uddf/uddf:profiledata/uddf:repetitiongroup' \ 
 188      '/uddf:dive[in-range(position(), $nodes)]' \ 
 189      '/uddf:samples/uddf:waypoint/uddf:switchmix/@ref]') 
 190   
 191   
192 -class RangeError(ValueError):
193 """ 194 Error raised when a range cannot be parsed. 195 196 .. seealso:: 197 parse_range 198 """ 199 pass
200 201
202 -def parse(f, ver_check=True):
203 """ 204 Parse XML file and return document object. 205 206 File to parse can be anything supported by ``lxml`` library. 207 208 If file to parse is file name and ends with '.bz2', then it is treated 209 as file compressed with bzip2. 210 211 :Parameters: 212 f 213 File to parse. 214 ver_check 215 Check version of UDDF file. 216 """ 217 if isinstance(f, str) and (f.endswith('.bz2') or f.endswith('.bz2.bak')): 218 log.debug('detected compressed file') 219 f = bz2.BZ2File(f) 220 doc = et.parse(f) 221 if ver_check: 222 v1, v2, *_ = doc.getroot().get('version').split('.') 223 if (v1, v2) != ('3', '2'): 224 raise ValueError('UDDF file version {}.{} is not supported.' \ 225 ' Please upgrade file with "kz upgrade" command.' \ 226 .format(v1, v2)) 227 return doc
228 229
230 -def find(f, query, **params):
231 """ 232 Find XML nodes in UDDF file using XPath query. 233 234 UDDF file can be a file name, file object, URL and basically everything 235 what is supported by `lxml` library. 236 237 File to parse can be a file name ending with '.bz2'. It is treated as 238 file compressed with bzip2. 239 240 :Parameters: 241 f 242 UDDF file to parse. 243 query 244 XPath expression or XPath object. 245 params 246 XPath query parameters. 247 248 .. seealso:: :py:func:`XPath`, :py:func:`parse` 249 """ 250 log.debug('parsing and searching with query: {}; parameters {}' \ 251 .format(query, params)) 252 doc = parse(f) 253 if isinstance(query, str): 254 return xp(doc, query) 255 else: 256 return (n for n in query(doc, **params))
257 258
259 -def xp(node, query):
260 """ 261 Find items with XPath query. 262 263 The query is performed using UDDF namespace. 264 265 Iterator of items (strings, nodes) found by query is returned. 266 267 :Parameters: 268 node 269 Document node or query starting node. 270 query 271 XPath query. 272 273 .. seealso:: 274 lxml.etree.Element.xpath 275 """ 276 for n in node.xpath(query, namespaces=_NSMAP): 277 yield n
278 279
280 -def xp_first(node, query):
281 """ 282 Get first element found with XPath query. 283 284 The query is performed using UDDF namespace. 285 286 First element is returned or None if it is not found. 287 288 :Parameters: 289 node 290 Document node or query starting node. 291 query 292 XPath query. 293 294 .. seealso:: 295 lxml.etree.Element.xpath 296 """ 297 data = xp(node, query) 298 return next(data, None)
299 300
301 -def xp_last(node, query):
302 """ 303 Get last element found with XPath query. 304 305 The query is performed using UDDF namespace. 306 307 Last element is returned or None if it is not found. 308 309 :Parameters: 310 node 311 Document node or query starting node. 312 query 313 XPath query. 314 315 .. seealso:: 316 lxml.etree.Element.xpath 317 """ 318 nodes = node.xpath(query, namespaces=_NSMAP) 319 return nodes[-1] if nodes else None
320 321
322 -def find_data(name, node, fields, queries, parsers, nquery=None):
323 """ 324 Find data records starting from specified XML node. 325 326 A record type (namedtuple) is created with specified fields. The data 327 of a record is retrieved with XPath expression objects, which is 328 converted from string to appropriate type using parsers. 329 330 A parser can be any type or function, i.e. `float`, `int` or 331 `dateutil.parser.parse`. 332 333 If XML node is too high to execture XPath expression objects, then the 334 basis for field queries can be relocated with `nquery` parameter. If 335 `nquery` parameter is not specified, then only one record is returned. 336 Otherwise it is generator of records. 337 338 The length of fields, field queries and field parsers should be the same. 339 340 :Parameters: 341 name 342 Name of the record to be created. 343 node 344 XML node. 345 fields 346 Names of fields to be created in a record. 347 queries 348 XPath expression objects for each field to retrieve its value. 349 parsers 350 Parsers of field values to be created in a record. 351 nquery 352 XPath expression object to relocate from node to more appropriate 353 position in XML document for record data retrieval. 354 355 .. seealso:: :py:func:`dive_data`, :py:func:`dive_profile` 356 """ 357 T = namedtuple(name, ' '.join(fields))._make 358 if nquery: 359 data = nquery(node) 360 return (_record(T, n, queries, parsers) for n in data) 361 else: 362 return _record(T, node, queries, parsers)
363 364
365 -def dive_data(node, fields=None, queries=None, parsers=None):
366 """ 367 Specialized function to return record of a dive data. 368 369 At the moment record of dive data contains dive start time only, by 370 default. It should be enhanced in the future to return more rich data 371 record. 372 373 Dive record data can be reconfigured with optional fields, field 374 queries and field parsers parameters. 375 376 :Parameters: 377 node 378 XML node. 379 fields 380 Names of fields to be created in a record. 381 queries 382 XPath expression object for each field to retrieve its value. 383 parsers 384 Parsers field values to be created in a record. 385 386 .. seealso:: :py:func:`find_data` 387 """ 388 if fields is None: 389 fields = ('number', 'datetime', 'depth', 'duration', 'temp', 390 'avg_depth', 'mode', 'profile') 391 queries = XP_DEFAULT_DIVE_DATA 392 parsers = (int, dparse, float, float, float, float, str, dive_profile) 393 394 return find_data('Dive', node, fields, queries, parsers)
395 396
397 -def dive_profile(node, fields=None, queries=None, parsers=None):
398 """ 399 Specialized function to return generator of dive profiles records. 400 401 By default, dive profile record contains following fields 402 403 time 404 dive time in seconds 405 depth 406 dive depth in meters 407 temp 408 temperature in Kelvins 409 410 Dive profile record data can be reconfigured with optional fields, 411 field queries and field parsers parameters. 412 413 :Parameters: 414 node 415 XML node. 416 fields 417 Names of fields to be created in a record. 418 queries 419 XPath expression objects for each field to retrieve its value. 420 parsers 421 Parsers of field values to be created in a record. 422 423 .. seealso:: :py:func:`find_data` 424 """ 425 if fields is None: 426 fields = ('depth', 'time', 'temp', 'setpoint', 'setpointby', 427 'deco_time', 'deco_depth', 'alarm', 'gas') 428 queries = XP_DEFAULT_PROFILE_DATA 429 gases = dict(((gas.id, gas) for gas in gas_data(node))) 430 parsers = (float, ) * 4 + (str, float, float, str, gases.get) 431 432 return find_data('Sample', node, fields, queries, parsers, 433 nquery=XP_WAYPOINT)
434 435
436 -def gas_data(node, fields=None, queries=None, parsers=None):
437 if fields is None: 438 fields = ('id', 'name', 'o2', 'he') 439 queries = XP_DEFAULT_GAS_DATA 440 parsers = (str, str, int, int) 441 442 return find_data('Gas', node, fields, queries, parsers, 443 nquery=XP_MIX)
444 445
446 -def dump_data(node, fields=None, queries=None, parsers=None):
447 """ 448 Get dive computer dump data. 449 450 The following data is returned 451 452 dc_id 453 Dive computer id. 454 dc_model 455 Dive computer model information. 456 datetime 457 Date and time when dive computer dump was obtained. 458 data 459 Dive computer dump data. 460 461 :Parameters: 462 node 463 XML node. 464 fields 465 Names of fields to be created in a record. 466 queries 467 XPath expression objects for each field to retrieve its value. 468 parsers 469 Parsers of field values to be created in a record. 470 471 .. seealso:: :py:func:`find_data` 472 """ 473 if fields is None: 474 fields = ('dc_id', 'dc_model', 'datetime', 'data') 475 queries = XP_DEFAULT_DUMP_DATA 476 parsers = (str, str, dparse, _dump_decode) 477 return find_data('DiveComputerDump', node, fields, queries, parsers)
478 479
480 -def buddy_data(node, fields=None, queries=None, parsers=None):
481 """ 482 Get dive buddy data. 483 484 The following data is returned by default 485 486 id 487 Buddy id. 488 fname 489 Buddy first name. 490 mname 491 Buddy middle name. 492 lname 493 Buddy last name. 494 org 495 Organization, which a buddy is member of. 496 number 497 Member number id in the organisation. 498 499 :Parameters: 500 node 501 XML node. 502 fields 503 Names of fields to be created in a record. 504 queries 505 XPath expression objects for each field to retrieve its value. 506 parsers 507 Parsers of field values to be created in a record. 508 509 .. seealso:: :py:func:`find_data` 510 """ 511 if fields is None: 512 fields = ('id', 'fname', 'mname', 'lname', 'org', 'number') 513 queries = XP_DEFAULT_BUDDY_DATA 514 parsers = (str, ) * 7 515 return find_data('Buddy', node, fields, queries, parsers)
516 517
518 -def site_data(node, fields=None, queries=None, parsers=None):
519 """ 520 Get dive site data. 521 522 The following data is returned by default 523 524 id 525 Dive site id. 526 name 527 Dive site name. 528 location 529 Dive site location. 530 x 531 Dive site longitude. 532 y 533 Dive site latitude. 534 535 :Parameters: 536 node 537 XML node. 538 fields 539 Names of fields to be created in a record. 540 queries 541 XPath expression objects for each field to retrieve its value. 542 parsers 543 Parsers of field values to be created in a record. 544 545 .. seealso:: :py:func:`find_data` 546 """ 547 if fields is None: 548 fields = ('id', 'name', 'location', 'x', 'y') 549 queries = XP_DEFAULT_SITE_DATA 550 parsers = (str, str, str, float, float) 551 return find_data('DiveSite', node, fields, queries, parsers)
552 553
554 -def parse_range(s):
555 """ 556 Parse textual representation of number range into Python expression. 557 558 Examples of a ranges 559 560 >>> parse_range('1-3,5') 561 '1 <= n and n <= 3 or n == 5' 562 563 >>> parse_range('-3,10') 564 'n <= 3 or n == 10' 565 566 Example of infinite range 567 568 >>> parse_range('20-') 569 '20 <= n' 570 571 :Parameters: 572 s 573 Textual representation of number range. 574 """ 575 data = [] 576 try: 577 for r in s.split(','): 578 d = r.split('-') 579 if len(d) == 1: 580 data.append('n == %d' % int(d[0])) 581 elif len(d) == 2: 582 p1 = d[0].strip() 583 p2 = d[1].strip() 584 if p1 and p2: 585 data.append('{} <= n and n <= {}'.format(int(p1), int(p2))) 586 elif p1 and not p2: 587 data.append('{} <= n'.format(int(p1))) 588 elif not p1 and p2: 589 data.append('n <= {}'.format(int(p2))) 590 else: 591 raise RangeError('Invalid range %s' % s) 592 except ValueError as ex: 593 raise RangeError('Invalid range %s' % s) 594 return ' or '.join(data)
595 596
597 -def in_range(ctx, pos, nodes):
598 """ 599 XPath expression function to restrict position of a node to be within 600 numeric range. 601 602 :Parameters: 603 ctx 604 XPath context object. 605 pos 606 Node position. 607 nodes 608 Number range, i.e. "2-3". 609 610 .. seealso:: :py:func:`parse_range` 611 """ 612 if not nodes: 613 return True 614 615 if isinstance(pos, list): 616 if len(pos) == 0: 617 return False 618 if len(pos) != 1: 619 raise ValueError('Too many parameters') 620 pos = int(pos[0]) 621 622 kf = 'in-range({})'.format(nodes) 623 if kf not in ctx.eval_context: 624 nr = parse_range(nodes) 625 fstr = 'ctx.eval_context["{}"] = lambda n: {}'.format(kf, nr) 626 exec(fstr) 627 return ctx.eval_context[kf](pos)
628 629 # register in-range XPath function 630 ns = et.FunctionNamespace(None) 631 ns['in-range'] = in_range 632 633
634 -def _field(node, query, parser):
635 """ 636 Find text value of a node starting from specified XML node. 637 638 The text value is converted with function `t` and then returned. 639 640 If node is not found, then `None` is returned. 641 642 :Parameters: 643 node 644 XML node. 645 query 646 XPath expression object to find node with text value. 647 parser 648 Parser to convert text value to requested type. 649 """ 650 data = [node] if query is None else query(node) 651 if data: 652 return parser(data[0])
653 654
655 -def _record(rt, node, queries, parsers):
656 """ 657 Create record with data. 658 659 The record data is found with queries (XPath expressions objects) 660 starting from XML node. 661 662 The data is converted to their appropriate type using parsers. 663 664 If query is `None`, then record data is node itself. 665 666 :Parameters: 667 rt 668 Record type (named tuple) of record data. 669 node 670 XML node. 671 queries 672 XPath expression objects for each field to retrieve its value. 673 parsers 674 Parsers of field values to be created in a record. 675 """ 676 return rt(_field(node, f, p) for f, p in zip(queries, parsers))
677 678
679 -def _dump_decode(data):
680 """ 681 Decode dive computer data, which is stored in UDDF dive computer dump 682 file. 683 """ 684 s = base64.b64decode(data.encode()) 685 return bz2.decompress(s)
686 687 688 # 689 # Creating UDDF data. 690 # 691 692 DEFAULT_FMT_DIVE_PROFILE = { 693 'depth': lambda d: str.format('{0:.1f}', max(d, 0)), 694 'temp': partial(str.format, '{0:.1f}'), 695 } 696 697 # basic data for an UDDF file 698 # fixme: obsolete 699 UDDF_BASIC = """\ 700 <uddf xmlns="http://www.streit.cc/uddf/3.2/" version="3.2.0"> 701 <generator> 702 <name>kenozooid</name> 703 <manufacturer id='kenozooid'> 704 <name>Kenozooid Team</name> 705 <contact> 706 <homepage>http://wrobell.dcmod.org/kenozooid/</homepage> 707 </contact> 708 </manufacturer> 709 <version>{kzver}</version> 710 <datetime></datetime> 711 </generator> 712 <diver> 713 <owner id='owner'> 714 <personal> 715 <firstname>Anonymous</firstname> 716 <lastname>Guest</lastname> 717 </personal> 718 </owner> 719 </diver> 720 </uddf> 721 """.format(kzver=kenozooid.__version__) 722 723
724 -def create(datetime=datetime.now()):
725 """ 726 fixme: obsolete 727 728 Create basic UDDF structure. 729 730 :Parameters: 731 datetime 732 Timestamp of file creation, current time by default. 733 """ 734 root = et.XML(UDDF_BASIC) 735 736 now = datetime.now() 737 n = root.xpath('//uddf:generator/uddf:datetime', namespaces=_NSMAP)[0] 738 n.text = FMT_DT(datetime) 739 return root
740 741
742 -def set_data(node, queries, formatters=None, **data):
743 """ 744 Set data of nodes or attributes using XPath queries relative to 745 specified XML node. 746 747 The data values are converted to string with formatters functions. 748 749 :Parameters: 750 node 751 XML node. 752 queries 753 Path-like expressions of XML structure to be created. 754 formatters 755 Data formatters. 756 data 757 Data values to be set within XML document. 758 """ 759 if formatters is None: 760 formatters = {} 761 762 nodes = {} # created nodes 763 attrs = set() # created attributes 764 765 for key, path in queries.items(): 766 value = data.get(key) 767 if value is None: 768 continue 769 770 if isinstance(path, str): 771 path = [path] 772 value = [value] 773 774 for p, v in zip(path, value): 775 f = formatters.get(key, str) 776 v = f(v) 777 778 attr = None 779 tags = p.rsplit('/', 1) 780 if tags[-1].startswith('@'): 781 attr = tags[-1][1:] # skip '@' 782 p = tags[0] if len(tags) > 1 else None 783 784 n = node 785 if p: 786 n = nodes.get(p) 787 # reuse node created in this call to make t/@a t/@b work, 788 # but create new node to not overwrite attribute value 789 if n is None or (p, attr) in attrs: 790 *_, n = create_node(p, parent=node) 791 nodes[p] = n 792 attrs.add((p, attr)) 793 794 assert n is not None 795 796 if attr: 797 n.set(attr, v) 798 else: 799 n.text = v
800 801
802 -def create_node(path, parent=None, append=True):
803 """ 804 TODO: get rid of parent, does not make sense 805 806 Create a hierarchy of nodes using XML nodes path specification. 807 808 Path is a string of node names separated by slash character, i.e. a/b/c 809 creates:: 810 811 <a><b><c/></b><a> 812 813 If parent node is specified and some part of node hierarchy already 814 exists then only non-existant nodes are created, i.e. if parent is 815 'x' node in 816 817 <x><y/></x> 818 819 then path 'x/z' modifies XML document as follows 820 821 <x><y/><z/></x> 822 823 :Parameters: 824 path 825 Hierarchy of nodes. 826 parent 827 Optional parent node. 828 """ 829 # preserve namespace prefix option... please?!? :/ 830 T = lambda tag: tag.replace('uddf:', '{' + _NSMAP['uddf'] + '}') 831 tags = path.split('/') 832 n = parent 833 for t in tags: 834 is_last = tags[-1] == t 835 k_exists = False 836 837 k = None 838 if n is not None: 839 k = xp_first(n, t) 840 if is_last or k is None: 841 k = et.Element(T(t)) 842 elif k is not None: 843 k_exists = True 844 if n is not None and not k_exists: 845 if append: 846 n.append(k) 847 else: 848 n.insert(0, k) 849 n = k 850 yield n
851 852
853 -def create_dive_data(node=None, queries=None, formatters=None, **data):
854 """ 855 Create dive data. 856 857 :Parameters: 858 node 859 Base node (UDDF root node). 860 queries 861 Path-like expressions of XML structure to be created. 862 formatters 863 Dive data formatters. 864 data 865 Dive data. 866 """ 867 if queries == None: 868 bd = data.get('buddies') 869 bno = len(bd) if bd else 0 870 f = ('site', 'buddies', 'datetime', 'depth', 'duration', 'temp') 871 q = ('uddf:informationbeforedive/uddf:link/@ref', 872 ['uddf:informationbeforedive/uddf:link/@ref'] * bno, 873 'uddf:informationbeforedive/uddf:datetime', 874 'uddf:informationafterdive/uddf:greatestdepth', 875 'uddf:informationafterdive/uddf:diveduration', 876 'uddf:informationafterdive/uddf:lowesttemperature') 877 queries = OrderedDict(zip(f, q)) 878 if formatters == None: 879 formatters = { 880 'datetime': FMT_DT, 881 'depth': partial(str.format, '{0:.1f}'), 882 'duration': partial(str.format, '{0:.0f}'), 883 'temp': partial(str.format, '{0:.1f}'), 884 } 885 _, rg, dn = create_node('uddf:profiledata/uddf:repetitiongroup/uddf:dive', 886 parent=node) 887 _set_id(rg) 888 _set_id(dn) 889 set_data(dn, queries, formatters, **data) 890 return dn
891 892
893 -def create_buddy_data(node, queries=None, formatters=None, **data):
894 """ 895 Create buddy data. 896 897 :Parameters: 898 node 899 Base node (UDDF root node). 900 queries 901 Path-like expressions of XML structure to be created. 902 formatters 903 Buddy data formatters. 904 data 905 Buddy data. 906 907 """ 908 if queries == None: 909 f = ('id', 'fname', 'mname', 'lname', 'org', 'number') 910 q = ('@id', 911 'uddf:personal/uddf:firstname', 912 'uddf:personal/uddf:middlename', 913 'uddf:personal/uddf:lastname', 914 'uddf:personal/uddf:membership/@organisation', 915 'uddf:personal/uddf:membership/@memberid') 916 queries = OrderedDict(zip(f, q)) 917 918 if formatters == None: 919 formatters = {} 920 921 if 'id' not in data or data['id'] is None: 922 data['id'] = uuid().hex 923 924 _, buddy = create_node('uddf:diver/uddf:buddy', parent=node) 925 set_data(buddy, queries, formatters, **data) 926 return buddy
927 928
929 -def create_site_data(node, queries=None, formatters=None, **data):
930 """ 931 Create dive site data. 932 933 :Parameters: 934 node 935 Base node (UDDF root node). 936 queries 937 Path-like expressions of XML structure to be created. 938 formatters 939 Dive site data formatters. 940 data 941 Dive site data. 942 943 """ 944 if queries == None: 945 f = ('id', 'name', 'location', 'x', 'y') 946 q = ('@id', 947 'uddf:name', 948 'uddf:geography/uddf:location', 949 'uddf:geography/uddf:longitude', 950 'uddf:geography/uddf:latitude') 951 queries = OrderedDict(zip(f, q)) 952 953 if formatters == None: 954 formatters = {} 955 956 if 'id' not in data or data['id'] is None: 957 data['id'] = uuid().hex 958 959 _, site = create_node('uddf:divesite/uddf:site', parent=node) 960 set_data(site, queries, formatters, **data) 961 return site
962 963
964 -def _dump_encode(data):
965 """ 966 Encode dive computer data, so it can be stored in UDDF file. 967 968 The encoded string is returned. 969 """ 970 s = bz2.compress(data) 971 return base64.b64encode(s)
972 973
974 -def create_uddf(datetime=datetime.now(), equipment=None, gases=None, dives=None, 975 dump=None):
976 """ 977 Create UDDF XML data. 978 979 :Parameters: 980 datetime 981 Timestamp of UDDF creation. 982 equipment 983 Diver's (owner) equipment XML data (see create_dc_data). 984 gases 985 List of gases used by the dives. 986 dives 987 Dives XML data (see create_dives). 988 dump 989 Dive computer dump XML data (see create_dump_data). 990 """ 991 doc = xml.uddf( 992 xml.generator( 993 xml.name('kenozooid'), 994 xml.manufacturer( 995 xml.name('Kenozooid Team'), 996 xml.contact(xml.homepage('https://wrobell.dcmod.org/kenozooid/')), 997 id='kenozooid'), 998 xml.version(kenozooid.__version__), 999 xml.datetime(FMT_DT(datetime)), 1000 ), 1001 1002 xml.diver( 1003 xml.owner( 1004 xml.personal(xml.firstname('Anonymous'), xml.lastname('Guest')), 1005 xml.equipment(equipment) if equipment else None, 1006 id='owner')), 1007 1008 xml.divecomputercontrol(dump) if dump else None, 1009 xml.gasdefinitions(gases) if gases else None, 1010 xml.profiledata(xml.repetitiongroup(dives, id=gen_id())) 1011 if dives else None, 1012 1013 xmlns=_NSMAP['uddf'], 1014 version='3.2.0', 1015 ) 1016 return doc
1017 1018
1019 -def create_dives(dives, equipment=None):
1020 """ 1021 Create dives UDDF XML data. 1022 1023 :Parameters: 1024 dives 1025 Iterable of dive tuples. 1026 equipment 1027 List of used equipment references. 1028 """ 1029 f = partial(create_dive, equipment=equipment) 1030 yield from (f(dive) for dive in dives)
1031
1032 -def create_dive(dive, equipment=None):
1033 """ 1034 Create dive UDDF XML data. 1035 1036 :Parameters: 1037 dive 1038 Dive to render as XML. 1039 equipment 1040 List of used equipment references. 1041 """ 1042 eq = itertools.chain(kt.nit(dive.equipment), kt.nit(equipment)) 1043 log.debug('convert dive {0.datetime}/{0.depth:.1f}/{0.duration} into XML' 1044 .format(dive)) 1045 return xml.dive( 1046 xml.informationbeforedive(xml.datetime(FMT_DT(dive.datetime))), 1047 None if not dive.profile else xml.samples(create_dive_samples(dive.profile, dive.mode)), 1048 xml.informationafterdive( 1049 xml.greatestdepth(FMT_F(dive.depth)), 1050 xml.diveduration(FMT_I(dive.duration)), 1051 None if dive.temp is None else xml.lowesttemperature(FMT_F(dive.temp)), 1052 xml.equipmentused((xml.link(ref=v) for v in eq)), 1053 None if dive.avg_depth is None else xml.averagedepth(FMT_F(dive.avg_depth)), 1054 ), 1055 id=gen_id(), 1056 )
1057 1058
1059 -def create_dive_samples(samples, mode=None):
1060 """ 1061 Create dive samples UDDF XML data. 1062 1063 :Parameters: 1064 samples 1065 Iterable of tuples of dive samples. 1066 mode 1067 Dive mode, i.e. opencircuit, closedcircuit. 1068 """ 1069 for i, s in enumerate(samples): 1070 yield xml.waypoint( 1071 None if s.alarm is None else (xml.alarm(a) for a in s.alarm), 1072 None if s.deco_time is None else 1073 xml.decostop( 1074 duration=FMT_I(s.deco_time), 1075 decodepth=FMT_I(s.deco_depth), 1076 kind='mandatory' 1077 ), 1078 xml.depth(FMT_F(s.depth)), 1079 xml.divetime(FMT_I(s.time)), 1080 None if s.setpoint is None else xml.setpo2(FMT_F2(s.setpoint), 1081 setby=s.setpointby), 1082 None if s.gas is None else xml.switchmix(ref=str(s.gas.id)), 1083 None if s.temp is None else xml.temperature(FMT_F(s.temp)), 1084 None if mode is None or i > 0 else xml.divemode(type=mode), 1085 )
1086 1087
1088 -def create_gas(gas):
1089 """ 1090 Create gas UDDF XML data. 1091 1092 :Parameters: 1093 gas 1094 Gas information to render as XML. 1095 """ 1096 return xml.mix( 1097 xml.name(gas.name), 1098 xml.o2(str(gas.o2)), 1099 xml.he(str(gas.he)), 1100 id=gas.id, 1101 )
1102 1103
1104 -def create_dc_data(dc_id, model):
1105 """ 1106 Create dive computer UDDF XML data. 1107 1108 :Parameters: 1109 dc_id 1110 Dive computer id. 1111 model 1112 Dive computer model. 1113 """ 1114 yield xml.divecomputer(xml.name(model), xml.model(model), id=dc_id)
1115 1116
1117 -def create_dump_data(dc_id, datetime, data):
1118 """ 1119 Create dive computer dump UDDF XML data. 1120 1121 :Parameters: 1122 dc_id 1123 Dive computer id. 1124 datetime 1125 Date and time when the dump was created. 1126 data 1127 Dive computer binary data. 1128 """ 1129 yield xml.divecomputerdump( 1130 xml.link(ref=dc_id), 1131 xml.datetime(FMT_DT(datetime)), 1132 xml.dcdump(_dump_encode(data).decode()), 1133 )
1134 1135
1136 -def save(doc, fout, validate=True):
1137 """ 1138 Save UDDF XML data into a file. 1139 1140 If output file is file name ending with '.bz2', then it is compressed 1141 with bzip2. 1142 1143 The UDDF XML data can be ElementTree XML object or iterable of strings. 1144 1145 If output file exists then backup file with ``.bak`` extension is 1146 created. 1147 1148 :Parameters: 1149 doc 1150 UDDF XML data. 1151 fout 1152 Output file. 1153 validate 1154 Validate UDDF file after saving if True. 1155 """ 1156 log.debug('saving uddf file') 1157 is_fn = isinstance(fout, str) 1158 openf = open 1159 if is_fn and fout.endswith('.bz2'): 1160 openf = bz2.BZ2File 1161 log.debug('uddf file will be compressed') 1162 1163 fbk = '{}.bak'.format(fout) 1164 if is_fn and os.path.exists(fout): 1165 os.rename(fout, fbk) 1166 log.debug('backup file created') 1167 try: 1168 f = openf(fout, 'wb') if is_fn else fout 1169 1170 if et.iselement(doc): 1171 et.ElementTree(doc).write(f, 1172 encoding='utf-8', 1173 xml_declaration=True, 1174 pretty_print=True) 1175 else: 1176 f.writelines(l.encode('utf-8') for l in doc) 1177 1178 if validate: 1179 log.debug('validating uddf file') 1180 fs = pkg_resources.resource_stream('kenozooid', 'uddf/uddf_3.2.0.xsd') 1181 if hasattr(fs, 'name'): 1182 log.debug('uddf xsd found: {}'.format(fs.name)) 1183 schema = et.XMLSchema(et.parse(fs)) 1184 if is_fn: 1185 f = openf(fout) 1186 else: 1187 f.seek(0) 1188 schema.assertValid(et.parse(f)) 1189 log.debug('uddf file is valid') 1190 except Exception as ex: 1191 if os.path.exists(fbk): 1192 os.rename(fbk, fout) 1193 log.debug('backup file restored') 1194 raise ex
1195 1196 1197 # 1198 # Removing UDDF data. 1199 # 1200
1201 -def remove_nodes(node, query, **params):
1202 """ 1203 Remove nodes from XML document using XPath query. 1204 1205 :Parameters: 1206 node 1207 Starting XML node for XPath query. 1208 query 1209 XPath query to find nodes to remove. 1210 params 1211 XPath query parameters. 1212 """ 1213 log.debug('node removal with query: {}, params: {}'.format(query, params)) 1214 for n in query(node, **params): 1215 p = n.getparent() 1216 p.remove(n)
1217 1218 # 1219 # Processing UDDF data. 1220 # 1221 1222
1223 -def reorder(doc):
1224 """ 1225 fixme: obsolete 1226 1227 Reorder and cleanup dives in UDDF document. 1228 1229 Following operations are being performed 1230 1231 - dives are sorted by dive start time 1232 - duplicate dives and repetition groups are removed 1233 1234 :Parameters: 1235 doc 1236 UDDF document. 1237 """ 1238 find = partial(doc.xpath, namespaces=_NSMAP) 1239 1240 profiles = find('//uddf:profiledata') 1241 rgroups = find('//uddf:profiledata/uddf:repetitiongroup') 1242 if not profiles or not rgroups: 1243 raise ValueError('No profile data to reorder') 1244 pd = profiles[0] 1245 1246 q = '/uddf:uddf/uddf:profiledata/uddf:repetitiongroup/uddf:dive' 1247 qt = q + '/uddf:informationbeforedive/uddf:datetime/text()' 1248 nodes = find(q) 1249 times = find(qt) 1250 1251 dives = {} 1252 for n, t in zip(nodes, times): 1253 dt = dparse(t) # don't rely on string representation for sorting 1254 if dt not in dives: 1255 dives[dt] = n 1256 1257 log.debug('removing old repetition groups') 1258 for rg in rgroups: # cleanup old repetition groups 1259 pd.remove(rg) 1260 rg, = create_node('uddf:repetitiongroup', parent=pd) 1261 _set_id(rg) 1262 1263 # sort dive nodes by dive time 1264 log.debug('sorting dives') 1265 for dt, n in sorted(dives.items(), key=itemgetter(0)): 1266 rg.append(n)
1267 1268
1269 -class NodeCopier(object):
1270 """ 1271 UDDF dcument node copier. 1272 1273 See :py:func:`NodeCopier.copy` for details. 1274 1275 :Attributes: 1276 doc 1277 The target document. 1278 doc_ids 1279 The cache of target document ids. 1280 """
1281 - def __init__(self, doc):
1282 """ 1283 Initialize node copier. 1284 """ 1285 self.doc = doc 1286 self.doc_ids = set(xp(doc, '//uddf:*/@id'))
1287 1288
1289 - def __enter__(self):
1290 """ 1291 Create UDDF node copier context manager. 1292 """ 1293 return self
1294 1295
1296 - def __exit__(self, *args):
1297 """ 1298 Close UDDF node copier context manager. 1299 """ 1300 pass
1301 1302
1303 - def copy(self, node, target):
1304 """ 1305 Copy node from UDDF document to target node in destination UDDF 1306 document. Target node becomes parent of node to be copied. 1307 1308 The copying works under following assumptions 1309 1310 - whole node is being copied including its descendants 1311 - node is not copied if it has id and id already exists in the target 1312 - if copied nodes reference non-descendant nodes and they do _not_ 1313 exist in destination document, then referencing nodes are _removed_ 1314 - if, due to node removal, its parent node becomes empty, then parent 1315 is removed, too 1316 1317 Copy of the node is returned. 1318 1319 :Parameters: 1320 node 1321 Node to copy. 1322 target 1323 The future parent of the copied node. 1324 """ 1325 cn = deepcopy(node) 1326 1327 cn_id = cn.get('id') 1328 if cn_id in self.doc_ids: 1329 log.debug('id {} already exists, not copying'.format(cn_id)) 1330 return None 1331 1332 s_ids = set(xp(cn, 'descendant-or-self::uddf:*/@id')) 1333 self.doc_ids.update(s_ids) 1334 1335 # get referencing nodes 1336 nodes = list(xp(cn, 'descendant-or-self::uddf:*[@ref]')) 1337 refs = set(k.get('ref') for k in nodes) 1338 1339 left = refs - self.doc_ids 1340 if __debug__: 1341 log.debug('references to remove: {} = {} - {}'.format(left, 1342 refs, self.doc_ids)) 1343 1344 if cn.get('ref') in left: 1345 raise ValueError('Node to copy references non-existing node') 1346 1347 # remove nodes referencing missing data 1348 to_remove = (n for n in nodes if n.get('ref') in left) 1349 assert cn.getparent() is None 1350 for n in to_remove: 1351 p = n.getparent() 1352 while p is not None and len(p) == 1: 1353 n = p 1354 p = n.getparent() 1355 if p is not None: 1356 p.remove(n) 1357 1358 target.append(cn) 1359 return cn
1360 1361
1362 -def _set_id(node):
1363 """ 1364 Generate id for a node if there is no id yet. 1365 1366 :Parameters: 1367 node 1368 Node for which id should be generated. 1369 """ 1370 if node.get('id') is None: 1371 node.set('id', FORMAT_ID.format(uuid().hex))
1372 1373
1374 -def gen_id(value=None):
1375 """ 1376 Generate id for a value. 1377 1378 If value is specified then id is MD5 hash of value. If not specified, 1379 then id is generated with UUID 4. 1380 1381 The returned id is a string prefixed with ``id-`` to make it XML 1382 compliant. 1383 """ 1384 if value is None: 1385 vid = uuid().hex 1386 else: 1387 vid = hashlib.md5(str(value).encode()).hexdigest() 1388 return 'id-{}'.format(vid)
1389 1390
1391 -def xml_file_copy(f):
1392 """ 1393 Iterator of raw XML data from a file to data into ``dirty.xml`` nodes. 1394 1395 :Parameters: 1396 f 1397 File containing XML data. 1398 """ 1399 while True: 1400 l = f.read(4096) 1401 if not l: 1402 break 1403 yield RawString(l)
1404 1405
1406 -def get_version(f):
1407 """ 1408 Get major version of UDDF file. 1409 1410 Tuple (major, minor) is returned, i.e. (3, 0), (3, 1), etc. 1411 1412 :Parameters: 1413 f 1414 File to check. 1415 """ 1416 n = parse(f, ver_check=False).getroot() 1417 v1, v2, *_ = n.get('version').split('.') 1418 if isinstance(f, FileIO): 1419 f.seek(0, 0) 1420 log.debug('detected version {}.{}'.format(v1, v2)) 1421 return int(v1), int(v2)
1422 1423 1424 # vim: sw=4:et:ai 1425