| 266 | | def array_parser_factory(parser, format, fixed, shape=None): |
|---|
| 267 | | """ |
|---|
| 268 | | Returns a parser function to convert from XML to Numpy arrays. |
|---|
| 269 | | Each element will be converted using the given parser function, |
|---|
| 270 | | and the array will have the given format. |
|---|
| 271 | | """ |
|---|
| 272 | | items = 1 |
|---|
| 273 | | for dim in shape: |
|---|
| 274 | | items *= dim |
|---|
| 275 | | |
|---|
| 276 | | def parse_array(value): |
|---|
| 277 | | """ |
|---|
| 278 | | Parses a string containing an array in VOTABLE format and |
|---|
| 279 | | returns a Numpy array in the given *format*. |
|---|
| 280 | | """ |
|---|
| 281 | | if ',' in value: |
|---|
| 282 | | warn("File uses commas as array separators, even though " + |
|---|
| 283 | | "the spec does not allow this.", |
|---|
| 284 | | VOTableSpecWarning) |
|---|
| 285 | | |
|---|
| 286 | | if value.strip() == '': |
|---|
| 287 | | parts = [] |
|---|
| | 266 | class Converter(object): |
|---|
| | 267 | def __init__(self, field): |
|---|
| | 268 | pass |
|---|
| | 269 | |
|---|
| | 270 | def _parse_length(self, read): |
|---|
| | 271 | return struct.unpack(">I", read(4))[0] |
|---|
| | 272 | |
|---|
| | 273 | def _write_length(self, length): |
|---|
| | 274 | return struct.pack(">I", length) |
|---|
| | 275 | |
|---|
| | 276 | def parse(self, value): |
|---|
| | 277 | raise NotImplementedError("This datatype must implement a 'parse' method.") |
|---|
| | 278 | |
|---|
| | 279 | def output(self, value): |
|---|
| | 280 | raise NotImplementedError("This datatype must implement a 'output' method.") |
|---|
| | 281 | |
|---|
| | 282 | def binparse(self, read): |
|---|
| | 283 | raise NotImplementedError("This datatype must implement a 'binparse' method.") |
|---|
| | 284 | |
|---|
| | 285 | def binoutput(self, value): |
|---|
| | 286 | raise NotImplementedError("This datatype must implement a 'binoutput' method.") |
|---|
| | 287 | |
|---|
| | 288 | class Char(Converter): |
|---|
| | 289 | default = '' |
|---|
| | 290 | |
|---|
| | 291 | def __init__(self, field): |
|---|
| | 292 | Converter.__init__(self, field) |
|---|
| | 293 | |
|---|
| | 294 | if field.arraysize is None or field.arraysize == '*': |
|---|
| | 295 | self.format = 'O' |
|---|
| | 296 | self.binparse = self._binparse_var |
|---|
| | 297 | self.binoutput = self._binoutput_var |
|---|
| 289 | | parts = array_splitter.split(value) |
|---|
| 290 | | |
|---|
| 291 | | if len(parts) % items != 0: |
|---|
| 292 | | raise ValueError("Incorrect number of elements in array") |
|---|
| 293 | | |
|---|
| 294 | | converted = [parser(x) for x in parts] |
|---|
| 295 | | if fixed: |
|---|
| 296 | | result = np.array(converted, dtype=format) |
|---|
| 297 | | if shape is not None: |
|---|
| 298 | | result = result.reshape(shape) |
|---|
| 299 | | else: |
|---|
| 300 | | result = [] |
|---|
| 301 | | for i in xrange(0, len(parts), items): |
|---|
| 302 | | element = np.array(converted[i:i+items], dtype=format) |
|---|
| 303 | | if shape is not None: |
|---|
| 304 | | element = element.reshape(shape) |
|---|
| 305 | | result.append(element) |
|---|
| 306 | | return np.asarray(result) |
|---|
| 307 | | |
|---|
| 308 | | def parse_complex_array(value): |
|---|
| 309 | | """ |
|---|
| 310 | | Parse a string containing an array of complex numbers in |
|---|
| 311 | | VOTABLE format and returns a Numpy array of complex numbers in |
|---|
| 312 | | the given *format*. |
|---|
| 313 | | """ |
|---|
| 314 | | if ',' in value: |
|---|
| 315 | | warn("File uses commas as array separators, even though " + |
|---|
| 316 | | "the spec does not allow this.", |
|---|
| 317 | | VOTableSpecWarning) |
|---|
| 318 | | |
|---|
| 319 | | if value.strip() == '': |
|---|
| 320 | | parts = [] |
|---|
| 321 | | else: |
|---|
| 322 | | parts = array_splitter.split(value) |
|---|
| 323 | | |
|---|
| 324 | | if (len(parts) / 2) % items != 0: |
|---|
| 325 | | raise ValueError("Incorrect number of elements in array") |
|---|
| 326 | | |
|---|
| 327 | | converted = [parser(x) for x in parts] |
|---|
| 328 | | if fixed: |
|---|
| 329 | | elements = [] |
|---|
| 330 | | for i in xrange(0, len(parts), 2): |
|---|
| 331 | | elements.append(complex(float(parts[i]), float(parts[i+1]))) |
|---|
| 332 | | result = np.array(elements, dtype=format) |
|---|
| 333 | | if shape is not None: |
|---|
| 334 | | result = result.reshape(shape) |
|---|
| 335 | | else: |
|---|
| 336 | | result = [] |
|---|
| 337 | | for i in xrange(0, len(parts), items*2): |
|---|
| 338 | | elements = [] |
|---|
| 339 | | for j in xrange(i, i+items*2, 2): |
|---|
| 340 | | elements.append(complex(float(parts[j]), float(parts[j+1]))) |
|---|
| 341 | | subresult = np.array(elements, dtype=format) |
|---|
| 342 | | if shape is not None: |
|---|
| 343 | | subresult = subresult.reshape(shape) |
|---|
| 344 | | result.append(subresult) |
|---|
| 345 | | return np.asarray(result) |
|---|
| 346 | | |
|---|
| 347 | | if format.startswith('c'): |
|---|
| 348 | | return parse_complex_array |
|---|
| 349 | | else: |
|---|
| 350 | | return parse_array |
|---|
| 351 | | |
|---|
| 352 | | def array_outputter_factory(element_outputter): |
|---|
| 353 | | """ |
|---|
| 354 | | Returns a parser function that writes Numpy arrays back to VOTABLE |
|---|
| 355 | | XML. |
|---|
| 356 | | """ |
|---|
| 357 | | def output_array(value): |
|---|
| 358 | | # "Flatten" the array, since there are no special delimiters |
|---|
| 359 | | # for multi-dimensional arrays in VOTABLE |
|---|
| 360 | | return ' '.join(element_outputter(x) for x in value.flat) |
|---|
| 361 | | |
|---|
| 362 | | return output_array |
|---|
| 363 | | |
|---|
| 364 | | def parse_char(value): |
|---|
| 365 | | """ |
|---|
| 366 | | Convert to char array, which must be 7-bit ASCII |
|---|
| 367 | | """ |
|---|
| 368 | | return value.encode('ascii') |
|---|
| 369 | | |
|---|
| 370 | | output_char = _xml_escape |
|---|
| 371 | | |
|---|
| 372 | | def binparse_char_factory(arraysize): |
|---|
| 373 | | """ |
|---|
| 374 | | Read the binary representation of a string. |
|---|
| 375 | | """ |
|---|
| 376 | | def binparse_char_star(read): |
|---|
| 377 | | length = struct.unpack(">I", read(4))[0] |
|---|
| | 299 | try: |
|---|
| | 300 | self.arraysize = int(field.arraysize) |
|---|
| | 301 | except ValueError: |
|---|
| | 302 | raise ValueError( |
|---|
| | 303 | "char fields can not be multidimensional") |
|---|
| | 304 | self.format = 'S%d' % self.arraysize |
|---|
| | 305 | self.binparse = self._binparse_fixed |
|---|
| | 306 | self.binoutput = self._binoutput_fixed |
|---|
| | 307 | self._struct_format = ">%ds" % self.arraysize |
|---|
| | 308 | |
|---|
| | 309 | def parse(self, value): |
|---|
| | 310 | return value.encode('ascii') |
|---|
| | 311 | |
|---|
| | 312 | def output(self, value): |
|---|
| | 313 | return _xml_escape(value) |
|---|
| | 314 | |
|---|
| | 315 | def _binparse_var(self, read): |
|---|
| | 316 | length = self._parse_length(read) |
|---|
| 398 | | return struct.pack(">I", 0) |
|---|
| 399 | | return struct.pack(">I", len(value)) + value.encode('ascii') |
|---|
| 400 | | |
|---|
| 401 | | def binoutput_char(value): |
|---|
| 402 | | return struct.pack(format, value.encode('ascii')) |
|---|
| 403 | | |
|---|
| 404 | | if arraysize in (None, '*'): |
|---|
| 405 | | return binoutput_char_star |
|---|
| 406 | | format = ">%ds" % arraysize |
|---|
| 407 | | return binoutput_char |
|---|
| 408 | | |
|---|
| 409 | | parse_unicode = unicode |
|---|
| 410 | | |
|---|
| 411 | | def output_unicode(value): |
|---|
| 412 | | """ |
|---|
| 413 | | Convert a Unicode object back to UTF-8 encoded XML, doing any |
|---|
| 414 | | entity escapes if necessary. |
|---|
| 415 | | """ |
|---|
| 416 | | return _xml_escape(value).encode('utf-8') |
|---|
| 417 | | |
|---|
| 418 | | def binparse_unicode_factory(arraysize): |
|---|
| 419 | | """ |
|---|
| 420 | | Output the binary representation of a unicode string. |
|---|
| 421 | | """ |
|---|
| 422 | | def binparse_unicode_star(read): |
|---|
| 423 | | length = struct.unpack(">I", read(4))[0] |
|---|
| | 328 | return self._write_length(0) |
|---|
| | 329 | return self._write_length(len(value)) + value.encode('ascii') |
|---|
| | 330 | |
|---|
| | 331 | def _binoutput_fixed(self, value): |
|---|
| | 332 | return struct.pack(self._struct_format, value.encode('ascii')) |
|---|
| | 333 | |
|---|
| | 334 | class UnicodeChar(Converter): |
|---|
| | 335 | default = u'' |
|---|
| | 336 | |
|---|
| | 337 | def __init__(self, field): |
|---|
| | 338 | Converter.__init__(self, field) |
|---|
| | 339 | |
|---|
| | 340 | if field.arraysize is None or field.arraysize == '*': |
|---|
| | 341 | self.format = 'O' |
|---|
| | 342 | self.binparse = self._binparse_var |
|---|
| | 343 | self.binoutput = self._binoutput_var |
|---|
| | 344 | else: |
|---|
| | 345 | try: |
|---|
| | 346 | self.arraysize = int(field.arraysize) |
|---|
| | 347 | except ValueError: |
|---|
| | 348 | raise ValueError( |
|---|
| | 349 | "char fields can not be multidimensional") |
|---|
| | 350 | self.format = 'U%d' % self.arraysize |
|---|
| | 351 | self.binparse = self._binparse_fixed |
|---|
| | 352 | self.binoutput = self._binoutput_fixed |
|---|
| | 353 | self._struct_format = ">%ds" % (self.arraysize * 2) |
|---|
| | 354 | |
|---|
| | 355 | def parse(self, value): |
|---|
| | 356 | return unicode(value) |
|---|
| | 357 | |
|---|
| | 358 | def output(self, value): |
|---|
| | 359 | return _xml_escape(value).encode('utf-8') |
|---|
| | 360 | |
|---|
| | 361 | def _binparse_var(self, read): |
|---|
| | 362 | length = self._parse_length(read) |
|---|
| 445 | | return struct.pack(">I", 0) |
|---|
| 446 | | return struct.pack(">I", len(value)) + value.encode('utf_16_be') |
|---|
| 447 | | |
|---|
| 448 | | def binoutput_unicode(value): |
|---|
| 449 | | return struct.pack(format, value.encode('utf_16_be')) |
|---|
| 450 | | |
|---|
| 451 | | if arraysize in (None, '*'): |
|---|
| 452 | | return binoutput_unicode_star |
|---|
| 453 | | format = ">%ds" % (arraysize * 2) |
|---|
| 454 | | return binoutput_unicode |
|---|
| 455 | | |
|---|
| 456 | | def parse_int(value): |
|---|
| 457 | | """ |
|---|
| 458 | | Convert a VOTABLE integer type to a Python int. |
|---|
| 459 | | """ |
|---|
| 460 | | try: |
|---|
| 461 | | return int(value) |
|---|
| 462 | | except ValueError: |
|---|
| 463 | | if value.startswith('0x'): |
|---|
| 464 | | return int(value, 16) |
|---|
| 465 | | # May still raise if hex doesn't work, but that's |
|---|
| 466 | | # what we want... |
|---|
| 467 | | |
|---|
| 468 | | def parse_boolean(value): |
|---|
| 469 | | """ |
|---|
| 470 | | Convert a VOTABLE boolean to a Python boolean. |
|---|
| 471 | | """ |
|---|
| 472 | | mapping = {'TRUE' : True, |
|---|
| 473 | | 'FALSE': False} |
|---|
| 474 | | try: |
|---|
| 475 | | return mapping[value.upper()] |
|---|
| 476 | | except KeyError: |
|---|
| 477 | | raise ValueError("Invalid boolean value") |
|---|
| 478 | | |
|---|
| 479 | | def binparse_boolean_factory(format, arraysize): |
|---|
| 480 | | items = 1 |
|---|
| 481 | | for dim in arraysize: |
|---|
| 482 | | items *= dim |
|---|
| 483 | | def binparse_boolean(read): |
|---|
| 484 | | mapping = {'T': True, 't': True, '1': True, |
|---|
| 485 | | 'F': False, 'f': False, '0': False, |
|---|
| 486 | | '\0': None, ' ': None, '?': None} |
|---|
| 487 | | value = read(items) |
|---|
| 488 | | result = np.array([mapping[char] for char in value], dtype='b1') |
|---|
| 489 | | return result.reshape(arraysize) |
|---|
| 490 | | return binparse_boolean |
|---|
| 491 | | |
|---|
| 492 | | def output_boolean_factory(precision): |
|---|
| 493 | | """ |
|---|
| 494 | | Convert a Python boolean back into an VOTABLE string. |
|---|
| 495 | | """ |
|---|
| 496 | | def output_boolean(value): |
|---|
| 497 | | if value: |
|---|
| 498 | | return 'True' |
|---|
| | 375 | return self._write_length(0) |
|---|
| | 376 | return self._write_length(len(value)) + value.encode('utf_16_be') |
|---|
| | 377 | |
|---|
| | 378 | def _binoutput_fixed(self, value): |
|---|
| | 379 | return struct.pack(self._struct_format, value.encode('utf_16_be')) |
|---|
| | 380 | |
|---|
| | 381 | class VarArray(Converter): |
|---|
| | 382 | format = 'O' |
|---|
| | 383 | default = [] |
|---|
| | 384 | |
|---|
| | 385 | def __init__(self, field, base): |
|---|
| | 386 | Converter.__init__(self, field) |
|---|
| | 387 | |
|---|
| | 388 | self._base = base |
|---|
| | 389 | |
|---|
| | 390 | def parse(self, value): |
|---|
| | 391 | if value.strip() == '': |
|---|
| | 392 | parts = [] |
|---|
| | 393 | parts = array_splitter.split(value) |
|---|
| | 394 | |
|---|
| | 395 | if isinstance(self._base, NumericArray): |
|---|
| | 396 | items = self._base._items |
|---|
| | 397 | parse_parts = self._base.parse_parts |
|---|
| | 398 | if len(parts) % items != 0: |
|---|
| | 399 | raise ValueError("Incorrect number of elements in array") |
|---|
| | 400 | result = [] |
|---|
| | 401 | for i in xrange(0, len(parts), items): |
|---|
| | 402 | result.append(parse_parts(parts[i:i+items])) |
|---|
| | 403 | return np.array(result) |
|---|
| 506 | | except TypeError: |
|---|
| 507 | | length = 1 |
|---|
| 508 | | value = [value] |
|---|
| 509 | | result = [] |
|---|
| 510 | | for x in value: |
|---|
| 511 | | if x: |
|---|
| 512 | | result.append('T') |
|---|
| | 427 | result = [self._write_length(length)] |
|---|
| | 428 | binoutput = self._base.binoutput |
|---|
| | 429 | for x in value: |
|---|
| | 430 | result.append(binoutput(x)) |
|---|
| | 431 | return ''.join(result) |
|---|
| | 432 | |
|---|
| | 433 | class NumericArray(Converter): |
|---|
| | 434 | vararray_type = VarArray |
|---|
| | 435 | |
|---|
| | 436 | def __init__(self, field, base): |
|---|
| | 437 | Converter.__init__(self, field) |
|---|
| | 438 | |
|---|
| | 439 | self._base = base |
|---|
| | 440 | self._arraysize = field.parsed_arraysize |
|---|
| | 441 | self.format = "%s%s" % (tuple(self._arraysize), self._base.format) |
|---|
| | 442 | |
|---|
| | 443 | self._items = 1 |
|---|
| | 444 | for dim in self._arraysize: |
|---|
| | 445 | self._items *= dim |
|---|
| | 446 | |
|---|
| | 447 | self._memsize = np.dtype(self.format).itemsize |
|---|
| | 448 | self._bigendian_format = '>' + self.format |
|---|
| | 449 | |
|---|
| | 450 | self.default = np.ones(self._arraysize, dtype=self._base.format) * self._base.default |
|---|
| | 451 | |
|---|
| | 452 | def parse(self, value): |
|---|
| | 453 | if value.strip() == '': |
|---|
| | 454 | parts = [] |
|---|
| | 455 | parts = array_splitter.split(value) |
|---|
| | 456 | if len(parts) != self._items: |
|---|
| | 457 | raise ValueError("Incorrect number of elements in array") |
|---|
| | 458 | return self.parse_parts(parts) |
|---|
| | 459 | |
|---|
| | 460 | def parse_parts(self, parts): |
|---|
| | 461 | base_parse = self._base.parse |
|---|
| | 462 | result = np.array([base_parse(x) for x in parts], dtype=self._base.format) |
|---|
| | 463 | return result.reshape(self._arraysize) |
|---|
| | 464 | |
|---|
| | 465 | def output(self, value): |
|---|
| | 466 | base_output = self._base.output |
|---|
| | 467 | return ' '.join(base_output(x) for x in value.flat) |
|---|
| | 468 | |
|---|
| | 469 | def binparse(self, read): |
|---|
| | 470 | result = np.fromstring(read(self._memsize), dtype=self._bigendian_format)[0] |
|---|
| | 471 | return result |
|---|
| | 472 | |
|---|
| | 473 | def binoutput(self, value): |
|---|
| | 474 | big_endian = np.array(value, dtype=value.dtype.newbyteorder('>')) |
|---|
| | 475 | return big_endian.tostring() |
|---|
| | 476 | |
|---|
| | 477 | class Numeric(Converter): |
|---|
| | 478 | array_type = NumericArray |
|---|
| | 479 | vararray_type = VarArray |
|---|
| | 480 | |
|---|
| | 481 | def __init__(self, field): |
|---|
| | 482 | Converter.__init__(self, field) |
|---|
| | 483 | |
|---|
| | 484 | self._memsize = np.dtype(self.format).itemsize |
|---|
| | 485 | self._bigendian_format = '>' + self.format |
|---|
| | 486 | |
|---|
| | 487 | def binparse(self, read): |
|---|
| | 488 | result = np.fromstring(read(self._memsize), |
|---|
| | 489 | dtype=self._bigendian_format) |
|---|
| | 490 | return result[0] |
|---|
| | 491 | |
|---|
| | 492 | def binoutput(self, value): |
|---|
| | 493 | return value.newbyteorder('>').tostring() |
|---|
| | 494 | |
|---|
| | 495 | class FloatingPoint(Numeric): |
|---|
| | 496 | default = np.nan |
|---|
| | 497 | |
|---|
| | 498 | def __init__(self, field): |
|---|
| | 499 | Numeric.__init__(self, field) |
|---|
| | 500 | |
|---|
| | 501 | precision = field.precision |
|---|
| | 502 | if precision is None: |
|---|
| | 503 | self._output_format = '%f' |
|---|
| | 504 | elif precision.startswith("E"): |
|---|
| | 505 | self._output_format = "%%.%dE" % (int(precision[1:])) |
|---|
| | 506 | elif precision.startswith("F"): |
|---|
| | 507 | self._output_format = "%%.%df" % (int(precision[1:])) |
|---|
| 514 | | result.append('F') |
|---|
| 515 | | return ''.join(result) |
|---|
| 516 | | |
|---|
| 517 | | def parse_bit(value): |
|---|
| 518 | | mapping = {'1': True, '0': False} |
|---|
| 519 | | try: |
|---|
| 520 | | return mapping[value] |
|---|
| 521 | | except KeyError: |
|---|
| 522 | | raise ValueError("Invalid bit value") |
|---|
| 523 | | |
|---|
| 524 | | def binparse_bit_factory(format, arraysize): |
|---|
| 525 | | """ |
|---|
| 526 | | Parses a bit field in BINARY format. |
|---|
| 527 | | """ |
|---|
| 528 | | items = 1 |
|---|
| 529 | | for dim in arraysize: |
|---|
| 530 | | items *= dim |
|---|
| 531 | | bytes = ((items-1) // 8) + 1 |
|---|
| 532 | | def binparse_bit(read): |
|---|
| 533 | | data = read(bytes) |
|---|
| | 509 | self._output_format = "%%.%df" % (int(precision)) |
|---|
| | 510 | |
|---|
| | 511 | def parse(self, value): |
|---|
| | 512 | return float(value) |
|---|
| | 513 | |
|---|
| | 514 | def output(self, value): |
|---|
| | 515 | if np.isfinite(value): |
|---|
| | 516 | return self._output_format % value |
|---|
| | 517 | elif np.isnan(value): |
|---|
| | 518 | return 'NaN' |
|---|
| | 519 | elif np.isposinf(value): |
|---|
| | 520 | return '+InF' |
|---|
| | 521 | elif np.isneginf(value): |
|---|
| | 522 | return '-InF' |
|---|
| | 523 | raise ValueError("Invalid floating point value") |
|---|
| | 524 | |
|---|
| | 525 | class Double(FloatingPoint): |
|---|
| | 526 | format = 'f8' |
|---|
| | 527 | |
|---|
| | 528 | class Float(FloatingPoint): |
|---|
| | 529 | format = 'f4' |
|---|
| | 530 | |
|---|
| | 531 | class Integer(Numeric): |
|---|
| | 532 | default = 0 |
|---|
| | 533 | |
|---|
| | 534 | def __init__(self, field): |
|---|
| | 535 | Numeric.__init__(self, field) |
|---|
| | 536 | |
|---|
| | 537 | def parse(self, value): |
|---|
| | 538 | try: |
|---|
| | 539 | return int(value) |
|---|
| | 540 | except ValueError: |
|---|
| | 541 | if value.startswith('0x'): |
|---|
| | 542 | return int(value, 16) |
|---|
| | 543 | # May still raise if hex doesn't work, but that's |
|---|
| | 544 | # what we want... |
|---|
| | 545 | |
|---|
| | 546 | def output(self, value): |
|---|
| | 547 | return str(value) |
|---|
| | 548 | |
|---|
| | 549 | class UnsignedByte(Integer): |
|---|
| | 550 | format = 'u1' |
|---|
| | 551 | |
|---|
| | 552 | class Short(Integer): |
|---|
| | 553 | format = 'i2' |
|---|
| | 554 | |
|---|
| | 555 | class Int(Integer): |
|---|
| | 556 | format = 'i4' |
|---|
| | 557 | |
|---|
| | 558 | class Long(Integer): |
|---|
| | 559 | format = 'i8' |
|---|
| | 560 | |
|---|
| | 561 | class ComplexVarArray(VarArray): |
|---|
| | 562 | def __init__(self, field, base): |
|---|
| | 563 | VarArray.__init__(self, field, base) |
|---|
| | 564 | |
|---|
| | 565 | def parse(self, value): |
|---|
| | 566 | if value.strip() == '': |
|---|
| | 567 | parts = [] |
|---|
| | 568 | parts = array_splitter.split(value) |
|---|
| | 569 | |
|---|
| | 570 | if isinstance(self._base, ComplexArray): |
|---|
| | 571 | items = self._base._items |
|---|
| | 572 | parse_parts = self._base.parse_parts |
|---|
| | 573 | if len(parts) % items != 0: |
|---|
| | 574 | raise ValueError("Incorrect number of elements in array") |
|---|
| | 575 | result = [] |
|---|
| | 576 | for i in xrange(0, len(parts), items): |
|---|
| | 577 | result.append(parse_parts(parts[i:i+items])) |
|---|
| | 578 | return np.array(result) |
|---|
| | 579 | else: |
|---|
| | 580 | parse = self._base.parse_parts |
|---|
| | 581 | result = [] |
|---|
| | 582 | for i in xrange(0, len(parts), 2): |
|---|
| | 583 | result.append(parse(parts[i:i+1])) |
|---|
| | 584 | return np.array(result, dtype=self._base.format) |
|---|
| | 585 | |
|---|
| | 586 | class ComplexArray(NumericArray): |
|---|
| | 587 | vararray_type = ComplexVarArray |
|---|
| | 588 | |
|---|
| | 589 | def __init__(self, field, base): |
|---|
| | 590 | NumericArray.__init__(self, field, base) |
|---|
| | 591 | |
|---|
| | 592 | self._items *= 2 |
|---|
| | 593 | |
|---|
| | 594 | def parse(self, value): |
|---|
| | 595 | if value.strip() == '': |
|---|
| | 596 | parts = [] |
|---|
| | 597 | parts = array_splitter.split(value) |
|---|
| | 598 | return self.parse_parts(parts) |
|---|
| | 599 | |
|---|
| | 600 | def parse_parts(self, parts): |
|---|
| | 601 | if len(parts) != self._items: |
|---|
| | 602 | raise ValueError("Incorrect number of elements in array") |
|---|
| | 603 | base_parse = self._base.parse_parts |
|---|
| | 604 | result = [] |
|---|
| | 605 | for i in xrange(0, self._items, 2): |
|---|
| | 606 | result.append(base_parse(parts[i:i+1])) |
|---|
| | 607 | result = np.array(result, dtype=self._base.format) |
|---|
| | 608 | return result.reshape(self._arraysize) |
|---|
| | 609 | |
|---|
| | 610 | class Complex(FloatingPoint): |
|---|
| | 611 | array_type = ComplexArray |
|---|
| | 612 | vararray_type = ComplexVarArray |
|---|
| | 613 | default = np.nan |
|---|
| | 614 | |
|---|
| | 615 | def __init__(self, field): |
|---|
| | 616 | FloatingPoint.__init__(self, field) |
|---|
| | 617 | |
|---|
| | 618 | self._output_format = self._output_format + " " + self._output_format |
|---|
| | 619 | |
|---|
| | 620 | def parse(self, value): |
|---|
| | 621 | parts = [float(x) for x in array_splitter.split(value)] |
|---|
| | 622 | if len(parts) != 2: |
|---|
| | 623 | raise ValueError("'%s' does not parse as a complex number" % value) |
|---|
| | 624 | return self.parse_parts(parts) |
|---|
| | 625 | |
|---|
| | 626 | def parse_parts(self, parts): |
|---|
| | 627 | return complex(*parts) |
|---|
| | 628 | |
|---|
| | 629 | def output(self, value): |
|---|
| | 630 | return self._output_format % (value.real, value.imag) |
|---|
| | 631 | |
|---|
| | 632 | class FloatComplex(Complex): |
|---|
| | 633 | format = 'c8' |
|---|
| | 634 | |
|---|
| | 635 | class DoubleComplex(Complex): |
|---|
| | 636 | format = 'c16' |
|---|
| | 637 | |
|---|
| | 638 | class BitArray(NumericArray): |
|---|
| | 639 | def __init__(self, field, base): |
|---|
| | 640 | NumericArray.__init__(self, field, base) |
|---|
| | 641 | |
|---|
| | 642 | self._bytes = ((self._items - 1) // 8) + 1 |
|---|
| | 643 | |
|---|
| | 644 | def binparse(self, read): |
|---|
| | 645 | data = read(self._bytes) |
|---|
| 564 | | return output_bit |
|---|
| 565 | | |
|---|
| 566 | | def binoutput_bit(value): |
|---|
| 567 | | """ |
|---|
| 568 | | Outputs a bit field in BINARY format. |
|---|
| 569 | | """ |
|---|
| 570 | | if np.isscalar(value): |
|---|
| 571 | | length = 1 |
|---|
| 572 | | value = [value] |
|---|
| 573 | | else: |
|---|
| 574 | | value = value.flat |
|---|
| 575 | | length = len(value) |
|---|
| 576 | | bit_no = 7 |
|---|
| 577 | | byte = 0 |
|---|
| 578 | | bytes = [] |
|---|
| 579 | | for v in value: |
|---|
| 580 | | if v: |
|---|
| 581 | | byte |= 1 << bit_no |
|---|
| 582 | | if bit_no == 0: |
|---|
| 583 | | bytes.append(byte) |
|---|
| 584 | | bit_no = 7 |
|---|
| 585 | | byte = 0 |
|---|
| 586 | | else: |
|---|
| 587 | | bit_no -= 1 |
|---|
| 588 | | bytes.append(byte) |
|---|
| 589 | | return struct.pack("%sB" % len(bytes), *bytes) |
|---|
| 590 | | |
|---|
| 591 | | def parse_complex(value): |
|---|
| 592 | | """ |
|---|
| 593 | | Convert a VOTABLE string with a pair of floats to a Python complex |
|---|
| 594 | | number. |
|---|
| 595 | | """ |
|---|
| 596 | | parts = [float(x) for x in array_splitter.split(value)] |
|---|
| 597 | | if len(parts) > 2: |
|---|
| 598 | | raise ValueError("'%s' does not parse as a complex number" % value) |
|---|
| 599 | | return complex(*parts) |
|---|
| 600 | | |
|---|
| 601 | | def output_complex_factory(precision): |
|---|
| 602 | | """ |
|---|
| 603 | | Convert a complex number back to its XML representation, which is |
|---|
| 604 | | just two numbers with a space in between. |
|---|
| 605 | | """ |
|---|
| 606 | | if precision is None: |
|---|
| 607 | | format = '%f' |
|---|
| 608 | | elif precision.startswith("E"): |
|---|
| 609 | | format = "%%.%dE" % (int(precision[1:])) |
|---|
| 610 | | elif precision.startswith("F"): |
|---|
| 611 | | format = "%%.%df" % (int(precision[1:])) |
|---|
| 612 | | else: |
|---|
| 613 | | format = "%%.%df" % (int(precision)) |
|---|
| 614 | | format = format + " " + format |
|---|
| 615 | | |
|---|
| 616 | | def output_complex(value): |
|---|
| 617 | | return format % (value.real, value.imag) |
|---|
| 618 | | return output_complex |
|---|
| 619 | | |
|---|
| 620 | | def output_float_factory(precision): |
|---|
| 621 | | """ |
|---|
| 622 | | Output a float with the correct precision. |
|---|
| 623 | | """ |
|---|
| 624 | | if precision is None: |
|---|
| 625 | | format = '%f' |
|---|
| 626 | | elif precision.startswith("E"): |
|---|
| 627 | | format = "%%.%dE" % (int(precision[1:])) |
|---|
| 628 | | elif precision.startswith("F"): |
|---|
| 629 | | format = "%%.%df" % (int(precision[1:])) |
|---|
| 630 | | else: |
|---|
| 631 | | format = "%%.%df" % (int(precision)) |
|---|
| 632 | | |
|---|
| 633 | | def output_float(value): |
|---|
| 634 | | if np.isfinite(value): |
|---|
| 635 | | return format % value |
|---|
| 636 | | elif np.isnan(value): |
|---|
| 637 | | return 'NaN' |
|---|
| 638 | | elif np.isposinf(value): |
|---|
| 639 | | return '+InF' |
|---|
| 640 | | elif np.isneginf(value): |
|---|
| 641 | | return '-InF' |
|---|
| 642 | | else: |
|---|
| 643 | | raise ValueError("Invalid floating point value") |
|---|
| 644 | | |
|---|
| 645 | | return output_float |
|---|
| 646 | | |
|---|
| 647 | | def output_generic_factory(precision): |
|---|
| 648 | | """ |
|---|
| 649 | | Use the standard Python str to convert the datatype to XML. |
|---|
| 650 | | precision is ignored. |
|---|
| 651 | | """ |
|---|
| 652 | | return str |
|---|
| 653 | | |
|---|
| 654 | | def binparse_generic_factory(format, arraysize): |
|---|
| 655 | | """ |
|---|
| 656 | | A factory to make generic conversion routines for single elements |
|---|
| 657 | | """ |
|---|
| 658 | | itemsize = np.dtype(format).itemsize |
|---|
| 659 | | bigendian_format = ">" + format |
|---|
| 660 | | def binparse(read): |
|---|
| 661 | | result = np.fromstring(read(itemsize), dtype=bigendian_format) |
|---|
| 662 | | if arraysize == [1]: |
|---|
| 663 | | return result[0] |
|---|
| 664 | | return result.reshape(arraysize) |
|---|
| 665 | | return binparse |
|---|
| 666 | | |
|---|
| 667 | | def binoutput_generic(value): |
|---|
| 668 | | """ |
|---|
| 669 | | A generic function for writing numpy scalars to the binary format. |
|---|
| 670 | | """ |
|---|
| 671 | | # We have to actually copy the data here or tostring will |
|---|
| 672 | | # not follow the new (probably non-native) byteorder |
|---|
| 673 | | if np.isscalar(value): |
|---|
| 674 | | return value.newbyteorder('>').tostring() |
|---|
| 675 | | else: |
|---|
| 676 | | big_endian = np.array(value, dtype=value.dtype.newbyteorder('>')) |
|---|
| 677 | | return big_endian.tostring() |
|---|
| 678 | | |
|---|
| 679 | | def binparse_variable_length_factory(datatype, arraysize, parse): |
|---|
| 680 | | """ |
|---|
| 681 | | A factory to make generic conversion routines for variable-length |
|---|
| 682 | | 1D arrays. |
|---|
| 683 | | """ |
|---|
| 684 | | items = 1 |
|---|
| 685 | | for dim in arraysize: |
|---|
| 686 | | items *= dim |
|---|
| 687 | | |
|---|
| 688 | | def binparse_variable_length(read): |
|---|
| 689 | | length = struct.unpack('>I', read(4))[0] |
|---|
| 690 | | result = [] |
|---|
| 691 | | for i in xrange(length): |
|---|
| 692 | | result.append(parse(read)) |
|---|
| 693 | | return np.asarray(result) |
|---|
| 694 | | |
|---|
| 695 | | def binparse_variable_length_bit(read): |
|---|
| 696 | | length = struct.unpack('>I', read(4))[0] |
|---|
| 697 | | bits = items * length |
|---|
| 698 | | bit_values = binparse_bit_factory('b1', [bits])(read) |
|---|
| 699 | | return bit_values.reshape(tuple(arraysize + [length])) |
|---|
| 700 | | |
|---|
| 701 | | if datatype == 'bit': |
|---|
| 702 | | return binparse_variable_length_bit |
|---|
| 703 | | return binparse_variable_length |
|---|
| 704 | | |
|---|
| 705 | | def binoutput_variable_length_factory(outputter): |
|---|
| 706 | | def binoutput_variable_length(value): |
|---|
| 707 | | """ |
|---|
| 708 | | A generic function for writing variable-length numpy arrays to the |
|---|
| 709 | | binary format. |
|---|
| 710 | | """ |
|---|
| 711 | | if value is None or len(value) == 0: |
|---|
| 712 | | return struct.pack('>I', 0) |
|---|
| 713 | | return struct.pack('>I', len(value)) + outputter(value) |
|---|
| 714 | | return binoutput_variable_length |
|---|
| | 703 | |
|---|
| | 704 | def binparse(self, read): |
|---|
| | 705 | data = read(1) |
|---|
| | 706 | return (ord(data) & 0x8) != 0 |
|---|
| | 707 | |
|---|
| | 708 | def binoutput(self, value): |
|---|
| | 709 | if value: |
|---|
| | 710 | return chr(0x8) |
|---|
| | 711 | return chr(0) |
|---|
| | 712 | |
|---|
| | 713 | class BooleanArray(NumericArray): |
|---|
| | 714 | def __init__(self, field, base): |
|---|
| | 715 | NumericArray.__init__(self, field, base) |
|---|
| | 716 | |
|---|
| | 717 | def binparse(self, read): |
|---|
| | 718 | data = read(self._items) |
|---|
| | 719 | parse = self._base.parse |
|---|
| | 720 | result = np.array([parse(char) for char in data], dtype='b1') |
|---|
| | 721 | return result.reshape(self._arraysize) |
|---|
| | 722 | |
|---|
| | 723 | def binoutput(self, value): |
|---|
| | 724 | output = self._base.output |
|---|
| | 725 | result = [output(x) for x in value.flat] |
|---|
| | 726 | return ''.join(result) |
|---|
| | 727 | |
|---|
| | 728 | class Boolean(Converter): |
|---|
| | 729 | format = 'b1' |
|---|
| | 730 | array_type = BooleanArray |
|---|
| | 731 | vararray_type = VarArray |
|---|
| | 732 | default = False |
|---|
| | 733 | |
|---|
| | 734 | def __init__(self, field): |
|---|
| | 735 | Converter.__init__(self, field) |
|---|
| | 736 | |
|---|
| | 737 | def parse(self, value): |
|---|
| | 738 | mapping = {'TRUE' : True, |
|---|
| | 739 | 'FALSE': False} |
|---|
| | 740 | try: |
|---|
| | 741 | return mapping[value.upper()] |
|---|
| | 742 | except KeyError: |
|---|
| | 743 | raise ValueError("Invalid boolean value") |
|---|
| | 744 | |
|---|
| | 745 | def output(self, value): |
|---|
| | 746 | if value: |
|---|
| | 747 | return 'TRUE' |
|---|
| | 748 | return 'FALSE' |
|---|
| | 749 | |
|---|
| | 750 | def binparse(self, read): |
|---|
| | 751 | mapping = {'T': True, 't': True, '1': True, |
|---|
| | 752 | 'F': False, 'f': False, '0': False, |
|---|
| | 753 | '\0': None, ' ': None, '?': None} |
|---|
| | 754 | value = read(1) |
|---|
| | 755 | try: |
|---|
| | 756 | return mapping[value] |
|---|
| | 757 | except KeyError: |
|---|
| | 758 | raise ValueError("Invalid boolean value") |
|---|
| | 759 | |
|---|
| | 760 | def binoutput(self, value): |
|---|
| | 761 | if value: |
|---|
| | 762 | return 'T' |
|---|
| | 763 | return 'F' |
|---|
| 1113 | | 'double' : |
|---|
| 1114 | | (float, output_float_factory, |
|---|
| 1115 | | binparse_generic_factory, binoutput_generic, |
|---|
| 1116 | | 'f8'), |
|---|
| 1117 | | 'float' : |
|---|
| 1118 | | (float, output_float_factory, |
|---|
| 1119 | | binparse_generic_factory, binoutput_generic, |
|---|
| 1120 | | 'f4'), |
|---|
| 1121 | | 'bit' : |
|---|
| 1122 | | (parse_bit, output_bit_factory, |
|---|
| 1123 | | binparse_bit_factory, binoutput_bit, |
|---|
| 1124 | | 'b1'), |
|---|
| 1125 | | 'boolean' : |
|---|
| 1126 | | (parse_boolean, output_boolean_factory, |
|---|
| 1127 | | binparse_boolean_factory, binoutput_boolean, |
|---|
| 1128 | | 'b1'), |
|---|
| 1129 | | 'unsignedByte' : |
|---|
| 1130 | | (parse_int, output_generic_factory, |
|---|
| 1131 | | binparse_generic_factory, binoutput_generic, |
|---|
| 1132 | | 'u1'), |
|---|
| 1133 | | 'short' : |
|---|
| 1134 | | (parse_int, output_generic_factory, |
|---|
| 1135 | | binparse_generic_factory, binoutput_generic, |
|---|
| 1136 | | 'i2'), |
|---|
| 1137 | | 'int' : |
|---|
| 1138 | | (parse_int, output_generic_factory, |
|---|
| 1139 | | binparse_generic_factory, binoutput_generic, |
|---|
| 1140 | | 'i4'), |
|---|
| 1141 | | 'long' : |
|---|
| 1142 | | (parse_int, output_generic_factory, |
|---|
| 1143 | | binparse_generic_factory, binoutput_generic, |
|---|
| 1144 | | 'i8'), |
|---|
| 1145 | | 'floatComplex' : |
|---|
| 1146 | | (parse_complex, output_complex_factory, |
|---|
| 1147 | | binparse_generic_factory, binoutput_generic, |
|---|
| 1148 | | 'c8'), |
|---|
| 1149 | | 'doubleComplex' : |
|---|
| 1150 | | (parse_complex, output_complex_factory, |
|---|
| 1151 | | binparse_generic_factory, binoutput_generic, |
|---|
| 1152 | | 'c16') |
|---|
| 1153 | | } |
|---|
| 1154 | | |
|---|
| 1155 | | if self.datatype == 'char': |
|---|
| 1156 | | if self.arraysize is None or self.arraysize == '*': |
|---|
| 1157 | | format = 'O' |
|---|
| 1158 | | arraysize = None |
|---|
| | 1161 | 'double': Double, |
|---|
| | 1162 | 'float': Float, |
|---|
| | 1163 | 'bit': Bit, |
|---|
| | 1164 | 'boolean': Boolean, |
|---|
| | 1165 | 'unsignedByte': UnsignedByte, |
|---|
| | 1166 | 'short': Short, |
|---|
| | 1167 | 'int': Int, |
|---|
| | 1168 | 'long': Long, |
|---|
| | 1169 | 'floatComplex': FloatComplex, |
|---|
| | 1170 | 'doubleComplex': DoubleComplex, |
|---|
| | 1171 | 'char': Char, |
|---|
| | 1172 | 'unicodeChar': UnicodeChar } |
|---|
| | 1173 | |
|---|
| | 1174 | if self.datatype not in mapping: |
|---|
| | 1175 | raise ValueError("Unknown datatype '%s'" % datatype) |
|---|
| | 1176 | |
|---|
| | 1177 | cls = mapping[self.datatype] |
|---|
| | 1178 | self.converter = cls(self) |
|---|
| | 1179 | if (self.datatype not in ('char', 'unicodeChar') and |
|---|
| | 1180 | self.arraysize is not None): |
|---|
| | 1181 | arraysize = self.arraysize |
|---|
| | 1182 | if arraysize[-1] == '*': |
|---|
| | 1183 | arraysize = self.arraysize[:-1] |
|---|
| | 1184 | last_x = arraysize.rfind('x') |
|---|
| | 1185 | if last_x == -1: |
|---|
| | 1186 | arraysize = '' |
|---|
| | 1187 | else: |
|---|
| | 1188 | arraysize = arraysize[:last_x] |
|---|
| | 1189 | fixed = False |
|---|
| 1179 | | try: |
|---|
| 1180 | | arraysize = int(self.arraysize) |
|---|
| 1181 | | except ValueError: |
|---|
| 1182 | | raise ValueError( |
|---|
| 1183 | | "unicodeChar fields can not be multidimensional") |
|---|
| 1184 | | format = 'U%d' % int(arraysize) |
|---|
| 1185 | | |
|---|
| 1186 | | self.parser = parse_unicode |
|---|
| 1187 | | self.outputter = output_unicode |
|---|
| 1188 | | self.binparser = binparse_unicode_factory(arraysize) |
|---|
| 1189 | | self.binoutputter = binoutput_unicode_factory(arraysize) |
|---|
| 1190 | | self.numpy_format = format |
|---|
| 1191 | | self.default = '' |
|---|
| 1192 | | |
|---|
| 1193 | | elif self.datatype in mapping: |
|---|
| 1194 | | parser, output_factory, binparse_factory, binoutputter, format = \ |
|---|
| 1195 | | mapping[self.datatype] |
|---|
| 1196 | | outputter = output_factory(self.precision) |
|---|
| 1197 | | |
|---|
| 1198 | | if self.arraysize is None: |
|---|
| 1199 | | arraysize = [1] |
|---|
| 1200 | | arrayformat = format |
|---|
| 1201 | | self.parser = parser |
|---|
| 1202 | | self.outputter = outputter |
|---|
| 1203 | | self.binparser = binparse_factory(format, [1]) |
|---|
| 1204 | | self.binoutputter = binoutputter |
|---|
| 1205 | | self.numpy_format = format |
|---|
| 1206 | | if self.datatype in ('float', 'double'): |
|---|
| 1207 | | self.default = np.nan |
|---|
| 1208 | | else: |
|---|
| 1209 | | arraysize = self.arraysize |
|---|
| 1210 | | if arraysize[-1] == '*': |
|---|
| 1211 | | arraysize = self.arraysize[:-1] |
|---|
| 1212 | | last_x = arraysize.rfind('x') |
|---|
| 1213 | | if last_x == -1: |
|---|
| 1214 | | arraysize = '' |
|---|
| 1215 | | else: |
|---|
| 1216 | | arraysize = arraysize[:last_x] |
|---|
| 1217 | | fixed = False |
|---|
| 1218 | | else: |
|---|
| 1219 | | fixed = True |
|---|
| 1220 | | |
|---|
| 1221 | | if arraysize != '': |
|---|
| 1222 | | arraysize = [int(x) for x in arraysize.split("x")] |
|---|
| 1223 | | arraysize.reverse() |
|---|
| 1224 | | else: |
|---|
| 1225 | | arraysize = [] |
|---|
| 1226 | | |
|---|
| 1227 | | arrayformat = '%s%s' % (tuple(arraysize), format) |
|---|
| 1228 | | self.parser = array_parser_factory(parser, format, fixed, arraysize) |
|---|
| 1229 | | self.outputter = array_outputter_factory(outputter) |
|---|
| 1230 | | if fixed: |
|---|
| 1231 | | self.binparser = binparse_factory(arrayformat, arraysize) |
|---|
| 1232 | | self.binoutputter = binoutputter |
|---|
| 1233 | | self.numpy_format = arrayformat |
|---|
| 1234 | | self.default = np.zeros((1,), dtype=arrayformat) |
|---|
| 1235 | | else: |
|---|
| 1236 | | self.binparser = binparse_variable_length_factory( |
|---|
| 1237 | | self.datatype, |
|---|
| 1238 | | arraysize, |
|---|
| 1239 | | binparse_factory(arrayformat, arraysize)) |
|---|
| 1240 | | self.binoutputter = binoutput_variable_length_factory(binoutputter) |
|---|
| 1241 | | self.numpy_format = 'O' |
|---|
| 1242 | | self.default = np.array([], dtype=arrayformat) |
|---|
| 1243 | | else: |
|---|
| 1244 | | raise ValueError("Unknown datatype '%s'" % datatype) |
|---|
| | 1197 | arraysize = [] |
|---|
| | 1198 | |
|---|
| | 1199 | self.parsed_arraysize = arraysize |
|---|
| | 1200 | |
|---|
| | 1201 | if arraysize != []: |
|---|
| | 1202 | self.converter = self.converter.array_type( |
|---|
| | 1203 | |
|---|