Source code for pylas.point.record

""" Contains the classes that manages Las PointRecords
Las PointRecords are represented using Numpy's structured arrays,
The PointRecord classes provide a few extra things to manage these arrays
in the context of Las point data
"""
import logging
from abc import ABC, abstractmethod

import numpy as np

from . import dims, packing
from .. import errors
from ..compression import lazperf_decompress_buffer
from ..point import PointFormat

logger = logging.getLogger(__name__)


[docs]def raise_not_enough_bytes_error( expected_bytes_len, missing_bytes_len, point_data_buffer_len, points_dtype ): raise errors.PylasError( "The file does not contain enough bytes to store the expected number of points\n" "expected {} bytes, read {} bytes ({} bytes missing == {} points) and it cannot be corrected\n" "{} (bytes) / {} (point_size) = {} (points)".format( expected_bytes_len, point_data_buffer_len, missing_bytes_len, missing_bytes_len / points_dtype.itemsize, point_data_buffer_len, points_dtype.itemsize, point_data_buffer_len / points_dtype.itemsize, ) )
[docs]class IPointRecord(ABC): """ Wraps the numpy structured array contained the points data """ @property @abstractmethod def point_size(self): """ Shall return the point size as that will be written in the header """ pass @property @abstractmethod def actual_point_size(self): """ Shall return the actual size in bytes that ta points take in memory """ pass @abstractmethod def __getitem__(self, item): pass @abstractmethod def __setitem__(self, key, value): pass @abstractmethod def __len__(self): pass
[docs] @abstractmethod def raw_bytes(self): pass
[docs] @abstractmethod def write_to(self, out): pass
[docs] @classmethod @abstractmethod def from_stream(cls, stream, point_format_id, count): pass
[docs] @classmethod @abstractmethod def empty(cls, point_format_id): pass
[docs]class PointRecord(IPointRecord): def __init__(self, data, point_format: PointFormat): self.array = data self.point_format = point_format @property def dimensions_names(self): return self.point_format.dimension_names @property def extra_dimensions_names(self): """ Returns the names of extra-dimensions contained in the PointRecord """ return self.point_format.extra_dimension_names @property def actual_point_size(self): """ Returns the point size in bytes taken by each points of the record Returns ------- int The point size in byte """ return self.array.dtype.itemsize
[docs] @classmethod def from_point_record(cls, other_point_record, new_point_format): """ Construct a new PackedPointRecord from an existing one with the ability to change to point format while doing so """ array = np.zeros_like(other_point_record.array, dtype=new_point_format.dtype) new_record = cls(array, new_point_format) new_record.copy_fields_from(other_point_record) return new_record
[docs] def copy_fields_from(self, other_record): """ Tries to copy the values of the current dimensions from other_record """ for dim_name in self.dimensions_names: try: self[dim_name] = other_record[dim_name] except ValueError: pass
[docs] def add_extra_dims(self, type_tuples): self.point_format.extra_dims.extend(type_tuples) old_array = self.array self.array = np.zeros_like(old_array, dtype=self.point_format.dtype) self.copy_fields_from(old_array)
[docs] def memoryview(self): return memoryview(self.array)
[docs] def raw_bytes(self): return self.array.tobytes()
def __getitem__(self, item): return self.array[item] def __setitem__(self, key, value): self._append_zeros_if_too_small(value) self.array[key] = value def _append_zeros_if_too_small(self, value): """ Appends zeros to the points stored if the value we are trying to fit is bigger """ size_diff = len(value) - len(self.array) if size_diff: self.array = np.append( self.array, np.zeros(size_diff, dtype=self.array.dtype) ) def __len__(self): return self.array.shape[0]
[docs]class PackedPointRecord(PointRecord): """ In the PackedPointRecord, fields that are a combinations of many sub-fields (fields stored on less than a byte) are still packed together and are only de-packed and re-packed when accessed. This uses of less memory than if the sub-fields were unpacked However some operations on sub-fields require extra steps: >>> #return number is a sub-field >>> from pylas import PointFormat >>> packed_point_record = PackedPointRecord.zeros(PointFormat(0), 10) >>> packed_point_record['return_number'][:] = 1 >>> np.alltrue(packed_point_record == 1) False >>> packed_point_record = PackedPointRecord.zeros(PointFormat(0), 10) >>> rn = packed_point_record['return_number'] >>> rn[:] = 1 >>> packed_point_record['return_number'] = rn >>> np.alltrue(packed_point_record['return_number'] == 1) True """ def __init__(self, data, point_format=None): if point_format is None: point_format = PointFormat(dims.np_dtype_to_point_format(data.dtype)) super().__init__(data, point_format) self.sub_fields_dict = point_format.sub_fields @property def all_dimensions_names(self): """ Returns all the dimensions names, including the names of sub_fields and their corresponding packed fields """ return frozenset(self.array.dtype.names + tuple(self.sub_fields_dict.keys())) @property def point_size(self): """ Returns the point size in bytes taken by each points of the record Returns ------- int The point size in byte """ return self.array.dtype.itemsize
[docs] @classmethod def zeros(cls, point_format, point_count): """ Creates a new point record with all dimensions initialized to zero Parameters ---------- point_format_id: int The point format id the point record should have point_count : int The number of point the point record should have Returns ------- PackedPointRecord """ data = np.zeros(point_count, point_format.dtype) return cls(data, point_format)
[docs] @classmethod def empty(cls, point_format): """ Creates an empty point record. Parameters ---------- point_format: pylas.PointFormat The point format id the point record should have Returns ------- PackedPointRecord """ return cls.zeros(point_format, point_count=0)
[docs] @classmethod def from_stream(cls, stream, point_format, count): """ Construct the point record by reading the points from the stream """ points_dtype = point_format.dtype point_data_buffer = bytearray(stream.read(count * points_dtype.itemsize)) try: data = np.frombuffer(point_data_buffer, dtype=points_dtype, count=count) except ValueError: expected_bytes_len = count * points_dtype.itemsize if len(point_data_buffer) % points_dtype.itemsize != 0: missing_bytes_len = expected_bytes_len - len(point_data_buffer) raise_not_enough_bytes_error( expected_bytes_len, missing_bytes_len, len(point_data_buffer), points_dtype, ) else: actual_count = len(point_data_buffer) // points_dtype.itemsize logger.critical( "Expected {} points, there are {} ({} missing)".format( count, actual_count, count - actual_count ) ) data = np.frombuffer( point_data_buffer, dtype=points_dtype, count=actual_count ) return cls(data, point_format)
[docs] @classmethod def from_buffer(cls, buffer, point_format, count, offset=0): points_dtype = point_format.dtype data = np.frombuffer(buffer, dtype=points_dtype, offset=offset, count=count) return cls(data, point_format)
[docs] def write_to(self, out): """ Writes the points to the output stream""" out.write(self.raw_bytes())
[docs] def to_unpacked(self): # array = packing.unpack_sub_fields(self.array, self.point_format) # return UnpackedPointRecord(array, self.point_format) arr = np.zeros_like(self.array, self.point_format.unpacked_dtype) record = UnpackedPointRecord(arr, self.point_format) record.copy_fields_from(self) return record
def __getitem__(self, item): """ Gives access to the underlying numpy array Unpack the dimension if item is the name a sub-field """ try: composed_dim, sub_field = self.sub_fields_dict[item] return packing.unpack( self.array[composed_dim], sub_field.mask, dtype=sub_field.type ) except KeyError: return self.array[item] def __setitem__(self, key, value): """ Sets elements in the array """ self._append_zeros_if_too_small(value) try: composed_dim, sub_field = self.sub_fields_dict[key] try: packing.pack(self.array[composed_dim], value, sub_field.mask, inplace=True) except OverflowError as e: raise OverflowError("Overflow when packing {} into {}: {}".format( sub_field.name, composed_dim, e )) except KeyError: self.array[key] = value def __repr__(self): return "<PackedPointRecord(fmt: {}, len: {}, point size: {})>".format( self.point_format, len(self), self.actual_point_size )
[docs]class UnpackedPointRecord(PointRecord): """ In the Unpacked Point Record, all the sub-fields are un-packed meaning that they are in their own array. Because the minimum size for the elements of an array is 8 bits, and sub-fields are only a few bits (less than 8) the resulting unpacked array uses more memory, especially if the point format has lots of sub-fields """ def __init__(self, data, point_fmt_id=None): if point_fmt_id is None: point_fmt_id = dims.np_dtype_to_point_format(data.dtype, unpacked=True) super().__init__(data, point_fmt_id) @property def point_size(self): return self.point_format.dtype.itemsize
[docs] def write_to(self, out): out.write(self.to_packed().raw_bytes())
[docs] @classmethod def from_stream(cls, stream, point_format_id, count, extra_dims=None): return UnpackedPointRecord.from_stream( stream, point_format_id, count, extra_dims=extra_dims ).to_unpacked()
[docs] @classmethod def from_compressed_buffer( cls, compressed_buffer, point_format_id, count, laszip_vlr ): return PackedPointRecord.from_compressed_buffer( compressed_buffer, point_format_id, count, laszip_vlr ).to_unpacked()
[docs] @classmethod def empty(cls, point_format): data = np.zeros(0, dtype=point_format.dtype) return cls(data, point_format)
[docs] def to_packed(self): return PackedPointRecord.from_point_record(self, self.point_format)