""" Contains the classes that manages Las PointRecords
Las PointRecords are represented using Numpy's structured arrays,
The PointRecord classes provide a few extra things to manage these arrays
in the context of Las point data
"""
import logging
from abc import ABC, abstractmethod
import numpy as np
from . import dims, packing
from .. import errors
from ..compression import lazperf_decompress_buffer
from ..point import PointFormat
logger = logging.getLogger(__name__)
[docs]def raise_not_enough_bytes_error(
expected_bytes_len, missing_bytes_len, point_data_buffer_len, points_dtype
):
raise errors.PylasError(
"The file does not contain enough bytes to store the expected number of points\n"
"expected {} bytes, read {} bytes ({} bytes missing == {} points) and it cannot be corrected\n"
"{} (bytes) / {} (point_size) = {} (points)".format(
expected_bytes_len,
point_data_buffer_len,
missing_bytes_len,
missing_bytes_len / points_dtype.itemsize,
point_data_buffer_len,
points_dtype.itemsize,
point_data_buffer_len / points_dtype.itemsize,
)
)
[docs]class IPointRecord(ABC):
""" Wraps the numpy structured array contained the points data
"""
@property
@abstractmethod
def point_size(self):
""" Shall return the point size as that will be written in the header
"""
pass
@property
@abstractmethod
def actual_point_size(self):
""" Shall return the actual size in bytes that ta points take in memory
"""
pass
@abstractmethod
def __getitem__(self, item):
pass
@abstractmethod
def __setitem__(self, key, value):
pass
@abstractmethod
def __len__(self):
pass
[docs] @abstractmethod
def raw_bytes(self):
pass
[docs] @abstractmethod
def write_to(self, out):
pass
[docs] @classmethod
@abstractmethod
def from_stream(cls, stream, point_format_id, count):
pass
[docs] @classmethod
@abstractmethod
def empty(cls, point_format_id):
pass
[docs]class PointRecord(IPointRecord):
def __init__(self, data, point_format: PointFormat):
self.array = data
self.point_format = point_format
@property
def dimensions_names(self):
return self.point_format.dimension_names
@property
def extra_dimensions_names(self):
""" Returns the names of extra-dimensions contained in the PointRecord
"""
return self.point_format.extra_dimension_names
@property
def actual_point_size(self):
""" Returns the point size in bytes taken by each points of the record
Returns
-------
int
The point size in byte
"""
return self.array.dtype.itemsize
[docs] @classmethod
def from_point_record(cls, other_point_record, new_point_format):
""" Construct a new PackedPointRecord from an existing one with the ability to change
to point format while doing so
"""
array = np.zeros_like(other_point_record.array, dtype=new_point_format.dtype)
new_record = cls(array, new_point_format)
new_record.copy_fields_from(other_point_record)
return new_record
[docs] def copy_fields_from(self, other_record):
""" Tries to copy the values of the current dimensions from other_record
"""
for dim_name in self.dimensions_names:
try:
self[dim_name] = other_record[dim_name]
except ValueError:
pass
[docs] def memoryview(self):
return memoryview(self.array)
[docs] def raw_bytes(self):
return self.array.tobytes()
def __getitem__(self, item):
return self.array[item]
def __setitem__(self, key, value):
self._append_zeros_if_too_small(value)
self.array[key] = value
def _append_zeros_if_too_small(self, value):
""" Appends zeros to the points stored if the value we are trying to
fit is bigger
"""
size_diff = len(value) - len(self.array)
if size_diff:
self.array = np.append(
self.array, np.zeros(size_diff, dtype=self.array.dtype)
)
def __len__(self):
return self.array.shape[0]
[docs]class PackedPointRecord(PointRecord):
"""
In the PackedPointRecord, fields that are a combinations of many sub-fields (fields stored on less than a byte)
are still packed together and are only de-packed and re-packed when accessed.
This uses of less memory than if the sub-fields were unpacked
However some operations on sub-fields require extra steps:
>>> #return number is a sub-field
>>> from pylas import PointFormat
>>> packed_point_record = PackedPointRecord.zeros(PointFormat(0), 10)
>>> packed_point_record['return_number'][:] = 1
>>> np.alltrue(packed_point_record == 1)
False
>>> packed_point_record = PackedPointRecord.zeros(PointFormat(0), 10)
>>> rn = packed_point_record['return_number']
>>> rn[:] = 1
>>> packed_point_record['return_number'] = rn
>>> np.alltrue(packed_point_record['return_number'] == 1)
True
"""
def __init__(self, data, point_format=None):
if point_format is None:
point_format = PointFormat(dims.np_dtype_to_point_format(data.dtype))
super().__init__(data, point_format)
self.sub_fields_dict = point_format.sub_fields
@property
def all_dimensions_names(self):
""" Returns all the dimensions names, including the names of sub_fields
and their corresponding packed fields
"""
return frozenset(self.array.dtype.names + tuple(self.sub_fields_dict.keys()))
@property
def point_size(self):
""" Returns the point size in bytes taken by each points of the record
Returns
-------
int
The point size in byte
"""
return self.array.dtype.itemsize
[docs] @classmethod
def zeros(cls, point_format, point_count):
""" Creates a new point record with all dimensions initialized to zero
Parameters
----------
point_format_id: int
The point format id the point record should have
point_count : int
The number of point the point record should have
Returns
-------
PackedPointRecord
"""
data = np.zeros(point_count, point_format.dtype)
return cls(data, point_format)
[docs] @classmethod
def empty(cls, point_format):
""" Creates an empty point record.
Parameters
----------
point_format: pylas.PointFormat
The point format id the point record should have
Returns
-------
PackedPointRecord
"""
return cls.zeros(point_format, point_count=0)
[docs] @classmethod
def from_stream(cls, stream, point_format, count):
""" Construct the point record by reading the points from the stream
"""
points_dtype = point_format.dtype
point_data_buffer = bytearray(stream.read(count * points_dtype.itemsize))
try:
data = np.frombuffer(point_data_buffer, dtype=points_dtype, count=count)
except ValueError:
expected_bytes_len = count * points_dtype.itemsize
if len(point_data_buffer) % points_dtype.itemsize != 0:
missing_bytes_len = expected_bytes_len - len(point_data_buffer)
raise_not_enough_bytes_error(
expected_bytes_len,
missing_bytes_len,
len(point_data_buffer),
points_dtype,
)
else:
actual_count = len(point_data_buffer) // points_dtype.itemsize
logger.critical(
"Expected {} points, there are {} ({} missing)".format(
count, actual_count, count - actual_count
)
)
data = np.frombuffer(
point_data_buffer, dtype=points_dtype, count=actual_count
)
return cls(data, point_format)
[docs] @classmethod
def from_buffer(cls, buffer, point_format, count, offset=0):
points_dtype = point_format.dtype
data = np.frombuffer(buffer, dtype=points_dtype, offset=offset, count=count)
return cls(data, point_format)
[docs] def write_to(self, out):
""" Writes the points to the output stream"""
out.write(self.raw_bytes())
[docs] def to_unpacked(self):
# array = packing.unpack_sub_fields(self.array, self.point_format)
# return UnpackedPointRecord(array, self.point_format)
arr = np.zeros_like(self.array, self.point_format.unpacked_dtype)
record = UnpackedPointRecord(arr, self.point_format)
record.copy_fields_from(self)
return record
def __getitem__(self, item):
""" Gives access to the underlying numpy array
Unpack the dimension if item is the name a sub-field
"""
try:
composed_dim, sub_field = self.sub_fields_dict[item]
return packing.unpack(
self.array[composed_dim], sub_field.mask, dtype=sub_field.type
)
except KeyError:
return self.array[item]
def __setitem__(self, key, value):
""" Sets elements in the array
"""
self._append_zeros_if_too_small(value)
try:
composed_dim, sub_field = self.sub_fields_dict[key]
try:
packing.pack(self.array[composed_dim], value, sub_field.mask, inplace=True)
except OverflowError as e:
raise OverflowError("Overflow when packing {} into {}: {}".format(
sub_field.name, composed_dim, e
))
except KeyError:
self.array[key] = value
def __repr__(self):
return "<PackedPointRecord(fmt: {}, len: {}, point size: {})>".format(
self.point_format, len(self), self.actual_point_size
)
[docs]class UnpackedPointRecord(PointRecord):
"""
In the Unpacked Point Record, all the sub-fields are un-packed meaning that they are in their
own array.
Because the minimum size for the elements of an array is 8 bits, and sub-fields are only a few bits
(less than 8) the resulting unpacked array uses more memory, especially if the point format has lots of sub-fields
"""
def __init__(self, data, point_fmt_id=None):
if point_fmt_id is None:
point_fmt_id = dims.np_dtype_to_point_format(data.dtype, unpacked=True)
super().__init__(data, point_fmt_id)
@property
def point_size(self):
return self.point_format.dtype.itemsize
[docs] def write_to(self, out):
out.write(self.to_packed().raw_bytes())
[docs] @classmethod
def from_stream(cls, stream, point_format_id, count, extra_dims=None):
return UnpackedPointRecord.from_stream(
stream, point_format_id, count, extra_dims=extra_dims
).to_unpacked()
[docs] @classmethod
def from_compressed_buffer(
cls, compressed_buffer, point_format_id, count, laszip_vlr
):
return PackedPointRecord.from_compressed_buffer(
compressed_buffer, point_format_id, count, laszip_vlr
).to_unpacked()
[docs] @classmethod
def empty(cls, point_format):
data = np.zeros(0, dtype=point_format.dtype)
return cls(data, point_format)
[docs] def to_packed(self):
return PackedPointRecord.from_point_record(self, self.point_format)