Source code for s3torchconnector.s3reader.constructor

#  Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
#  // SPDX-License-Identifier: BSD

from functools import partial
from typing import Optional

from .protocol import S3ReaderConstructorProtocol
from .sequential import SequentialS3Reader
from .ranged import RangedS3Reader



[docs]
class S3ReaderConstructor:
    """Constructor for creating ``partial(S3Reader)`` instances.

    Creates partial ``S3Reader`` instances that will be completed by ``S3Client`` with the
    remaining required parameters (e.g. ``bucket``, ``key``, ``get_object_info``, ``get_stream``).

    The constructor provides factory methods for different reader types:

    - ``sequential()``: Creates a constructor for sequential readers that buffer the entire object.
      Best for full reads and repeated access.
    - ``range_based()``: Creates a constructor for range-based readers that fetch specific byte ranges.
      Suitable for sparse partial reads for large objects.
    """

    @staticmethod

[docs]
    def sequential() -> S3ReaderConstructorProtocol:
        """Creates a constructor for sequential readers

        Returns:
            S3ReaderConstructorProtocol: Partial constructor for SequentialS3Reader

        Example::

            reader_constructor = S3ReaderConstructor.sequential()

        """
        return partial(SequentialS3Reader)


    @staticmethod

[docs]
    def range_based(buffer_size: Optional[int] = None) -> S3ReaderConstructorProtocol:
        """Creates a constructor for range-based readers

        Args:
            buffer_size: Internal buffer size in bytes. If None, uses default 8MB.
                         Set to 0 to disable buffering.

        Returns:
            S3ReaderConstructorProtocol: Partial constructor for RangedS3Reader

        Range-based reader performs byte-range requests to read specific portions of S3 objects without
        downloading the entire file.

        Buffer size affects read performance:

        * Small reads (< ``buffer_size``): Loads ``buffer_size`` bytes to buffer to reduce S3 API calls for small, sequential reads
        * Large reads (≥ ``buffer_size``): bypass the buffer for direct transfer from S3
        * Forward overlap reads: Reuses buffered data when reading ranges that extend beyond current buffer, and processes remaining
        data according to size with logic above.

        Configuration Guide:

        * Use larger buffer sizes for workloads with many small, sequential reads of nearby bytes
        * Use smaller buffer sizes or disable buffering for sparse partial reads
        * Buffer can be disabled by setting ``buffer_size`` to 0
        * If ``buffer_size`` is None, uses default 8MB buffer

        Examples::

            # Range-based reader with default 8MB buffer
            reader_constructor = S3ReaderConstructor.range_based()

            # Range-based reader with custom buffer size
            reader_constructor = S3ReaderConstructor.range_based(buffer_size=16*1024*1024)

            # Range-based reader with buffering disabled
            reader_constructor = S3ReaderConstructor.range_based(buffer_size=0)
        """
        return partial(RangedS3Reader, buffer_size=buffer_size)


    @staticmethod

[docs]
    def default() -> S3ReaderConstructorProtocol:
        """Creates default reader constructor (sequential)

        Returns:
            S3ReaderConstructorProtocol: Partial constructor for SequentialS3Reader
        """
        return S3ReaderConstructor.sequential()


    @staticmethod

[docs]
    def get_reader_type_string(
        constructor: Optional[S3ReaderConstructorProtocol],
    ) -> str:
        """Returns the reader type string for the given constructor."""
        if constructor is None:
            return S3ReaderConstructor.get_reader_type_string(
                S3ReaderConstructor.default()
            )

        if not isinstance(constructor, partial):
            return "unknown"

        if constructor.func == RangedS3Reader:
            return "range_based"
        elif constructor.func == SequentialS3Reader:
            return "sequential"
        else:
            return "unknown"