tmp

airbytehq · maxi297 · Apr 4, 2025 · Apr 4, 2025 · Apr 7, 2025 · Apr 4, 2025
commit 58016298de3b134cb177ed35a949fa3dcfc7e8ee
diff --git a/airbyte_cdk/connector_builder/connector_builder_handler.py b/airbyte_cdk/connector_builder/connector_builder_handler.py
@@ -15,6 +15,9 @@
     Type,
 )
 from airbyte_cdk.models import Type as MessageType
+from airbyte_cdk.sources.declarative.concurrent_declarative_source import (
+    ConcurrentDeclarativeSource,
+)
 from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource
 from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
 from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
@@ -54,12 +57,23 @@ def get_limits(config: Mapping[str, Any]) -> TestLimits:
     return TestLimits(max_records, max_pages_per_slice, max_slices, max_streams)
 
 
-def create_source(config: Mapping[str, Any], limits: TestLimits) -> ManifestDeclarativeSource:
+def _ensure_concurrency_level(manifest: Dict[str, Any]) -> None:
+    # We need to do that to ensure that the state in the StreamReadSlices only contains the changes for one slice
+    # Note that this is below the _LOWEST_SAFE_CONCURRENCY_LEVEL but it is fine in this case because we are limiting the number of slices
+    # being generated which means that the memory usage is limited anyway
+    if "concurrency_level" not in manifest:
+        manifest["concurrency_level"] = {}
+    manifest["concurrency_level"]["default_concurrency"] = 1
+
+def create_source(config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog, state: Any, limits: TestLimits) -> ManifestDeclarativeSource:
     manifest = config["__injected_declarative_manifest"]
-    return ManifestDeclarativeSource(
+    _ensure_concurrency_level(manifest)
+    return ConcurrentDeclarativeSource(
         config=config,
-        emit_connector_builder_messages=True,
+        catalog=catalog,
+        state=state,
         source_config=manifest,
+        emit_connector_builder_messages=True,
         component_factory=ModelToComponentFactory(
             emit_connector_builder_messages=True,
             limit_pages_fetched_per_slice=limits.max_pages_per_slice,

diff --git a/airbyte_cdk/connector_builder/main.py b/airbyte_cdk/connector_builder/main.py
@@ -91,7 +91,7 @@ def handle_connector_builder_request(
 def handle_request(args: List[str]) -> str:
     command, config, catalog, state = get_config_and_catalog_from_args(args)
     limits = get_limits(config)
-    source = create_source(config, limits)
+    source = create_source(config, catalog, state, limits)
     return orjson.dumps(
         AirbyteMessageSerializer.dump(
             handle_connector_builder_request(source, command, config, catalog, state, limits)

diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
@@ -106,7 +106,6 @@
 )
 from airbyte_cdk.sources.declarative.models import (
     CustomStateMigration,
-    GzipDecoder,
 )
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     AddedFieldDefinition as AddedFieldDefinitionModel,
@@ -389,10 +388,6 @@
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     ZipfileDecoder as ZipfileDecoderModel,
 )
-from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import (
-    COMPONENTS_MODULE_NAME,
-    SDM_COMPONENTS_MODULE_NAME,
-)
 from airbyte_cdk.sources.declarative.partition_routers import (
     CartesianProductStreamSlicer,
     GroupingPartitionRouter,
@@ -464,6 +459,7 @@
 )
 from airbyte_cdk.sources.declarative.spec import Spec
 from airbyte_cdk.sources.declarative.stream_slicers import StreamSlicer
+from airbyte_cdk.sources.declarative.stream_slicers.stream_slicer import TestReadSlicerDecorator
 from airbyte_cdk.sources.declarative.transformations import (
     AddFields,
     RecordTransformation,
@@ -518,7 +514,7 @@
     IncrementingCountStreamStateConverter,
 )
 from airbyte_cdk.sources.streams.http.error_handlers.response_models import ResponseAction
-from airbyte_cdk.sources.types import Config
+from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
 from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
 
 ComponentDefinition = Mapping[str, Any]
@@ -2845,6 +2841,8 @@ def create_simple_retriever(
             )
 
         if self._limit_slices_fetched or self._emit_connector_builder_messages:
+            slice_limit = self._limit_slices_fetched or 5
+            stream_slicer = TestReadSlicerDecorator(stream_slicer, slice_limit)  # FIXME Once log formatter is removed, we can just pass this to the SimpleRetriever
             return SimpleRetrieverTestReadDecorator(
                 name=name,
                 paginator=paginator,
@@ -2855,7 +2853,6 @@ def create_simple_retriever(
                 request_option_provider=request_options_provider,
                 cursor=cursor,
                 config=config,
-                maximum_number_of_slices=self._limit_slices_fetched or 5,
                 ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests,
                 parameters=model.parameters or {},
             )

diff --git a/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py b/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py
@@ -584,10 +584,6 @@ def __post_init__(self, options: Mapping[str, Any]) -> None:
                 f"The maximum number of slices on a test read needs to be strictly positive. Got {self.maximum_number_of_slices}"
             )
 
-    # stream_slices is defined with arguments on http stream and fixing this has a long tail of dependencies. Will be resolved by the decoupling of http stream and simple retriever
-    def stream_slices(self) -> Iterable[Optional[StreamSlice]]:  # type: ignore
-        return islice(super().stream_slices(), self.maximum_number_of_slices)
-
     def _fetch_next_page(
         self,
         stream_state: Mapping[str, Any],
@@ -623,6 +619,7 @@ def _fetch_next_page(
                 stream_slice=stream_slice,
                 next_page_token=next_page_token,
             ),
+            # FIXME remove this implementation and have the log_formatter depend on the fact that the logger is debug or not
             log_formatter=lambda response: format_http_message(
                 response,
                 f"Stream '{self.name}' request",

diff --git a/airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py b/airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py
@@ -3,13 +3,16 @@
 #
 
 from abc import ABC
+from itertools import islice
+from typing import Any, Iterable, Mapping, Optional, Union
 
 from airbyte_cdk.sources.declarative.requesters.request_options.request_options_provider import (
     RequestOptionsProvider,
 )
 from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import (
     StreamSlicer as ConcurrentStreamSlicer,
 )
+from airbyte_cdk.sources.types import StreamSlice, StreamState
 
 
 class StreamSlicer(ConcurrentStreamSlicer, RequestOptionsProvider, ABC):
@@ -23,3 +26,48 @@ class StreamSlicer(ConcurrentStreamSlicer, RequestOptionsProvider, ABC):
     """
 
     pass
+
+
+class TestReadSlicerDecorator(StreamSlicer):
+    """
+    A stream slicer wrapper for test reads which limits the number of slices produced.
+    """
+
+    def __init__(self, stream_slicer: StreamSlicer, maximum_number_of_slices: int) -> None:
+        self._decorated = stream_slicer
+        self._maximum_number_of_slices = maximum_number_of_slices
+
+    def stream_slices(self) -> Iterable[StreamSlice]:
+        return islice(self._decorated.stream_slices(), self._maximum_number_of_slices)
+
+    def get_request_params(self, *, stream_state: Optional[StreamState] = None, stream_slice: Optional[StreamSlice] = None,
+                           next_page_token: Optional[Mapping[str, Any]] = None) -> Mapping[str, Any]:
+        return self._decorated.get_request_params(
+            stream_state=stream_state,
+            stream_slice=stream_slice,
+            next_page_token=next_page_token,
+        )
+
+    def get_request_headers(self, *, stream_state: Optional[StreamState] = None, stream_slice: Optional[StreamSlice] = None,
+                            next_page_token: Optional[Mapping[str, Any]] = None) -> Mapping[str, Any]:
+        return self._decorated.get_request_headers(
+            stream_state=stream_state,
+            stream_slice=stream_slice,
+            next_page_token=next_page_token,
+        )
+
+    def get_request_body_data(self, *, stream_state: Optional[StreamState] = None, stream_slice: Optional[StreamSlice] = None,
+                              next_page_token: Optional[Mapping[str, Any]] = None) -> Union[Mapping[str, Any], str]:
+        return self._decorated.get_request_body_data(
+            stream_state=stream_state,
+            stream_slice=stream_slice,
+            next_page_token=next_page_token,
+        )
+
+    def get_request_body_json(self, *, stream_state: Optional[StreamState] = None, stream_slice: Optional[StreamSlice] = None,
+                              next_page_token: Optional[Mapping[str, Any]] = None) -> Mapping[str, Any]:
+        return self._decorated.get_request_body_json(
+            stream_state=stream_state,
+            stream_slice=stream_slice,
+            next_page_token=next_page_token,
+        )
diff --git a/airbyte_cdk/test/entrypoint_wrapper.py b/airbyte_cdk/test/entrypoint_wrapper.py
@@ -157,6 +157,8 @@ def _run_command(
     stream_handler.setFormatter(AirbyteLogFormatter())
     parent_logger = logging.getLogger("")
     parent_logger.addHandler(stream_handler)
+    if "--debug" not in args:
+        args.append("--debug")
 
     parsed_args = AirbyteEntrypoint.parse_args(args)
 
@@ -195,7 +197,7 @@ def discover(
         config_file = make_file(tmp_directory_path / "config.json", config)
 
         return _run_command(
-            source, ["discover", "--config", config_file, "--debug"], expecting_exception
+            source, ["discover", "--config", config_file], expecting_exception
         )