-
Notifications
You must be signed in to change notification settings - Fork 24
poc: connector builder using concurrent cdk #460
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,13 +3,16 @@ | |
# | ||
|
||
from abc import ABC | ||
from itertools import islice | ||
from typing import Any, Iterable, Mapping, Optional, Union | ||
|
||
from airbyte_cdk.sources.declarative.requesters.request_options.request_options_provider import ( | ||
RequestOptionsProvider, | ||
) | ||
from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import ( | ||
StreamSlicer as ConcurrentStreamSlicer, | ||
) | ||
from airbyte_cdk.sources.types import StreamSlice, StreamState | ||
|
||
|
||
class StreamSlicer(ConcurrentStreamSlicer, RequestOptionsProvider, ABC): | ||
|
@@ -23,3 +26,48 @@ class StreamSlicer(ConcurrentStreamSlicer, RequestOptionsProvider, ABC): | |
""" | ||
|
||
pass | ||
|
||
|
||
class TestReadSlicerDecorator(StreamSlicer): | ||
""" | ||
A stream slicer wrapper for test reads which limits the number of slices produced. | ||
""" | ||
|
||
def __init__(self, stream_slicer: StreamSlicer, maximum_number_of_slices: int) -> None: | ||
self._decorated = stream_slicer | ||
self._maximum_number_of_slices = maximum_number_of_slices | ||
|
||
def stream_slices(self) -> Iterable[StreamSlice]: | ||
return islice(self._decorated.stream_slices(), self._maximum_number_of_slices) | ||
|
||
def get_request_params(self, *, stream_state: Optional[StreamState] = None, stream_slice: Optional[StreamSlice] = None, | ||
next_page_token: Optional[Mapping[str, Any]] = None) -> Mapping[str, Any]: | ||
return self._decorated.get_request_params( | ||
stream_state=stream_state, | ||
stream_slice=stream_slice, | ||
next_page_token=next_page_token, | ||
) | ||
|
||
def get_request_headers(self, *, stream_state: Optional[StreamState] = None, stream_slice: Optional[StreamSlice] = None, | ||
next_page_token: Optional[Mapping[str, Any]] = None) -> Mapping[str, Any]: | ||
return self._decorated.get_request_headers( | ||
stream_state=stream_state, | ||
stream_slice=stream_slice, | ||
next_page_token=next_page_token, | ||
) | ||
|
||
def get_request_body_data(self, *, stream_state: Optional[StreamState] = None, stream_slice: Optional[StreamSlice] = None, | ||
next_page_token: Optional[Mapping[str, Any]] = None) -> Union[Mapping[str, Any], str]: | ||
return self._decorated.get_request_body_data( | ||
stream_state=stream_state, | ||
stream_slice=stream_slice, | ||
next_page_token=next_page_token, | ||
) | ||
|
||
def get_request_body_json(self, *, stream_state: Optional[StreamState] = None, stream_slice: Optional[StreamSlice] = None, | ||
next_page_token: Optional[Mapping[str, Any]] = None) -> Mapping[str, Any]: | ||
return self._decorated.get_request_body_json( | ||
stream_state=stream_state, | ||
stream_slice=stream_slice, | ||
next_page_token=next_page_token, | ||
) | ||
Comment on lines
+31
to
+73
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 💡 Verification agent 🧩 Analysis chainNew TestReadSlicerDecorator implementation looks good, but formatting needs attention. The new TestReadSlicerDecorator correctly wraps a StreamSlicer and limits slices during test reads. This appears to replace functionality that was previously in SimpleRetrieverTestReadDecorator. Note that there's a linting warning about formatting for this file. Perhaps run the formatter on this file before merging? One minor suggestion: Would it be worth adding validation to ensure maximum_number_of_slices is positive, similar to how SimpleRetrieverTestReadDecorator does it? wdyt? 🏁 Script executed: #!/bin/bash
# Check if SimpleRetrieverTestReadDecorator validates maximum_number_of_slices
rg -A 5 -B 5 "maximum_number_of_slices" airbyte_cdk/sources/declarative/retrievers/simple_retriever.py Length of output: 794 Fix Required: Address formatting warnings and add positive-value validation The new TestReadSlicerDecorator implementation works well in wrapping a StreamSlicer and limiting the slices. However, there are a couple of points to consider before merging:
🧰 Tools🪛 GitHub Actions: Linters[warning] Would reformat: airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -157,6 +157,8 @@ def _run_command( | |
stream_handler.setFormatter(AirbyteLogFormatter()) | ||
parent_logger = logging.getLogger("") | ||
parent_logger.addHandler(stream_handler) | ||
if "--debug" not in args: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not needed for tests to pass. Just enabling more logs in the tests for debugging purposes |
||
args.append("--debug") | ||
|
||
parsed_args = AirbyteEntrypoint.parse_args(args) | ||
|
||
|
@@ -195,7 +197,7 @@ def discover( | |
config_file = make_file(tmp_directory_path / "config.json", config) | ||
|
||
return _run_command( | ||
source, ["discover", "--config", config_file, "--debug"], expecting_exception | ||
source, ["discover", "--config", config_file], expecting_exception | ||
) | ||
|
||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would like to remove the SimpleRetriever decorator eventually. We just need to have the log_formatter be optional be an optional field of the SimpleRetriever and we should be fine.
In theory, this seems good because it would allow for test reads configuration to apply to any retriever, not just the SimpleRetriever