Skip to content

Commit 72acaaf

Browse files
authored
Merge pull request #186 from vzhurba01/docstrings
Docstrings for cuda.core
2 parents f953ab3 + 7e25688 commit 72acaaf

File tree

16 files changed

+615
-75
lines changed

16 files changed

+615
-75
lines changed

‎cuda_core/cuda/core/experimental/_device.py

Lines changed: 160 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,31 @@
1919

2020

2121
class Device:
22+
"""Represent a GPU and act as an entry point for cuda.core features.
2223
24+
This is a singleton object that helps ensure interoperability
25+
across multiple libraries imported in the process to both see
26+
and use the same GPU device.
27+
28+
While acting as the entry point, many other CUDA resources can be
29+
allocated such as streams and buffers. Any :obj:`Context` dependent
30+
resource created through this device, will continue to refer to
31+
this device's context.
32+
33+
Newly returend :obj:`Device` object are is a thread-local singleton
34+
for a specified device.
35+
36+
Note
37+
----
38+
Will not initialize the GPU.
39+
40+
Parameters
41+
----------
42+
device_id : int, optional
43+
Device ordinal to return a :obj:`Device` object for.
44+
Default value of `None` return the currently used device.
45+
46+
"""
2347
__slots__ = ("_id", "_mr", "_has_inited")
2448

2549
def __new__(cls, device_id=None):
@@ -54,15 +78,29 @@ def _check_context_initialized(self, *args, **kwargs):
5478

5579
@property
5680
def device_id(self) -> int:
81+
"""Return device ordinal."""
5782
return self._id
5883

5984
@property
6085
def pci_bus_id(self) -> str:
86+
"""Return a PCI Bus Id string for this device."""
6187
bus_id = handle_return(cudart.cudaDeviceGetPCIBusId(13, self._id))
6288
return bus_id[:12].decode()
6389

6490
@property
6591
def uuid(self) -> str:
92+
"""Return a UUID for the device.
93+
94+
Returns 16-octets identifying the device. If the device is in
95+
MIG mode, returns its MIG UUID which uniquely identifies the
96+
subscribed MIG compute instance.
97+
98+
Note
99+
----
100+
MIG UUID is only returned when device is in MIG mode and the
101+
driver is older than CUDA 11.4.
102+
103+
"""
66104
driver_ver = handle_return(cuda.cuDriverGetVersion())
67105
if driver_ver >= 11040:
68106
uuid = handle_return(cuda.cuDeviceGetUuid_v2(self._id))
@@ -74,19 +112,21 @@ def uuid(self) -> str:
74112

75113
@property
76114
def name(self) -> str:
77-
# assuming a GPU name is less than 128 characters...
78-
name = handle_return(cuda.cuDeviceGetName(128, self._id))
115+
"""Return the device name."""
116+
# Use 256 characters to be consistent with CUDA Runtime
117+
name = handle_return(cuda.cuDeviceGetName(256, self._id))
79118
name = name.split(b'\0')[0]
80119
return name.decode()
81120

82121
@property
83122
def properties(self) -> dict:
123+
"""Return information about the compute-device."""
84124
# TODO: pythonize the key names
85125
return handle_return(cudart.cudaGetDeviceProperties(self._id))
86126

87127
@property
88128
def compute_capability(self) -> ComputeCapability:
89-
"""Returns a named tuple with 2 fields: major and minor. """
129+
"""Return a named tuple with 2 fields: major and minor."""
90130
major = handle_return(cudart.cudaDeviceGetAttribute(
91131
cudart.cudaDeviceAttr.cudaDevAttrComputeCapabilityMajor, self._id))
92132
minor = handle_return(cudart.cudaDeviceGetAttribute(
@@ -96,12 +136,20 @@ def compute_capability(self) -> ComputeCapability:
96136
@property
97137
@precondition(_check_context_initialized)
98138
def context(self) -> Context:
139+
"""Return the current :obj:`Context` associated with this device.
140+
141+
Note
142+
----
143+
Device must be initialized.
144+
145+
"""
99146
ctx = handle_return(cuda.cuCtxGetCurrent())
100147
assert int(ctx) != 0
101148
return Context._from_ctx(ctx, self._id)
102149

103150
@property
104151
def memory_resource(self) -> MemoryResource:
152+
"""Return :obj:`MemoryResource` associated with this device."""
105153
return self._mr
106154

107155
@memory_resource.setter
@@ -112,27 +160,53 @@ def memory_resource(self, mr):
112160

113161
@property
114162
def default_stream(self) -> Stream:
163+
"""Return default CUDA :obj:`Stream` associated with this device.
164+
165+
The type of default stream returned depends on if the environment
166+
variable CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM is set.
167+
168+
If set, returns a per-thread default stream. Otherwise returns
169+
the legacy stream.
170+
171+
"""
115172
return default_stream()
116173

117174
def __int__(self):
175+
"""Return device_id."""
118176
return self._id
119177

120178
def __repr__(self):
121179
return f"<Device {self._id} ({self.name})>"
122180

123181
def set_current(self, ctx: Context=None) -> Union[Context, None]:
124-
"""
125-
Entry point of this object. Users always start a code by
182+
"""Set device to be used for GPU executions.
183+
184+
Initializes CUDA and sets the calling thread to a valid CUDA
185+
context. By default the primary context is used, but optional `ctx`
186+
parameter can be used to explicitly supply a :obj:`Context` object.
187+
188+
Providing a `ctx` causes the previous set context to be popped and returned.
189+
190+
Parameters
191+
----------
192+
ctx : :obj:`Context`, optional
193+
Optional context to push onto this device's current thread stack.
194+
195+
Returns
196+
-------
197+
Union[:obj:`Context`, None], optional
198+
Popped context.
199+
200+
Examples
201+
--------
202+
Acts as an entry point of this object. Users always start a code by
126203
calling this method, e.g.
127-
204+
128205
>>> from cuda.core.experimental import Device
129206
>>> dev0 = Device(0)
130207
>>> dev0.set_current()
131208
>>> # ... do work on device 0 ...
132-
133-
The optional ctx argument is for advanced users to bind a
134-
CUDA context with the device. In this case, the previously
135-
set context is popped and returned to the user.
209+
136210
"""
137211
if ctx is not None:
138212
if not isinstance(ctx, Context):
@@ -163,25 +237,94 @@ def set_current(self, ctx: Context=None) -> Union[Context, None]:
163237
self._has_inited = True
164238

165239
def create_context(self, options: ContextOptions = None) -> Context:
166-
# Create a Context object (but do NOT set it current yet!).
167-
# ContextOptions is a dataclass for setting e.g. affinity or CIG
168-
# options.
240+
"""Create a new :obj:`Context` object.
241+
242+
Note
243+
----
244+
The newly context will not be set as current.
245+
246+
Parameters
247+
----------
248+
options : :obj:`ContextOptions`, optional
249+
Customizable dataclass for context creation options.
250+
251+
Returns
252+
-------
253+
:obj:`Context`
254+
Newly created context object.
255+
256+
"""
169257
raise NotImplementedError("TODO")
170258

171259
@precondition(_check_context_initialized)
172260
def create_stream(self, obj=None, options: StreamOptions=None) -> Stream:
173-
# Create a Stream object by either holding a newly created
174-
# CUDA stream or wrapping an existing foreign object supporting
175-
# the __cuda_stream__ protocol. In the latter case, a reference
176-
# to obj is held internally so that its lifetime is managed.
261+
"""Create a Stream object.
262+
263+
New stream objects can be created in two different ways:
264+
265+
1) Create a new CUDA stream with customizable `options`.
266+
2) Wrap an existing foreign `obj` supporting the __cuda_stream__ protocol.
267+
268+
Option (2) internally holds a reference to the foreign object
269+
such that the lifetime is managed.
270+
271+
Note
272+
----
273+
Device must be initialized.
274+
275+
Parameters
276+
----------
277+
obj : Any, optional
278+
Any object supporting the __cuda_stream__ protocol.
279+
options : :obj:`StreamOptions`, optional
280+
Customizable dataclass for stream creation options.
281+
282+
Returns
283+
-------
284+
:obj:`Stream`
285+
Newly created stream object.
286+
287+
"""
177288
return Stream._init(obj=obj, options=options)
178289

179290
@precondition(_check_context_initialized)
180291
def allocate(self, size, stream=None) -> Buffer:
292+
"""Allocate device memory from a specified stream.
293+
294+
Allocates device memory of `size` bytes on the specified `stream`
295+
using the memory resource currently associated with this Device.
296+
297+
Parameter `stream` is optional, using a default stream by default.
298+
299+
Note
300+
----
301+
Device must be initialized.
302+
303+
Parameters
304+
----------
305+
size : int
306+
Number of bytes to allocate.
307+
stream : :obj:`Stream`, optional
308+
The stream establishing the stream ordering semantic.
309+
Default value of `None` uses default stream.
310+
311+
Returns
312+
-------
313+
:obj:`Buffer`
314+
Newly created buffer object.
315+
316+
"""
181317
if stream is None:
182318
stream = default_stream()
183319
return self._mr.allocate(size, stream)
184320

185321
@precondition(_check_context_initialized)
186322
def sync(self):
323+
"""Synchronize the device.
324+
325+
Note
326+
----
327+
Device must be initialized.
328+
329+
"""
187330
handle_return(cudart.cudaDeviceSynchronize())

‎cuda_core/cuda/core/experimental/_event.py

Lines changed: 46 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,46 @@
1313

1414
@dataclass
1515
class EventOptions:
16+
"""Customizable :obj:`Event` options.
17+
18+
Attributes
19+
----------
20+
enable_timing : bool, optional
21+
Event will record timing data. (Default to False)
22+
busy_waited_sync : bool, optional
23+
If True, event will use blocking synchronization. When a CPU
24+
thread calls synchronize, the call will block until the event
25+
has actually been completed.
26+
Otherwise, the CPU thread will busy-wait until the event has
27+
been completed. (Default to False)
28+
support_ipc : bool, optional
29+
Event will be suitable for interprocess use.
30+
Note that enable_timing must be False. (Default to False)
31+
32+
"""
1633
enable_timing: Optional[bool] = False
1734
busy_waited_sync: Optional[bool] = False
1835
support_ipc: Optional[bool] = False
1936

2037

2138
class Event:
39+
"""Represent a record at a specific point of execution within a CUDA stream.
2240
41+
Applications can asynchronously record events at any point in
42+
the program. An event keeps a record of all previous work within
43+
the last recorded stream.
44+
45+
Events can be used to monitor device's progress, query completion
46+
of work up to event's record, and help establish dependencies
47+
between GPU work submissions.
48+
49+
Directly creating an :obj:`Event` is not supported due to ambiguity,
50+
and they should instead be created through a :obj:`Stream` object.
51+
52+
"""
2353
__slots__ = ("_handle", "_timing_disabled", "_busy_waited")
2454

2555
def __init__(self):
26-
# minimal requirements for the destructor
2756
self._handle = None
2857
raise NotImplementedError(
2958
"directly creating an Event object can be ambiguous. Please call "
@@ -51,37 +80,45 @@ def _init(options: Optional[EventOptions]=None):
5180
return self
5281

5382
def __del__(self):
83+
"""Return close(self)"""
5484
self.close()
5585

5686
def close(self):
57-
# Destroy the event.
87+
"""Destroy the event."""
5888
if self._handle:
5989
handle_return(cuda.cuEventDestroy(self._handle))
6090
self._handle = None
6191

6292
@property
6393
def is_timing_disabled(self) -> bool:
64-
# Check if this instance can be used for the timing purpose.
94+
"""Return True if the event does not record timing data, otherwise False."""
6595
return self._timing_disabled
6696

6797
@property
6898
def is_sync_busy_waited(self) -> bool:
69-
# Check if the event synchronization would keep the CPU busy-waiting.
99+
"""Return True if the event synchronization would keep the CPU busy-waiting, otherwise False."""
70100
return self._busy_waited
71101

72102
@property
73103
def is_ipc_supported(self) -> bool:
74-
# Check if this instance can be used for IPC.
104+
"""Return True if this event can be used as an interprocess event, otherwise False."""
75105
raise NotImplementedError("TODO")
76106

77107
def sync(self):
78-
# Sync over the event.
108+
"""Synchronize until the event completes.
109+
110+
If the event was created with busy_waited_sync, then the
111+
calling CPU thread will block until the event has been
112+
completed by the device.
113+
Otherwise the CPU thread will busy-wait until the event
114+
has been completed.
115+
116+
"""
79117
handle_return(cuda.cuEventSynchronize(self._handle))
80118

81119
@property
82120
def is_done(self) -> bool:
83-
# Return True if all captured works have been completed,
84-
# otherwise False.
121+
"""Return True if all captured works have been completed, otherwise False."""
85122
result, = cuda.cuEventQuery(self._handle)
86123
if result == cuda.CUresult.CUDA_SUCCESS:
87124
return True
@@ -92,4 +129,5 @@ def is_done(self) -> bool:
92129

93130
@property
94131
def handle(self) -> int:
132+
"""Return the underlying cudaEvent_t pointer address as Python int."""
95133
return int(self._handle)

0 commit comments

Comments
 (0)