Skip to content
This repository was archived by the owner on Sep 26, 2023. It is now read-only.

Commit 1d0c940

Browse files
authored
fix: truncate RPC timeouts to time remaining in totalTimeout (#1191)
* fix: truncate RPC timeouts to time remaining * comment time left <= 0 rpcTimeout behavior * comment shouldRetry polling relationship * add more commentary to polling test for clarity
1 parent f3c1d3e commit 1d0c940

File tree

3 files changed

+63
-6
lines changed

3 files changed

+63
-6
lines changed

‎gax/src/main/java/com/google/api/gax/retrying/ExponentialRetryAlgorithm.java

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,19 +100,37 @@ public TimedAttemptSettings createNextAttempt(TimedAttemptSettings prevSettings)
100100
(long) (settings.getRetryDelayMultiplier() * prevSettings.getRetryDelay().toMillis());
101101
newRetryDelay = Math.min(newRetryDelay, settings.getMaxRetryDelay().toMillis());
102102
}
103+
Duration randomDelay = Duration.ofMillis(nextRandomLong(newRetryDelay));
103104

104105
// The rpc timeout is determined as follows:
105106
// attempt #0 - use the initialRpcTimeout;
106-
// attempt #1+ - use the calculated value.
107+
// attempt #1+ - use the calculated value, or the time remaining in totalTimeout if the
108+
// calculated value would exceed the totalTimeout.
107109
long newRpcTimeout =
108110
(long) (settings.getRpcTimeoutMultiplier() * prevSettings.getRpcTimeout().toMillis());
109111
newRpcTimeout = Math.min(newRpcTimeout, settings.getMaxRpcTimeout().toMillis());
110112

113+
// The totalTimeout could be zero if a callable is only using maxAttempts to limit retries.
114+
// If set, calculate time remaining in the totalTimeout since the start, taking into account the
115+
// next attempt's delay, in order to truncate the RPC timeout should it exceed the totalTimeout.
116+
if (!settings.getTotalTimeout().isZero()) {
117+
Duration timeElapsed =
118+
Duration.ofNanos(clock.nanoTime())
119+
.minus(Duration.ofNanos(prevSettings.getFirstAttemptStartTimeNanos()));
120+
Duration timeLeft = globalSettings.getTotalTimeout().minus(timeElapsed).minus(randomDelay);
121+
122+
// If timeLeft at this point is < 0, the shouldRetry logic will prevent
123+
// the attempt from being made as it would exceed the totalTimeout. A negative RPC timeout
124+
// will result in a deadline in the past, which should will always fail prior to making a
125+
// network call.
126+
newRpcTimeout = Math.min(newRpcTimeout, timeLeft.toMillis());
127+
}
128+
111129
return TimedAttemptSettings.newBuilder()
112130
.setGlobalSettings(prevSettings.getGlobalSettings())
113131
.setRetryDelay(Duration.ofMillis(newRetryDelay))
114132
.setRpcTimeout(Duration.ofMillis(newRpcTimeout))
115-
.setRandomizedRetryDelay(Duration.ofMillis(nextRandomLong(newRetryDelay)))
133+
.setRandomizedRetryDelay(randomDelay)
116134
.setAttemptCount(prevSettings.getAttemptCount() + 1)
117135
.setOverallAttemptCount(prevSettings.getOverallAttemptCount() + 1)
118136
.setFirstAttemptStartTimeNanos(prevSettings.getFirstAttemptStartTimeNanos())
@@ -144,7 +162,16 @@ public boolean shouldRetry(TimedAttemptSettings nextAttemptSettings) {
144162
- nextAttemptSettings.getFirstAttemptStartTimeNanos()
145163
+ nextAttemptSettings.getRandomizedRetryDelay().toNanos();
146164

147-
// If totalTimeout limit is defined, check that it hasn't been crossed
165+
// If totalTimeout limit is defined, check that it hasn't been crossed.
166+
//
167+
// Note: if the potential time spent is exactly equal to the totalTimeout,
168+
// the attempt will still be allowed. This might not be desired, but if we
169+
// enforce it, it could have potentially negative side effects on LRO polling.
170+
// Specifically, if a polling retry attempt is denied, the LRO is canceled, and
171+
// if a polling retry attempt is denied because its delay would *reach* the
172+
// totalTimeout, the LRO would be canceled prematurely. The problem here is that
173+
// totalTimeout doubles as the polling threshold and also the time limit for an
174+
// operation to finish.
148175
if (totalTimeout > 0 && totalTimeSpentNanos > totalTimeout) {
149176
return false;
150177
}

‎gax/src/test/java/com/google/api/gax/retrying/ExponentialRetryAlgorithmTest.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
*/
3030
package com.google.api.gax.retrying;
3131

32+
import static com.google.common.truth.Truth.assertThat;
3233
import static org.junit.Assert.assertEquals;
3334
import static org.junit.Assert.assertFalse;
3435
import static org.junit.Assert.assertTrue;
@@ -89,6 +90,25 @@ public void testCreateNextAttempt() {
8990
assertEquals(Duration.ofMillis(4L), thirdAttempt.getRpcTimeout());
9091
}
9192

93+
@Test
94+
public void testTruncateToTotalTimeout() {
95+
RetrySettings timeoutSettings =
96+
retrySettings
97+
.toBuilder()
98+
.setInitialRpcTimeout(Duration.ofSeconds(4L))
99+
.setMaxRpcTimeout(Duration.ofSeconds(4L))
100+
.setTotalTimeout(Duration.ofSeconds(4L))
101+
.build();
102+
ExponentialRetryAlgorithm timeoutAlg = new ExponentialRetryAlgorithm(timeoutSettings, clock);
103+
104+
TimedAttemptSettings firstAttempt = timeoutAlg.createFirstAttempt();
105+
TimedAttemptSettings secondAttempt = timeoutAlg.createNextAttempt(firstAttempt);
106+
assertThat(firstAttempt.getRpcTimeout()).isGreaterThan(secondAttempt.getRpcTimeout());
107+
108+
TimedAttemptSettings thirdAttempt = timeoutAlg.createNextAttempt(secondAttempt);
109+
assertThat(secondAttempt.getRpcTimeout()).isGreaterThan(thirdAttempt.getRpcTimeout());
110+
}
111+
92112
@Test
93113
public void testShouldRetryTrue() {
94114
TimedAttemptSettings attempt = algorithm.createFirstAttempt();

‎gax/src/test/java/com/google/api/gax/rpc/OperationCallableImplTest.java

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,11 +94,14 @@ public class OperationCallableImplTest {
9494
.setInitialRetryDelay(Duration.ofMillis(1L))
9595
.setRetryDelayMultiplier(1)
9696
.setMaxRetryDelay(Duration.ofMillis(1L))
97-
.setInitialRpcTimeout(Duration.ZERO) // supposed to be ignored
97+
.setInitialRpcTimeout(
98+
Duration.ZERO) // supposed to be ignored, but are not actually, so we set to zero
9899
.setMaxAttempts(0)
99100
.setJittered(false)
100-
.setRpcTimeoutMultiplier(1) // supposed to be ignored
101-
.setMaxRpcTimeout(Duration.ZERO) // supposed to be ignored
101+
.setRpcTimeoutMultiplier(
102+
1) // supposed to be ignored, but are not actually, so we set to one
103+
.setMaxRpcTimeout(
104+
Duration.ZERO) // supposed to be ignored, but are not actually, so we set to zero
102105
.setTotalTimeout(Duration.ofMillis(5L))
103106
.build();
104107

@@ -475,9 +478,16 @@ public void testFutureCallPollRPCTimeout() throws Exception {
475478
OperationTimedPollAlgorithm.create(
476479
FAST_RECHECKING_SETTINGS
477480
.toBuilder()
481+
// Update the polling algorithm to set per-RPC timeouts instead of the default zero.
482+
//
483+
// This is non-standard, as these fields have been documented as "should be ignored"
484+
// for LRO polling. They are not actually ignored in code, so they changing them
485+
// here has an actual affect. This test verifies that they work as such, but in
486+
// practice generated clients set the RPC timeouts to 0 to be ignored.
478487
.setInitialRpcTimeout(Duration.ofMillis(100))
479488
.setMaxRpcTimeout(Duration.ofSeconds(1))
480489
.setRpcTimeoutMultiplier(2)
490+
.setTotalTimeout(Duration.ofSeconds(5L))
481491
.build(),
482492
clock);
483493
callSettings = callSettings.toBuilder().setPollingAlgorithm(pollingAlgorithm).build();

0 commit comments

Comments
 (0)