@@ -100,19 +100,37 @@ public TimedAttemptSettings createNextAttempt(TimedAttemptSettings prevSettings)
100
100
(long ) (settings .getRetryDelayMultiplier () * prevSettings .getRetryDelay ().toMillis ());
101
101
newRetryDelay = Math .min (newRetryDelay , settings .getMaxRetryDelay ().toMillis ());
102
102
}
103
+ Duration randomDelay = Duration .ofMillis (nextRandomLong (newRetryDelay ));
103
104
104
105
// The rpc timeout is determined as follows:
105
106
// attempt #0 - use the initialRpcTimeout;
106
- // attempt #1+ - use the calculated value.
107
+ // attempt #1+ - use the calculated value, or the time remaining in totalTimeout if the
108
+ // calculated value would exceed the totalTimeout.
107
109
long newRpcTimeout =
108
110
(long ) (settings .getRpcTimeoutMultiplier () * prevSettings .getRpcTimeout ().toMillis ());
109
111
newRpcTimeout = Math .min (newRpcTimeout , settings .getMaxRpcTimeout ().toMillis ());
110
112
113
+ // The totalTimeout could be zero if a callable is only using maxAttempts to limit retries.
114
+ // If set, calculate time remaining in the totalTimeout since the start, taking into account the
115
+ // next attempt's delay, in order to truncate the RPC timeout should it exceed the totalTimeout.
116
+ if (!settings .getTotalTimeout ().isZero ()) {
117
+ Duration timeElapsed =
118
+ Duration .ofNanos (clock .nanoTime ())
119
+ .minus (Duration .ofNanos (prevSettings .getFirstAttemptStartTimeNanos ()));
120
+ Duration timeLeft = globalSettings .getTotalTimeout ().minus (timeElapsed ).minus (randomDelay );
121
+
122
+ // If timeLeft at this point is < 0, the shouldRetry logic will prevent
123
+ // the attempt from being made as it would exceed the totalTimeout. A negative RPC timeout
124
+ // will result in a deadline in the past, which should will always fail prior to making a
125
+ // network call.
126
+ newRpcTimeout = Math .min (newRpcTimeout , timeLeft .toMillis ());
127
+ }
128
+
111
129
return TimedAttemptSettings .newBuilder ()
112
130
.setGlobalSettings (prevSettings .getGlobalSettings ())
113
131
.setRetryDelay (Duration .ofMillis (newRetryDelay ))
114
132
.setRpcTimeout (Duration .ofMillis (newRpcTimeout ))
115
- .setRandomizedRetryDelay (Duration . ofMillis ( nextRandomLong ( newRetryDelay )) )
133
+ .setRandomizedRetryDelay (randomDelay )
116
134
.setAttemptCount (prevSettings .getAttemptCount () + 1 )
117
135
.setOverallAttemptCount (prevSettings .getOverallAttemptCount () + 1 )
118
136
.setFirstAttemptStartTimeNanos (prevSettings .getFirstAttemptStartTimeNanos ())
@@ -144,7 +162,16 @@ public boolean shouldRetry(TimedAttemptSettings nextAttemptSettings) {
144
162
- nextAttemptSettings .getFirstAttemptStartTimeNanos ()
145
163
+ nextAttemptSettings .getRandomizedRetryDelay ().toNanos ();
146
164
147
- // If totalTimeout limit is defined, check that it hasn't been crossed
165
+ // If totalTimeout limit is defined, check that it hasn't been crossed.
166
+ //
167
+ // Note: if the potential time spent is exactly equal to the totalTimeout,
168
+ // the attempt will still be allowed. This might not be desired, but if we
169
+ // enforce it, it could have potentially negative side effects on LRO polling.
170
+ // Specifically, if a polling retry attempt is denied, the LRO is canceled, and
171
+ // if a polling retry attempt is denied because its delay would *reach* the
172
+ // totalTimeout, the LRO would be canceled prematurely. The problem here is that
173
+ // totalTimeout doubles as the polling threshold and also the time limit for an
174
+ // operation to finish.
148
175
if (totalTimeout > 0 && totalTimeSpentNanos > totalTimeout ) {
149
176
return false ;
150
177
}
0 commit comments