@@ -637,9 +637,53 @@ export async function* createStreamingResponse(
637637
638638 if ( ! response . ok ) {
639639 const errorText = await response . text ( ) ;
640- throw new Error (
641- `OpenAI Responses API error: ${ response . status } ${ response . statusText } - ${ errorText } ` ,
642- ) ;
640+ const baseErrorMsg = `OpenAI Responses API error: ${ response . status } ${ response . statusText } - ${ errorText } ` ;
641+
642+ const error = new Error ( baseErrorMsg ) as Error & {
643+ code ?: string ;
644+ isRetryable ?: boolean ;
645+ } ;
646+
647+ if ( response . status === 403 ) {
648+ const lower = errorText . toLowerCase ( ) ;
649+ const likelyPersistent =
650+ lower . includes ( 'insufficient_quota' ) ||
651+ lower . includes ( 'quota' ) ||
652+ lower . includes ( 'billing' ) ||
653+ lower . includes ( 'invalid api key' ) ||
654+ lower . includes ( 'api key' ) ||
655+ lower . includes ( 'permission' ) ||
656+ lower . includes ( 'organization' ) ||
657+ lower . includes ( 'not allowed' ) ;
658+
659+ error . isRetryable = true ;
660+ error . code = likelyPersistent
661+ ? 'HTTP_403_FORBIDDEN_PERSISTENT'
662+ : 'HTTP_403_FORBIDDEN_TRANSIENT' ;
663+
664+ const hint = likelyPersistent
665+ ? 'Hint: This 403 likely indicates auth/quota/permission. Retrying may not help, but Snow CLI will retry up to the configured limit.'
666+ : 'Hint: This 403 may be transient when using a third-party relay/WAF. Snow CLI will retry with backoff.' ;
667+ error . message = `${ baseErrorMsg } \n${ hint } ` ;
668+
669+ try {
670+ const { logger} = await import ( '../utils/core/logger.js' ) ;
671+ logger . warn (
672+ `[API_ERROR] OpenAI Responses API HTTP 403 (${ error . code } ), will retry with backoff` ,
673+ {
674+ url,
675+ model : requestPayload . model ,
676+ code : error . code ,
677+ status : response . status ,
678+ statusText : response . statusText ,
679+ } ,
680+ ) ;
681+ } catch {
682+ // ignore logging errors
683+ }
684+ }
685+
686+ throw error ;
643687 }
644688
645689 if ( ! response . body ) {
0 commit comments