fix(traefik): increase read-timeout to avoid crashing ocis for large uploads

Traefik's default readTimeout of 60s killing the upload connection. The cascade was:

  1. Large upload exceeds 60s → Traefik kills connection
  2. storageusers floods with NetworkTimeoutError
  3. Aborted uploads generate tons of NATS events
  4. NATS gets overwhelmed → no response from stream
  5. Proxy can't resolve user roles → login returns 500
This commit is contained in:
Felix Wolf 2026-04-12 18:49:02 +02:00
parent f57d29d1d3
commit 88fa8c4df3
2 changed files with 8 additions and 0 deletions

View file

@ -11,8 +11,14 @@ deployment:
ports: ports:
web: web:
hostPort: 80 hostPort: 80
transport:
respondingTimeouts:
readTimeout: 600s
websecure: websecure:
hostPort: 443 hostPort: 443
transport:
respondingTimeouts:
readTimeout: 600s
metrics: metrics:
expose: expose:
default: true default: true

View file

@ -44,7 +44,9 @@ spec:
- --providers.kubernetesingress - --providers.kubernetesingress
- --providers.kubernetesingress.allowEmptyServices=true - --providers.kubernetesingress.allowEmptyServices=true
- --providers.kubernetesingress.ingressendpoint.publishedservice=traefik/traefik - --providers.kubernetesingress.ingressendpoint.publishedservice=traefik/traefik
- --entryPoints.web.transport.respondingTimeouts.readTimeout=600s
- --entryPoints.websecure.http.tls=true - --entryPoints.websecure.http.tls=true
- --entryPoints.websecure.transport.respondingTimeouts.readTimeout=600s
- --log.level=INFO - --log.level=INFO
env: env:
- name: POD_NAME - name: POD_NAME