k8s-and-chill/rendered/envs/production/traefik/daemonset-traefik.yaml
Felix Wolf 88fa8c4df3 fix(traefik): increase read-timeout to avoid crashing ocis for large uploads
Traefik's default readTimeout of 60s killing the upload connection. The cascade was:

  1. Large upload exceeds 60s → Traefik kills connection
  2. storageusers floods with NetworkTimeoutError
  3. Aborted uploads generate tons of NATS events
  4. NATS gets overwhelmed → no response from stream
  5. Proxy can't resolve user roles → login returns 500
2026-04-12 18:49:02 +02:00

136 lines
4.1 KiB
YAML

apiVersion: apps/v1
kind: DaemonSet
metadata:
annotations:
a8r.io/repository: ssh://git@git.tr1ceracop.de:222/gitea_admin/k8s-and-chill.git
labels:
app.kubernetes.io/instance: traefik-traefik
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: traefik
helm.sh/chart: traefik-39.0.5
name: traefik
namespace: traefik
spec:
minReadySeconds: 0
selector:
matchLabels:
app.kubernetes.io/instance: traefik-traefik
app.kubernetes.io/name: traefik
template:
metadata:
annotations:
prometheus.io/path: /metrics
prometheus.io/port: "9100"
prometheus.io/scrape: "true"
labels:
app.kubernetes.io/instance: traefik-traefik
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: traefik
helm.sh/chart: traefik-39.0.5
spec:
automountServiceAccountToken: true
containers:
- args:
- --entryPoints.metrics.address=:9100/tcp
- --entryPoints.traefik.address=:8080/tcp
- --entryPoints.web.address=:8000/tcp
- --entryPoints.websecure.address=:8443/tcp
- --api.dashboard=true
- --ping=true
- --metrics.prometheus=true
- --metrics.prometheus.entrypoint=metrics
- --providers.kubernetescrd
- --providers.kubernetescrd.allowEmptyServices=true
- --providers.kubernetesingress
- --providers.kubernetesingress.allowEmptyServices=true
- --providers.kubernetesingress.ingressendpoint.publishedservice=traefik/traefik
- --entryPoints.web.transport.respondingTimeouts.readTimeout=600s
- --entryPoints.websecure.http.tls=true
- --entryPoints.websecure.transport.respondingTimeouts.readTimeout=600s
- --log.level=INFO
env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: USER
value: traefik
image: docker.io/traefik:v3.6.10
imagePullPolicy: IfNotPresent
lifecycle: null
livenessProbe:
failureThreshold: 3
httpGet:
path: /ping
port: 8080
scheme: HTTP
initialDelaySeconds: 2
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 2
name: traefik
ports:
- containerPort: 9100
name: metrics
protocol: TCP
- containerPort: 8080
name: traefik
protocol: TCP
- containerPort: 8000
hostPort: 80
name: web
protocol: TCP
- containerPort: 8443
hostPort: 443
name: websecure
protocol: TCP
readinessProbe:
failureThreshold: 1
httpGet:
path: /ping
port: 8080
scheme: HTTP
initialDelaySeconds: 2
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 2
resources: null
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
volumeMounts:
- mountPath: /data
name: data
- mountPath: /tmp
name: tmp
hostNetwork: false
securityContext:
runAsGroup: 65532
runAsNonRoot: true
runAsUser: 65532
seccompProfile:
type: RuntimeDefault
serviceAccountName: traefik
terminationGracePeriodSeconds: 60
tolerations:
- effect: NoSchedule
key: node-role.kubernetes.io/control-plane
operator: Exists
volumes:
- emptyDir: {}
name: data
- emptyDir: {}
name: tmp
updateStrategy:
rollingUpdate:
maxSurge: 0
maxUnavailable: 1
type: RollingUpdate