feat(ocis-backup): adds oCIS volume backup CronJobs

Implements daily online backups for oCIS persistent volumes.

Each CronJob uses `rclone` to sync its respective PVC to a Storage Box, mounting the volume read-only to ensure zero downtime. Pod affinity is configured to schedule the backup job on the same node as the consuming application pod. This covers `idm`, `storagesystem`, and `storageusers` data volumes.
This commit is contained in:
Felix Wolf 2026-05-03 02:52:53 +02:00
parent d65181de78
commit 122e03f3ec
4 changed files with 420 additions and 0 deletions

View file

@ -0,0 +1,120 @@
#@ load("@ytt:data", "data")
#@ ns = data.values.application.namespace
#! Daily online volume backup. One CronJob per oCIS PVC, each pinned
#! via podAffinity to the consumer pod that already holds that volume.
#! Mounts the RWO PVC read-only alongside the running app — no
#! downtime, no scale-down dance, no cross-pod coupling.
#!
#! Storage Box layout (latest-state mirror):
#! storagebox:ocis-volumes/{pvc}/...
#!
#! NOTE: online backup — not crash-consistent. decomposedfs writes may
#! be caught mid-flight. Acceptable trade for zero-downtime.
#@ targets = [
#@ {"app": "storageusers", "pvc": "storageusers-data"},
#@ {"app": "idm", "pvc": "idm-data"},
#@ {"app": "storagesystem", "pvc": "storagesystem-data"},
#@ ]
#@ for t in targets:
---
apiVersion: batch/v1
kind: CronJob
metadata:
name: #@ "ocis-volume-backup-{}".format(t["app"])
namespace: #@ ns
spec:
schedule: "30 2 * * *"
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 3
failedJobsHistoryLimit: 3
jobTemplate:
spec:
ttlSecondsAfterFinished: 86400
template:
spec:
restartPolicy: OnFailure
securityContext:
runAsNonRoot: true
runAsUser: 1009
runAsGroup: 1009
fsGroup: 1009
seccompProfile:
type: RuntimeDefault
affinity:
podAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchLabels:
app: #@ t["app"]
topologyKey: kubernetes.io/hostname
containers:
- name: rclone
image: rclone/rclone:1.69.0
command: [/bin/sh, -c]
args:
- |
set -eu
echo "[backup] Syncing ${PVC}..."
rclone sync "/pvc" "storagebox:ocis-volumes/${PVC}" \
--links \
--transfers=4 \
-v
echo "[backup] Done."
resources:
requests:
cpu: 100m
memory: 256Mi
env:
- name: PVC
value: #@ t["app"]
- name: RCLONE_CONFIG_STORAGEBOX_TYPE
value: sftp
- name: RCLONE_CONFIG_STORAGEBOX_PORT
value: "23"
- name: RCLONE_CONFIG_STORAGEBOX_KEY_FILE
value: /etc/storagebox/ssh-key
- name: RCLONE_CONFIG_STORAGEBOX_SHELL_TYPE
value: none
- name: RCLONE_CONFIG_STORAGEBOX_MD5SUM_COMMAND
value: none
- name: RCLONE_CONFIG_STORAGEBOX_SHA1SUM_COMMAND
value: none
- name: RCLONE_CONFIG_STORAGEBOX_HOST
valueFrom:
secretKeyRef:
name: ocis-storagebox-credentials
key: host
- name: RCLONE_CONFIG_STORAGEBOX_USER
valueFrom:
secretKeyRef:
name: ocis-storagebox-credentials
key: user
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
capabilities:
drop:
- ALL
volumeMounts:
- name: pvc
mountPath: /pvc
readOnly: true
- name: storagebox-ssh
mountPath: /etc/storagebox
readOnly: true
volumes:
- name: pvc
persistentVolumeClaim:
claimName: #@ t["pvc"]
- name: storagebox-ssh
secret:
secretName: ocis-storagebox-credentials
items:
- key: ssh-key
path: ssh-key
defaultMode: 0440
#@ end

View file

@ -0,0 +1,100 @@
apiVersion: batch/v1
kind: CronJob
metadata:
annotations:
a8r.io/repository: ssh://git@git.tr1ceracop.de:222/gitea_admin/k8s-and-chill.git
name: ocis-volume-backup-idm
namespace: ocis
spec:
concurrencyPolicy: Forbid
failedJobsHistoryLimit: 3
jobTemplate:
spec:
template:
spec:
affinity:
podAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchLabels:
app: idm
topologyKey: kubernetes.io/hostname
containers:
- args:
- |
set -eu
echo "[backup] Syncing ${PVC}..."
rclone sync "/pvc" "storagebox:ocis-volumes/${PVC}" \
--links \
--transfers=4 \
-v
echo "[backup] Done."
command:
- /bin/sh
- -c
env:
- name: PVC
value: idm
- name: RCLONE_CONFIG_STORAGEBOX_TYPE
value: sftp
- name: RCLONE_CONFIG_STORAGEBOX_PORT
value: "23"
- name: RCLONE_CONFIG_STORAGEBOX_KEY_FILE
value: /etc/storagebox/ssh-key
- name: RCLONE_CONFIG_STORAGEBOX_SHELL_TYPE
value: none
- name: RCLONE_CONFIG_STORAGEBOX_MD5SUM_COMMAND
value: none
- name: RCLONE_CONFIG_STORAGEBOX_SHA1SUM_COMMAND
value: none
- name: RCLONE_CONFIG_STORAGEBOX_HOST
valueFrom:
secretKeyRef:
key: host
name: ocis-storagebox-credentials
- name: RCLONE_CONFIG_STORAGEBOX_USER
valueFrom:
secretKeyRef:
key: user
name: ocis-storagebox-credentials
image: rclone/rclone:1.69.0
name: rclone
resources:
requests:
cpu: 100m
memory: 256Mi
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
volumeMounts:
- mountPath: /pvc
name: pvc
readOnly: true
- mountPath: /etc/storagebox
name: storagebox-ssh
readOnly: true
restartPolicy: OnFailure
securityContext:
fsGroup: 1009
runAsGroup: 1009
runAsNonRoot: true
runAsUser: 1009
seccompProfile:
type: RuntimeDefault
volumes:
- name: pvc
persistentVolumeClaim:
claimName: idm-data
- name: storagebox-ssh
secret:
defaultMode: 288
items:
- key: ssh-key
path: ssh-key
secretName: ocis-storagebox-credentials
ttlSecondsAfterFinished: 86400
schedule: 30 2 * * *
successfulJobsHistoryLimit: 3

View file

@ -0,0 +1,100 @@
apiVersion: batch/v1
kind: CronJob
metadata:
annotations:
a8r.io/repository: ssh://git@git.tr1ceracop.de:222/gitea_admin/k8s-and-chill.git
name: ocis-volume-backup-storagesystem
namespace: ocis
spec:
concurrencyPolicy: Forbid
failedJobsHistoryLimit: 3
jobTemplate:
spec:
template:
spec:
affinity:
podAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchLabels:
app: storagesystem
topologyKey: kubernetes.io/hostname
containers:
- args:
- |
set -eu
echo "[backup] Syncing ${PVC}..."
rclone sync "/pvc" "storagebox:ocis-volumes/${PVC}" \
--links \
--transfers=4 \
-v
echo "[backup] Done."
command:
- /bin/sh
- -c
env:
- name: PVC
value: storagesystem
- name: RCLONE_CONFIG_STORAGEBOX_TYPE
value: sftp
- name: RCLONE_CONFIG_STORAGEBOX_PORT
value: "23"
- name: RCLONE_CONFIG_STORAGEBOX_KEY_FILE
value: /etc/storagebox/ssh-key
- name: RCLONE_CONFIG_STORAGEBOX_SHELL_TYPE
value: none
- name: RCLONE_CONFIG_STORAGEBOX_MD5SUM_COMMAND
value: none
- name: RCLONE_CONFIG_STORAGEBOX_SHA1SUM_COMMAND
value: none
- name: RCLONE_CONFIG_STORAGEBOX_HOST
valueFrom:
secretKeyRef:
key: host
name: ocis-storagebox-credentials
- name: RCLONE_CONFIG_STORAGEBOX_USER
valueFrom:
secretKeyRef:
key: user
name: ocis-storagebox-credentials
image: rclone/rclone:1.69.0
name: rclone
resources:
requests:
cpu: 100m
memory: 256Mi
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
volumeMounts:
- mountPath: /pvc
name: pvc
readOnly: true
- mountPath: /etc/storagebox
name: storagebox-ssh
readOnly: true
restartPolicy: OnFailure
securityContext:
fsGroup: 1009
runAsGroup: 1009
runAsNonRoot: true
runAsUser: 1009
seccompProfile:
type: RuntimeDefault
volumes:
- name: pvc
persistentVolumeClaim:
claimName: storagesystem-data
- name: storagebox-ssh
secret:
defaultMode: 288
items:
- key: ssh-key
path: ssh-key
secretName: ocis-storagebox-credentials
ttlSecondsAfterFinished: 86400
schedule: 30 2 * * *
successfulJobsHistoryLimit: 3

View file

@ -0,0 +1,100 @@
apiVersion: batch/v1
kind: CronJob
metadata:
annotations:
a8r.io/repository: ssh://git@git.tr1ceracop.de:222/gitea_admin/k8s-and-chill.git
name: ocis-volume-backup-storageusers
namespace: ocis
spec:
concurrencyPolicy: Forbid
failedJobsHistoryLimit: 3
jobTemplate:
spec:
template:
spec:
affinity:
podAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchLabels:
app: storageusers
topologyKey: kubernetes.io/hostname
containers:
- args:
- |
set -eu
echo "[backup] Syncing ${PVC}..."
rclone sync "/pvc" "storagebox:ocis-volumes/${PVC}" \
--links \
--transfers=4 \
-v
echo "[backup] Done."
command:
- /bin/sh
- -c
env:
- name: PVC
value: storageusers
- name: RCLONE_CONFIG_STORAGEBOX_TYPE
value: sftp
- name: RCLONE_CONFIG_STORAGEBOX_PORT
value: "23"
- name: RCLONE_CONFIG_STORAGEBOX_KEY_FILE
value: /etc/storagebox/ssh-key
- name: RCLONE_CONFIG_STORAGEBOX_SHELL_TYPE
value: none
- name: RCLONE_CONFIG_STORAGEBOX_MD5SUM_COMMAND
value: none
- name: RCLONE_CONFIG_STORAGEBOX_SHA1SUM_COMMAND
value: none
- name: RCLONE_CONFIG_STORAGEBOX_HOST
valueFrom:
secretKeyRef:
key: host
name: ocis-storagebox-credentials
- name: RCLONE_CONFIG_STORAGEBOX_USER
valueFrom:
secretKeyRef:
key: user
name: ocis-storagebox-credentials
image: rclone/rclone:1.69.0
name: rclone
resources:
requests:
cpu: 100m
memory: 256Mi
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
volumeMounts:
- mountPath: /pvc
name: pvc
readOnly: true
- mountPath: /etc/storagebox
name: storagebox-ssh
readOnly: true
restartPolicy: OnFailure
securityContext:
fsGroup: 1009
runAsGroup: 1009
runAsNonRoot: true
runAsUser: 1009
seccompProfile:
type: RuntimeDefault
volumes:
- name: pvc
persistentVolumeClaim:
claimName: storageusers-data
- name: storagebox-ssh
secret:
defaultMode: 288
items:
- key: ssh-key
path: ssh-key
secretName: ocis-storagebox-credentials
ttlSecondsAfterFinished: 86400
schedule: 30 2 * * *
successfulJobsHistoryLimit: 3