diff --git a/api/v1alpha2/etcdmember_types.go b/api/v1alpha2/etcdmember_types.go index 7c50fbd3..89391cb9 100644 --- a/api/v1alpha2/etcdmember_types.go +++ b/api/v1alpha2/etcdmember_types.go @@ -63,6 +63,12 @@ const ( MemberJoined = "Joined" // MemberReady indicates the member is healthy and serving requests. MemberReady = "Ready" + // MemberVersionDrifted is True when the version etcd actually reports + // running (status.version, observed from the endpoint) does not match the + // version the operator asked this member to run (spec.version). It makes + // intent-vs-reality version drift detectable rather than assumed; the + // operator does not act on it (spec.version still drives the image tag). + MemberVersionDrifted = "VersionDrifted" ) // EtcdMemberSpec defines the desired state of a single etcd member. @@ -223,6 +229,18 @@ type EtcdMemberStatus struct { // +optional PVCName string `json:"pvcName,omitempty"` + // Version is the etcd server version this member is actually running, + // observed at runtime from the member's own etcd endpoint via the + // Maintenance Status API (StatusResponse.Version) — i.e. what etcd + // reports, as opposed to spec.version, which is the intended/target + // version the operator asks for (and pins the image tag to). Empty until + // the member's Pod is Ready and the operator has successfully queried it. + // This observed value is the source of truth for detecting version drift + // between intent and reality (see the VersionDrifted condition); the + // observation is best-effort and never gates readiness. + // +optional + Version string `json:"version,omitempty"` + // Conditions represent the latest available observations of the member's state. // +optional Conditions []metav1.Condition `json:"conditions,omitempty"` @@ -233,6 +251,7 @@ type EtcdMemberStatus struct { // +kubebuilder:subresource:scale:specpath=.spec.replicas,statuspath=.status.replicas,selectorpath=.status.selector // +kubebuilder:printcolumn:name="Cluster",type=string,JSONPath=`.spec.clusterName` // +kubebuilder:printcolumn:name="Version",type=string,JSONPath=`.spec.version` +// +kubebuilder:printcolumn:name="Running",type=string,JSONPath=`.status.version` // +kubebuilder:printcolumn:name="Ready",type=string,JSONPath=`.status.conditions[?(@.type=="Ready")].status` // +kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp` diff --git a/charts/etcd-operator/crd-bases/etcd-operator.cozystack.io_etcdmembers.yaml b/charts/etcd-operator/crd-bases/etcd-operator.cozystack.io_etcdmembers.yaml index 36c945ab..cfa36427 100644 --- a/charts/etcd-operator/crd-bases/etcd-operator.cozystack.io_etcdmembers.yaml +++ b/charts/etcd-operator/crd-bases/etcd-operator.cozystack.io_etcdmembers.yaml @@ -21,6 +21,9 @@ spec: - jsonPath: .spec.version name: Version type: string + - jsonPath: .status.version + name: Running + type: string - jsonPath: .status.conditions[?(@.type=="Ready")].status name: Ready type: string @@ -1683,6 +1686,18 @@ spec: Selector exposes the label-selector that matches this member's Pod via /scale (consumed by the PDB controller; not user-facing). type: string + version: + description: |- + Version is the etcd server version this member is actually running, + observed at runtime from the member's own etcd endpoint via the + Maintenance Status API (StatusResponse.Version) — i.e. what etcd + reports, as opposed to spec.version, which is the intended/target + version the operator asks for (and pins the image tag to). Empty until + the member's Pod is Ready and the operator has successfully queried it. + This observed value is the source of truth for detecting version drift + between intent and reality (see the VersionDrifted condition); the + observation is best-effort and never gates readiness. + type: string type: object type: object served: true diff --git a/controllers/etcd_client.go b/controllers/etcd_client.go index 15a58013..e00d8fc0 100644 --- a/controllers/etcd_client.go +++ b/controllers/etcd_client.go @@ -35,6 +35,13 @@ type EtcdClusterClient interface { MemberPromote(ctx context.Context, id uint64) (*clientv3.MemberPromoteResponse, error) MemberRemove(ctx context.Context, id uint64) (*clientv3.MemberRemoveResponse, error) + // Status returns a single endpoint's server status, including the etcd + // version it is actually running (StatusResponse.Version). Used by the + // member controller to observe the running version into + // EtcdMember.status.version. *clientv3.Client satisfies this via its + // embedded Maintenance interface. + Status(ctx context.Context, endpoint string) (*clientv3.StatusResponse, error) + // Auth surface — used by reconcileAuth to provision the single root // user/role and turn on authentication. The "root" role is built into // etcd, so a RoleAdd is not needed: UserAdd("root", …) + diff --git a/controllers/etcdmember_controller.go b/controllers/etcdmember_controller.go index 6af23952..c87c69ab 100644 --- a/controllers/etcdmember_controller.go +++ b/controllers/etcdmember_controller.go @@ -1040,6 +1040,10 @@ func (r *EtcdMemberReconciler) updateStatus(ctx context.Context, member *lll.Etc } } + // ready tracks whether this reconcile settled on MemberReady=True; only + // then do we bother observing the running etcd version (below). + ready := false + switch { case !podReady: // Self-heal an unrecoverable member. A non-bootstrap PVC member whose @@ -1073,6 +1077,7 @@ func (r *EtcdMemberReconciler) updateStatus(ctx context.Context, member *lll.Etc if id, err := r.discoverMemberID(ctx, member); err == nil { member.Status.MemberID = fmt.Sprintf("%016x", id) changed = true + ready = true if setMemberCondition(member, lll.MemberReady, metav1.ConditionTrue, "PodReady", "etcd member is ready") { changed = true @@ -1084,12 +1089,46 @@ func (r *EtcdMemberReconciler) updateStatus(ctx context.Context, member *lll.Etc } } default: + ready = true if setMemberCondition(member, lll.MemberReady, metav1.ConditionTrue, "PodReady", "etcd member is ready") { changed = true } } + // Observe the running etcd version once the member is Ready and record it + // in status, plus surface a VersionDrifted condition when what etcd + // actually runs diverges from the intended spec.version. Observation is + // best-effort (observeVersion never errors); the drift condition is + // informational — the operator does not act on it. + if ready { + if v := r.observeVersion(ctx, member); v != member.Status.Version { + member.Status.Version = v + changed = true + } + // Only evaluate drift once we know BOTH sides: the observed running + // version and a non-empty intended spec.version. spec.version is + // propagated from cluster.Status.Observed.Version for scale-up members, + // which is transiently "" before the first latch — treat unknown intent + // as "no drift" rather than flagging a spurious mismatch. + if member.Status.Version != "" && member.Spec.Version != "" { + condStatus := metav1.ConditionFalse + reason := "VersionMatched" + msg := fmt.Sprintf("running etcd version %q matches intended spec.version", member.Status.Version) + if member.Status.Version != member.Spec.Version { + condStatus = metav1.ConditionTrue + reason = "VersionMismatch" + msg = fmt.Sprintf("running etcd version %q does not match intended spec.version %q", + member.Status.Version, member.Spec.Version) + log.Info("etcd member version drift detected", + "running", member.Status.Version, "intended", member.Spec.Version) + } + if setMemberCondition(member, lll.MemberVersionDrifted, condStatus, reason, msg) { + changed = true + } + } + } + if changed { if err := r.Status().Update(ctx, member); err != nil { return ctrl.Result{}, err @@ -1162,28 +1201,9 @@ func (r *EtcdMemberReconciler) discoverMemberID(ctx context.Context, member *lll endpoints = append(endpoints, clientURL(scheme, member.Name, memberServiceName(member), member.Namespace)) } - // Build the operator-side dial config. Only TLS clusters need the parent - // EtcdCluster fetched: auth requires client TLS (CEL-enforced), which is - // propagated to member.Spec.TLS.ClientServerSecretRef, so a member with - // no client TLS can never have auth enabled — its dial is plaintext and - // anonymous, exactly as before. When TLS is set we fetch the cluster once - // and derive both the TLS config and the credentials from it (after auth - // is enabled the voter peers we dial here reject anonymous access). - var tlsCfg *tls.Config - var user, pass string - if member.Spec.TLS != nil && member.Spec.TLS.ClientServerSecretRef != nil { - cluster, err := r.clusterFor(ctx, member) - if err != nil { - return 0, err - } - tlsCfg, err = buildOperatorTLSConfig(ctx, r.Client, cluster) - if err != nil { - return 0, err - } - user, pass, _, err = resolveEtcdCredentials(ctx, r.Client, cluster) - if err != nil { - return 0, err - } + tlsCfg, user, pass, err := r.etcdDialConfig(ctx, member) + if err != nil { + return 0, err } c, err := r.EtcdClientFactory(ctx, endpoints, tlsCfg, user, pass) if err != nil { @@ -1214,6 +1234,71 @@ func (r *EtcdMemberReconciler) discoverMemberID(ctx context.Context, member *lll return 0, fmt.Errorf("member %q not found in etcd member list", member.Name) } +// etcdDialConfig builds the operator-side dial config for reaching this +// member's etcd cluster: the TLS config and the auth credentials. Only TLS +// clusters need the parent EtcdCluster fetched: auth requires client TLS +// (CEL-enforced), which is propagated to member.Spec.TLS.ClientServerSecretRef, +// so a member with no client TLS can never have auth enabled — its dial is +// plaintext and anonymous. When TLS is set we fetch the cluster once and +// derive both the TLS config and the credentials from it (after auth is +// enabled the peers we dial reject anonymous access). Returns (nil, "", "") +// for plaintext, anonymous clusters. +func (r *EtcdMemberReconciler) etcdDialConfig(ctx context.Context, member *lll.EtcdMember) (*tls.Config, string, string, error) { + if member.Spec.TLS == nil || member.Spec.TLS.ClientServerSecretRef == nil { + return nil, "", "", nil + } + cluster, err := r.clusterFor(ctx, member) + if err != nil { + return nil, "", "", err + } + tlsCfg, err := buildOperatorTLSConfig(ctx, r.Client, cluster) + if err != nil { + return nil, "", "", err + } + user, pass, _, err := resolveEtcdCredentials(ctx, r.Client, cluster) + if err != nil { + return nil, "", "", err + } + return tlsCfg, user, pass, nil +} + +// observeVersion reads the etcd version this member is actually running from +// its own etcd endpoint (the Maintenance Status API, StatusResponse.Version) +// and returns it. Best-effort: on any dial/RPC failure it logs and returns the +// member's previous status.Version unchanged, so version observation never +// fails the reconcile or affects readiness — Ready stays driven purely by Pod +// readiness and MemberID discovery. The dial targets this member's own client +// URL (Status is per-endpoint, so it reports this member's version, not a +// peer's). +func (r *EtcdMemberReconciler) observeVersion(ctx context.Context, member *lll.EtcdMember) string { + log := log.FromContext(ctx) + prev := member.Status.Version + + self := clientURL(memberClientScheme(member), member.Name, memberServiceName(member), member.Namespace) + + tlsCfg, user, pass, err := r.etcdDialConfig(ctx, member) + if err != nil { + log.V(1).Info("skipping version observation: cannot build dial config", "error", err) + return prev + } + c, err := r.EtcdClientFactory(ctx, []string{self}, tlsCfg, user, pass) + if err != nil { + log.V(1).Info("skipping version observation: cannot dial etcd", "error", err) + return prev + } + defer c.Close() + + statusCtx, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + + resp, err := c.Status(statusCtx, self) + if err != nil { + log.V(1).Info("skipping version observation: Status RPC failed", "error", err) + return prev + } + return resp.Version +} + // ── Manager wiring ─────────────────────────────────────────────────────── func (r *EtcdMemberReconciler) SetupWithManager(mgr ctrl.Manager) error { diff --git a/controllers/etcdmember_controller_test.go b/controllers/etcdmember_controller_test.go index 8fc2ab8a..56096f8a 100644 --- a/controllers/etcdmember_controller_test.go +++ b/controllers/etcdmember_controller_test.go @@ -692,6 +692,181 @@ func TestUpdateStatus_PopulatesMemberIDAndFlipsReady(t *testing.T) { } } +// findMemberCondition returns the condition of the given type, or nil. +func findMemberCondition(member *lll.EtcdMember, condType string) *metav1.Condition { + for i := range member.Status.Conditions { + if member.Status.Conditions[i].Type == condType { + return &member.Status.Conditions[i] + } + } + return nil +} + +// readyMemberWithFake builds a Ready member (Pod ready, etcd knows it by name) +// plus a fakeEtcd wired to report statusVersion, and runs updateStatus. It +// drives the Ready path so observeVersion runs. +func readyMemberWithFake(t *testing.T, specVersion, statusVersion string, statusErr error) *lll.EtcdMember { + t.Helper() + ctx := context.Background() + member := &lll.EtcdMember{ + ObjectMeta: metav1.ObjectMeta{Name: "test-0", Namespace: "ns", Labels: memberLabels("test", "test-0")}, + Spec: lll.EtcdMemberSpec{ClusterName: "test", Version: specVersion, Storage: lll.StorageSpec{Size: quickQty(t, "1Gi")}, InitialCluster: "x", ClusterToken: "test"}, + } + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-0", Namespace: "ns"}, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + Conditions: []corev1.PodCondition{readyPodCondition()}, + }, + } + c, _ := newTestClient(t, member, pod) + fe := newFakeEtcd(0xdeadbeef, + &etcdserverpb.Member{ID: 0xae36f238164a08ad, Name: "test-0", PeerURLs: []string{peerURL("http", "test-0", "test", "ns")}}, + ) + fe.statusVersion = statusVersion + fe.statusErr = statusErr + r := &EtcdMemberReconciler{Client: c, Scheme: testScheme(t), EtcdClientFactory: factoryReturning(fe)} + if _, err := r.updateStatus(ctx, member); err != nil { + t.Fatalf("updateStatus: %v", err) + } + return mustGet(t, c, "test-0", "ns", member) +} + +// TestUpdateStatus_ObservesRunningVersion: once the member is Ready, the +// controller records the version etcd actually reports into status.version and, +// when it equals spec.version, marks VersionDrifted=False. +func TestUpdateStatus_ObservesRunningVersion(t *testing.T) { + member := readyMemberWithFake(t, "3.5.17", "3.5.17", nil) + + if member.Status.Version != "3.5.17" { + t.Fatalf("status.Version = %q, want 3.5.17", member.Status.Version) + } + drift := findMemberCondition(member, lll.MemberVersionDrifted) + if drift == nil || drift.Status != metav1.ConditionFalse { + t.Fatalf("VersionDrifted = %+v, want False", drift) + } + if drift.Reason != "VersionMatched" { + t.Fatalf("VersionDrifted reason = %q, want VersionMatched", drift.Reason) + } +} + +// TestUpdateStatus_VersionDriftDetected: when etcd reports a version different +// from the intended spec.version, status.version reflects reality and +// VersionDrifted flips True (the signal the operator surfaces but does not act +// on). +func TestUpdateStatus_VersionDriftDetected(t *testing.T) { + member := readyMemberWithFake(t, "3.5.17", "3.6.4", nil) + + if member.Status.Version != "3.6.4" { + t.Fatalf("status.Version = %q, want observed 3.6.4", member.Status.Version) + } + drift := findMemberCondition(member, lll.MemberVersionDrifted) + if drift == nil || drift.Status != metav1.ConditionTrue { + t.Fatalf("VersionDrifted = %+v, want True", drift) + } + if drift.Reason != "VersionMismatch" { + t.Fatalf("VersionDrifted reason = %q, want VersionMismatch", drift.Reason) + } + // Readiness must be unaffected by drift. + if ready := findMemberCondition(member, lll.MemberReady); ready == nil || ready.Status != metav1.ConditionTrue { + t.Fatalf("Ready condition = %+v, want True", ready) + } +} + +// TestUpdateStatus_NoDriftWhenIntentUnknown: when spec.version is empty (a +// scale-up member stamped from a transiently-unlatched Observed.Version), the +// observed running version is still recorded, but VersionDrifted must NOT be +// set — unknown intent is treated as "no drift", not a spurious mismatch. +func TestUpdateStatus_NoDriftWhenIntentUnknown(t *testing.T) { + member := readyMemberWithFake(t, "", "3.6.4", nil) + + if member.Status.Version != "3.6.4" { + t.Fatalf("status.Version = %q, want observed 3.6.4", member.Status.Version) + } + if drift := findMemberCondition(member, lll.MemberVersionDrifted); drift != nil { + t.Fatalf("VersionDrifted = %+v, want unset when spec.version is empty", drift) + } +} + +// TestUpdateStatus_VersionDriftResolves: a member that was drifted +// (VersionDrifted=True) and then comes up on the intended version must flip the +// condition back to False/VersionMatched and record the new observed version. +func TestUpdateStatus_VersionDriftResolves(t *testing.T) { + ctx := context.Background() + member := &lll.EtcdMember{ + ObjectMeta: metav1.ObjectMeta{Name: "test-0", Namespace: "ns", Labels: memberLabels("test", "test-0")}, + // Steady-state Ready (MemberID set) with a prior drifted observation. + Spec: lll.EtcdMemberSpec{ClusterName: "test", Version: "3.6.4", Storage: lll.StorageSpec{Size: quickQty(t, "1Gi")}, InitialCluster: "x", ClusterToken: "test"}, + Status: lll.EtcdMemberStatus{ + MemberID: "ae36f238164a08ad", + Version: "3.5.17", + Conditions: []metav1.Condition{{ + Type: lll.MemberVersionDrifted, Status: metav1.ConditionTrue, Reason: "VersionMismatch", + LastTransitionTime: metav1.Now(), Message: "was drifted", + }}, + }, + } + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-0", Namespace: "ns"}, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + Conditions: []corev1.PodCondition{readyPodCondition()}, + }, + } + c, _ := newTestClient(t, member, pod) + fe := newFakeEtcd(0xdeadbeef) + fe.statusVersion = "3.6.4" // member has caught up to intent + r := &EtcdMemberReconciler{Client: c, Scheme: testScheme(t), EtcdClientFactory: factoryReturning(fe)} + if _, err := r.updateStatus(ctx, member); err != nil { + t.Fatalf("updateStatus: %v", err) + } + member = mustGet(t, c, "test-0", "ns", member) + + if member.Status.Version != "3.6.4" { + t.Fatalf("status.Version = %q, want 3.6.4", member.Status.Version) + } + drift := findMemberCondition(member, lll.MemberVersionDrifted) + if drift == nil || drift.Status != metav1.ConditionFalse || drift.Reason != "VersionMatched" { + t.Fatalf("VersionDrifted = %+v, want False/VersionMatched after catch-up", drift) + } +} + +// TestUpdateStatus_VersionObservationErrorIsNonFatal: a failing Status RPC must +// leave the previously observed version intact and must not disturb readiness — +// version observation is best-effort. +func TestUpdateStatus_VersionObservationErrorIsNonFatal(t *testing.T) { + ctx := context.Background() + member := &lll.EtcdMember{ + ObjectMeta: metav1.ObjectMeta{Name: "test-0", Namespace: "ns", Labels: memberLabels("test", "test-0")}, + // MemberID pre-set → steady-state Ready path (default case), so + // observeVersion runs even though discovery is skipped. + Spec: lll.EtcdMemberSpec{ClusterName: "test", Version: "3.5.17", Storage: lll.StorageSpec{Size: quickQty(t, "1Gi")}, InitialCluster: "x", ClusterToken: "test"}, + Status: lll.EtcdMemberStatus{MemberID: "ae36f238164a08ad", Version: "3.5.17"}, + } + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-0", Namespace: "ns"}, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + Conditions: []corev1.PodCondition{readyPodCondition()}, + }, + } + c, _ := newTestClient(t, member, pod) + fe := newFakeEtcd(0xdeadbeef) + fe.statusErr = errors.New("dial timeout") + r := &EtcdMemberReconciler{Client: c, Scheme: testScheme(t), EtcdClientFactory: factoryReturning(fe)} + if _, err := r.updateStatus(ctx, member); err != nil { + t.Fatalf("updateStatus: %v", err) + } + member = mustGet(t, c, "test-0", "ns", member) + + if member.Status.Version != "3.5.17" { + t.Fatalf("status.Version = %q, want prior value 3.5.17 preserved on Status error", member.Status.Version) + } + if ready := findMemberCondition(member, lll.MemberReady); ready == nil || ready.Status != metav1.ConditionTrue { + t.Fatalf("Ready condition = %+v, want True despite Status error", ready) + } +} + // TestEtcdContainerStuck pins the self-heal detection: an etcd container is // "stuck" only when it is not ready, has restarted at least the threshold, and // was not OOMKilled — and never while the Pod is itself being deleted. diff --git a/controllers/testing_helpers_test.go b/controllers/testing_helpers_test.go index f3800f15..5c63020a 100644 --- a/controllers/testing_helpers_test.go +++ b/controllers/testing_helpers_test.go @@ -43,6 +43,11 @@ type fakeEtcd struct { removeErr error promoteErr error + // statusVersion is the etcd server version Status reports (observed + // running version); statusErr, when set, fails the Status RPC. + statusVersion string + statusErr error + addCalls []string addLearnerCalls []string promoteCalls []uint64 @@ -176,6 +181,16 @@ func (f *fakeEtcd) UserGrantRole(_ context.Context, user, role string) (*clientv return &clientv3.AuthUserGrantRoleResponse{Header: &etcdserverpb.ResponseHeader{ClusterId: f.clusterID}}, nil } +func (f *fakeEtcd) Status(_ context.Context, _ string) (*clientv3.StatusResponse, error) { + if f.statusErr != nil { + return nil, f.statusErr + } + return &clientv3.StatusResponse{ + Header: &etcdserverpb.ResponseHeader{ClusterId: f.clusterID}, + Version: f.statusVersion, + }, nil +} + func (f *fakeEtcd) Close() error { f.closed = true; return nil } func factoryReturning(c EtcdClusterClient) EtcdClientFactory { diff --git a/docs/concepts.md b/docs/concepts.md index a0daf24c..a39c983e 100644 --- a/docs/concepts.md +++ b/docs/concepts.md @@ -434,6 +434,12 @@ The cluster surfaces three conditions: `Available`, `Progressing`, `Degraded`. T All conditions carry `observedGeneration` so consumers can tell whether a condition reflects the latest spec. Status writes are gated on "did anything actually change" — the operator does not bump `resourceVersion` every 30 s just because of the periodic reconcile. +### Observed member version + +`spec.version` is *intent* — it pins the image tag (`v`) and drives the restore version-compat gate. What etcd is **actually running** is a separate, observed fact. Once a member's Pod is Ready, the member controller reads the running version from that member's own etcd endpoint (the Maintenance `Status` RPC) and records it in `EtcdMember.status.version` (surfaced as the `Running` print column). The read is best-effort: a dial or RPC failure leaves the previous value in place and never affects `Ready` — readiness stays driven by Pod readiness and member-ID discovery alone. + +When the observed version diverges from the member's intended `spec.version`, the member surfaces `VersionDrifted=True/VersionMismatch`; when they agree it is `False/VersionMatched`; when intent is not yet known (`spec.version` empty) the condition is left unset. This condition is **informational only** — the operator does not act on it (it never keys reconciliation off the observed value). It exists so intent-vs-reality drift is *detectable rather than assumed*, which is the prerequisite for safely reconsidering a per-cluster image/version override. + ## Snapshots & restore Two surfaces, one agent. The operator image doubles as a snapshot agent: `main.go` dispatches on `os.Args[1]` so `manager snapshot-agent` / `manager restore-agent` run the agent and exit, while a bare `manager` runs the controller. This keeps one binary and one image — no separate agent build — and means the agent always matches the operator it ships with.