--- - name: Target correct resource group for debugging command: ibmcloud target -g kscale-workload when: cloud_provider == 'iks' ignore_errors: true - name: Debug - Get all pods across namespaces (Karpenter) command: kubectl get po -A --kubeconfig={{ lookup('vars', 'KUBECONFIG_' ~ cloud_provider | upper ~ '_KARPENTER') }} environment: "{{ cloud_provider_env }}" register: all_pods_karpenter ignore_errors: true - name: Debug - Get all pods across namespaces (CAS) command: kubectl get po -A --kubeconfig={{ lookup('vars', 'KUBECONFIG_' ~ cloud_provider | upper ~ '_CAS') }} environment: "{{ cloud_provider_env }}" register: all_pods_cas ignore_errors: true - name: Wait for workloads to stabilize ansible.builtin.wait_for: timeout: 60 - name: Debug - Get all pods across all namespaces command: kubectl get po -A -o wide --kubeconfig={{ lookup('vars', 'KUBECONFIG_' ~ cloud_provider | upper ~ '_KARPENTER') }} environment: "{{ cloud_provider_env }}" register: all_pods ignore_errors: true - name: Debug - Get Karpenter pods and extract namespace command: >- kubectl get pods -A -l app.kubernetes.io/name=karpenter --kubeconfig={{ lookup('vars', 'KUBECONFIG_' ~ cloud_provider | upper ~ '_KARPENTER') }} -o custom-columns=NAMESPACE:.metadata.namespace --no-headers environment: "{{ cloud_provider_env }}" register: karpenter_namespace ignore_errors: true - name: Debug - Get CAS pods and extract namespace command: >- kubectl get pods -A -l {{ 'app.kubernetes.io/name=ibm-iks-cluster-autoscaler' if cloud_provider == 'iks' else 'app=cluster-autoscaler' }} --kubeconfig={{ lookup('vars', 'KUBECONFIG_' ~ cloud_provider | upper ~ '_CAS') }} -o custom-columns=NAMESPACE:.metadata.namespace --no-headers environment: "{{ cloud_provider_env }}" register: cas_namespace ignore_errors: true - name: Debug - Get Karpenter pods command: >- kubectl get pods -A -l app.kubernetes.io/name=karpenter --kubeconfig={{ lookup('vars', 'KUBECONFIG_' ~ cloud_provider | upper ~ '_KARPENTER') }} environment: "{{ cloud_provider_env }}" register: karpenter_pods ignore_errors: true - name: Debug - Get CAS pods command: >- kubectl get pods -A -l {{ 'app.kubernetes.io/name=ibm-iks-cluster-autoscaler' if cloud_provider == 'iks' else 'app=cluster-autoscaler' }} --kubeconfig={{ lookup('vars', 'KUBECONFIG_' ~ cloud_provider | upper ~ '_CAS') }} environment: "{{ cloud_provider_env }}" register: cas_pods ignore_errors: true - name: Debug - Get Karpenter controller logs command: >- kubectl logs -n {{ karpenter_namespace.stdout_lines[0] }} -l app.kubernetes.io/name=karpenter --kubeconfig={{ lookup('vars', 'KUBECONFIG_' ~ cloud_provider | upper ~ '_KARPENTER') }} environment: "{{ cloud_provider_env }}" register: karpenter_logs ignore_errors: true when: karpenter_namespace.stdout_lines[0] is defined - name: Debug - Get CAS controller logs command: >- kubectl logs -n {{ cas_namespace.stdout_lines[0] }} -l {{ 'app.kubernetes.io/name=ibm-iks-cluster-autoscaler' if cloud_provider == 'iks' else 'app=cluster-autoscaler' }} --kubeconfig={{ lookup('vars', 'KUBECONFIG_' ~ cloud_provider | upper ~ '_CAS') }} environment: "{{ cloud_provider_env }}" register: cas_logs ignore_errors: true when: cas_namespace.stdout_lines[0] is defined - name: Debug - Describe Karpenter deployment command: >- kubectl describe deployment -n {{ karpenter_namespace.stdout_lines[0] }} -l app.kubernetes.io/name=karpenter --kubeconfig={{ lookup('vars', 'KUBECONFIG_' ~ cloud_provider | upper ~ '_KARPENTER') }} environment: "{{ cloud_provider_env }}" register: karpenter_describe ignore_errors: true when: karpenter_namespace.stdout_lines[0] is defined - name: Debug - Describe CAS deployment command: >- kubectl describe deployment -n {{ cas_namespace.stdout_lines[0] }} -l {{ 'app.kubernetes.io/name=ibm-iks-cluster-autoscaler' if cloud_provider == 'iks' else 'app=cluster-autoscaler' }} --kubeconfig={{ lookup('vars', 'KUBECONFIG_' ~ cloud_provider | upper ~ '_CAS') }} environment: "{{ cloud_provider_env }}" register: cas_describe ignore_errors: true when: cas_namespace.stdout_lines[0] is defined - name: Debug - Get Karpenter resources command: >- kubectl get nodeclass,nodepool,provisioner -A --kubeconfig={{ lookup('vars', 'KUBECONFIG_' ~ cloud_provider | upper ~ '_KARPENTER') }} environment: "{{ cloud_provider_env }}" register: karpenter_resources ignore_errors: true - name: Set debug summary set_fact: debug_summary: | All Pods Across Namespaces: {{ all_pods.stdout | default('Failed to get pods') }} Karpenter Pods Status: {{ all_pods_karpenter.stdout | default('Failed to get pods') }} CAS Pods Status: {{ all_pods_cas.stdout | default('Failed to get pods') }} Karpenter Pods in Namespace: {{ karpenter_pods.stdout | default('Failed to get pods') }} CAS Pods in Namespace: {{ cas_pods.stdout | default('Failed to get pods') }} Karpenter Logs: {{ karpenter_logs.stdout | default('Failed to get logs') }} CAS Logs: {{ cas_logs.stdout | default('Failed to get logs') }} Karpenter Pod Details: {{ karpenter_describe.stdout | default('Failed to get pod details') }} CAS Pod Details: {{ cas_describe.stdout | default('Failed to get pod details') }} Karpenter Resources: {{ karpenter_resources.stdout | default('Failed to get Karpenter resources') }} - name: Display debug information debug: msg: "{{ debug_summary.split('\n') }}" - name: Ensure debug directories exist file: path: "ansible/experiment-data/{{ cloud_provider }}/{{ workload_scenario }}/{{ item }}" state: directory mode: '0755' loop: - karpenter - cas - name: Write debug information to files copy: content: "{{ debug_summary }}" dest: "ansible/experiment-data/{{ cloud_provider }}/{{ workload_scenario }}/{{ item }}/debug_summary.txt" mode: '0644' loop: - karpenter - cas