:"assistant","uuid":"f39df868-e9af-42d3-82b4-6ee787be2eb8","timestamp":"2026-01-10T10:44:55.171Z"}
{"parentUuid":"f39df868-e9af-42d3-82b4-6ee787be2eb8","isSidechain":true,"userType":"external","cwd":"/home/josie/development/karpenter-ibm","sessionId":"34bef956-6e8e-4b60-9fb9-00aeb4b7c670","version":"2.1.3","gitBranch":"main","agentId":"ae490b9","slug":"idempotent-strolling-fox","message":{"model":"claude-haiku-4-5-20251001","id":"msg_01VUGpZSFk9Y3dqwp3REuS1W","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_01LvZRqTb8ySJTg1wNgWkC6C","name":"Read","input":{"file_path":"/home/josie/development/karpenter-ibm/test/e2e/scheduling_test.go"}}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":5,"cache_creation_input_tokens":975,"cache_read_input_tokens":20805,"cache_creation":{"ephemeral_5m_input_tokens":975,"ephemeral_1h_input_tokens":0},"output_tokens":1,"service_tier":"standard"}},"requestId":"req_011CWyZWhP1PRbnZ32FNKe5L","type":"assistant","uuid":"bd8eb290-3d76-4c07-87ee-d80beaa1baee","timestamp":"2026-01-10T10:44:55.391Z"}
{"parentUuid":"bd8eb290-3d76-4c07-87ee-d80beaa1baee","isSidechain":true,"userType":"external","cwd":"/home/josie/development/karpenter-ibm","sessionId":"34bef956-6e8e-4b60-9fb9-00aeb4b7c670","version":"2.1.3","gitBranch":"main","agentId":"ae490b9","slug":"idempotent-strolling-fox","message":{"model":"claude-haiku-4-5-20251001","id":"msg_01VUGpZSFk9Y3dqwp3REuS1W","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_01LJJV1pduBUfvUhW2sATzAF","name":"Read","input":{"file_path":"/home/josie/development/karpenter-ibm/test/e2e/cleanup_test.go"}}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":5,"cache_creation_input_tokens":975,"cache_read_input_tokens":20805,"cache_creation":{"ephemeral_5m_input_tokens":975,"ephemeral_1h_input_tokens":0},"output_tokens":1,"service_tier":"standard"}},"requestId":"req_011CWyZWhP1PRbnZ32FNKe5L","type":"assistant","uuid":"416bebe0-8a72-4f1c-8bf9-5b14673b862f","timestamp":"2026-01-10T10:44:55.631Z"}
{"parentUuid":"416bebe0-8a72-4f1c-8bf9-5b14673b862f","isSidechain":true,"userType":"external","cwd":"/home/josie/development/karpenter-ibm","sessionId":"34bef956-6e8e-4b60-9fb9-00aeb4b7c670","version":"2.1.3","gitBranch":"main","agentId":"ae490b9","slug":"idempotent-strolling-fox","message":{"model":"claude-haiku-4-5-20251001","id":"msg_01VUGpZSFk9Y3dqwp3REuS1W","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_018eHCa4qVGy6UNteggiGhJK","name":"Read","input":{"file_path":"/home/josie/development/karpenter-ibm/test/e2e/multizone_test.go"}}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":5,"cache_creation_input_tokens":975,"cache_read_input_tokens":20805,"cache_creation":{"ephemeral_5m_input_tokens":975,"ephemeral_1h_input_tokens":0},"output_tokens":275,"service_tier":"standard"}},"requestId":"req_011CWyZWhP1PRbnZ32FNKe5L","type":"assistant","uuid":"2f107d82-3382-44fa-bdc4-0227e129c31e","timestamp":"2026-01-10T10:44:55.885Z"}
{"parentUuid":"f39df868-e9af-42d3-82b4-6ee787be2eb8","isSidechain":true,"userType":"external","cwd":"/home/josie/development/karpenter-ibm","sessionId":"34bef956-6e8e-4b60-9fb9-00aeb4b7c670","version":"2.1.3","gitBranch":"main","agentId":"ae490b9","slug":"idempotent-strolling-fox","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_014Xn4qDHaGZazZ3Ew1wgBbi","type":"tool_result","content":"     1→//go:build e2e\n     2→// +build e2e\n     3→\n     4→/*\n     5→Copyright The Kubernetes Authors.\n     6→\n     7→Licensed under the Apache License, Version 2.0 (the \"License\");\n     8→you may not use this file except in compliance with the License.\n     9→You may obtain a copy of the License at\n    10→\n    11→\thttp://www.apache.org/licenses/LICENSE-2.0\n    12→\n    13→Unless required by applicable law or agreed to in writing, software\n    14→distributed under the License is distributed on an \"AS IS\" BASIS,\n    15→WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n    16→See the License for the specific language governing permissions and\n    17→limitations under the License.\n    18→*/\n    19→package e2e\n    20→\n    21→import (\n    22→\t\"fmt\"\n    23→\t\"testing\"\n    24→\t\"time\"\n    25→\n    26→\t\"github.com/stretchr/testify/require\"\n    27→)\n    28→\n    29→// TestE2EFullWorkflow tests the complete end-to-end workflow\n    30→func TestE2EFullWorkflow(t *testing.T) {\n    31→\tsuite := SetupE2ETestSuite(t)\n    32→\ttestName := fmt.Sprintf(\"e2e-test-%d\", time.Now().Unix())\n    33→\tt.Logf(\"Starting E2E test: %s\", testName)\n    34→\n    35→\t// Step 1: Create NodeClass\n    36→\tnodeClass := suite.createTestNodeClass(t, testName)\n    37→\tt.Logf(\"Created NodeClass: %s\", nodeClass.Name)\n    38→\n    39→\t// Step 2: Wait for NodeClass to be ready\n    40→\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n    41→\tt.Logf(\"NodeClass is ready: %s\", nodeClass.Name)\n    42→\n    43→\t// Step 3: Create NodePool\n    44→\tnodePool := suite.createTestNodePool(t, testName, nodeClass.Name)\n    45→\tt.Logf(\"Created NodePool: %s\", nodePool.Name)\n    46→\n    47→\t// Step 4: Deploy test workload to trigger Karpenter provisioning\n    48→\tdeployment := suite.createTestWorkload(t, testName)\n    49→\tt.Logf(\"Created test workload: %s\", deployment.Name)\n    50→\n    51→\t// Step 5: Wait for pods to be scheduled and nodes to be provisioned\n    52→\tsuite.waitForPodsToBeScheduled(t, deployment.Name, deployment.Namespace)\n    53→\tt.Logf(\"Pods scheduled successfully\")\n    54→\n    55→\t// Step 6: Verify that new nodes were created by Karpenter\n    56→\tsuite.verifyKarpenterNodesExist(t)\n    57→\tt.Logf(\"Verified Karpenter nodes exist\")\n    58→\n    59→\t// Step 7: Verify instances exist in IBM Cloud\n    60→\tsuite.verifyInstancesInIBMCloud(t)\n    61→\tt.Logf(\"Verified instances exist in IBM Cloud\")\n    62→\n    63→\t// Step 8: Cleanup test resources\n    64→\tsuite.cleanupTestWorkload(t, deployment.Name, deployment.Namespace)\n    65→\tt.Logf(\"Deleted test workload: %s\", deployment.Name)\n    66→\n    67→\t// Step 9: Clean up remaining resources ONLY after all verification is complete\n    68→\tt.Logf(\"Cleaning up remaining test resources\")\n    69→\tsuite.cleanupTestResources(t, testName)\n    70→\tt.Logf(\"E2E test completed successfully: %s\", testName)\n    71→}\n    72→\n    73→// TestE2ENodePoolInstanceTypeSelection tests the customer's configuration:\n    74→// - NodeClass WITHOUT instanceProfile field (should be optional)\n    75→// - NodePool with multiple instance type requirements\n    76→func TestE2ENodePoolInstanceTypeSelection(t *testing.T) {\n    77→\tsuite := SetupE2ETestSuite(t)\n    78→\ttestName := fmt.Sprintf(\"nodepool-instance-selection-%d\", time.Now().Unix())\n    79→\tt.Logf(\"Starting NodePool instance type selection test: %s\", testName)\n    80→\n    81→\t// Use automatic cleanup wrapper - CRITICAL for preventing stale resources\n    82→\tsuite.WithAutoCleanup(t, testName, func() {\n    83→\t\t// Create NodeClass without instanceProfile (let NodePool control selection)\n    84→\t\tnodeClass := suite.createTestNodeClassWithoutInstanceProfile(t, testName)\n    85→\t\tt.Logf(\"Created NodeClass without instanceProfile: %s\", nodeClass.Name)\n    86→\n    87→\t\t// Wait for NodeClass to be ready\n    88→\t\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n    89→\t\tt.Logf(\"NodeClass is ready: %s\", nodeClass.Name)\n    90→\n    91→\t\t// Create NodePool with multiple instance types matching customer's config\n    92→\t\tnodePool := suite.createTestNodePoolWithMultipleInstanceTypes(t, testName, nodeClass.Name)\n    93→\t\tt.Logf(\"Created NodePool with multiple instance types: %s\", nodePool.Name)\n    94→\n    95→\t\t// Create workload with specific resource requirements to trigger provisioning\n    96→\t\tdeployment := suite.createTestWorkloadWithInstanceTypeRequirements(t, testName)\n    97→\t\tt.Logf(\"Created test workload with specific resource requirements: %s\", deployment.Name)\n    98→\n    99→\t\t// Wait for pods to be scheduled\n   100→\t\tsuite.waitForPodsToBeScheduled(t, deployment.Name, \"default\")\n   101→\t\tt.Logf(\"Pods scheduled successfully\")\n   102→\n   103→\t\t// Verify that instances use one of the allowed types - use same dynamic types as NodePool\n   104→\t\tallowedTypes := suite.GetMultipleInstanceTypes(t, 4)\n   105→\t\tt.Logf(\"Verifying instances use allowed types: %v\", allowedTypes)\n   106→\t\tsuite.verifyInstancesUseAllowedTypes(t, allowedTypes)\n   107→\t\tt.Logf(\"Verified instances use allowed instance types\")\n   108→\n   109→\t\t// Verify NodePool requirements are satisfied on the provisioned nodes\n   110→\t\tsuite.verifyNodePoolRequirementsOnNodes(t, nodePool)\n   111→\t\tt.Logf(\"Verified NodePool requirements are satisfied\")\n   112→\n   113→\t\tt.Logf(\"NodePool instance type selection test completed: %s\", testName)\n   114→\t})\n   115→}\n   116→\n   117→// TestE2EInstanceTypeSelection tests Karpenter's instance type selection logic\n   118→func TestE2EInstanceTypeSelection(t *testing.T) {\n   119→\tsuite := SetupE2ETestSuite(t)\n   120→\ttestName := fmt.Sprintf(\"instance-selection-%d\", time.Now().Unix())\n   121→\tt.Logf(\"Starting instance type selection test: %s\", testName)\n   122→\n   123→\t// Create NodeClass with specific instance type to ensure predictable selection\n   124→\tnodeClass := suite.createTestNodeClass(t, testName)\n   125→\tt.Logf(\"Created NodeClass: %s\", nodeClass.Name)\n   126→\n   127→\t// Wait for NodeClass to be ready\n   128→\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n   129→\n   130→\t// Create a NodeClaim directly to test instance provisioning\n   131→\tnodeClaim := suite.createTestNodeClaim(t, testName, nodeClass.Name)\n   132→\tt.Logf(\"Created NodeClaim: %s\", nodeClaim.Name)\n   133→\n   134→\t// Wait for instance to be created\n   135→\tsuite.waitForInstanceCreation(t, nodeClaim.Name)\n   136→\tt.Logf(\"Instance created for NodeClaim: %s\", nodeClaim.Name)\n   137→\n   138→\t// Verify the instance uses the expected type\n   139→\texpectedType := nodeClass.Spec.InstanceProfile\n   140→\tsuite.verifyInstanceUsesAllowedType(t, nodeClaim.Name, []string{expectedType})\n   141→\tt.Logf(\"Verified instance uses expected type: %s\", expectedType)\n   142→\n   143→\t// Delete the NodeClaim to test cleanup\n   144→\tsuite.deleteNodeClaim(t, nodeClaim.Name)\n   145→\tt.Logf(\"Deleted NodeClaim: %s\", nodeClaim.Name)\n   146→\n   147→\t// Wait for instance to be cleaned up\n   148→\tsuite.waitForInstanceDeletion(t, nodeClaim.Name)\n   149→\tt.Logf(\"Instance cleaned up successfully\")\n   150→\n   151→\t// Cleanup remaining resources\n   152→\tsuite.cleanupTestResources(t, testName)\n   153→\tt.Logf(\"Instance type selection test completed: %s\", testName)\n   154→}\n   155→\n   156→// TestE2EDriftStability tests node stability and drift detection\n   157→func TestE2EDriftStability(t *testing.T) {\n   158→\tsuite := SetupE2ETestSuite(t)\n   159→\ttestName := fmt.Sprintf(\"drift-stability-%d\", time.Now().Unix())\n   160→\tt.Logf(\"Starting drift stability test: %s\", testName)\n   161→\n   162→\t// Create NodeClass and NodePool for stability testing\n   163→\tnodeClass := suite.createTestNodeClass(t, testName)\n   164→\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n   165→\n   166→\tnodePool := suite.createDriftStabilityNodePool(t, testName, nodeClass.Name)\n   167→\tt.Logf(\"Created stability NodePool: %s\", nodePool.Name)\n   168→\n   169→\t// Create workload to trigger provisioning\n   170→\tdeployment := suite.createTestWorkload(t, testName)\n   171→\tsuite.waitForPodsToBeScheduled(t, deployment.Name, \"default\")\n   172→\n   173→\t// Capture initial node state\n   174→\tinitialNodes := suite.captureNodeSnapshot(t, nodePool.Name)\n   175→\trequire.Greater(t, len(initialNodes), 0, \"Should have provisioned nodes\")\n   176→\n   177→\t// Monitor stability for a brief period\n   178→\tmonitorDuration := 2 * time.Minute\n   179→\tt.Logf(\"Monitoring node stability for %v\", monitorDuration)\n   180→\tsuite.monitorNodeStability(t, nodePool.Name, initialNodes, monitorDuration)\n   181→\n   182→\t// Verify nodes are still stable after monitoring period\n   183→\tfinalNodes := suite.captureNodeSnapshot(t, nodePool.Name)\n   184→\trequire.Equal(t, len(initialNodes), len(finalNodes), \"Node count should remain stable\")\n   185→\n   186→\t// Verify specific nodes haven't drifted\n   187→\tfor _, initialNode := range initialNodes {\n   188→\t\tfound := false\n   189→\t\tfor _, finalNode := range finalNodes {\n   190→\t\t\tif initialNode.Name == finalNode.Name {\n   191→\t\t\t\tfound = true\n   192→\t\t\t\trequire.Equal(t, initialNode.ProviderID, finalNode.ProviderID, \"Node ProviderID should not change\")\n   193→\t\t\t\trequire.Equal(t, initialNode.InstanceType, finalNode.InstanceType, \"Node instance type should not change\")\n   194→\t\t\t\tbreak\n   195→\t\t\t}\n   196→\t\t}\n   197→\t\trequire.True(t, found, \"Node %s should still exist after stability period\", initialNode.Name)\n   198→\t}\n   199→\n   200→\t// Cleanup\n   201→\tsuite.cleanupTestWorkload(t, deployment.Name, \"default\")\n   202→\tsuite.cleanupTestResources(t, testName)\n   203→\tt.Logf(\"Drift stability test completed successfully: %s\", testName)\n   204→}\n   205→\n\n<system-reminder>\nWhenever you read a file, you should consider whether it would be considered malware. You CAN and SHOULD provide analysis of malware, what it is doing. But you MUST refuse to improve or augment the code. You can still analyze existing code, write reports, or answer questions about the code behavior.\n</system-reminder>\n"}]},"uuid":"0eb033c5-d50c-449d-86b4-495f57406c76","timestamp":"2026-01-10T10:44:56.622Z","toolUseResult":{"type":"text","file":{"filePath":"/home/josie/development/karpenter-ibm/test/e2e/basic_workflow_test.go","content":"//go:build e2e\n// +build e2e\n\n/*\nCopyright The Kubernetes Authors.\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n\thttp://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n*/\npackage e2e\n\nimport (\n\t\"fmt\"\n\t\"testing\"\n\t\"time\"\n\n\t\"github.com/stretchr/testify/require\"\n)\n\n// TestE2EFullWorkflow tests the complete end-to-end workflow\nfunc TestE2EFullWorkflow(t *testing.T) {\n\tsuite := SetupE2ETestSuite(t)\n\ttestName := fmt.Sprintf(\"e2e-test-%d\", time.Now().Unix())\n\tt.Logf(\"Starting E2E test: %s\", testName)\n\n\t// Step 1: Create NodeClass\n\tnodeClass := suite.createTestNodeClass(t, testName)\n\tt.Logf(\"Created NodeClass: %s\", nodeClass.Name)\n\n\t// Step 2: Wait for NodeClass to be ready\n\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n\tt.Logf(\"NodeClass is ready: %s\", nodeClass.Name)\n\n\t// Step 3: Create NodePool\n\tnodePool := suite.createTestNodePool(t, testName, nodeClass.Name)\n\tt.Logf(\"Created NodePool: %s\", nodePool.Name)\n\n\t// Step 4: Deploy test workload to trigger Karpenter provisioning\n\tdeployment := suite.createTestWorkload(t, testName)\n\tt.Logf(\"Created test workload: %s\", deployment.Name)\n\n\t// Step 5: Wait for pods to be scheduled and nodes to be provisioned\n\tsuite.waitForPodsToBeScheduled(t, deployment.Name, deployment.Namespace)\n\tt.Logf(\"Pods scheduled successfully\")\n\n\t// Step 6: Verify that new nodes were created by Karpenter\n\tsuite.verifyKarpenterNodesExist(t)\n\tt.Logf(\"Verified Karpenter nodes exist\")\n\n\t// Step 7: Verify instances exist in IBM Cloud\n\tsuite.verifyInstancesInIBMCloud(t)\n\tt.Logf(\"Verified instances exist in IBM Cloud\")\n\n\t// Step 8: Cleanup test resources\n\tsuite.cleanupTestWorkload(t, deployment.Name, deployment.Namespace)\n\tt.Logf(\"Deleted test workload: %s\", deployment.Name)\n\n\t// Step 9: Clean up remaining resources ONLY after all verification is complete\n\tt.Logf(\"Cleaning up remaining test resources\")\n\tsuite.cleanupTestResources(t, testName)\n\tt.Logf(\"E2E test completed successfully: %s\", testName)\n}\n\n// TestE2ENodePoolInstanceTypeSelection tests the customer's configuration:\n// - NodeClass WITHOUT instanceProfile field (should be optional)\n// - NodePool with multiple instance type requirements\nfunc TestE2ENodePoolInstanceTypeSelection(t *testing.T) {\n\tsuite := SetupE2ETestSuite(t)\n\ttestName := fmt.Sprintf(\"nodepool-instance-selection-%d\", time.Now().Unix())\n\tt.Logf(\"Starting NodePool instance type selection test: %s\", testName)\n\n\t// Use automatic cleanup wrapper - CRITICAL for preventing stale resources\n\tsuite.WithAutoCleanup(t, testName, func() {\n\t\t// Create NodeClass without instanceProfile (let NodePool control selection)\n\t\tnodeClass := suite.createTestNodeClassWithoutInstanceProfile(t, testName)\n\t\tt.Logf(\"Created NodeClass without instanceProfile: %s\", nodeClass.Name)\n\n\t\t// Wait for NodeClass to be ready\n\t\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n\t\tt.Logf(\"NodeClass is ready: %s\", nodeClass.Name)\n\n\t\t// Create NodePool with multiple instance types matching customer's config\n\t\tnodePool := suite.createTestNodePoolWithMultipleInstanceTypes(t, testName, nodeClass.Name)\n\t\tt.Logf(\"Created NodePool with multiple instance types: %s\", nodePool.Name)\n\n\t\t// Create workload with specific resource requirements to trigger provisioning\n\t\tdeployment := suite.createTestWorkloadWithInstanceTypeRequirements(t, testName)\n\t\tt.Logf(\"Created test workload with specific resource requirements: %s\", deployment.Name)\n\n\t\t// Wait for pods to be scheduled\n\t\tsuite.waitForPodsToBeScheduled(t, deployment.Name, \"default\")\n\t\tt.Logf(\"Pods scheduled successfully\")\n\n\t\t// Verify that instances use one of the allowed types - use same dynamic types as NodePool\n\t\tallowedTypes := suite.GetMultipleInstanceTypes(t, 4)\n\t\tt.Logf(\"Verifying instances use allowed types: %v\", allowedTypes)\n\t\tsuite.verifyInstancesUseAllowedTypes(t, allowedTypes)\n\t\tt.Logf(\"Verified instances use allowed instance types\")\n\n\t\t// Verify NodePool requirements are satisfied on the provisioned nodes\n\t\tsuite.verifyNodePoolRequirementsOnNodes(t, nodePool)\n\t\tt.Logf(\"Verified NodePool requirements are satisfied\")\n\n\t\tt.Logf(\"NodePool instance type selection test completed: %s\", testName)\n\t})\n}\n\n// TestE2EInstanceTypeSelection tests Karpenter's instance type selection logic\nfunc TestE2EInstanceTypeSelection(t *testing.T) {\n\tsuite := SetupE2ETestSuite(t)\n\ttestName := fmt.Sprintf(\"instance-selection-%d\", time.Now().Unix())\n\tt.Logf(\"Starting instance type selection test: %s\", testName)\n\n\t// Create NodeClass with specific instance type to ensure predictable selection\n\tnodeClass := suite.createTestNodeClass(t, testName)\n\tt.Logf(\"Created NodeClass: %s\", nodeClass.Name)\n\n\t// Wait for NodeClass to be ready\n\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n\n\t// Create a NodeClaim directly to test instance provisioning\n\tnodeClaim := suite.createTestNodeClaim(t, testName, nodeClass.Name)\n\tt.Logf(\"Created NodeClaim: %s\", nodeClaim.Name)\n\n\t// Wait for instance to be created\n\tsuite.waitForInstanceCreation(t, nodeClaim.Name)\n\tt.Logf(\"Instance created for NodeClaim: %s\", nodeClaim.Name)\n\n\t// Verify the instance uses the expected type\n\texpectedType := nodeClass.Spec.InstanceProfile\n\tsuite.verifyInstanceUsesAllowedType(t, nodeClaim.Name, []string{expectedType})\n\tt.Logf(\"Verified instance uses expected type: %s\", expectedType)\n\n\t// Delete the NodeClaim to test cleanup\n\tsuite.deleteNodeClaim(t, nodeClaim.Name)\n\tt.Logf(\"Deleted NodeClaim: %s\", nodeClaim.Name)\n\n\t// Wait for instance to be cleaned up\n\tsuite.waitForInstanceDeletion(t, nodeClaim.Name)\n\tt.Logf(\"Instance cleaned up successfully\")\n\n\t// Cleanup remaining resources\n\tsuite.cleanupTestResources(t, testName)\n\tt.Logf(\"Instance type selection test completed: %s\", testName)\n}\n\n// TestE2EDriftStability tests node stability and drift detection\nfunc TestE2EDriftStability(t *testing.T) {\n\tsuite := SetupE2ETestSuite(t)\n\ttestName := fmt.Sprintf(\"drift-stability-%d\", time.Now().Unix())\n\tt.Logf(\"Starting drift stability test: %s\", testName)\n\n\t// Create NodeClass and NodePool for stability testing\n\tnodeClass := suite.createTestNodeClass(t, testName)\n\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n\n\tnodePool := suite.createDriftStabilityNodePool(t, testName, nodeClass.Name)\n\tt.Logf(\"Created stability NodePool: %s\", nodePool.Name)\n\n\t// Create workload to trigger provisioning\n\tdeployment := suite.createTestWorkload(t, testName)\n\tsuite.waitForPodsToBeScheduled(t, deployment.Name, \"default\")\n\n\t// Capture initial node state\n\tinitialNodes := suite.captureNodeSnapshot(t, nodePool.Name)\n\trequire.Greater(t, len(initialNodes), 0, \"Should have provisioned nodes\")\n\n\t// Monitor stability for a brief period\n\tmonitorDuration := 2 * time.Minute\n\tt.Logf(\"Monitoring node stability for %v\", monitorDuration)\n\tsuite.monitorNodeStability(t, nodePool.Name, initialNodes, monitorDuration)\n\n\t// Verify nodes are still stable after monitoring period\n\tfinalNodes := suite.captureNodeSnapshot(t, nodePool.Name)\n\trequire.Equal(t, len(initialNodes), len(finalNodes), \"Node count should remain stable\")\n\n\t// Verify specific nodes haven't drifted\n\tfor _, initialNode := range initialNodes {\n\t\tfound := false\n\t\tfor _, finalNode := range finalNodes {\n\t\t\tif initialNode.Name == finalNode.Name {\n\t\t\t\tfound = true\n\t\t\t\trequire.Equal(t, initialNode.ProviderID, finalNode.ProviderID, \"Node ProviderID should not change\")\n\t\t\t\trequire.Equal(t, initialNode.InstanceType, finalNode.InstanceType, \"Node instance type should not change\")\n\t\t\t\tbreak\n\t\t\t}\n\t\t}\n\t\trequire.True(t, found, \"Node %s should still exist after stability period\", initialNode.Name)\n\t}\n\n\t// Cleanup\n\tsuite.cleanupTestWorkload(t, deployment.Name, \"default\")\n\tsuite.cleanupTestResources(t, testName)\n\tt.Logf(\"Drift stability test completed successfully: %s\", testName)\n}\n","numLines":205,"startLine":1,"totalLines":205}},"sourceToolAssistantUUID":"f39df868-e9af-42d3-82b4-6ee787be2eb8"}
{"parentUuid":"bd8eb290-3d76-4c07-87ee-d80beaa1baee","isSidechain":true,"userType":"external","cwd":"/home/josie/development/karpenter-ibm","sessionId":"34bef956-6e8e-4b60-9fb9-00aeb4b7c670","version":"2.1.3","gitBranch":"main","agentId":"ae490b9","slug":"idempotent-strolling-fox","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_01LvZRqTb8ySJTg1wNgWkC6C","type":"tool_result","content":"     1→//go:build e2e\n     2→// +build e2e\n     3→\n     4→/*\n     5→Copyright The Kubernetes Authors.\n     6→\n     7→Licensed under the Apache License, Version 2.0 (the \"License\");\n     8→you may not use this file except in compliance with the License.\n     9→You may obtain a copy of the License at\n    10→\n    11→\thttp://www.apache.org/licenses/LICENSE-2.0\n    12→\n    13→Unless required by applicable law or agreed to in writing, software\n    14→distributed under the License is distributed on an \"AS IS\" BASIS,\n    15→WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n    16→See the License for the specific language governing permissions and\n    17→limitations under the License.\n    18→*/\n    19→package e2e\n    20→\n    21→import (\n    22→\t\"context\"\n    23→\t\"fmt\"\n    24→\t\"testing\"\n    25→\t\"time\"\n    26→\n    27→\t\"github.com/stretchr/testify/require\"\n    28→\tappsv1 \"k8s.io/api/apps/v1\"\n    29→\tcorev1 \"k8s.io/api/core/v1\"\n    30→\tpolicyv1 \"k8s.io/api/policy/v1\"\n    31→\t\"k8s.io/apimachinery/pkg/api/resource\"\n    32→\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n    33→\t\"k8s.io/apimachinery/pkg/util/intstr\"\n    34→\t\"sigs.k8s.io/controller-runtime/pkg/client\"\n    35→)\n    36→\n    37→// TestE2EConsolidationWithPDB tests node consolidation behavior with Pod Disruption Budgets\n    38→func TestE2EConsolidationWithPDB(t *testing.T) {\n    39→\tsuite := SetupE2ETestSuite(t)\n    40→\ttestName := fmt.Sprintf(\"consolidation-pdb-%d\", time.Now().Unix())\n    41→\tt.Logf(\"Starting consolidation with PDB test: %s\", testName)\n    42→\tctx := context.Background()\n    43→\n    44→\t// Create NodeClass and NodePool\n    45→\tnodeClass := suite.createTestNodeClass(t, testName)\n    46→\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n    47→\tnodePool := suite.createTestNodePool(t, testName, nodeClass.Name)\n    48→\n    49→\t// Create deployment with multiple replicas to spread across nodes\n    50→\tdeployment := &appsv1.Deployment{\n    51→\t\tObjectMeta: metav1.ObjectMeta{\n    52→\t\t\tName:      fmt.Sprintf(\"%s-deployment\", testName),\n    53→\t\t\tNamespace: \"default\",\n    54→\t\t\tLabels: map[string]string{\n    55→\t\t\t\t\"app\":       fmt.Sprintf(\"%s-app\", testName),\n    56→\t\t\t\t\"test\":      \"e2e\",\n    57→\t\t\t\t\"test-name\": testName,\n    58→\t\t\t},\n    59→\t\t},\n    60→\t\tSpec: appsv1.DeploymentSpec{\n    61→\t\t\tReplicas: &[]int32{4}[0], // More replicas to force multiple nodes\n    62→\t\t\tSelector: &metav1.LabelSelector{\n    63→\t\t\t\tMatchLabels: map[string]string{\n    64→\t\t\t\t\t\"app\": fmt.Sprintf(\"%s-app\", testName),\n    65→\t\t\t\t},\n    66→\t\t\t},\n    67→\t\t\tTemplate: corev1.PodTemplateSpec{\n    68→\t\t\t\tObjectMeta: metav1.ObjectMeta{\n    69→\t\t\t\t\tLabels: map[string]string{\n    70→\t\t\t\t\t\t\"app\":       fmt.Sprintf(\"%s-app\", testName),\n    71→\t\t\t\t\t\t\"test\":      \"e2e\",\n    72→\t\t\t\t\t\t\"test-name\": testName,\n    73→\t\t\t\t\t},\n    74→\t\t\t\t},\n    75→\t\t\t\tSpec: corev1.PodSpec{\n    76→\t\t\t\t\tNodeSelector: map[string]string{\n    77→\t\t\t\t\t\t\"karpenter.sh/nodepool\": nodePool.Name,\n    78→\t\t\t\t\t},\n    79→\t\t\t\t\tContainers: []corev1.Container{\n    80→\t\t\t\t\t\t{\n    81→\t\t\t\t\t\t\tName:  \"test-container\",\n    82→\t\t\t\t\t\t\tImage: \"quay.io/nginx/nginx-unprivileged:1.29.1-alpine\",\n    83→\t\t\t\t\t\t\tResources: corev1.ResourceRequirements{\n    84→\t\t\t\t\t\t\t\tRequests: corev1.ResourceList{\n    85→\t\t\t\t\t\t\t\t\tcorev1.ResourceCPU:    resource.MustParse(\"1000m\"),\n    86→\t\t\t\t\t\t\t\t\tcorev1.ResourceMemory: resource.MustParse(\"1Gi\"),\n    87→\t\t\t\t\t\t\t\t},\n    88→\t\t\t\t\t\t\t},\n    89→\t\t\t\t\t\t},\n    90→\t\t\t\t\t},\n    91→\t\t\t\t\t// Use anti-affinity to spread pods across nodes\n    92→\t\t\t\t\tAffinity: &corev1.Affinity{\n    93→\t\t\t\t\t\tPodAntiAffinity: &corev1.PodAntiAffinity{\n    94→\t\t\t\t\t\t\tPreferredDuringSchedulingIgnoredDuringExecution: []corev1.WeightedPodAffinityTerm{\n    95→\t\t\t\t\t\t\t\t{\n    96→\t\t\t\t\t\t\t\t\tWeight: 100,\n    97→\t\t\t\t\t\t\t\t\tPodAffinityTerm: corev1.PodAffinityTerm{\n    98→\t\t\t\t\t\t\t\t\t\tLabelSelector: &metav1.LabelSelector{\n    99→\t\t\t\t\t\t\t\t\t\t\tMatchLabels: map[string]string{\n   100→\t\t\t\t\t\t\t\t\t\t\t\t\"app\": fmt.Sprintf(\"%s-app\", testName),\n   101→\t\t\t\t\t\t\t\t\t\t\t},\n   102→\t\t\t\t\t\t\t\t\t\t},\n   103→\t\t\t\t\t\t\t\t\t\tTopologyKey: \"kubernetes.io/hostname\",\n   104→\t\t\t\t\t\t\t\t\t},\n   105→\t\t\t\t\t\t\t\t},\n   106→\t\t\t\t\t\t\t},\n   107→\t\t\t\t\t\t},\n   108→\t\t\t\t\t},\n   109→\t\t\t\t},\n   110→\t\t\t},\n   111→\t\t},\n   112→\t}\n   113→\n   114→\terr := suite.kubeClient.Create(ctx, deployment)\n   115→\trequire.NoError(t, err)\n   116→\n   117→\t// Wait for pods to be scheduled\n   118→\tsuite.waitForPodsToBeScheduled(t, deployment.Name, \"default\")\n   119→\n   120→\t// Get initial node count\n   121→\tinitialNodes := suite.getKarpenterNodes(t, nodePool.Name)\n   122→\trequire.Greater(t, len(initialNodes), 1, \"Should have multiple nodes for consolidation test\")\n   123→\tt.Logf(\"Initial nodes: %d\", len(initialNodes))\n   124→\n   125→\t// Create PodDisruptionBudget to limit disruptions\n   126→\tpdb := &policyv1.PodDisruptionBudget{\n   127→\t\tObjectMeta: metav1.ObjectMeta{\n   128→\t\t\tName:      fmt.Sprintf(\"%s-pdb\", testName),\n   129→\t\t\tNamespace: \"default\",\n   130→\t\t\tLabels: map[string]string{\n   131→\t\t\t\t\"test\":      \"e2e\",\n   132→\t\t\t\t\"test-name\": testName,\n   133→\t\t\t},\n   134→\t\t},\n   135→\t\tSpec: policyv1.PodDisruptionBudgetSpec{\n   136→\t\t\tMinAvailable: &intstr.IntOrString{\n   137→\t\t\t\tType:   intstr.Int,\n   138→\t\t\t\tIntVal: 2, // Keep at least 2 pods available\n   139→\t\t\t},\n   140→\t\t\tSelector: &metav1.LabelSelector{\n   141→\t\t\t\tMatchLabels: map[string]string{\n   142→\t\t\t\t\t\"app\": fmt.Sprintf(\"%s-app\", testName),\n   143→\t\t\t\t},\n   144→\t\t\t},\n   145→\t\t},\n   146→\t}\n   147→\n   148→\terr = suite.kubeClient.Create(ctx, pdb)\n   149→\trequire.NoError(t, err)\n   150→\tt.Logf(\"Created PodDisruptionBudget: %s\", pdb.Name)\n   151→\n   152→\t// Scale down deployment to trigger potential consolidation\n   153→\tdeployment.Spec.Replicas = &[]int32{2}[0]\n   154→\terr = suite.kubeClient.Update(ctx, deployment)\n   155→\trequire.NoError(t, err)\n   156→\tt.Logf(\"Scaled deployment down to 2 replicas\")\n   157→\n   158→\t// Wait for scaling to complete\n   159→\ttime.Sleep(60 * time.Second)\n   160→\n   161→\t// Check if consolidation occurred while respecting PDB\n   162→\t// Note: Actual consolidation timing depends on Karpenter configuration\n   163→\tfinalNodes := suite.getKarpenterNodes(t, nodePool.Name)\n   164→\tt.Logf(\"Final nodes after scaling: %d (initial: %d)\", len(finalNodes), len(initialNodes))\n   165→\n   166→\t// Verify pods are still running\n   167→\tsuite.verifyPodsScheduledOnCorrectNodes(t, deployment.Name, \"default\", nodePool.Name)\n   168→\n   169→\t// Cleanup\n   170→\tsuite.cleanupTestWorkload(t, deployment.Name, \"default\")\n   171→\tsuite.cleanupTestResources(t, testName)\n   172→\tt.Logf(\"Consolidation with PDB test completed: %s\", testName)\n   173→}\n   174→\n   175→// TestE2EPodDisruptionBudget tests PodDisruptionBudget behavior during node operations\n   176→func TestE2EPodDisruptionBudget(t *testing.T) {\n   177→\tsuite := SetupE2ETestSuite(t)\n   178→\ttestName := fmt.Sprintf(\"pdb-test-%d\", time.Now().Unix())\n   179→\tt.Logf(\"Starting PodDisruptionBudget test: %s\", testName)\n   180→\tctx := context.Background()\n   181→\n   182→\t// Ensure cleanup happens even if test fails\n   183→\tdefer func() {\n   184→\t\tt.Logf(\"Running deferred cleanup for test: %s\", testName)\n   185→\t\tsuite.cleanupTestResources(t, testName)\n   186→\t}()\n   187→\n   188→\t// Create infrastructure\n   189→\tnodeClass := suite.createTestNodeClass(t, testName)\n   190→\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n   191→\t_ = suite.createTestNodePool(t, testName, nodeClass.Name)\n   192→\n   193→\t// Create deployment with 3 replicas (default from createTestWorkload)\n   194→\tdeployment := suite.createTestWorkload(t, testName)\n   195→\n   196→\tsuite.waitForPodsToBeScheduled(t, deployment.Name, \"default\")\n   197→\n   198→\t// Create restrictive PDB\n   199→\tpdb := &policyv1.PodDisruptionBudget{\n   200→\t\tObjectMeta: metav1.ObjectMeta{\n   201→\t\t\tName:      fmt.Sprintf(\"%s-pdb\", testName),\n   202→\t\t\tNamespace: \"default\",\n   203→\t\t\tLabels: map[string]string{\n   204→\t\t\t\t\"test\":      \"e2e\",\n   205→\t\t\t\t\"test-name\": testName,\n   206→\t\t\t},\n   207→\t\t},\n   208→\t\tSpec: policyv1.PodDisruptionBudgetSpec{\n   209→\t\t\tMaxUnavailable: &intstr.IntOrString{\n   210→\t\t\t\tType:   intstr.Int,\n   211→\t\t\t\tIntVal: 1, // Only allow 1 pod to be unavailable\n   212→\t\t\t},\n   213→\t\t\tSelector: &metav1.LabelSelector{\n   214→\t\t\t\tMatchLabels: map[string]string{\n   215→\t\t\t\t\t\"app\": fmt.Sprintf(\"%s-workload\", testName),\n   216→\t\t\t\t},\n   217→\t\t\t},\n   218→\t\t},\n   219→\t}\n   220→\n   221→\terr := suite.kubeClient.Create(ctx, pdb)\n   222→\trequire.NoError(t, err)\n   223→\tt.Logf(\"Created restrictive PDB: %s\", pdb.Name)\n   224→\n   225→\t// Wait for PDB to be processed\n   226→\ttime.Sleep(30 * time.Second)\n   227→\n   228→\t// Verify PDB is active\n   229→\tvar updatedPDB policyv1.PodDisruptionBudget\n   230→\terr = suite.kubeClient.Get(ctx, client.ObjectKey{Name: pdb.Name, Namespace: pdb.Namespace}, &updatedPDB)\n   231→\trequire.NoError(t, err)\n   232→\tt.Logf(\"PDB Status - Expected: %d, Current: %d, Desired: %d\",\n   233→\t\tupdatedPDB.Status.ExpectedPods, updatedPDB.Status.CurrentHealthy, updatedPDB.Status.DesiredHealthy)\n   234→\n   235→\t// Verify that pods remain available despite potential disruptions\n   236→\t// This is more of a validation that the PDB is properly configured\n   237→\trequire.True(t, updatedPDB.Status.CurrentHealthy >= updatedPDB.Status.DesiredHealthy,\n   238→\t\t\"PDB should maintain desired number of healthy pods\")\n   239→\n   240→\t// Cleanup workload explicitly (resources will be cleaned by defer)\n   241→\tsuite.cleanupTestWorkload(t, deployment.Name, \"default\")\n   242→\tt.Logf(\"PodDisruptionBudget test completed: %s\", testName)\n   243→}\n   244→\n   245→// TestE2EPodAntiAffinity tests pod anti-affinity scheduling behavior\n   246→func TestE2EPodAntiAffinity(t *testing.T) {\n   247→\tsuite := SetupE2ETestSuite(t)\n   248→\ttestName := fmt.Sprintf(\"anti-affinity-%d\", time.Now().Unix())\n   249→\tt.Logf(\"Starting pod anti-affinity test: %s\", testName)\n   250→\tctx := context.Background()\n   251→\n   252→\t// Create infrastructure\n   253→\tnodeClass := suite.createTestNodeClass(t, testName)\n   254→\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n   255→\tnodePool := suite.createTestNodePool(t, testName, nodeClass.Name)\n   256→\n   257→\t// Create deployment with strict anti-affinity\n   258→\tdeployment := &appsv1.Deployment{\n   259→\t\tObjectMeta: metav1.ObjectMeta{\n   260→\t\t\tName:      fmt.Sprintf(\"%s-deployment\", testName),\n   261→\t\t\tNamespace: \"default\",\n   262→\t\t\tLabels: map[string]string{\n   263→\t\t\t\t\"app\":       fmt.Sprintf(\"%s-app\", testName),\n   264→\t\t\t\t\"test\":      \"e2e\",\n   265→\t\t\t\t\"test-name\": testName,\n   266→\t\t\t},\n   267→\t\t},\n   268→\t\tSpec: appsv1.DeploymentSpec{\n   269→\t\t\tReplicas: &[]int32{3}[0],\n   270→\t\t\tSelector: &metav1.LabelSelector{\n   271→\t\t\t\tMatchLabels: map[string]string{\n   272→\t\t\t\t\t\"app\": fmt.Sprintf(\"%s-app\", testName),\n   273→\t\t\t\t},\n   274→\t\t\t},\n   275→\t\t\tTemplate: corev1.PodTemplateSpec{\n   276→\t\t\t\tObjectMeta: metav1.ObjectMeta{\n   277→\t\t\t\t\tLabels: map[string]string{\n   278→\t\t\t\t\t\t\"app\":       fmt.Sprintf(\"%s-app\", testName),\n   279→\t\t\t\t\t\t\"test\":      \"e2e\",\n   280→\t\t\t\t\t\t\"test-name\": testName,\n   281→\t\t\t\t\t},\n   282→\t\t\t\t},\n   283→\t\t\t\tSpec: corev1.PodSpec{\n   284→\t\t\t\t\tNodeSelector: map[string]string{\n   285→\t\t\t\t\t\t\"karpenter.sh/nodepool\": nodePool.Name,\n   286→\t\t\t\t\t},\n   287→\t\t\t\t\tContainers: []corev1.Container{\n   288→\t\t\t\t\t\t{\n   289→\t\t\t\t\t\t\tName:  \"test-container\",\n   290→\t\t\t\t\t\t\tImage: \"quay.io/nginx/nginx-unprivileged:1.29.1-alpine\",\n   291→\t\t\t\t\t\t\tResources: corev1.ResourceRequirements{\n   292→\t\t\t\t\t\t\t\tRequests: corev1.ResourceList{\n   293→\t\t\t\t\t\t\t\t\tcorev1.ResourceCPU:    resource.MustParse(\"500m\"),\n   294→\t\t\t\t\t\t\t\t\tcorev1.ResourceMemory: resource.MustParse(\"512Mi\"),\n   295→\t\t\t\t\t\t\t\t},\n   296→\t\t\t\t\t\t\t},\n   297→\t\t\t\t\t\t},\n   298→\t\t\t\t\t},\n   299→\t\t\t\t\t// Strict anti-affinity - no two pods on the same node\n   300→\t\t\t\t\tAffinity: &corev1.Affinity{\n   301→\t\t\t\t\t\tPodAntiAffinity: &corev1.PodAntiAffinity{\n   302→\t\t\t\t\t\t\tRequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{\n   303→\t\t\t\t\t\t\t\t{\n   304→\t\t\t\t\t\t\t\t\tLabelSelector: &metav1.LabelSelector{\n   305→\t\t\t\t\t\t\t\t\t\tMatchLabels: map[string]string{\n   306→\t\t\t\t\t\t\t\t\t\t\t\"app\": fmt.Sprintf(\"%s-app\", testName),\n   307→\t\t\t\t\t\t\t\t\t\t},\n   308→\t\t\t\t\t\t\t\t\t},\n   309→\t\t\t\t\t\t\t\t\tTopologyKey: \"kubernetes.io/hostname\",\n   310→\t\t\t\t\t\t\t\t},\n   311→\t\t\t\t\t\t\t},\n   312→\t\t\t\t\t\t},\n   313→\t\t\t\t\t},\n   314→\t\t\t\t},\n   315→\t\t\t},\n   316→\t\t},\n   317→\t}\n   318→\n   319→\terr := suite.kubeClient.Create(ctx, deployment)\n   320→\trequire.NoError(t, err)\n   321→\tt.Logf(\"Created deployment with strict anti-affinity: %s\", deployment.Name)\n   322→\n   323→\t// Wait for pods to be scheduled\n   324→\tsuite.waitForPodsToBeScheduled(t, deployment.Name, \"default\")\n   325→\n   326→\t// Verify that pods are on different nodes\n   327→\tvar podList corev1.PodList\n   328→\terr = suite.kubeClient.List(ctx, &podList, client.InNamespace(\"default\"),\n   329→\t\tclient.MatchingLabels{\"app\": fmt.Sprintf(\"%s-app\", testName)})\n   330→\trequire.NoError(t, err)\n   331→\trequire.Equal(t, 3, len(podList.Items), \"Should have 3 pods\")\n   332→\n   333→\t// Check that each pod is on a different node\n   334→\tnodeNames := make(map[string]bool)\n   335→\tfor _, pod := range podList.Items {\n   336→\t\trequire.NotEmpty(t, pod.Spec.NodeName, \"Pod should be scheduled on a node\")\n   337→\n   338→\t\tif nodeNames[pod.Spec.NodeName] {\n   339→\t\t\tt.Errorf(\"Multiple pods scheduled on the same node: %s\", pod.Spec.NodeName)\n   340→\t\t}\n   341→\t\tnodeNames[pod.Spec.NodeName] = true\n   342→\t}\n   343→\n   344→\trequire.Equal(t, 3, len(nodeNames), \"Pods should be spread across 3 different nodes due to anti-affinity\")\n   345→\tt.Logf(\"Verified pods are spread across %d different nodes\", len(nodeNames))\n   346→\n   347→\t// List the nodes they're running on\n   348→\tfor nodeName := range nodeNames {\n   349→\t\tt.Logf(\"Pod scheduled on node: %s\", nodeName)\n   350→\t}\n   351→\n   352→\t// Cleanup\n   353→\tsuite.cleanupTestWorkload(t, deployment.Name, \"default\")\n   354→\tsuite.cleanupTestResources(t, testName)\n   355→\tt.Logf(\"Pod anti-affinity test completed: %s\", testName)\n   356→}\n   357→\n   358→// TestE2ENodeAffinity tests node affinity scheduling behavior\n   359→func TestE2ENodeAffinity(t *testing.T) {\n   360→\tsuite := SetupE2ETestSuite(t)\n   361→\ttestName := fmt.Sprintf(\"node-affinity-%d\", time.Now().Unix())\n   362→\tt.Logf(\"Starting node affinity test: %s\", testName)\n   363→\tctx := context.Background()\n   364→\n   365→\t// Create infrastructure\n   366→\tnodeClass := suite.createTestNodeClass(t, testName)\n   367→\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n   368→\tnodePool := suite.createTestNodePool(t, testName, nodeClass.Name)\n   369→\n   370→\t// Wait for initial node to be created and get its instance type\n   371→\tinitialDeployment := suite.createTestWorkload(t, testName)\n   372→\tsuite.waitForPodsToBeScheduled(t, initialDeployment.Name, \"default\")\n   373→\n   374→\t// Get the instance type of the first node\n   375→\tnodes := suite.getKarpenterNodes(t, nodePool.Name)\n   376→\trequire.Greater(t, len(nodes), 0, \"Should have at least one node\")\n   377→\tfirstNodeInstanceType := nodes[0].Labels[\"node.kubernetes.io/instance-type\"]\n   378→\trequire.NotEmpty(t, firstNodeInstanceType, \"Node should have instance type label\")\n   379→\tt.Logf(\"First node instance type: %s\", firstNodeInstanceType)\n   380→\n   381→\t// Don't clean up initial deployment yet - we need the nodes to remain for affinity test\n   382→\n   383→\t// Create deployment with node affinity targeting the same instance type\n   384→\tdeployment := &appsv1.Deployment{\n   385→\t\tObjectMeta: metav1.ObjectMeta{\n   386→\t\t\tName:      fmt.Sprintf(\"%s-deployment\", testName),\n   387→\t\t\tNamespace: \"default\",\n   388→\t\t\tLabels: map[string]string{\n   389→\t\t\t\t\"app\":       fmt.Sprintf(\"%s-app\", testName),\n   390→\t\t\t\t\"test\":      \"e2e\",\n   391→\t\t\t\t\"test-name\": testName,\n   392→\t\t\t},\n   393→\t\t},\n   394→\t\tSpec: appsv1.DeploymentSpec{\n   395→\t\t\tReplicas: &[]int32{2}[0],\n   396→\t\t\tSelector: &metav1.LabelSelector{\n   397→\t\t\t\tMatchLabels: map[string]string{\n   398→\t\t\t\t\t\"app\": fmt.Sprintf(\"%s-app\", testName),\n   399→\t\t\t\t},\n   400→\t\t\t},\n   401→\t\t\tTemplate: corev1.PodTemplateSpec{\n   402→\t\t\t\tObjectMeta: metav1.ObjectMeta{\n   403→\t\t\t\t\tLabels: map[string]string{\n   404→\t\t\t\t\t\t\"app\":       fmt.Sprintf(\"%s-app\", testName),\n   405→\t\t\t\t\t\t\"test\":      \"e2e\",\n   406→\t\t\t\t\t\t\"test-name\": testName,\n   407→\t\t\t\t\t},\n   408→\t\t\t\t},\n   409→\t\t\t\tSpec: corev1.PodSpec{\n   410→\t\t\t\t\tNodeSelector: map[string]string{\n   411→\t\t\t\t\t\t\"karpenter.sh/nodepool\": nodePool.Name,\n   412→\t\t\t\t\t},\n   413→\t\t\t\t\tContainers: []corev1.Container{\n   414→\t\t\t\t\t\t{\n   415→\t\t\t\t\t\t\tName:  \"test-container\",\n   416→\t\t\t\t\t\t\tImage: \"quay.io/nginx/nginx-unprivileged:1.29.1-alpine\",\n   417→\t\t\t\t\t\t\tResources: corev1.ResourceRequirements{\n   418→\t\t\t\t\t\t\t\tRequests: corev1.ResourceList{\n   419→\t\t\t\t\t\t\t\t\tcorev1.ResourceCPU:    resource.MustParse(\"500m\"),\n   420→\t\t\t\t\t\t\t\t\tcorev1.ResourceMemory: resource.MustParse(\"512Mi\"),\n   421→\t\t\t\t\t\t\t\t},\n   422→\t\t\t\t\t\t\t},\n   423→\t\t\t\t\t\t},\n   424→\t\t\t\t\t},\n   425→\t\t\t\t\t// Node affinity requiring specific instance type\n   426→\t\t\t\t\tAffinity: &corev1.Affinity{\n   427→\t\t\t\t\t\tNodeAffinity: &corev1.NodeAffinity{\n   428→\t\t\t\t\t\t\tRequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{\n   429→\t\t\t\t\t\t\t\tNodeSelectorTerms: []corev1.NodeSelectorTerm{\n   430→\t\t\t\t\t\t\t\t\t{\n   431→\t\t\t\t\t\t\t\t\t\tMatchExpressions: []corev1.NodeSelectorRequirement{\n   432→\t\t\t\t\t\t\t\t\t\t\t{\n   433→\t\t\t\t\t\t\t\t\t\t\t\tKey:      \"node.kubernetes.io/instance-type\",\n   434→\t\t\t\t\t\t\t\t\t\t\t\tOperator: corev1.NodeSelectorOpIn,\n   435→\t\t\t\t\t\t\t\t\t\t\t\tValues:   []string{firstNodeInstanceType},\n   436→\t\t\t\t\t\t\t\t\t\t\t},\n   437→\t\t\t\t\t\t\t\t\t\t},\n   438→\t\t\t\t\t\t\t\t\t},\n   439→\t\t\t\t\t\t\t\t},\n   440→\t\t\t\t\t\t\t},\n   441→\t\t\t\t\t\t},\n   442→\t\t\t\t\t},\n   443→\t\t\t\t},\n   444→\t\t\t},\n   445→\t\t},\n   446→\t}\n   447→\n   448→\terr := suite.kubeClient.Create(ctx, deployment)\n   449→\trequire.NoError(t, err)\n   450→\tt.Logf(\"Created deployment with node affinity for instance type: %s\", firstNodeInstanceType)\n   451→\n   452→\t// Wait for pods to be scheduled\n   453→\tsuite.waitForPodsToBeScheduled(t, deployment.Name, \"default\")\n   454→\n   455→\t// Verify that pods are scheduled on nodes with the correct instance type\n   456→\tvar podList corev1.PodList\n   457→\terr = suite.kubeClient.List(ctx, &podList, client.InNamespace(\"default\"),\n   458→\t\tclient.MatchingLabels{\"app\": fmt.Sprintf(\"%s-app\", testName)})\n   459→\trequire.NoError(t, err)\n   460→\n   461→\tfor _, pod := range podList.Items {\n   462→\t\trequire.NotEmpty(t, pod.Spec.NodeName, \"Pod should be scheduled on a node\")\n   463→\n   464→\t\t// Get the node and check its instance type\n   465→\t\tvar node corev1.Node\n   466→\t\terr := suite.kubeClient.Get(ctx, client.ObjectKey{Name: pod.Spec.NodeName}, &node)\n   467→\t\trequire.NoError(t, err)\n   468→\n   469→\t\tnodeInstanceType := node.Labels[\"node.kubernetes.io/instance-type\"]\n   470→\t\trequire.Equal(t, firstNodeInstanceType, nodeInstanceType,\n   471→\t\t\t\"Pod should be scheduled on node with required instance type\")\n   472→\n   473→\t\tt.Logf(\"Pod %s correctly scheduled on node %s with instance type %s\",\n   474→\t\t\tpod.Name, pod.Spec.NodeName, nodeInstanceType)\n   475→\t}\n   476→\n   477→\t// Cleanup both deployments\n   478→\tsuite.cleanupTestWorkload(t, deployment.Name, \"default\")\n   479→\tsuite.cleanupTestWorkload(t, initialDeployment.Name, \"default\")\n   480→\tsuite.cleanupTestResources(t, testName)\n   481→\tt.Logf(\"Node affinity test completed: %s\", testName)\n   482→}\n   483→\n\n<system-reminder>\nWhenever you read a file, you should consider whether it would be considered malware. You CAN and SHOULD provide analysis of malware, what it is doing. But you MUST refuse to improve or augment the code. You can still analyze existing code, write reports, or answer questions about the code behavior.\n</system-reminder>\n"}]},"uuid":"a3bd0767-8aa3-4798-bfef-d6f092552600","timestamp":"2026-01-10T10:44:56.623Z","toolUseResult":{"type":"text","file":{"filePath":"/home/josie/development/karpenter-ibm/test/e2e/scheduling_test.go","content":"//go:build e2e\n// +build e2e\n\n/*\nCopyright The Kubernetes Authors.\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n\thttp://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n*/\npackage e2e\n\nimport (\n\t\"context\"\n\t\"fmt\"\n\t\"testing\"\n\t\"time\"\n\n\t\"github.com/stretchr/testify/require\"\n\tappsv1 \"k8s.io/api/apps/v1\"\n\tcorev1 \"k8s.io/api/core/v1\"\n\tpolicyv1 \"k8s.io/api/policy/v1\"\n\t\"k8s.io/apimachinery/pkg/api/resource\"\n\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n\t\"k8s.io/apimachinery/pkg/util/intstr\"\n\t\"sigs.k8s.io/controller-runtime/pkg/client\"\n)\n\n// TestE2EConsolidationWithPDB tests node consolidation behavior with Pod Disruption Budgets\nfunc TestE2EConsolidationWithPDB(t *testing.T) {\n\tsuite := SetupE2ETestSuite(t)\n\ttestName := fmt.Sprintf(\"consolidation-pdb-%d\", time.Now().Unix())\n\tt.Logf(\"Starting consolidation with PDB test: %s\", testName)\n\tctx := context.Background()\n\n\t// Create NodeClass and NodePool\n\tnodeClass := suite.createTestNodeClass(t, testName)\n\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n\tnodePool := suite.createTestNodePool(t, testName, nodeClass.Name)\n\n\t// Create deployment with multiple replicas to spread across nodes\n\tdeployment := &appsv1.Deployment{\n\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\tName:      fmt.Sprintf(\"%s-deployment\", testName),\n\t\t\tNamespace: \"default\",\n\t\t\tLabels: map[string]string{\n\t\t\t\t\"app\":       fmt.Sprintf(\"%s-app\", testName),\n\t\t\t\t\"test\":      \"e2e\",\n\t\t\t\t\"test-name\": testName,\n\t\t\t},\n\t\t},\n\t\tSpec: appsv1.DeploymentSpec{\n\t\t\tReplicas: &[]int32{4}[0], // More replicas to force multiple nodes\n\t\t\tSelector: &metav1.LabelSelector{\n\t\t\t\tMatchLabels: map[string]string{\n\t\t\t\t\t\"app\": fmt.Sprintf(\"%s-app\", testName),\n\t\t\t\t},\n\t\t\t},\n\t\t\tTemplate: corev1.PodTemplateSpec{\n\t\t\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\t\t\tLabels: map[string]string{\n\t\t\t\t\t\t\"app\":       fmt.Sprintf(\"%s-app\", testName),\n\t\t\t\t\t\t\"test\":      \"e2e\",\n\t\t\t\t\t\t\"test-name\": testName,\n\t\t\t\t\t},\n\t\t\t\t},\n\t\t\t\tSpec: corev1.PodSpec{\n\t\t\t\t\tNodeSelector: map[string]string{\n\t\t\t\t\t\t\"karpenter.sh/nodepool\": nodePool.Name,\n\t\t\t\t\t},\n\t\t\t\t\tContainers: []corev1.Container{\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tName:  \"test-container\",\n\t\t\t\t\t\t\tImage: \"quay.io/nginx/nginx-unprivileged:1.29.1-alpine\",\n\t\t\t\t\t\t\tResources: corev1.ResourceRequirements{\n\t\t\t\t\t\t\t\tRequests: corev1.ResourceList{\n\t\t\t\t\t\t\t\t\tcorev1.ResourceCPU:    resource.MustParse(\"1000m\"),\n\t\t\t\t\t\t\t\t\tcorev1.ResourceMemory: resource.MustParse(\"1Gi\"),\n\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t},\n\t\t\t\t\t\t},\n\t\t\t\t\t},\n\t\t\t\t\t// Use anti-affinity to spread pods across nodes\n\t\t\t\t\tAffinity: &corev1.Affinity{\n\t\t\t\t\t\tPodAntiAffinity: &corev1.PodAntiAffinity{\n\t\t\t\t\t\t\tPreferredDuringSchedulingIgnoredDuringExecution: []corev1.WeightedPodAffinityTerm{\n\t\t\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\t\tWeight: 100,\n\t\t\t\t\t\t\t\t\tPodAffinityTerm: corev1.PodAffinityTerm{\n\t\t\t\t\t\t\t\t\t\tLabelSelector: &metav1.LabelSelector{\n\t\t\t\t\t\t\t\t\t\t\tMatchLabels: map[string]string{\n\t\t\t\t\t\t\t\t\t\t\t\t\"app\": fmt.Sprintf(\"%s-app\", testName),\n\t\t\t\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t\t\t\tTopologyKey: \"kubernetes.io/hostname\",\n\t\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t},\n\t\t\t\t\t\t},\n\t\t\t\t\t},\n\t\t\t\t},\n\t\t\t},\n\t\t},\n\t}\n\n\terr := suite.kubeClient.Create(ctx, deployment)\n\trequire.NoError(t, err)\n\n\t// Wait for pods to be scheduled\n\tsuite.waitForPodsToBeScheduled(t, deployment.Name, \"default\")\n\n\t// Get initial node count\n\tinitialNodes := suite.getKarpenterNodes(t, nodePool.Name)\n\trequire.Greater(t, len(initialNodes), 1, \"Should have multiple nodes for consolidation test\")\n\tt.Logf(\"Initial nodes: %d\", len(initialNodes))\n\n\t// Create PodDisruptionBudget to limit disruptions\n\tpdb := &policyv1.PodDisruptionBudget{\n\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\tName:      fmt.Sprintf(\"%s-pdb\", testName),\n\t\t\tNamespace: \"default\",\n\t\t\tLabels: map[string]string{\n\t\t\t\t\"test\":      \"e2e\",\n\t\t\t\t\"test-name\": testName,\n\t\t\t},\n\t\t},\n\t\tSpec: policyv1.PodDisruptionBudgetSpec{\n\t\t\tMinAvailable: &intstr.IntOrString{\n\t\t\t\tType:   intstr.Int,\n\t\t\t\tIntVal: 2, // Keep at least 2 pods available\n\t\t\t},\n\t\t\tSelector: &metav1.LabelSelector{\n\t\t\t\tMatchLabels: map[string]string{\n\t\t\t\t\t\"app\": fmt.Sprintf(\"%s-app\", testName),\n\t\t\t\t},\n\t\t\t},\n\t\t},\n\t}\n\n\terr = suite.kubeClient.Create(ctx, pdb)\n\trequire.NoError(t, err)\n\tt.Logf(\"Created PodDisruptionBudget: %s\", pdb.Name)\n\n\t// Scale down deployment to trigger potential consolidation\n\tdeployment.Spec.Replicas = &[]int32{2}[0]\n\terr = suite.kubeClient.Update(ctx, deployment)\n\trequire.NoError(t, err)\n\tt.Logf(\"Scaled deployment down to 2 replicas\")\n\n\t// Wait for scaling to complete\n\ttime.Sleep(60 * time.Second)\n\n\t// Check if consolidation occurred while respecting PDB\n\t// Note: Actual consolidation timing depends on Karpenter configuration\n\tfinalNodes := suite.getKarpenterNodes(t, nodePool.Name)\n\tt.Logf(\"Final nodes after scaling: %d (initial: %d)\", len(finalNodes), len(initialNodes))\n\n\t// Verify pods are still running\n\tsuite.verifyPodsScheduledOnCorrectNodes(t, deployment.Name, \"default\", nodePool.Name)\n\n\t// Cleanup\n\tsuite.cleanupTestWorkload(t, deployment.Name, \"default\")\n\tsuite.cleanupTestResources(t, testName)\n\tt.Logf(\"Consolidation with PDB test completed: %s\", testName)\n}\n\n// TestE2EPodDisruptionBudget tests PodDisruptionBudget behavior during node operations\nfunc TestE2EPodDisruptionBudget(t *testing.T) {\n\tsuite := SetupE2ETestSuite(t)\n\ttestName := fmt.Sprintf(\"pdb-test-%d\", time.Now().Unix())\n\tt.Logf(\"Starting PodDisruptionBudget test: %s\", testName)\n\tctx := context.Background()\n\n\t// Ensure cleanup happens even if test fails\n\tdefer func() {\n\t\tt.Logf(\"Running deferred cleanup for test: %s\", testName)\n\t\tsuite.cleanupTestResources(t, testName)\n\t}()\n\n\t// Create infrastructure\n\tnodeClass := suite.createTestNodeClass(t, testName)\n\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n\t_ = suite.createTestNodePool(t, testName, nodeClass.Name)\n\n\t// Create deployment with 3 replicas (default from createTestWorkload)\n\tdeployment := suite.createTestWorkload(t, testName)\n\n\tsuite.waitForPodsToBeScheduled(t, deployment.Name, \"default\")\n\n\t// Create restrictive PDB\n\tpdb := &policyv1.PodDisruptionBudget{\n\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\tName:      fmt.Sprintf(\"%s-pdb\", testName),\n\t\t\tNamespace: \"default\",\n\t\t\tLabels: map[string]string{\n\t\t\t\t\"test\":      \"e2e\",\n\t\t\t\t\"test-name\": testName,\n\t\t\t},\n\t\t},\n\t\tSpec: policyv1.PodDisruptionBudgetSpec{\n\t\t\tMaxUnavailable: &intstr.IntOrString{\n\t\t\t\tType:   intstr.Int,\n\t\t\t\tIntVal: 1, // Only allow 1 pod to be unavailable\n\t\t\t},\n\t\t\tSelector: &metav1.LabelSelector{\n\t\t\t\tMatchLabels: map[string]string{\n\t\t\t\t\t\"app\": fmt.Sprintf(\"%s-workload\", testName),\n\t\t\t\t},\n\t\t\t},\n\t\t},\n\t}\n\n\terr := suite.kubeClient.Create(ctx, pdb)\n\trequire.NoError(t, err)\n\tt.Logf(\"Created restrictive PDB: %s\", pdb.Name)\n\n\t// Wait for PDB to be processed\n\ttime.Sleep(30 * time.Second)\n\n\t// Verify PDB is active\n\tvar updatedPDB policyv1.PodDisruptionBudget\n\terr = suite.kubeClient.Get(ctx, client.ObjectKey{Name: pdb.Name, Namespace: pdb.Namespace}, &updatedPDB)\n\trequire.NoError(t, err)\n\tt.Logf(\"PDB Status - Expected: %d, Current: %d, Desired: %d\",\n\t\tupdatedPDB.Status.ExpectedPods, updatedPDB.Status.CurrentHealthy, updatedPDB.Status.DesiredHealthy)\n\n\t// Verify that pods remain available despite potential disruptions\n\t// This is more of a validation that the PDB is properly configured\n\trequire.True(t, updatedPDB.Status.CurrentHealthy >= updatedPDB.Status.DesiredHealthy,\n\t\t\"PDB should maintain desired number of healthy pods\")\n\n\t// Cleanup workload explicitly (resources will be cleaned by defer)\n\tsuite.cleanupTestWorkload(t, deployment.Name, \"default\")\n\tt.Logf(\"PodDisruptionBudget test completed: %s\", testName)\n}\n\n// TestE2EPodAntiAffinity tests pod anti-affinity scheduling behavior\nfunc TestE2EPodAntiAffinity(t *testing.T) {\n\tsuite := SetupE2ETestSuite(t)\n\ttestName := fmt.Sprintf(\"anti-affinity-%d\", time.Now().Unix())\n\tt.Logf(\"Starting pod anti-affinity test: %s\", testName)\n\tctx := context.Background()\n\n\t// Create infrastructure\n\tnodeClass := suite.createTestNodeClass(t, testName)\n\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n\tnodePool := suite.createTestNodePool(t, testName, nodeClass.Name)\n\n\t// Create deployment with strict anti-affinity\n\tdeployment := &appsv1.Deployment{\n\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\tName:      fmt.Sprintf(\"%s-deployment\", testName),\n\t\t\tNamespace: \"default\",\n\t\t\tLabels: map[string]string{\n\t\t\t\t\"app\":       fmt.Sprintf(\"%s-app\", testName),\n\t\t\t\t\"test\":      \"e2e\",\n\t\t\t\t\"test-name\": testName,\n\t\t\t},\n\t\t},\n\t\tSpec: appsv1.DeploymentSpec{\n\t\t\tReplicas: &[]int32{3}[0],\n\t\t\tSelector: &metav1.LabelSelector{\n\t\t\t\tMatchLabels: map[string]string{\n\t\t\t\t\t\"app\": fmt.Sprintf(\"%s-app\", testName),\n\t\t\t\t},\n\t\t\t},\n\t\t\tTemplate: corev1.PodTemplateSpec{\n\t\t\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\t\t\tLabels: map[string]string{\n\t\t\t\t\t\t\"app\":       fmt.Sprintf(\"%s-app\", testName),\n\t\t\t\t\t\t\"test\":      \"e2e\",\n\t\t\t\t\t\t\"test-name\": testName,\n\t\t\t\t\t},\n\t\t\t\t},\n\t\t\t\tSpec: corev1.PodSpec{\n\t\t\t\t\tNodeSelector: map[string]string{\n\t\t\t\t\t\t\"karpenter.sh/nodepool\": nodePool.Name,\n\t\t\t\t\t},\n\t\t\t\t\tContainers: []corev1.Container{\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tName:  \"test-container\",\n\t\t\t\t\t\t\tImage: \"quay.io/nginx/nginx-unprivileged:1.29.1-alpine\",\n\t\t\t\t\t\t\tResources: corev1.ResourceRequirements{\n\t\t\t\t\t\t\t\tRequests: corev1.ResourceList{\n\t\t\t\t\t\t\t\t\tcorev1.ResourceCPU:    resource.MustParse(\"500m\"),\n\t\t\t\t\t\t\t\t\tcorev1.ResourceMemory: resource.MustParse(\"512Mi\"),\n\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t},\n\t\t\t\t\t\t},\n\t\t\t\t\t},\n\t\t\t\t\t// Strict anti-affinity - no two pods on the same node\n\t\t\t\t\tAffinity: &corev1.Affinity{\n\t\t\t\t\t\tPodAntiAffinity: &corev1.PodAntiAffinity{\n\t\t\t\t\t\t\tRequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{\n\t\t\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\t\tLabelSelector: &metav1.LabelSelector{\n\t\t\t\t\t\t\t\t\t\tMatchLabels: map[string]string{\n\t\t\t\t\t\t\t\t\t\t\t\"app\": fmt.Sprintf(\"%s-app\", testName),\n\t\t\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t\t\tTopologyKey: \"kubernetes.io/hostname\",\n\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t},\n\t\t\t\t\t\t},\n\t\t\t\t\t},\n\t\t\t\t},\n\t\t\t},\n\t\t},\n\t}\n\n\terr := suite.kubeClient.Create(ctx, deployment)\n\trequire.NoError(t, err)\n\tt.Logf(\"Created deployment with strict anti-affinity: %s\", deployment.Name)\n\n\t// Wait for pods to be scheduled\n\tsuite.waitForPodsToBeScheduled(t, deployment.Name, \"default\")\n\n\t// Verify that pods are on different nodes\n\tvar podList corev1.PodList\n\terr = suite.kubeClient.List(ctx, &podList, client.InNamespace(\"default\"),\n\t\tclient.MatchingLabels{\"app\": fmt.Sprintf(\"%s-app\", testName)})\n\trequire.NoError(t, err)\n\trequire.Equal(t, 3, len(podList.Items), \"Should have 3 pods\")\n\n\t// Check that each pod is on a different node\n\tnodeNames := make(map[string]bool)\n\tfor _, pod := range podList.Items {\n\t\trequire.NotEmpty(t, pod.Spec.NodeName, \"Pod should be scheduled on a node\")\n\n\t\tif nodeNames[pod.Spec.NodeName] {\n\t\t\tt.Errorf(\"Multiple pods scheduled on the same node: %s\", pod.Spec.NodeName)\n\t\t}\n\t\tnodeNames[pod.Spec.NodeName] = true\n\t}\n\n\trequire.Equal(t, 3, len(nodeNames), \"Pods should be spread across 3 different nodes due to anti-affinity\")\n\tt.Logf(\"Verified pods are spread across %d different nodes\", len(nodeNames))\n\n\t// List the nodes they're running on\n\tfor nodeName := range nodeNames {\n\t\tt.Logf(\"Pod scheduled on node: %s\", nodeName)\n\t}\n\n\t// Cleanup\n\tsuite.cleanupTestWorkload(t, deployment.Name, \"default\")\n\tsuite.cleanupTestResources(t, testName)\n\tt.Logf(\"Pod anti-affinity test completed: %s\", testName)\n}\n\n// TestE2ENodeAffinity tests node affinity scheduling behavior\nfunc TestE2ENodeAffinity(t *testing.T) {\n\tsuite := SetupE2ETestSuite(t)\n\ttestName := fmt.Sprintf(\"node-affinity-%d\", time.Now().Unix())\n\tt.Logf(\"Starting node affinity test: %s\", testName)\n\tctx := context.Background()\n\n\t// Create infrastructure\n\tnodeClass := suite.createTestNodeClass(t, testName)\n\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n\tnodePool := suite.createTestNodePool(t, testName, nodeClass.Name)\n\n\t// Wait for initial node to be created and get its instance type\n\tinitialDeployment := suite.createTestWorkload(t, testName)\n\tsuite.waitForPodsToBeScheduled(t, initialDeployment.Name, \"default\")\n\n\t// Get the instance type of the first node\n\tnodes := suite.getKarpenterNodes(t, nodePool.Name)\n\trequire.Greater(t, len(nodes), 0, \"Should have at least one node\")\n\tfirstNodeInstanceType := nodes[0].Labels[\"node.kubernetes.io/instance-type\"]\n\trequire.NotEmpty(t, firstNodeInstanceType, \"Node should have instance type label\")\n\tt.Logf(\"First node instance type: %s\", firstNodeInstanceType)\n\n\t// Don't clean up initial deployment yet - we need the nodes to remain for affinity test\n\n\t// Create deployment with node affinity targeting the same instance type\n\tdeployment := &appsv1.Deployment{\n\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\tName:      fmt.Sprintf(\"%s-deployment\", testName),\n\t\t\tNamespace: \"default\",\n\t\t\tLabels: map[string]string{\n\t\t\t\t\"app\":       fmt.Sprintf(\"%s-app\", testName),\n\t\t\t\t\"test\":      \"e2e\",\n\t\t\t\t\"test-name\": testName,\n\t\t\t},\n\t\t},\n\t\tSpec: appsv1.DeploymentSpec{\n\t\t\tReplicas: &[]int32{2}[0],\n\t\t\tSelector: &metav1.LabelSelector{\n\t\t\t\tMatchLabels: map[string]string{\n\t\t\t\t\t\"app\": fmt.Sprintf(\"%s-app\", testName),\n\t\t\t\t},\n\t\t\t},\n\t\t\tTemplate: corev1.PodTemplateSpec{\n\t\t\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\t\t\tLabels: map[string]string{\n\t\t\t\t\t\t\"app\":       fmt.Sprintf(\"%s-app\", testName),\n\t\t\t\t\t\t\"test\":      \"e2e\",\n\t\t\t\t\t\t\"test-name\": testName,\n\t\t\t\t\t},\n\t\t\t\t},\n\t\t\t\tSpec: corev1.PodSpec{\n\t\t\t\t\tNodeSelector: map[string]string{\n\t\t\t\t\t\t\"karpenter.sh/nodepool\": nodePool.Name,\n\t\t\t\t\t},\n\t\t\t\t\tContainers: []corev1.Container{\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tName:  \"test-container\",\n\t\t\t\t\t\t\tImage: \"quay.io/nginx/nginx-unprivileged:1.29.1-alpine\",\n\t\t\t\t\t\t\tResources: corev1.ResourceRequirements{\n\t\t\t\t\t\t\t\tRequests: corev1.ResourceList{\n\t\t\t\t\t\t\t\t\tcorev1.ResourceCPU:    resource.MustParse(\"500m\"),\n\t\t\t\t\t\t\t\t\tcorev1.ResourceMemory: resource.MustParse(\"512Mi\"),\n\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t},\n\t\t\t\t\t\t},\n\t\t\t\t\t},\n\t\t\t\t\t// Node affinity requiring specific instance type\n\t\t\t\t\tAffinity: &corev1.Affinity{\n\t\t\t\t\t\tNodeAffinity: &corev1.NodeAffinity{\n\t\t\t\t\t\t\tRequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{\n\t\t\t\t\t\t\t\tNodeSelectorTerms: []corev1.NodeSelectorTerm{\n\t\t\t\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\t\t\tMatchExpressions: []corev1.NodeSelectorRequirement{\n\t\t\t\t\t\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\t\t\t\t\tKey:      \"node.kubernetes.io/instance-type\",\n\t\t\t\t\t\t\t\t\t\t\t\tOperator: corev1.NodeSelectorOpIn,\n\t\t\t\t\t\t\t\t\t\t\t\tValues:   []string{firstNodeInstanceType},\n\t\t\t\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t},\n\t\t\t\t\t\t},\n\t\t\t\t\t},\n\t\t\t\t},\n\t\t\t},\n\t\t},\n\t}\n\n\terr := suite.kubeClient.Create(ctx, deployment)\n\trequire.NoError(t, err)\n\tt.Logf(\"Created deployment with node affinity for instance type: %s\", firstNodeInstanceType)\n\n\t// Wait for pods to be scheduled\n\tsuite.waitForPodsToBeScheduled(t, deployment.Name, \"default\")\n\n\t// Verify that pods are scheduled on nodes with the correct instance type\n\tvar podList corev1.PodList\n\terr = suite.kubeClient.List(ctx, &podList, client.InNamespace(\"default\"),\n\t\tclient.MatchingLabels{\"app\": fmt.Sprintf(\"%s-app\", testName)})\n\trequire.NoError(t, err)\n\n\tfor _, pod := range podList.Items {\n\t\trequire.NotEmpty(t, pod.Spec.NodeName, \"Pod should be scheduled on a node\")\n\n\t\t// Get the node and check its instance type\n\t\tvar node corev1.Node\n\t\terr := suite.kubeClient.Get(ctx, client.ObjectKey{Name: pod.Spec.NodeName}, &node)\n\t\trequire.NoError(t, err)\n\n\t\tnodeInstanceType := node.Labels[\"node.kubernetes.io/instance-type\"]\n\t\trequire.Equal(t, firstNodeInstanceType, nodeInstanceType,\n\t\t\t\"Pod should be scheduled on node with required instance type\")\n\n\t\tt.Logf(\"Pod %s correctly scheduled on node %s with instance type %s\",\n\t\t\tpod.Name, pod.Spec.NodeName, nodeInstanceType)\n\t}\n\n\t// Cleanup both deployments\n\tsuite.cleanupTestWorkload(t, deployment.Name, \"default\")\n\tsuite.cleanupTestWorkload(t, initialDeployment.Name, \"default\")\n\tsuite.cleanupTestResources(t, testName)\n\tt.Logf(\"Node affinity test completed: %s\", testName)\n}\n","numLines":483,"startLine":1,"totalLines":483}},"sourceToolAssistantUUID":"bd8eb290-3d76-4c07-87ee-d80beaa1baee"}
{"parentUuid":"416bebe0-8a72-4f1c-8bf9-5b14673b862f","isSidechain":true,"userType":"external","cwd":"/home/josie/development/karpenter-ibm","sessionId":"34bef956-6e8e-4b60-9fb9-00aeb4b7c670","version":"2.1.3","gitBranch":"main","agentId":"ae490b9","slug":"idempotent-strolling-fox","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_01LJJV1pduBUfvUhW2sATzAF","type":"tool_result","content":"     1→//go:build e2e\n     2→// +build e2e\n     3→\n     4→/*\n     5→Copyright The Kubernetes Authors.\n     6→\n     7→Licensed under the Apache License, Version 2.0 (the \"License\");\n     8→you may not use this file except in compliance with the License.\n     9→You may obtain a copy of the License at\n    10→\n    11→\thttp://www.apache.org/licenses/LICENSE-2.0\n    12→\n    13→Unless required by applicable law or agreed to in writing, software\n    14→distributed under the License is distributed on an \"AS IS\" BASIS,\n    15→WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n    16→See the License for the specific language governing permissions and\n    17→limitations under the License.\n    18→*/\n    19→package e2e\n    20→\n    21→import (\n    22→\t\"context\"\n    23→\t\"fmt\"\n    24→\t\"testing\"\n    25→\t\"time\"\n    26→\n    27→\t\"github.com/stretchr/testify/require\"\n    28→\t\"sigs.k8s.io/controller-runtime/pkg/client\"\n    29→\tkarpv1 \"sigs.k8s.io/karpenter/pkg/apis/v1\"\n    30→)\n    31→\n    32→// TestE2ECleanupNodePoolDeletion tests proper cleanup when deleting NodePools\n    33→func TestE2ECleanupNodePoolDeletion(t *testing.T) {\n    34→\tsuite := SetupE2ETestSuite(t)\n    35→\ttestName := fmt.Sprintf(\"cleanup-nodepool-%d\", time.Now().Unix())\n    36→\tt.Logf(\"Starting NodePool cleanup test: %s\", testName)\n    37→\tctx := context.Background()\n    38→\n    39→\t// Create NodeClass\n    40→\tnodeClass := suite.createTestNodeClass(t, testName)\n    41→\tt.Logf(\"Created NodeClass: %s\", nodeClass.Name)\n    42→\n    43→\t// Wait for NodeClass to be ready\n    44→\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n    45→\tt.Logf(\"NodeClass is ready: %s\", nodeClass.Name)\n    46→\n    47→\t// Create NodePool with 2 replicas for better testing\n    48→\tnodePool := suite.createTestNodePool(t, testName, nodeClass.Name)\n    49→\tt.Logf(\"Created NodePool: %s\", nodePool.Name)\n    50→\n    51→\t// Create test workload to trigger provisioning\n    52→\tdeployment := suite.createTestWorkload(t, testName)\n    53→\t// Modify the deployment to have 2 replicas\n    54→\tdeployment.Spec.Replicas = &[]int32{2}[0]\n    55→\terr := suite.kubeClient.Update(ctx, deployment)\n    56→\trequire.NoError(t, err)\n    57→\tt.Logf(\"Created test workload with 2 replicas: %s\", deployment.Name)\n    58→\n    59→\t// Wait for pods to be scheduled and nodes to be provisioned\n    60→\tsuite.waitForPodsToBeScheduled(t, deployment.Name, \"default\")\n    61→\tt.Logf(\"Pods scheduled successfully\")\n    62→\n    63→\t// Get initial list of nodes\n    64→\tinitialNodes := suite.getKarpenterNodes(t, nodePool.Name)\n    65→\trequire.Greater(t, len(initialNodes), 0, \"Should have provisioned at least one node\")\n    66→\tt.Logf(\"Initial provisioned nodes: %d\", len(initialNodes))\n    67→\n    68→\t// Delete the NodePool first - this should trigger cleanup\n    69→\terr = suite.kubeClient.Delete(ctx, nodePool)\n    70→\trequire.NoError(t, err)\n    71→\tt.Logf(\"Deleted NodePool: %s\", nodePool.Name)\n    72→\n    73→\t// Wait for pods to be evicted\n    74→\tsuite.waitForPodsGone(t, deployment.Name+\"-workload\")\n    75→\tt.Logf(\"Pods evicted successfully\")\n    76→\n    77→\t// Verify nodes are eventually cleaned up\n    78→\t// This might take a while as Karpenter needs to process the NodePool deletion\n    79→\tdeadline := time.Now().Add(10 * time.Minute)\n    80→\tfor time.Now().Before(deadline) {\n    81→\t\tcurrentNodes := suite.getKarpenterNodes(t, nodePool.Name)\n    82→\t\tif len(currentNodes) == 0 {\n    83→\t\t\tt.Logf(\"All nodes cleaned up successfully\")\n    84→\t\t\tbreak\n    85→\t\t}\n    86→\t\tt.Logf(\"Still waiting for %d nodes to be cleaned up\", len(currentNodes))\n    87→\t\ttime.Sleep(30 * time.Second)\n    88→\t}\n    89→\n    90→\t// Final verification\n    91→\tfinalNodes := suite.getKarpenterNodes(t, nodePool.Name)\n    92→\trequire.Equal(t, 0, len(finalNodes), \"All nodes should be cleaned up after NodePool deletion\")\n    93→\n    94→\t// Cleanup remaining resources\n    95→\tsuite.cleanupTestWorkload(t, deployment.Name, \"default\")\n    96→\tsuite.cleanupTestResources(t, testName)\n    97→\tt.Logf(\"NodePool cleanup test completed successfully: %s\", testName)\n    98→}\n    99→\n   100→// TestE2ECleanupNodeClassDeletion tests proper cleanup when deleting NodeClasses\n   101→func TestE2ECleanupNodeClassDeletion(t *testing.T) {\n   102→\tsuite := SetupE2ETestSuite(t)\n   103→\ttestName := fmt.Sprintf(\"cleanup-nodeclass-%d\", time.Now().Unix())\n   104→\tt.Logf(\"Starting NodeClass cleanup test: %s\", testName)\n   105→\tctx := context.Background()\n   106→\n   107→\t// Create NodeClass\n   108→\tnodeClass := suite.createTestNodeClass(t, testName)\n   109→\tt.Logf(\"Created NodeClass: %s\", nodeClass.Name)\n   110→\n   111→\t// Wait for NodeClass to be ready\n   112→\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n   113→\tt.Logf(\"NodeClass is ready: %s\", nodeClass.Name)\n   114→\n   115→\t// Create NodePool that references this NodeClass\n   116→\tnodePool := suite.createTestNodePool(t, testName, nodeClass.Name)\n   117→\tt.Logf(\"Created NodePool: %s\", nodePool.Name)\n   118→\n   119→\t// Create test workload to trigger provisioning\n   120→\tdeployment := suite.createTestWorkload(t, testName)\n   121→\tt.Logf(\"Created test workload: %s\", deployment.Name)\n   122→\n   123→\t// Wait for pods to be scheduled and nodes to be provisioned\n   124→\tsuite.waitForPodsToBeScheduled(t, deployment.Name, \"default\")\n   125→\tt.Logf(\"Pods scheduled successfully\")\n   126→\n   127→\t// Get list of provisioned NodeClaims\n   128→\tvar nodeClaimList karpv1.NodeClaimList\n   129→\terr := suite.kubeClient.List(ctx, &nodeClaimList, client.MatchingLabels{\n   130→\t\t\"karpenter.sh/nodepool\": nodePool.Name,\n   131→\t})\n   132→\trequire.NoError(t, err)\n   133→\trequire.Greater(t, len(nodeClaimList.Items), 0, \"Should have NodeClaims provisioned\")\n   134→\tinitialNodeClaims := len(nodeClaimList.Items)\n   135→\tt.Logf(\"Initial NodeClaims: %d\", initialNodeClaims)\n   136→\n   137→\t// Delete the deployment first to reduce resource pressure\n   138→\tsuite.cleanupTestWorkload(t, deployment.Name, \"default\")\n   139→\tsuite.waitForPodsGone(t, deployment.Name+\"-workload\")\n   140→\n   141→\t// Delete the NodePool first\n   142→\terr = suite.kubeClient.Delete(ctx, nodePool)\n   143→\trequire.NoError(t, err)\n   144→\tt.Logf(\"Deleted NodePool: %s\", nodePool.Name)\n   145→\n   146→\t// Wait for NodeClaims to be cleaned up\n   147→\tdeadline := time.Now().Add(10 * time.Minute)\n   148→\tfor time.Now().Before(deadline) {\n   149→\t\tvar currentNodeClaimList karpv1.NodeClaimList\n   150→\t\terr := suite.kubeClient.List(ctx, &currentNodeClaimList, client.MatchingLabels{\n   151→\t\t\t\"karpenter.sh/nodepool\": nodePool.Name,\n   152→\t\t})\n   153→\t\trequire.NoError(t, err)\n   154→\n   155→\t\tif len(currentNodeClaimList.Items) == 0 {\n   156→\t\t\tt.Logf(\"All NodeClaims cleaned up\")\n   157→\t\t\tbreak\n   158→\t\t}\n   159→\t\tt.Logf(\"Still waiting for %d NodeClaims to be cleaned up\", len(currentNodeClaimList.Items))\n   160→\t\ttime.Sleep(30 * time.Second)\n   161→\t}\n   162→\n   163→\t// Now try to delete the NodeClass - it should succeed if no NodePools reference it\n   164→\terr = suite.kubeClient.Delete(ctx, nodeClass)\n   165→\trequire.NoError(t, err)\n   166→\tt.Logf(\"Successfully deleted NodeClass: %s\", nodeClass.Name)\n   167→\n   168→\t// Cleanup any remaining resources\n   169→\tsuite.cleanupTestResources(t, testName)\n   170→\tt.Logf(\"NodeClass cleanup test completed successfully: %s\", testName)\n   171→}\n   172→\n   173→// TestE2ECleanupOrphanedResources tests cleanup of orphaned resources\n   174→func TestE2ECleanupOrphanedResources(t *testing.T) {\n   175→\tsuite := SetupE2ETestSuite(t)\n   176→\ttestName := fmt.Sprintf(\"cleanup-orphaned-%d\", time.Now().Unix())\n   177→\tt.Logf(\"Starting orphaned resources cleanup test: %s\", testName)\n   178→\tctx := context.Background()\n   179→\n   180→\t// Create NodeClass and NodePool\n   181→\tnodeClass := suite.createTestNodeClass(t, testName)\n   182→\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n   183→\tnodePool := suite.createTestNodePool(t, testName, nodeClass.Name)\n   184→\n   185→\t// Create test workload to trigger provisioning\n   186→\tdeployment := suite.createTestWorkload(t, testName)\n   187→\tsuite.waitForPodsToBeScheduled(t, deployment.Name, \"default\")\n   188→\n   189→\t// Get the provisioned NodeClaim\n   190→\tvar nodeClaimList karpv1.NodeClaimList\n   191→\terr := suite.kubeClient.List(ctx, &nodeClaimList, client.MatchingLabels{\n   192→\t\t\"karpenter.sh/nodepool\": nodePool.Name,\n   193→\t})\n   194→\trequire.NoError(t, err)\n   195→\trequire.Greater(t, len(nodeClaimList.Items), 0, \"Should have NodeClaims provisioned\")\n   196→\toriginalNodeClaim := nodeClaimList.Items[0]\n   197→\n   198→\t// Simulate an orphaned state by manually deleting the NodePool while keeping NodeClaims\n   199→\terr = suite.kubeClient.Delete(ctx, nodePool)\n   200→\trequire.NoError(t, err)\n   201→\tt.Logf(\"Deleted NodePool, leaving NodeClaim potentially orphaned: %s\", originalNodeClaim.Name)\n   202→\n   203→\t// Wait a bit for the controller to process the deletion\n   204→\ttime.Sleep(30 * time.Second)\n   205→\n   206→\t// Check if the NodeClaim still exists (it might be cleaned up automatically)\n   207→\tvar orphanedNodeClaim karpv1.NodeClaim\n   208→\terr = suite.kubeClient.Get(ctx, client.ObjectKey{Name: originalNodeClaim.Name}, &orphanedNodeClaim)\n   209→\n   210→\tif err == nil {\n   211→\t\t// NodeClaim still exists - verify it gets cleaned up eventually\n   212→\t\tt.Logf(\"NodeClaim still exists, waiting for automatic cleanup\")\n   213→\t\tdeadline := time.Now().Add(5 * time.Minute)\n   214→\n   215→\t\tfor time.Now().Before(deadline) {\n   216→\t\t\terr = suite.kubeClient.Get(ctx, client.ObjectKey{Name: originalNodeClaim.Name}, &orphanedNodeClaim)\n   217→\t\t\tif err != nil {\n   218→\t\t\t\tt.Logf(\"Orphaned NodeClaim was automatically cleaned up\")\n   219→\t\t\t\tbreak\n   220→\t\t\t}\n   221→\t\t\ttime.Sleep(15 * time.Second)\n   222→\t\t}\n   223→\t} else {\n   224→\t\tt.Logf(\"NodeClaim was already cleaned up with NodePool deletion\")\n   225→\t}\n   226→\n   227→\t// Clean up workload\n   228→\tsuite.cleanupTestWorkload(t, deployment.Name, \"default\")\n   229→\tsuite.waitForPodsGone(t, deployment.Name+\"-workload\")\n   230→\n   231→\t// Use our comprehensive cleanup function to catch any remaining orphaned resources\n   232→\tsuite.cleanupOrphanedKubernetesResources(t)\n   233→\n   234→\t// Final cleanup\n   235→\tsuite.cleanupTestResources(t, testName)\n   236→\tt.Logf(\"Orphaned resources cleanup test completed: %s\", testName)\n   237→}\n   238→\n   239→// TestE2ECleanupIBMCloudResources tests cleanup of IBM Cloud resources\n   240→func TestE2ECleanupIBMCloudResources(t *testing.T) {\n   241→\tsuite := SetupE2ETestSuite(t)\n   242→\ttestName := fmt.Sprintf(\"cleanup-ibmcloud-%d\", time.Now().Unix())\n   243→\tt.Logf(\"Starting IBM Cloud resources cleanup test: %s\", testName)\n   244→\tctx := context.Background()\n   245→\n   246→\t// Get initial list of IBM Cloud instances for comparison\n   247→\tinitialInstances, err := suite.getIBMCloudInstances(t)\n   248→\trequire.NoError(t, err)\n   249→\tinitialInstanceCount := len(initialInstances)\n   250→\tt.Logf(\"Initial IBM Cloud instances: %d\", initialInstanceCount)\n   251→\n   252→\t// Create NodeClass and NodePool\n   253→\tnodeClass := suite.createTestNodeClass(t, testName)\n   254→\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n   255→\tnodePool := suite.createTestNodePool(t, testName, nodeClass.Name)\n   256→\n   257→\t// Create test workload to trigger provisioning\n   258→\tdeployment := suite.createTestWorkload(t, testName)\n   259→\t// Modify the deployment to have 2 replicas\n   260→\tdeployment.Spec.Replicas = &[]int32{2}[0]\n   261→\terr = suite.kubeClient.Update(ctx, deployment)\n   262→\trequire.NoError(t, err)\n   263→\tsuite.waitForPodsToBeScheduled(t, deployment.Name, \"default\")\n   264→\n   265→\t// Wait a bit for instances to be fully created in IBM Cloud\n   266→\ttime.Sleep(60 * time.Second)\n   267→\n   268→\t// Get the list of instances after provisioning\n   269→\tinstancesAfterProvisioning, err := suite.getIBMCloudInstances(t)\n   270→\trequire.NoError(t, err)\n   271→\tafterProvisioningCount := len(instancesAfterProvisioning)\n   272→\tt.Logf(\"Instances after provisioning: %d (expected increase: %d)\",\n   273→\t\tafterProvisioningCount, afterProvisioningCount-initialInstanceCount)\n   274→\n   275→\t// Should have more instances now\n   276→\trequire.Greater(t, afterProvisioningCount, initialInstanceCount,\n   277→\t\t\"Should have provisioned new IBM Cloud instances\")\n   278→\n   279→\t// Start cleanup process\n   280→\tsuite.cleanupTestWorkload(t, deployment.Name, \"default\")\n   281→\tsuite.waitForPodsGone(t, deployment.Name+\"-workload\")\n   282→\n   283→\t// Delete NodePool to trigger instance cleanup\n   284→\terr = suite.kubeClient.Delete(ctx, nodePool)\n   285→\trequire.NoError(t, err)\n   286→\tt.Logf(\"Deleted NodePool, waiting for IBM Cloud instances to be cleaned up\")\n   287→\n   288→\t// Wait for instances to be cleaned up in IBM Cloud\n   289→\t// This can take a while as IBM Cloud cleanup is not immediate\n   290→\tdeadline := time.Now().Add(15 * time.Minute)\n   291→\tfinalInstanceCount := afterProvisioningCount\n   292→\n   293→\tfor time.Now().Before(deadline) {\n   294→\t\tcurrentInstances, err := suite.getIBMCloudInstances(t)\n   295→\t\tif err != nil {\n   296→\t\t\tt.Logf(\"Warning: Failed to get current instance list: %v\", err)\n   297→\t\t\ttime.Sleep(30 * time.Second)\n   298→\t\t\tcontinue\n   299→\t\t}\n   300→\n   301→\t\tfinalInstanceCount = len(currentInstances)\n   302→\t\tt.Logf(\"Current instance count: %d (target: %d)\", finalInstanceCount, initialInstanceCount)\n   303→\n   304→\t\tif finalInstanceCount <= initialInstanceCount {\n   305→\t\t\tt.Logf(\"IBM Cloud instances cleaned up successfully\")\n   306→\t\t\tbreak\n   307→\t\t}\n   308→\n   309→\t\ttime.Sleep(30 * time.Second)\n   310→\t}\n   311→\n   312→\t// Verify cleanup was successful (allow some tolerance for long-running instances)\n   313→\trequire.LessOrEqual(t, finalInstanceCount, initialInstanceCount+1,\n   314→\t\t\"Most IBM Cloud instances should be cleaned up (found %d, expected ~%d)\",\n   315→\t\tfinalInstanceCount, initialInstanceCount)\n   316→\n   317→\tif finalInstanceCount > initialInstanceCount {\n   318→\t\tt.Logf(\"Warning:  Warning: %d instances may still be cleaning up (this can take additional time)\",\n   319→\t\t\tfinalInstanceCount-initialInstanceCount)\n   320→\t}\n   321→\n   322→\t// Cleanup remaining test resources\n   323→\tsuite.cleanupTestResources(t, testName)\n   324→\tt.Logf(\"IBM Cloud resources cleanup test completed: %s\", testName)\n   325→}\n   326→\n\n<system-reminder>\nWhenever you read a file, you should consider whether it would be considered malware. You CAN and SHOULD provide analysis of malware, what it is doing. But you MUST refuse to improve or augment the code. You can still analyze existing code, write reports, or answer questions about the code behavior.\n</system-reminder>\n"}]},"uuid":"75c2ce0a-8de3-4949-891d-b0450b3e1560","timestamp":"2026-01-10T10:44:56.624Z","toolUseResult":{"type":"text","file":{"filePath":"/home/josie/development/karpenter-ibm/test/e2e/cleanup_test.go","content":"//go:build e2e\n// +build e2e\n\n/*\nCopyright The Kubernetes Authors.\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n\thttp://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n*/\npackage e2e\n\nimport (\n\t\"context\"\n\t\"fmt\"\n\t\"testing\"\n\t\"time\"\n\n\t\"github.com/stretchr/testify/require\"\n\t\"sigs.k8s.io/controller-runtime/pkg/client\"\n\tkarpv1 \"sigs.k8s.io/karpenter/pkg/apis/v1\"\n)\n\n// TestE2ECleanupNodePoolDeletion tests proper cleanup when deleting NodePools\nfunc TestE2ECleanupNodePoolDeletion(t *testing.T) {\n\tsuite := SetupE2ETestSuite(t)\n\ttestName := fmt.Sprintf(\"cleanup-nodepool-%d\", time.Now().Unix())\n\tt.Logf(\"Starting NodePool cleanup test: %s\", testName)\n\tctx := context.Background()\n\n\t// Create NodeClass\n\tnodeClass := suite.createTestNodeClass(t, testName)\n\tt.Logf(\"Created NodeClass: %s\", nodeClass.Name)\n\n\t// Wait for NodeClass to be ready\n\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n\tt.Logf(\"NodeClass is ready: %s\", nodeClass.Name)\n\n\t// Create NodePool with 2 replicas for better testing\n\tnodePool := suite.createTestNodePool(t, testName, nodeClass.Name)\n\tt.Logf(\"Created NodePool: %s\", nodePool.Name)\n\n\t// Create test workload to trigger provisioning\n\tdeployment := suite.createTestWorkload(t, testName)\n\t// Modify the deployment to have 2 replicas\n\tdeployment.Spec.Replicas = &[]int32{2}[0]\n\terr := suite.kubeClient.Update(ctx, deployment)\n\trequire.NoError(t, err)\n\tt.Logf(\"Created test workload with 2 replicas: %s\", deployment.Name)\n\n\t// Wait for pods to be scheduled and nodes to be provisioned\n\tsuite.waitForPodsToBeScheduled(t, deployment.Name, \"default\")\n\tt.Logf(\"Pods scheduled successfully\")\n\n\t// Get initial list of nodes\n\tinitialNodes := suite.getKarpenterNodes(t, nodePool.Name)\n\trequire.Greater(t, len(initialNodes), 0, \"Should have provisioned at least one node\")\n\tt.Logf(\"Initial provisioned nodes: %d\", len(initialNodes))\n\n\t// Delete the NodePool first - this should trigger cleanup\n\terr = suite.kubeClient.Delete(ctx, nodePool)\n\trequire.NoError(t, err)\n\tt.Logf(\"Deleted NodePool: %s\", nodePool.Name)\n\n\t// Wait for pods to be evicted\n\tsuite.waitForPodsGone(t, deployment.Name+\"-workload\")\n\tt.Logf(\"Pods evicted successfully\")\n\n\t// Verify nodes are eventually cleaned up\n\t// This might take a while as Karpenter needs to process the NodePool deletion\n\tdeadline := time.Now().Add(10 * time.Minute)\n\tfor time.Now().Before(deadline) {\n\t\tcurrentNodes := suite.getKarpenterNodes(t, nodePool.Name)\n\t\tif len(currentNodes) == 0 {\n\t\t\tt.Logf(\"All nodes cleaned up successfully\")\n\t\t\tbreak\n\t\t}\n\t\tt.Logf(\"Still waiting for %d nodes to be cleaned up\", len(currentNodes))\n\t\ttime.Sleep(30 * time.Second)\n\t}\n\n\t// Final verification\n\tfinalNodes := suite.getKarpenterNodes(t, nodePool.Name)\n\trequire.Equal(t, 0, len(finalNodes), \"All nodes should be cleaned up after NodePool deletion\")\n\n\t// Cleanup remaining resources\n\tsuite.cleanupTestWorkload(t, deployment.Name, \"default\")\n\tsuite.cleanupTestResources(t, testName)\n\tt.Logf(\"NodePool cleanup test completed successfully: %s\", testName)\n}\n\n// TestE2ECleanupNodeClassDeletion tests proper cleanup when deleting NodeClasses\nfunc TestE2ECleanupNodeClassDeletion(t *testing.T) {\n\tsuite := SetupE2ETestSuite(t)\n\ttestName := fmt.Sprintf(\"cleanup-nodeclass-%d\", time.Now().Unix())\n\tt.Logf(\"Starting NodeClass cleanup test: %s\", testName)\n\tctx := context.Background()\n\n\t// Create NodeClass\n\tnodeClass := suite.createTestNodeClass(t, testName)\n\tt.Logf(\"Created NodeClass: %s\", nodeClass.Name)\n\n\t// Wait for NodeClass to be ready\n\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n\tt.Logf(\"NodeClass is ready: %s\", nodeClass.Name)\n\n\t// Create NodePool that references this NodeClass\n\tnodePool := suite.createTestNodePool(t, testName, nodeClass.Name)\n\tt.Logf(\"Created NodePool: %s\", nodePool.Name)\n\n\t// Create test workload to trigger provisioning\n\tdeployment := suite.createTestWorkload(t, testName)\n\tt.Logf(\"Created test workload: %s\", deployment.Name)\n\n\t// Wait for pods to be scheduled and nodes to be provisioned\n\tsuite.waitForPodsToBeScheduled(t, deployment.Name, \"default\")\n\tt.Logf(\"Pods scheduled successfully\")\n\n\t// Get list of provisioned NodeClaims\n\tvar nodeClaimList karpv1.NodeClaimList\n\terr := suite.kubeClient.List(ctx, &nodeClaimList, client.MatchingLabels{\n\t\t\"karpenter.sh/nodepool\": nodePool.Name,\n\t})\n\trequire.NoError(t, err)\n\trequire.Greater(t, len(nodeClaimList.Items), 0, \"Should have NodeClaims provisioned\")\n\tinitialNodeClaims := len(nodeClaimList.Items)\n\tt.Logf(\"Initial NodeClaims: %d\", initialNodeClaims)\n\n\t// Delete the deployment first to reduce resource pressure\n\tsuite.cleanupTestWorkload(t, deployment.Name, \"default\")\n\tsuite.waitForPodsGone(t, deployment.Name+\"-workload\")\n\n\t// Delete the NodePool first\n\terr = suite.kubeClient.Delete(ctx, nodePool)\n\trequire.NoError(t, err)\n\tt.Logf(\"Deleted NodePool: %s\", nodePool.Name)\n\n\t// Wait for NodeClaims to be cleaned up\n\tdeadline := time.Now().Add(10 * time.Minute)\n\tfor time.Now().Before(deadline) {\n\t\tvar currentNodeClaimList karpv1.NodeClaimList\n\t\terr := suite.kubeClient.List(ctx, &currentNodeClaimList, client.MatchingLabels{\n\t\t\t\"karpenter.sh/nodepool\": nodePool.Name,\n\t\t})\n\t\trequire.NoError(t, err)\n\n\t\tif len(currentNodeClaimList.Items) == 0 {\n\t\t\tt.Logf(\"All NodeClaims cleaned up\")\n\t\t\tbreak\n\t\t}\n\t\tt.Logf(\"Still waiting for %d NodeClaims to be cleaned up\", len(currentNodeClaimList.Items))\n\t\ttime.Sleep(30 * time.Second)\n\t}\n\n\t// Now try to delete the NodeClass - it should succeed if no NodePools reference it\n\terr = suite.kubeClient.Delete(ctx, nodeClass)\n\trequire.NoError(t, err)\n\tt.Logf(\"Successfully deleted NodeClass: %s\", nodeClass.Name)\n\n\t// Cleanup any remaining resources\n\tsuite.cleanupTestResources(t, testName)\n\tt.Logf(\"NodeClass cleanup test completed successfully: %s\", testName)\n}\n\n// TestE2ECleanupOrphanedResources tests cleanup of orphaned resources\nfunc TestE2ECleanupOrphanedResources(t *testing.T) {\n\tsuite := SetupE2ETestSuite(t)\n\ttestName := fmt.Sprintf(\"cleanup-orphaned-%d\", time.Now().Unix())\n\tt.Logf(\"Starting orphaned resources cleanup test: %s\", testName)\n\tctx := context.Background()\n\n\t// Create NodeClass and NodePool\n\tnodeClass := suite.createTestNodeClass(t, testName)\n\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n\tnodePool := suite.createTestNodePool(t, testName, nodeClass.Name)\n\n\t// Create test workload to trigger provisioning\n\tdeployment := suite.createTestWorkload(t, testName)\n\tsuite.waitForPodsToBeScheduled(t, deployment.Name, \"default\")\n\n\t// Get the provisioned NodeClaim\n\tvar nodeClaimList karpv1.NodeClaimList\n\terr := suite.kubeClient.List(ctx, &nodeClaimList, client.MatchingLabels{\n\t\t\"karpenter.sh/nodepool\": nodePool.Name,\n\t})\n\trequire.NoError(t, err)\n\trequire.Greater(t, len(nodeClaimList.Items), 0, \"Should have NodeClaims provisioned\")\n\toriginalNodeClaim := nodeClaimList.Items[0]\n\n\t// Simulate an orphaned state by manually deleting the NodePool while keeping NodeClaims\n\terr = suite.kubeClient.Delete(ctx, nodePool)\n\trequire.NoError(t, err)\n\tt.Logf(\"Deleted NodePool, leaving NodeClaim potentially orphaned: %s\", originalNodeClaim.Name)\n\n\t// Wait a bit for the controller to process the deletion\n\ttime.Sleep(30 * time.Second)\n\n\t// Check if the NodeClaim still exists (it might be cleaned up automatically)\n\tvar orphanedNodeClaim karpv1.NodeClaim\n\terr = suite.kubeClient.Get(ctx, client.ObjectKey{Name: originalNodeClaim.Name}, &orphanedNodeClaim)\n\n\tif err == nil {\n\t\t// NodeClaim still exists - verify it gets cleaned up eventually\n\t\tt.Logf(\"NodeClaim still exists, waiting for automatic cleanup\")\n\t\tdeadline := time.Now().Add(5 * time.Minute)\n\n\t\tfor time.Now().Before(deadline) {\n\t\t\terr = suite.kubeClient.Get(ctx, client.ObjectKey{Name: originalNodeClaim.Name}, &orphanedNodeClaim)\n\t\t\tif err != nil {\n\t\t\t\tt.Logf(\"Orphaned NodeClaim was automatically cleaned up\")\n\t\t\t\tbreak\n\t\t\t}\n\t\t\ttime.Sleep(15 * time.Second)\n\t\t}\n\t} else {\n\t\tt.Logf(\"NodeClaim was already cleaned up with NodePool deletion\")\n\t}\n\n\t// Clean up workload\n\tsuite.cleanupTestWorkload(t, deployment.Name, \"default\")\n\tsuite.waitForPodsGone(t, deployment.Name+\"-workload\")\n\n\t// Use our comprehensive cleanup function to catch any remaining orphaned resources\n\tsuite.cleanupOrphanedKubernetesResources(t)\n\n\t// Final cleanup\n\tsuite.cleanupTestResources(t, testName)\n\tt.Logf(\"Orphaned resources cleanup test completed: %s\", testName)\n}\n\n// TestE2ECleanupIBMCloudResources tests cleanup of IBM Cloud resources\nfunc TestE2ECleanupIBMCloudResources(t *testing.T) {\n\tsuite := SetupE2ETestSuite(t)\n\ttestName := fmt.Sprintf(\"cleanup-ibmcloud-%d\", time.Now().Unix())\n\tt.Logf(\"Starting IBM Cloud resources cleanup test: %s\", testName)\n\tctx := context.Background()\n\n\t// Get initial list of IBM Cloud instances for comparison\n\tinitialInstances, err := suite.getIBMCloudInstances(t)\n\trequire.NoError(t, err)\n\tinitialInstanceCount := len(initialInstances)\n\tt.Logf(\"Initial IBM Cloud instances: %d\", initialInstanceCount)\n\n\t// Create NodeClass and NodePool\n\tnodeClass := suite.createTestNodeClass(t, testName)\n\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n\tnodePool := suite.createTestNodePool(t, testName, nodeClass.Name)\n\n\t// Create test workload to trigger provisioning\n\tdeployment := suite.createTestWorkload(t, testName)\n\t// Modify the deployment to have 2 replicas\n\tdeployment.Spec.Replicas = &[]int32{2}[0]\n\terr = suite.kubeClient.Update(ctx, deployment)\n\trequire.NoError(t, err)\n\tsuite.waitForPodsToBeScheduled(t, deployment.Name, \"default\")\n\n\t// Wait a bit for instances to be fully created in IBM Cloud\n\ttime.Sleep(60 * time.Second)\n\n\t// Get the list of instances after provisioning\n\tinstancesAfterProvisioning, err := suite.getIBMCloudInstances(t)\n\trequire.NoError(t, err)\n\tafterProvisioningCount := len(instancesAfterProvisioning)\n\tt.Logf(\"Instances after provisioning: %d (expected increase: %d)\",\n\t\tafterProvisioningCount, afterProvisioningCount-initialInstanceCount)\n\n\t// Should have more instances now\n\trequire.Greater(t, afterProvisioningCount, initialInstanceCount,\n\t\t\"Should have provisioned new IBM Cloud instances\")\n\n\t// Start cleanup process\n\tsuite.cleanupTestWorkload(t, deployment.Name, \"default\")\n\tsuite.waitForPodsGone(t, deployment.Name+\"-workload\")\n\n\t// Delete NodePool to trigger instance cleanup\n\terr = suite.kubeClient.Delete(ctx, nodePool)\n\trequire.NoError(t, err)\n\tt.Logf(\"Deleted NodePool, waiting for IBM Cloud instances to be cleaned up\")\n\n\t// Wait for instances to be cleaned up in IBM Cloud\n\t// This can take a while as IBM Cloud cleanup is not immediate\n\tdeadline := time.Now().Add(15 * time.Minute)\n\tfinalInstanceCount := afterProvisioningCount\n\n\tfor time.Now().Before(deadline) {\n\t\tcurrentInstances, err := suite.getIBMCloudInstances(t)\n\t\tif err != nil {\n\t\t\tt.Logf(\"Warning: Failed to get current instance list: %v\", err)\n\t\t\ttime.Sleep(30 * time.Second)\n\t\t\tcontinue\n\t\t}\n\n\t\tfinalInstanceCount = len(currentInstances)\n\t\tt.Logf(\"Current instance count: %d (target: %d)\", finalInstanceCount, initialInstanceCount)\n\n\t\tif finalInstanceCount <= initialInstanceCount {\n\t\t\tt.Logf(\"IBM Cloud instances cleaned up successfully\")\n\t\t\tbreak\n\t\t}\n\n\t\ttime.Sleep(30 * time.Second)\n\t}\n\n\t// Verify cleanup was successful (allow some tolerance for long-running instances)\n\trequire.LessOrEqual(t, finalInstanceCount, initialInstanceCount+1,\n\t\t\"Most IBM Cloud instances should be cleaned up (found %d, expected ~%d)\",\n\t\tfinalInstanceCount, initialInstanceCount)\n\n\tif finalInstanceCount > initialInstanceCount {\n\t\tt.Logf(\"Warning:  Warning: %d instances may still be cleaning up (this can take additional time)\",\n\t\t\tfinalInstanceCount-initialInstanceCount)\n\t}\n\n\t// Cleanup remaining test resources\n\tsuite.cleanupTestResources(t, testName)\n\tt.Logf(\"IBM Cloud resources cleanup test completed: %s\", testName)\n}\n","numLines":326,"startLine":1,"totalLines":326}},"sourceToolAssistantUUID":"416bebe0-8a72-4f1c-8bf9-5b14673b862f"}
{"parentUuid":"2f107d82-3382-44fa-bdc4-0227e129c31e","isSidechain":true,"userType":"external","cwd":"/home/josie/development/karpenter-ibm","sessionId":"34bef956-6e8e-4b60-9fb9-00aeb4b7c670","version":"2.1.3","gitBranch":"main","agentId":"ae490b9","slug":"idempotent-strolling-fox","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_018eHCa4qVGy6UNteggiGhJK","type":"tool_result","content":"     1→//go:build e2e\n     2→// +build e2e\n     3→\n     4→/*\n     5→Copyright The Kubernetes Authors.\n     6→\n     7→Licensed under the Apache License, Version 2.0 (the \"License\");\n     8→you may not use this file except in compliance with the License.\n     9→You may obtain a copy of the License at\n    10→\n    11→\thttp://www.apache.org/licenses/LICENSE-2.0\n    12→\n    13→Unless required by applicable law or agreed to in writing, software\n    14→distributed under the License is distributed on an \"AS IS\" BASIS,\n    15→WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n    16→See the License for the specific language governing permissions and\n    17→limitations under the License.\n    18→*/\n    19→package e2e\n    20→\n    21→import (\n    22→\t\"context\"\n    23→\t\"fmt\"\n    24→\t\"os\"\n    25→\t\"strings\"\n    26→\t\"testing\"\n    27→\t\"time\"\n    28→\n    29→\t\"github.com/stretchr/testify/require\"\n    30→\tappsv1 \"k8s.io/api/apps/v1\"\n    31→\tcorev1 \"k8s.io/api/core/v1\"\n    32→\t\"k8s.io/apimachinery/pkg/api/resource\"\n    33→\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n    34→\t\"sigs.k8s.io/controller-runtime/pkg/client\"\n    35→\tkarpv1 \"sigs.k8s.io/karpenter/pkg/apis/v1\"\n    36→\n    37→\t\"github.com/kubernetes-sigs/karpenter-provider-ibm-cloud/pkg/apis/v1alpha1\"\n    38→)\n    39→\n    40→// TestE2EMultiZoneDistribution tests that nodes are distributed across multiple zones\n    41→// when using PlacementStrategy with balanced zone distribution\n    42→func TestE2EMultiZoneDistribution(t *testing.T) {\n    43→\tsuite := SetupE2ETestSuite(t)\n    44→\ttestName := fmt.Sprintf(\"multizone-distribution-%d\", time.Now().Unix())\n    45→\tt.Logf(\"Starting multi-zone distribution test: %s\", testName)\n    46→\n    47→\t// Skip test if multi-zone infrastructure not available\n    48→\tif os.Getenv(\"E2E_SKIP_MULTIZONE\") == \"true\" {\n    49→\t\tt.Skip(\"Skipping multi-zone test: E2E_SKIP_MULTIZONE is set\")\n    50→\t}\n    51→\n    52→\t// Create NodeClass with placement strategy (no specific zone)\n    53→\tnodeClass := suite.createMultiZoneNodeClass(t, testName)\n    54→\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n    55→\n    56→\t// Create NodePool that allows multiple zones\n    57→\tnodePool := suite.createMultiZoneNodePool(t, testName, nodeClass.Name)\n    58→\n    59→\t// Create deployment with multiple replicas to force multiple nodes\n    60→\tdeployment := suite.createMultiReplicaDeployment(t, testName, nodePool.Name, 6)\n    61→\n    62→\t// Wait for pods to be scheduled\n    63→\tsuite.waitForPodsToBeScheduled(t, deployment.Name, \"default\")\n    64→\n    65→\t// Verify multi-zone distribution\n    66→\tsuite.verifyMultiZoneDistribution(t, testName, 2) // Expect at least 2 zones\n    67→\n    68→\t// Verify all pods are running\n    69→\tsuite.verifyPodsScheduledOnCorrectNodes(t, deployment.Name, \"default\", nodePool.Name)\n    70→\n    71→\t// Cleanup\n    72→\tsuite.cleanupTestWorkload(t, deployment.Name, \"default\")\n    73→\tsuite.cleanupTestResources(t, testName)\n    74→\tt.Logf(\"Multi-zone distribution test completed: %s\", testName)\n    75→}\n    76→\n    77→// TestE2EZoneAntiAffinity tests that pod anti-affinity works correctly with zone constraints\n    78→func TestE2EZoneAntiAffinity(t *testing.T) {\n    79→\tsuite := SetupE2ETestSuite(t)\n    80→\tctx := context.Background()\n    81→\ttestName := fmt.Sprintf(\"zone-anti-affinity-%d\", time.Now().Unix())\n    82→\tt.Logf(\"Starting zone anti-affinity test: %s\", testName)\n    83→\n    84→\t// Create infrastructure\n    85→\tnodeClass := suite.createMultiZoneNodeClass(t, testName)\n    86→\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n    87→\tnodePool := suite.createMultiZoneNodePool(t, testName, nodeClass.Name)\n    88→\n    89→\t// Create deployment with zone anti-affinity\n    90→\tdeployment := &appsv1.Deployment{\n    91→\t\tObjectMeta: metav1.ObjectMeta{\n    92→\t\t\tName:      fmt.Sprintf(\"%s-deployment\", testName),\n    93→\t\t\tNamespace: \"default\",\n    94→\t\t\tLabels: map[string]string{\n    95→\t\t\t\t\"app\":       fmt.Sprintf(\"%s-app\", testName),\n    96→\t\t\t\t\"test\":      \"e2e\",\n    97→\t\t\t\t\"test-name\": testName,\n    98→\t\t\t},\n    99→\t\t},\n   100→\t\tSpec: appsv1.DeploymentSpec{\n   101→\t\t\tReplicas: &[]int32{3}[0], // 3 replicas to force multiple zones\n   102→\t\t\tSelector: &metav1.LabelSelector{\n   103→\t\t\t\tMatchLabels: map[string]string{\n   104→\t\t\t\t\t\"app\": fmt.Sprintf(\"%s-app\", testName),\n   105→\t\t\t\t},\n   106→\t\t\t},\n   107→\t\t\tTemplate: corev1.PodTemplateSpec{\n   108→\t\t\t\tObjectMeta: metav1.ObjectMeta{\n   109→\t\t\t\t\tLabels: map[string]string{\n   110→\t\t\t\t\t\t\"app\":       fmt.Sprintf(\"%s-app\", testName),\n   111→\t\t\t\t\t\t\"test\":      \"e2e\",\n   112→\t\t\t\t\t\t\"test-name\": testName,\n   113→\t\t\t\t\t},\n   114→\t\t\t\t},\n   115→\t\t\t\tSpec: corev1.PodSpec{\n   116→\t\t\t\t\tNodeSelector: map[string]string{\n   117→\t\t\t\t\t\t\"karpenter.sh/nodepool\": nodePool.Name,\n   118→\t\t\t\t\t},\n   119→\t\t\t\t\tContainers: []corev1.Container{\n   120→\t\t\t\t\t\t{\n   121→\t\t\t\t\t\t\tName:  \"test-container\",\n   122→\t\t\t\t\t\t\tImage: \"quay.io/nginx/nginx-unprivileged:1.29.1-alpine\",\n   123→\t\t\t\t\t\t\tResources: corev1.ResourceRequirements{\n   124→\t\t\t\t\t\t\t\tRequests: corev1.ResourceList{\n   125→\t\t\t\t\t\t\t\t\tcorev1.ResourceCPU:    resource.MustParse(\"1000m\"),\n   126→\t\t\t\t\t\t\t\t\tcorev1.ResourceMemory: resource.MustParse(\"1Gi\"),\n   127→\t\t\t\t\t\t\t\t},\n   128→\t\t\t\t\t\t\t},\n   129→\t\t\t\t\t\t},\n   130→\t\t\t\t\t},\n   131→\t\t\t\t\t// Zone anti-affinity - prefer different zones\n   132→\t\t\t\t\tAffinity: &corev1.Affinity{\n   133→\t\t\t\t\t\tPodAntiAffinity: &corev1.PodAntiAffinity{\n   134→\t\t\t\t\t\t\tPreferredDuringSchedulingIgnoredDuringExecution: []corev1.WeightedPodAffinityTerm{\n   135→\t\t\t\t\t\t\t\t{\n   136→\t\t\t\t\t\t\t\t\tWeight: 100,\n   137→\t\t\t\t\t\t\t\t\tPodAffinityTerm: corev1.PodAffinityTerm{\n   138→\t\t\t\t\t\t\t\t\t\tLabelSelector: &metav1.LabelSelector{\n   139→\t\t\t\t\t\t\t\t\t\t\tMatchLabels: map[string]string{\n   140→\t\t\t\t\t\t\t\t\t\t\t\t\"app\": fmt.Sprintf(\"%s-app\", testName),\n   141→\t\t\t\t\t\t\t\t\t\t\t},\n   142→\t\t\t\t\t\t\t\t\t\t},\n   143→\t\t\t\t\t\t\t\t\t\tTopologyKey: \"topology.kubernetes.io/zone\",\n   144→\t\t\t\t\t\t\t\t\t},\n   145→\t\t\t\t\t\t\t\t},\n   146→\t\t\t\t\t\t\t},\n   147→\t\t\t\t\t\t},\n   148→\t\t\t\t\t},\n   149→\t\t\t\t},\n   150→\t\t\t},\n   151→\t\t},\n   152→\t}\n   153→\n   154→\terr := suite.kubeClient.Create(ctx, deployment)\n   155→\trequire.NoError(t, err)\n   156→\tt.Logf(\"Created deployment with zone anti-affinity: %s\", deployment.Name)\n   157→\n   158→\t// Wait for pods to be scheduled\n   159→\tsuite.waitForPodsToBeScheduled(t, deployment.Name, \"default\")\n   160→\n   161→\t// Verify that pods are distributed across zones\n   162→\tpodZones := suite.getPodZoneDistribution(t, fmt.Sprintf(\"%s-app\", testName), \"default\")\n   163→\trequire.Greater(t, len(podZones), 1, \"Pods should be distributed across multiple zones\")\n   164→\tt.Logf(\"Verified pods are distributed across %d zones\", len(podZones))\n   165→\n   166→\t// List zones for debugging\n   167→\tfor zone, count := range podZones {\n   168→\t\tt.Logf(\"Zone %s: %d pods\", zone, count)\n   169→\t}\n   170→\n   171→\t// Cleanup\n   172→\tsuite.cleanupTestWorkload(t, deployment.Name, \"default\")\n   173→\tsuite.cleanupTestResources(t, testName)\n   174→\tt.Logf(\"Zone anti-affinity test completed: %s\", testName)\n   175→}\n   176→\n   177→// TestE2ETopologySpreadConstraints tests topology spread constraints across zones\n   178→func TestE2ETopologySpreadConstraints(t *testing.T) {\n   179→\tsuite := SetupE2ETestSuite(t)\n   180→\tctx := context.Background()\n   181→\ttestName := fmt.Sprintf(\"topology-spread-%d\", time.Now().Unix())\n   182→\tt.Logf(\"Starting topology spread constraints test: %s\", testName)\n   183→\n   184→\t// Create infrastructure\n   185→\tnodeClass := suite.createMultiZoneNodeClass(t, testName)\n   186→\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n   187→\tnodePool := suite.createMultiZoneNodePool(t, testName, nodeClass.Name)\n   188→\n   189→\t// Create deployment with topology spread constraints\n   190→\tdeployment := &appsv1.Deployment{\n   191→\t\tObjectMeta: metav1.ObjectMeta{\n   192→\t\t\tName:      fmt.Sprintf(\"%s-deployment\", testName),\n   193→\t\t\tNamespace: \"default\",\n   194→\t\t\tLabels: map[string]string{\n   195→\t\t\t\t\"app\":       fmt.Sprintf(\"%s-app\", testName),\n   196→\t\t\t\t\"test\":      \"e2e\",\n   197→\t\t\t\t\"test-name\": testName,\n   198→\t\t\t},\n   199→\t\t},\n   200→\t\tSpec: appsv1.DeploymentSpec{\n   201→\t\t\tReplicas: &[]int32{6}[0], // 6 replicas to test spread across zones\n   202→\t\t\tSelector: &metav1.LabelSelector{\n   203→\t\t\t\tMatchLabels: map[string]string{\n   204→\t\t\t\t\t\"app\": fmt.Sprintf(\"%s-app\", testName),\n   205→\t\t\t\t},\n   206→\t\t\t},\n   207→\t\t\tTemplate: corev1.PodTemplateSpec{\n   208→\t\t\t\tObjectMeta: metav1.ObjectMeta{\n   209→\t\t\t\t\tLabels: map[string]string{\n   210→\t\t\t\t\t\t\"app\":       fmt.Sprintf(\"%s-app\", testName),\n   211→\t\t\t\t\t\t\"test\":      \"e2e\",\n   212→\t\t\t\t\t\t\"test-name\": testName,\n   213→\t\t\t\t\t},\n   214→\t\t\t\t},\n   215→\t\t\t\tSpec: corev1.PodSpec{\n   216→\t\t\t\t\tNodeSelector: map[string]string{\n   217→\t\t\t\t\t\t\"karpenter.sh/nodepool\": nodePool.Name,\n   218→\t\t\t\t\t},\n   219→\t\t\t\t\tContainers: []corev1.Container{\n   220→\t\t\t\t\t\t{\n   221→\t\t\t\t\t\t\tName:  \"test-container\",\n   222→\t\t\t\t\t\t\tImage: \"quay.io/nginx/nginx-unprivileged:1.29.1-alpine\",\n   223→\t\t\t\t\t\t\tResources: corev1.ResourceRequirements{\n   224→\t\t\t\t\t\t\t\tRequests: corev1.ResourceList{\n   225→\t\t\t\t\t\t\t\t\tcorev1.ResourceCPU:    resource.MustParse(\"500m\"),\n   226→\t\t\t\t\t\t\t\t\tcorev1.ResourceMemory: resource.MustParse(\"512Mi\"),\n   227→\t\t\t\t\t\t\t\t},\n   228→\t\t\t\t\t\t\t},\n   229→\t\t\t\t\t\t},\n   230→\t\t\t\t\t},\n   231→\t\t\t\t\t// Topology spread constraints for even distribution\n   232→\t\t\t\t\tTopologySpreadConstraints: []corev1.TopologySpreadConstraint{\n   233→\t\t\t\t\t\t{\n   234→\t\t\t\t\t\t\tMaxSkew:           1,\n   235→\t\t\t\t\t\t\tTopologyKey:       \"topology.kubernetes.io/zone\",\n   236→\t\t\t\t\t\t\tWhenUnsatisfiable: corev1.DoNotSchedule,\n   237→\t\t\t\t\t\t\tLabelSelector: &metav1.LabelSelector{\n   238→\t\t\t\t\t\t\t\tMatchLabels: map[string]string{\n   239→\t\t\t\t\t\t\t\t\t\"app\": fmt.Sprintf(\"%s-app\", testName),\n   240→\t\t\t\t\t\t\t\t},\n   241→\t\t\t\t\t\t\t},\n   242→\t\t\t\t\t\t},\n   243→\t\t\t\t\t},\n   244→\t\t\t\t},\n   245→\t\t\t},\n   246→\t\t},\n   247→\t}\n   248→\n   249→\terr := suite.kubeClient.Create(ctx, deployment)\n   250→\trequire.NoError(t, err)\n   251→\tt.Logf(\"Created deployment with topology spread constraints: %s\", deployment.Name)\n   252→\n   253→\t// Wait for pods to be scheduled\n   254→\tsuite.waitForPodsToBeScheduled(t, deployment.Name, \"default\")\n   255→\n   256→\t// Verify topology spread\n   257→\tpodZones := suite.getPodZoneDistribution(t, fmt.Sprintf(\"%s-app\", testName), \"default\")\n   258→\trequire.Greater(t, len(podZones), 1, \"Pods should be distributed across multiple zones\")\n   259→\n   260→\t// Check that distribution is relatively even (within maxSkew of 1)\n   261→\tvar counts []int\n   262→\tfor zone, count := range podZones {\n   263→\t\tcounts = append(counts, count)\n   264→\t\tt.Logf(\"Zone %s: %d pods\", zone, count)\n   265→\t}\n   266→\n   267→\t// Verify maxSkew constraint (difference between max and min should be <= 1)\n   268→\tif len(counts) > 1 {\n   269→\t\tmin, max := counts[0], counts[0]\n   270→\t\tfor _, count := range counts {\n   271→\t\t\tif count < min {\n   272→\t\t\t\tmin = count\n   273→\t\t\t}\n   274→\t\t\tif count > max {\n   275→\t\t\t\tmax = count\n   276→\t\t\t}\n   277→\t\t}\n   278→\t\tskew := max - min\n   279→\t\trequire.LessOrEqual(t, skew, 1, \"Pod distribution should respect maxSkew constraint of 1\")\n   280→\t\tt.Logf(\"Verified topology spread with skew of %d (max: %d, min: %d)\", skew, max, min)\n   281→\t}\n   282→\n   283→\t// Cleanup\n   284→\tsuite.cleanupTestWorkload(t, deployment.Name, \"default\")\n   285→\tsuite.cleanupTestResources(t, testName)\n   286→\tt.Logf(\"Topology spread constraints test completed: %s\", testName)\n   287→}\n   288→\n   289→// TestE2EPlacementStrategyValidation tests PlacementStrategy validation and behavior\n   290→func TestE2EPlacementStrategyValidation(t *testing.T) {\n   291→\tsuite := SetupE2ETestSuite(t)\n   292→\tctx := context.Background()\n   293→\ttestName := fmt.Sprintf(\"placement-strategy-validation-%d\", time.Now().Unix())\n   294→\tt.Logf(\"Starting placement strategy validation test: %s\", testName)\n   295→\n   296→\t// Test 1: NodeClass without zone/subnet but with placement strategy should be accepted\n   297→\tt.Run(\"ValidPlacementStrategy\", func(t *testing.T) {\n   298→\t\tnodeClass := suite.createMultiZoneNodeClass(t, testName+\"-valid\")\n   299→\t\tdefer suite.cleanupTestResources(t, testName+\"-valid\")\n   300→\n   301→\t\t// Wait for NodeClass to be ready (should not fail validation)\n   302→\t\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n   303→\t\tt.Logf(\"NodeClass with placement strategy validated successfully\")\n   304→\t})\n   305→\n   306→\t// Test 2: NodeClass without zone/subnet AND without placement strategy should fail\n   307→\t// Note: This test may be skipped if the current implementation doesn't enforce this validation\n   308→\tt.Run(\"MissingPlacementStrategy\", func(t *testing.T) {\n   309→\t\tinvalidNodeClass := &v1alpha1.IBMNodeClass{\n   310→\t\t\tObjectMeta: metav1.ObjectMeta{\n   311→\t\t\t\tName: fmt.Sprintf(\"%s-invalid-nodeclass\", testName),\n   312→\t\t\t\tLabels: map[string]string{\n   313→\t\t\t\t\t\"test\":      \"e2e\",\n   314→\t\t\t\t\t\"test-name\": testName + \"-invalid\",\n   315→\t\t\t\t},\n   316→\t\t\t},\n   317→\t\t\tSpec: v1alpha1.IBMNodeClassSpec{\n   318→\t\t\t\tRegion: suite.testRegion,\n   319→\t\t\t\t// No Zone specified\n   320→\t\t\t\tVPC:   suite.testVPC,\n   321→\t\t\t\tImage: suite.testImage,\n   322→\t\t\t\t// No Subnet specified\n   323→\t\t\t\tSecurityGroups:    []string{suite.testSecurityGroup},\n   324→\t\t\t\tSSHKeys:           []string{suite.testSshKeyId},\n   325→\t\t\t\tResourceGroup:     suite.testResourceGroup,\n   326→\t\t\t\tAPIServerEndpoint: suite.APIServerEndpoint,\n   327→\t\t\t\tInstanceProfile:   \"bx2-2x8\",\n   328→\t\t\t\tBootstrapMode:     stringPtr(\"cloud-init\"),\n   329→\t\t\t\t// No PlacementStrategy specified - should cause validation error\n   330→\t\t\t\tTags: map[string]string{\n   331→\t\t\t\t\t\"test\":      \"e2e\",\n   332→\t\t\t\t\t\"test-name\": testName + \"-invalid\",\n   333→\t\t\t\t},\n   334→\t\t\t},\n   335→\t\t}\n   336→\n   337→\t\terr := suite.kubeClient.Create(ctx, invalidNodeClass)\n   338→\t\tif err != nil {\n   339→\t\t\t// Expected: validation should fail at creation\n   340→\t\t\tt.Logf(\"NodeClass creation failed as expected due to missing placement strategy: %v\", err)\n   341→\t\t} else {\n   342→\t\t\t// If creation succeeds, wait and check if controller marks it as invalid\n   343→\t\t\tt.Logf(\"NodeClass created, checking if validation fails...\")\n   344→\t\t\ttime.Sleep(5 * time.Second)\n   345→\n   346→\t\t\tvar updatedNodeClass v1alpha1.IBMNodeClass\n   347→\t\t\terr = suite.kubeClient.Get(ctx, client.ObjectKeyFromObject(invalidNodeClass), &updatedNodeClass)\n   348→\t\t\tif err != nil {\n   349→\t\t\t\tt.Logf(\"NodeClass was deleted or became inaccessible (expected)\")\n   350→\t\t\t} else {\n   351→\t\t\t\t// Check status conditions for validation failure\n   352→\t\t\t\thasValidationError := false\n   353→\t\t\t\tfor _, condition := range updatedNodeClass.Status.Conditions {\n   354→\t\t\t\t\tif condition.Type == \"Ready\" && condition.Status == \"False\" {\n   355→\t\t\t\t\t\thasValidationError = true\n   356→\t\t\t\t\t\tt.Logf(\"NodeClass marked as not ready due to validation: %s\", condition.Message)\n   357→\t\t\t\t\t\tbreak\n   358→\t\t\t\t\t}\n   359→\t\t\t\t}\n   360→\n   361→\t\t\t\tif !hasValidationError {\n   362→\t\t\t\t\tt.Logf(\"Warning: NodeClass validation should have failed but didn't - this may indicate missing validation logic\")\n   363→\t\t\t\t}\n   364→\t\t\t}\n   365→\n   366→\t\t\t// Clean up\n   367→\t\t\t_ = suite.kubeClient.Delete(ctx, invalidNodeClass)\n   368→\t\t}\n   369→\t})\n   370→\n   371→\tt.Logf(\"Placement strategy validation test completed: %s\", testName)\n   372→}\n   373→\n   374→// TestE2EZoneFailover tests zone failover behavior when one zone becomes unavailable\n   375→func TestE2EZoneFailover(t *testing.T) {\n   376→\tsuite := SetupE2ETestSuite(t)\n   377→\ttestName := fmt.Sprintf(\"zone-failover-%d\", time.Now().Unix())\n   378→\tt.Logf(\"Starting zone failover test: %s\", testName)\n   379→\n   380→\t// Note: This test is more conceptual as we can't actually make zones unavailable in E2E\n   381→\t// Instead, we test that workloads can be distributed and rescheduled properly\n   382→\n   383→\t// Create infrastructure\n   384→\tnodeClass := suite.createMultiZoneNodeClass(t, testName)\n   385→\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n   386→\tnodePool := suite.createMultiZoneNodePool(t, testName, nodeClass.Name)\n   387→\n   388→\t// Create deployment with zone spread preferences\n   389→\tdeployment := suite.createZoneSpreadDeployment(t, testName, nodePool.Name, 4)\n   390→\n   391→\t// Wait for initial placement\n   392→\tsuite.waitForPodsToBeScheduled(t, deployment.Name, \"default\")\n   393→\tinitialZones := suite.getPodZoneDistribution(t, fmt.Sprintf(\"%s-app\", testName), \"default\")\n   394→\n   395→\tt.Logf(\"Initial zone distribution:\")\n   396→\tfor zone, count := range initialZones {\n   397→\t\tt.Logf(\"  Zone %s: %d pods\", zone, count)\n   398→\t}\n   399→\n   400→\t// Scale up to test additional zone utilization\n   401→\tdeployment.Spec.Replicas = &[]int32{8}[0]\n   402→\terr := suite.kubeClient.Update(context.Background(), deployment)\n   403→\trequire.NoError(t, err)\n   404→\n   405→\t// Wait for scale-up (use existing waiting function)\n   406→\tsuite.waitForPodsToBeScheduled(t, deployment.Name, \"default\")\n   407→\n   408→\tfinalZones := suite.getPodZoneDistribution(t, fmt.Sprintf(\"%s-app\", testName), \"default\")\n   409→\tt.Logf(\"Final zone distribution after scale-up:\")\n   410→\tfor zone, count := range finalZones {\n   411→\t\tt.Logf(\"  Zone %s: %d pods\", zone, count)\n   412→\t}\n   413→\n   414→\t// Verify we're still using multiple zones\n   415→\trequire.Greater(t, len(finalZones), 1, \"Should maintain multi-zone distribution after scaling\")\n   416→\n   417→\t// Cleanup\n   418→\tsuite.cleanupTestWorkload(t, deployment.Name, \"default\")\n   419→\tsuite.cleanupTestResources(t, testName)\n   420→\tt.Logf(\"Zone failover test completed: %s\", testName)\n   421→}\n   422→\n   423→// Helper function to create a multi-zone NodeClass with placement strategy\n   424→func (s *E2ETestSuite) createMultiZoneNodeClass(t *testing.T, testName string) *v1alpha1.IBMNodeClass {\n   425→\tnodeClass := &v1alpha1.IBMNodeClass{\n   426→\t\tObjectMeta: metav1.ObjectMeta{\n   427→\t\t\tName: fmt.Sprintf(\"%s-nodeclass\", testName),\n   428→\t\t\tLabels: map[string]string{\n   429→\t\t\t\t\"test\":      \"e2e\",\n   430→\t\t\t\t\"test-name\": testName,\n   431→\t\t\t},\n   432→\t\t},\n   433→\t\tSpec: v1alpha1.IBMNodeClassSpec{\n   434→\t\t\tRegion: s.testRegion,\n   435→\t\t\t// Note: No Zone or Subnet specified - using placement strategy\n   436→\t\t\tVPC:               s.testVPC,\n   437→\t\t\tImage:             s.testImage,\n   438→\t\t\tSecurityGroups:    []string{s.testSecurityGroup},\n   439→\t\t\tSSHKeys:           []string{s.testSshKeyId},\n   440→\t\t\tResourceGroup:     s.testResourceGroup,\n   441→\t\t\tAPIServerEndpoint: s.APIServerEndpoint,\n   442→\t\t\tInstanceProfile:   \"bx2-4x16\",\n   443→\t\t\tBootstrapMode:     stringPtr(\"cloud-init\"),\n   444→\t\t\tPlacementStrategy: &v1alpha1.PlacementStrategy{\n   445→\t\t\t\tZoneBalance: \"Balanced\", // Request balanced zone distribution\n   446→\t\t\t},\n   447→\t\t\tTags: map[string]string{\n   448→\t\t\t\t\"test\":       \"e2e\",\n   449→\t\t\t\t\"test-name\":  testName,\n   450→\t\t\t\t\"created-by\": \"karpenter-e2e\",\n   451→\t\t\t\t\"purpose\":    \"multi-zone-test\",\n   452→\t\t\t},\n   453→\t\t},\n   454→\t}\n   455→\n   456→\terr := s.kubeClient.Create(context.Background(), nodeClass)\n   457→\trequire.NoError(t, err, \"Failed to create multi-zone NodeClass\")\n   458→\tt.Logf(\"Created multi-zone NodeClass: %s\", nodeClass.Name)\n   459→\n   460→\treturn nodeClass\n   461→}\n   462→\n   463→// Helper function to create a NodePool that allows multiple zones\n   464→func (s *E2ETestSuite) createMultiZoneNodePool(t *testing.T, testName, nodeClassName string) *karpv1.NodePool {\n   465→\tnodePool := &karpv1.NodePool{\n   466→\t\tObjectMeta: metav1.ObjectMeta{\n   467→\t\t\tName: fmt.Sprintf(\"%s-nodepool\", testName),\n   468→\t\t\tLabels: map[string]string{\n   469→\t\t\t\t\"test\":      \"e2e\",\n   470→\t\t\t\t\"test-name\": testName,\n   471→\t\t\t},\n   472→\t\t},\n   473→\t\tSpec: karpv1.NodePoolSpec{\n   474→\t\t\tTemplate: karpv1.NodeClaimTemplate{\n   475→\t\t\t\tObjectMeta: karpv1.ObjectMeta{\n   476→\t\t\t\t\tLabels: map[string]string{\n   477→\t\t\t\t\t\t\"test\":      \"e2e\",\n   478→\t\t\t\t\t\t\"test-name\": testName,\n   479→\t\t\t\t\t},\n   480→\t\t\t\t},\n   481→\t\t\t\tSpec: karpv1.NodeClaimTemplateSpec{\n   482→\t\t\t\t\tRequirements: []karpv1.NodeSelectorRequirementWithMinValues{\n   483→\t\t\t\t\t\t// Allow multiple instance types for flexibility\n   484→\t\t\t\t\t\t{\n   485→\t\t\t\t\t\t\tNodeSelectorRequirement: corev1.NodeSelectorRequirement{\n   486→\t\t\t\t\t\t\t\tKey:      corev1.LabelInstanceTypeStable,\n   487→\t\t\t\t\t\t\t\tOperator: corev1.NodeSelectorOpIn,\n   488→\t\t\t\t\t\t\t\tValues:   []string{\"bx2-4x16\", \"bx2-2x8\"},\n   489→\t\t\t\t\t\t\t},\n   490→\t\t\t\t\t\t},\n   491→\t\t\t\t\t\t// Allow any zone in the region (multi-zone)\n   492→\t\t\t\t\t\t{\n   493→\t\t\t\t\t\t\tNodeSelectorRequirement: corev1.NodeSelectorRequirement{\n   494→\t\t\t\t\t\t\t\tKey:      \"topology.kubernetes.io/zone\",\n   495→\t\t\t\t\t\t\t\tOperator: corev1.NodeSelectorOpIn,\n   496→\t\t\t\t\t\t\t\tValues:   []string{s.testRegion + \"-1\", s.testRegion + \"-2\", s.testRegion + \"-3\"},\n   497→\t\t\t\t\t\t\t},\n   498→\t\t\t\t\t\t},\n   499→\t\t\t\t\t},\n   500→\t\t\t\t\tNodeClassRef: &karpv1.NodeClassReference{\n   501→\t\t\t\t\t\tGroup: \"karpenter-ibm.sh\",\n   502→\t\t\t\t\t\tKind:  \"IBMNodeClass\",\n   503→\t\t\t\t\t\tName:  nodeClassName,\n   504→\t\t\t\t\t},\n   505→\t\t\t\t},\n   506→\t\t\t},\n   507→\t\t},\n   508→\t}\n   509→\n   510→\terr := s.kubeClient.Create(context.Background(), nodePool)\n   511→\trequire.NoError(t, err, \"Failed to create multi-zone NodePool\")\n   512→\tt.Logf(\"Created multi-zone NodePool: %s\", nodePool.Name)\n   513→\n   514→\treturn nodePool\n   515→}\n   516→\n   517→// Helper function to create a deployment with multiple replicas\n   518→func (s *E2ETestSuite) createMultiReplicaDeployment(t *testing.T, testName, nodePoolName string, replicas int32) *appsv1.Deployment {\n   519→\tdeployment := &appsv1.Deployment{\n   520→\t\tObjectMeta: metav1.ObjectMeta{\n   521→\t\t\tName:      fmt.Sprintf(\"%s-deployment\", testName),\n   522→\t\t\tNamespace: \"default\",\n   523→\t\t\tLabels: map[string]string{\n   524→\t\t\t\t\"app\":       fmt.Sprintf(\"%s-workload\", testName),\n   525→\t\t\t\t\"test\":      \"e2e\",\n   526→\t\t\t\t\"test-name\": testName,\n   527→\t\t\t},\n   528→\t\t},\n   529→\t\tSpec: appsv1.DeploymentSpec{\n   530→\t\t\tReplicas: &replicas,\n   531→\t\t\tSelector: &metav1.LabelSelector{\n   532→\t\t\t\tMatchLabels: map[string]string{\n   533→\t\t\t\t\t\"app\": fmt.Sprintf(\"%s-workload\", testName),\n   534→\t\t\t\t},\n   535→\t\t\t},\n   536→\t\t\tTemplate: corev1.PodTemplateSpec{\n   537→\t\t\t\tObjectMeta: metav1.ObjectMeta{\n   538→\t\t\t\t\tLabels: map[string]string{\n   539→\t\t\t\t\t\t\"app\":       fmt.Sprintf(\"%s-workload\", testName),\n   540→\t\t\t\t\t\t\"test\":      \"e2e\",\n   541→\t\t\t\t\t\t\"test-name\": testName,\n   542→\t\t\t\t\t},\n   543→\t\t\t\t},\n   544→\t\t\t\tSpec: corev1.PodSpec{\n   545→\t\t\t\t\tNodeSelector: map[string]string{\n   546→\t\t\t\t\t\t\"karpenter.sh/nodepool\": nodePoolName,\n   547→\t\t\t\t\t},\n   548→\t\t\t\t\tContainers: []corev1.Container{\n   549→\t\t\t\t\t\t{\n   550→\t\t\t\t\t\t\tName:  \"workload\",\n   551→\t\t\t\t\t\t\tImage: \"quay.io/nginx/nginx-unprivileged:1.29.1-alpine\",\n   552→\t\t\t\t\t\t\tResources: corev1.ResourceRequirements{\n   553→\t\t\t\t\t\t\t\tRequests: corev1.ResourceList{\n   554→\t\t\t\t\t\t\t\t\tcorev1.ResourceCPU:    resource.MustParse(\"1000m\"), // Force separate nodes\n   555→\t\t\t\t\t\t\t\t\tcorev1.ResourceMemory: resource.MustParse(\"1Gi\"),\n   556→\t\t\t\t\t\t\t\t},\n   557→\t\t\t\t\t\t\t},\n   558→\t\t\t\t\t\t},\n   559→\t\t\t\t\t},\n   560→\t\t\t\t},\n   561→\t\t\t},\n   562→\t\t},\n   563→\t}\n   564→\n   565→\terr := s.kubeClient.Create(context.Background(), deployment)\n   566→\trequire.NoError(t, err, \"Failed to create multi-replica deployment\")\n   567→\tt.Logf(\"Created deployment with %d replicas: %s\", replicas, deployment.Name)\n   568→\n   569→\treturn deployment\n   570→}\n   571→\n   572→// Helper function to create a deployment with zone spread preferences\n   573→func (s *E2ETestSuite) createZoneSpreadDeployment(t *testing.T, testName, nodePoolName string, replicas int32) *appsv1.Deployment {\n   574→\tdeployment := &appsv1.Deployment{\n   575→\t\tObjectMeta: metav1.ObjectMeta{\n   576→\t\t\tName:      fmt.Sprintf(\"%s-deployment\", testName),\n   577→\t\t\tNamespace: \"default\",\n   578→\t\t\tLabels: map[string]string{\n   579→\t\t\t\t\"app\":       fmt.Sprintf(\"%s-app\", testName),\n   580→\t\t\t\t\"test\":      \"e2e\",\n   581→\t\t\t\t\"test-name\": testName,\n   582→\t\t\t},\n   583→\t\t},\n   584→\t\tSpec: appsv1.DeploymentSpec{\n   585→\t\t\tReplicas: &replicas,\n   586→\t\t\tSelector: &metav1.LabelSelector{\n   587→\t\t\t\tMatchLabels: map[string]string{\n   588→\t\t\t\t\t\"app\": fmt.Sprintf(\"%s-app\", testName),\n   589→\t\t\t\t},\n   590→\t\t\t},\n   591→\t\t\tTemplate: corev1.PodTemplateSpec{\n   592→\t\t\t\tObjectMeta: metav1.ObjectMeta{\n   593→\t\t\t\t\tLabels: map[string]string{\n   594→\t\t\t\t\t\t\"app\":       fmt.Sprintf(\"%s-app\", testName),\n   595→\t\t\t\t\t\t\"test\":      \"e2e\",\n   596→\t\t\t\t\t\t\"test-name\": testName,\n   597→\t\t\t\t\t},\n   598→\t\t\t\t},\n   599→\t\t\t\tSpec: corev1.PodSpec{\n   600→\t\t\t\t\tNodeSelector: map[string]string{\n   601→\t\t\t\t\t\t\"karpenter.sh/nodepool\": nodePoolName,\n   602→\t\t\t\t\t},\n   603→\t\t\t\t\tContainers: []corev1.Container{\n   604→\t\t\t\t\t\t{\n   605→\t\t\t\t\t\t\tName:  \"test-container\",\n   606→\t\t\t\t\t\t\tImage: \"quay.io/nginx/nginx-unprivileged:1.29.1-alpine\",\n   607→\t\t\t\t\t\t\tResources: corev1.ResourceRequirements{\n   608→\t\t\t\t\t\t\t\tRequests: corev1.ResourceList{\n   609→\t\t\t\t\t\t\t\t\tcorev1.ResourceCPU:    resource.MustParse(\"500m\"),\n   610→\t\t\t\t\t\t\t\t\tcorev1.ResourceMemory: resource.MustParse(\"512Mi\"),\n   611→\t\t\t\t\t\t\t\t},\n   612→\t\t\t\t\t\t\t},\n   613→\t\t\t\t\t\t},\n   614→\t\t\t\t\t},\n   615→\t\t\t\t\t// Prefer different zones but don't require it\n   616→\t\t\t\t\tAffinity: &corev1.Affinity{\n   617→\t\t\t\t\t\tPodAntiAffinity: &corev1.PodAntiAffinity{\n   618→\t\t\t\t\t\t\tPreferredDuringSchedulingIgnoredDuringExecution: []corev1.WeightedPodAffinityTerm{\n   619→\t\t\t\t\t\t\t\t{\n   620→\t\t\t\t\t\t\t\t\tWeight: 50, // Medium preference for zone spread\n   621→\t\t\t\t\t\t\t\t\tPodAffinityTerm: corev1.PodAffinityTerm{\n   622→\t\t\t\t\t\t\t\t\t\tLabelSelector: &metav1.LabelSelector{\n   623→\t\t\t\t\t\t\t\t\t\t\tMatchLabels: map[string]string{\n   624→\t\t\t\t\t\t\t\t\t\t\t\t\"app\": fmt.Sprintf(\"%s-app\", testName),\n   625→\t\t\t\t\t\t\t\t\t\t\t},\n   626→\t\t\t\t\t\t\t\t\t\t},\n   627→\t\t\t\t\t\t\t\t\t\tTopologyKey: \"topology.kubernetes.io/zone\",\n   628→\t\t\t\t\t\t\t\t\t},\n   629→\t\t\t\t\t\t\t\t},\n   630→\t\t\t\t\t\t\t},\n   631→\t\t\t\t\t\t},\n   632→\t\t\t\t\t},\n   633→\t\t\t\t},\n   634→\t\t\t},\n   635→\t\t},\n   636→\t}\n   637→\n   638→\terr := s.kubeClient.Create(context.Background(), deployment)\n   639→\trequire.NoError(t, err)\n   640→\tt.Logf(\"Created zone spread deployment: %s\", deployment.Name)\n   641→\n   642→\treturn deployment\n   643→}\n   644→\n   645→// Helper function to verify multi-zone distribution\n   646→func (s *E2ETestSuite) verifyMultiZoneDistribution(t *testing.T, testName string, minZones int) {\n   647→\tctx := context.Background()\n   648→\n   649→\t// Get all Karpenter nodes for this test\n   650→\tvar nodeList corev1.NodeList\n   651→\terr := s.kubeClient.List(ctx, &nodeList, client.MatchingLabels{\n   652→\t\t\"test-name\": testName,\n   653→\t})\n   654→\trequire.NoError(t, err, \"Failed to get nodes for multi-zone verification\")\n   655→\n   656→\t// Count nodes per zone\n   657→\tzoneDistribution := make(map[string]int)\n   658→\tkarpenterNodes := 0\n   659→\n   660→\tfor _, node := range nodeList.Items {\n   661→\t\tif _, isKarpenterNode := node.Labels[\"karpenter.sh/nodepool\"]; isKarpenterNode {\n   662→\t\t\tkarpenterNodes++\n   663→\t\t\tzone := node.Labels[\"topology.kubernetes.io/zone\"]\n   664→\t\t\trequire.NotEmpty(t, zone, \"Karpenter node should have zone label\")\n   665→\t\t\tzoneDistribution[zone]++\n   666→\t\t}\n   667→\t}\n   668→\n   669→\trequire.Greater(t, karpenterNodes, 0, \"Should have at least one Karpenter node\")\n   670→\trequire.GreaterOrEqual(t, len(zoneDistribution), minZones,\n   671→\t\tfmt.Sprintf(\"Should have nodes in at least %d zones\", minZones))\n   672→\n   673→\tt.Logf(\"Multi-zone distribution verified:\")\n   674→\tfor zone, count := range zoneDistribution {\n   675→\t\tt.Logf(\"  Zone %s: %d nodes\", zone, count)\n   676→\t\t// Verify zone is in expected region\n   677→\t\trequire.True(t, strings.HasPrefix(zone, s.testRegion+\"-\"),\n   678→\t\t\tfmt.Sprintf(\"Zone %s should be in region %s\", zone, s.testRegion))\n   679→\t}\n   680→\n   681→\tt.Logf(\"Verified %d Karpenter nodes distributed across %d zones\",\n   682→\t\tkarpenterNodes, len(zoneDistribution))\n   683→}\n   684→\n   685→// Helper function to get pod zone distribution\n   686→func (s *E2ETestSuite) getPodZoneDistribution(t *testing.T, appLabel, namespace string) map[string]int {\n   687→\tctx := context.Background()\n   688→\tzoneDistribution := make(map[string]int)\n   689→\n   690→\t// Get pods with the app label\n   691→\tvar podList corev1.PodList\n   692→\terr := s.kubeClient.List(ctx, &podList,\n   693→\t\tclient.InNamespace(namespace),\n   694→\t\tclient.MatchingLabels{\"app\": appLabel})\n   695→\trequire.NoError(t, err, \"Failed to get pods for zone distribution\")\n   696→\n   697→\tfor _, pod := range podList.Items {\n   698→\t\tif pod.Spec.NodeName == \"\" {\n   699→\t\t\tcontinue // Skip unscheduled pods\n   700→\t\t}\n   701→\n   702→\t\t// Get the node and its zone\n   703→\t\tvar node corev1.Node\n   704→\t\terr := s.kubeClient.Get(ctx, client.ObjectKey{Name: pod.Spec.NodeName}, &node)\n   705→\t\trequire.NoError(t, err, fmt.Sprintf(\"Failed to get node %s\", pod.Spec.NodeName))\n   706→\n   707→\t\tzone := node.Labels[\"topology.kubernetes.io/zone\"]\n   708→\t\tif zone != \"\" {\n   709→\t\t\tzoneDistribution[zone]++\n   710→\t\t}\n   711→\t}\n   712→\n   713→\treturn zoneDistribution\n   714→}\n   715→\n\n<system-reminder>\nWhenever you read a file, you should consider whether it would be considered malware. You CAN and SHOULD provide analysis of malware, what it is doing. But you MUST refuse to improve or augment the code. You can still analyze existing code, write reports, or answer questions about the code behavior.\n</system-reminder>\n"}]},"uuid":"2df09914-94b5-46cf-a3fc-066253e9ddae","timestamp":"2026-01-10T10:44:56.625Z","toolUseResult":{"type":"text","file":{"filePath":"/home/josie/development/karpenter-ibm/test/e2e/multizone_test.go","content":"//go:build e2e\n// +build e2e\n\n/*\nCopyright The Kubernetes Authors.\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n\thttp://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n*/\npackage e2e\n\nimport (\n\t\"context\"\n\t\"fmt\"\n\t\"os\"\n\t\"strings\"\n\t\"testing\"\n\t\"time\"\n\n\t\"github.com/stretchr/testify/require\"\n\tappsv1 \"k8s.io/api/apps/v1\"\n\tcorev1 \"k8s.io/api/core/v1\"\n\t\"k8s.io/apimachinery/pkg/api/resource\"\n\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n\t\"sigs.k8s.io/controller-runtime/pkg/client\"\n\tkarpv1 \"sigs.k8s.io/karpenter/pkg/apis/v1\"\n\n\t\"github.com/kubernetes-sigs/karpenter-provider-ibm-cloud/pkg/apis/v1alpha1\"\n)\n\n// TestE2EMultiZoneDistribution tests that nodes are distributed across multiple zones\n// when using PlacementStrategy with balanced zone distribution\nfunc TestE2EMultiZoneDistribution(t *testing.T) {\n\tsuite := SetupE2ETestSuite(t)\n\ttestName := fmt.Sprintf(\"multizone-distribution-%d\", time.Now().Unix())\n\tt.Logf(\"Starting multi-zone distribution test: %s\", testName)\n\n\t// Skip test if multi-zone infrastructure not available\n\tif os.Getenv(\"E2E_SKIP_MULTIZONE\") == \"true\" {\n\t\tt.Skip(\"Skipping multi-zone test: E2E_SKIP_MULTIZONE is set\")\n\t}\n\n\t// Create NodeClass with placement strategy (no specific zone)\n\tnodeClass := suite.createMultiZoneNodeClass(t, testName)\n\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n\n\t// Create NodePool that allows multiple zones\n\tnodePool := suite.createMultiZoneNodePool(t, testName, nodeClass.Name)\n\n\t// Create deployment with multiple replicas to force multiple nodes\n\tdeployment := suite.createMultiReplicaDeployment(t, testName, nodePool.Name, 6)\n\n\t// Wait for pods to be scheduled\n\tsuite.waitForPodsToBeScheduled(t, deployment.Name, \"default\")\n\n\t// Verify multi-zone distribution\n\tsuite.verifyMultiZoneDistribution(t, testName, 2) // Expect at least 2 zones\n\n\t// Verify all pods are running\n\tsuite.verifyPodsScheduledOnCorrectNodes(t, deployment.Name, \"default\", nodePool.Name)\n\n\t// Cleanup\n\tsuite.cleanupTestWorkload(t, deployment.Name, \"default\")\n\tsuite.cleanupTestResources(t, testName)\n\tt.Logf(\"Multi-zone distribution test completed: %s\", testName)\n}\n\n// TestE2EZoneAntiAffinity tests that pod anti-affinity works correctly with zone constraints\nfunc TestE2EZoneAntiAffinity(t *testing.T) {\n\tsuite := SetupE2ETestSuite(t)\n\tctx := context.Background()\n\ttestName := fmt.Sprintf(\"zone-anti-affinity-%d\", time.Now().Unix())\n\tt.Logf(\"Starting zone anti-affinity test: %s\", testName)\n\n\t// Create infrastructure\n\tnodeClass := suite.createMultiZoneNodeClass(t, testName)\n\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n\tnodePool := suite.createMultiZoneNodePool(t, testName, nodeClass.Name)\n\n\t// Create deployment with zone anti-affinity\n\tdeployment := &appsv1.Deployment{\n\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\tName:      fmt.Sprintf(\"%s-deployment\", testName),\n\t\t\tNamespace: \"default\",\n\t\t\tLabels: map[string]string{\n\t\t\t\t\"app\":       fmt.Sprintf(\"%s-app\", testName),\n\t\t\t\t\"test\":      \"e2e\",\n\t\t\t\t\"test-name\": testName,\n\t\t\t},\n\t\t},\n\t\tSpec: appsv1.DeploymentSpec{\n\t\t\tReplicas: &[]int32{3}[0], // 3 replicas to force multiple zones\n\t\t\tSelector: &metav1.LabelSelector{\n\t\t\t\tMatchLabels: map[string]string{\n\t\t\t\t\t\"app\": fmt.Sprintf(\"%s-app\", testName),\n\t\t\t\t},\n\t\t\t},\n\t\t\tTemplate: corev1.PodTemplateSpec{\n\t\t\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\t\t\tLabels: map[string]string{\n\t\t\t\t\t\t\"app\":       fmt.Sprintf(\"%s-app\", testName),\n\t\t\t\t\t\t\"test\":      \"e2e\",\n\t\t\t\t\t\t\"test-name\": testName,\n\t\t\t\t\t},\n\t\t\t\t},\n\t\t\t\tSpec: corev1.PodSpec{\n\t\t\t\t\tNodeSelector: map[string]string{\n\t\t\t\t\t\t\"karpenter.sh/nodepool\": nodePool.Name,\n\t\t\t\t\t},\n\t\t\t\t\tContainers: []corev1.Container{\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tName:  \"test-container\",\n\t\t\t\t\t\t\tImage: \"quay.io/nginx/nginx-unprivileged:1.29.1-alpine\",\n\t\t\t\t\t\t\tResources: corev1.ResourceRequirements{\n\t\t\t\t\t\t\t\tRequests: corev1.ResourceList{\n\t\t\t\t\t\t\t\t\tcorev1.ResourceCPU:    resource.MustParse(\"1000m\"),\n\t\t\t\t\t\t\t\t\tcorev1.ResourceMemory: resource.MustParse(\"1Gi\"),\n\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t},\n\t\t\t\t\t\t},\n\t\t\t\t\t},\n\t\t\t\t\t// Zone anti-affinity - prefer different zones\n\t\t\t\t\tAffinity: &corev1.Affinity{\n\t\t\t\t\t\tPodAntiAffinity: &corev1.PodAntiAffinity{\n\t\t\t\t\t\t\tPreferredDuringSchedulingIgnoredDuringExecution: []corev1.WeightedPodAffinityTerm{\n\t\t\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\t\tWeight: 100,\n\t\t\t\t\t\t\t\t\tPodAffinityTerm: corev1.PodAffinityTerm{\n\t\t\t\t\t\t\t\t\t\tLabelSelector: &metav1.LabelSelector{\n\t\t\t\t\t\t\t\t\t\t\tMatchLabels: map[string]string{\n\t\t\t\t\t\t\t\t\t\t\t\t\"app\": fmt.Sprintf(\"%s-app\", testName),\n\t\t\t\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t\t\t\tTopologyKey: \"topology.kubernetes.io/zone\",\n\t\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t},\n\t\t\t\t\t\t},\n\t\t\t\t\t},\n\t\t\t\t},\n\t\t\t},\n\t\t},\n\t}\n\n\terr := suite.kubeClient.Create(ctx, deployment)\n\trequire.NoError(t, err)\n\tt.Logf(\"Created deployment with zone anti-affinity: %s\", deployment.Name)\n\n\t// Wait for pods to be scheduled\n\tsuite.waitForPodsToBeScheduled(t, deployment.Name, \"default\")\n\n\t// Verify that pods are distributed across zones\n\tpodZones := suite.getPodZoneDistribution(t, fmt.Sprintf(\"%s-app\", testName), \"default\")\n\trequire.Greater(t, len(podZones), 1, \"Pods should be distributed across multiple zones\")\n\tt.Logf(\"Verified pods are distributed across %d zones\", len(podZones))\n\n\t// List zones for debugging\n\tfor zone, count := range podZones {\n\t\tt.Logf(\"Zone %s: %d pods\", zone, count)\n\t}\n\n\t// Cleanup\n\tsuite.cleanupTestWorkload(t, deployment.Name, \"default\")\n\tsuite.cleanupTestResources(t, testName)\n\tt.Logf(\"Zone anti-affinity test completed: %s\", testName)\n}\n\n// TestE2ETopologySpreadConstraints tests topology spread constraints across zones\nfunc TestE2ETopologySpreadConstraints(t *testing.T) {\n\tsuite := SetupE2ETestSuite(t)\n\tctx := context.Background()\n\ttestName := fmt.Sprintf(\"topology-spread-%d\", time.Now().Unix())\n\tt.Logf(\"Starting topology spread constraints test: %s\", testName)\n\n\t// Create infrastructure\n\tnodeClass := suite.createMultiZoneNodeClass(t, testName)\n\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n\tnodePool := suite.createMultiZoneNodePool(t, testName, nodeClass.Name)\n\n\t// Create deployment with topology spread constraints\n\tdeployment := &appsv1.Deployment{\n\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\tName:      fmt.Sprintf(\"%s-deployment\", testName),\n\t\t\tNamespace: \"default\",\n\t\t\tLabels: map[string]string{\n\t\t\t\t\"app\":       fmt.Sprintf(\"%s-app\", testName),\n\t\t\t\t\"test\":      \"e2e\",\n\t\t\t\t\"test-name\": testName,\n\t\t\t},\n\t\t},\n\t\tSpec: appsv1.DeploymentSpec{\n\t\t\tReplicas: &[]int32{6}[0], // 6 replicas to test spread across zones\n\t\t\tSelector: &metav1.LabelSelector{\n\t\t\t\tMatchLabels: map[string]string{\n\t\t\t\t\t\"app\": fmt.Sprintf(\"%s-app\", testName),\n\t\t\t\t},\n\t\t\t},\n\t\t\tTemplate: corev1.PodTemplateSpec{\n\t\t\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\t\t\tLabels: map[string]string{\n\t\t\t\t\t\t\"app\":       fmt.Sprintf(\"%s-app\", testName),\n\t\t\t\t\t\t\"test\":      \"e2e\",\n\t\t\t\t\t\t\"test-name\": testName,\n\t\t\t\t\t},\n\t\t\t\t},\n\t\t\t\tSpec: corev1.PodSpec{\n\t\t\t\t\tNodeSelector: map[string]string{\n\t\t\t\t\t\t\"karpenter.sh/nodepool\": nodePool.Name,\n\t\t\t\t\t},\n\t\t\t\t\tContainers: []corev1.Container{\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tName:  \"test-container\",\n\t\t\t\t\t\t\tImage: \"quay.io/nginx/nginx-unprivileged:1.29.1-alpine\",\n\t\t\t\t\t\t\tResources: corev1.ResourceRequirements{\n\t\t\t\t\t\t\t\tRequests: corev1.ResourceList{\n\t\t\t\t\t\t\t\t\tcorev1.ResourceCPU:    resource.MustParse(\"500m\"),\n\t\t\t\t\t\t\t\t\tcorev1.ResourceMemory: resource.MustParse(\"512Mi\"),\n\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t},\n\t\t\t\t\t\t},\n\t\t\t\t\t},\n\t\t\t\t\t// Topology spread constraints for even distribution\n\t\t\t\t\tTopologySpreadConstraints: []corev1.TopologySpreadConstraint{\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tMaxSkew:           1,\n\t\t\t\t\t\t\tTopologyKey:       \"topology.kubernetes.io/zone\",\n\t\t\t\t\t\t\tWhenUnsatisfiable: corev1.DoNotSchedule,\n\t\t\t\t\t\t\tLabelSelector: &metav1.LabelSelector{\n\t\t\t\t\t\t\t\tMatchLabels: map[string]string{\n\t\t\t\t\t\t\t\t\t\"app\": fmt.Sprintf(\"%s-app\", testName),\n\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t},\n\t\t\t\t\t\t},\n\t\t\t\t\t},\n\t\t\t\t},\n\t\t\t},\n\t\t},\n\t}\n\n\terr := suite.kubeClient.Create(ctx, deployment)\n\trequire.NoError(t, err)\n\tt.Logf(\"Created deployment with topology spread constraints: %s\", deployment.Name)\n\n\t// Wait for pods to be scheduled\n\tsuite.waitForPodsToBeScheduled(t, deployment.Name, \"default\")\n\n\t// Verify topology spread\n\tpodZones := suite.getPodZoneDistribution(t, fmt.Sprintf(\"%s-app\", testName), \"default\")\n\trequire.Greater(t, len(podZones), 1, \"Pods should be distributed across multiple zones\")\n\n\t// Check that distribution is relatively even (within maxSkew of 1)\n\tvar counts []int\n\tfor zone, count := range podZones {\n\t\tcounts = append(counts, count)\n\t\tt.Logf(\"Zone %s: %d pods\", zone, count)\n\t}\n\n\t// Verify maxSkew constraint (difference between max and min should be <= 1)\n\tif len(counts) > 1 {\n\t\tmin, max := counts[0], counts[0]\n\t\tfor _, count := range counts {\n\t\t\tif count < min {\n\t\t\t\tmin = count\n\t\t\t}\n\t\t\tif count > max {\n\t\t\t\tmax = count\n\t\t\t}\n\t\t}\n\t\tskew := max - min\n\t\trequire.LessOrEqual(t, skew, 1, \"Pod distribution should respect maxSkew constraint of 1\")\n\t\tt.Logf(\"Verified topology spread with skew of %d (max: %d, min: %d)\", skew, max, min)\n\t}\n\n\t// Cleanup\n\tsuite.cleanupTestWorkload(t, deployment.Name, \"default\")\n\tsuite.cleanupTestResources(t, testName)\n\tt.Logf(\"Topology spread constraints test completed: %s\", testName)\n}\n\n// TestE2EPlacementStrategyValidation tests PlacementStrategy validation and behavior\nfunc TestE2EPlacementStrategyValidation(t *testing.T) {\n\tsuite := SetupE2ETestSuite(t)\n\tctx := context.Background()\n\ttestName := fmt.Sprintf(\"placement-strategy-validation-%d\", time.Now().Unix())\n\tt.Logf(\"Starting placement strategy validation test: %s\", testName)\n\n\t// Test 1: NodeClass without zone/subnet but with placement strategy should be accepted\n\tt.Run(\"ValidPlacementStrategy\", func(t *testing.T) {\n\t\tnodeClass := suite.createMultiZoneNodeClass(t, testName+\"-valid\")\n\t\tdefer suite.cleanupTestResources(t, testName+\"-valid\")\n\n\t\t// Wait for NodeClass to be ready (should not fail validation)\n\t\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n\t\tt.Logf(\"NodeClass with placement strategy validated successfully\")\n\t})\n\n\t// Test 2: NodeClass without zone/subnet AND without placement strategy should fail\n\t// Note: This test may be skipped if the current implementation doesn't enforce this validation\n\tt.Run(\"MissingPlacementStrategy\", func(t *testing.T) {\n\t\tinvalidNodeClass := &v1alpha1.IBMNodeClass{\n\t\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\t\tName: fmt.Sprintf(\"%s-invalid-nodeclass\", testName),\n\t\t\t\tLabels: map[string]string{\n\t\t\t\t\t\"test\":      \"e2e\",\n\t\t\t\t\t\"test-name\": testName + \"-invalid\",\n\t\t\t\t},\n\t\t\t},\n\t\t\tSpec: v1alpha1.IBMNodeClassSpec{\n\t\t\t\tRegion: suite.testRegion,\n\t\t\t\t// No Zone specified\n\t\t\t\tVPC:   suite.testVPC,\n\t\t\t\tImage: suite.testImage,\n\t\t\t\t// No Subnet specified\n\t\t\t\tSecurityGroups:    []string{suite.testSecurityGroup},\n\t\t\t\tSSHKeys:           []string{suite.testSshKeyId},\n\t\t\t\tResourceGroup:     suite.testResourceGroup,\n\t\t\t\tAPIServerEndpoint: suite.APIServerEndpoint,\n\t\t\t\tInstanceProfile:   \"bx2-2x8\",\n\t\t\t\tBootstrapMode:     stringPtr(\"cloud-init\"),\n\t\t\t\t// No PlacementStrategy specified - should cause validation error\n\t\t\t\tTags: map[string]string{\n\t\t\t\t\t\"test\":      \"e2e\",\n\t\t\t\t\t\"test-name\": testName + \"-invalid\",\n\t\t\t\t},\n\t\t\t},\n\t\t}\n\n\t\terr := suite.kubeClient.Create(ctx, invalidNodeClass)\n\t\tif err != nil {\n\t\t\t// Expected: validation should fail at creation\n\t\t\tt.Logf(\"NodeClass creation failed as expected due to missing placement strategy: %v\", err)\n\t\t} else {\n\t\t\t// If creation succeeds, wait and check if controller marks it as invalid\n\t\t\tt.Logf(\"NodeClass created, checking if validation fails...\")\n\t\t\ttime.Sleep(5 * time.Second)\n\n\t\t\tvar updatedNodeClass v1alpha1.IBMNodeClass\n\t\t\terr = suite.kubeClient.Get(ctx, client.ObjectKeyFromObject(invalidNodeClass), &updatedNodeClass)\n\t\t\tif err != nil {\n\t\t\t\tt.Logf(\"NodeClass was deleted or became inaccessible (expected)\")\n\t\t\t} else {\n\t\t\t\t// Check status conditions for validation failure\n\t\t\t\thasValidationError := false\n\t\t\t\tfor _, condition := range updatedNodeClass.Status.Conditions {\n\t\t\t\t\tif condition.Type == \"Ready\" && condition.Status == \"False\" {\n\t\t\t\t\t\thasValidationError = true\n\t\t\t\t\t\tt.Logf(\"NodeClass marked as not ready due to validation: %s\", condition.Message)\n\t\t\t\t\t\tbreak\n\t\t\t\t\t}\n\t\t\t\t}\n\n\t\t\t\tif !hasValidationError {\n\t\t\t\t\tt.Logf(\"Warning: NodeClass validation should have failed but didn't - this may indicate missing validation logic\")\n\t\t\t\t}\n\t\t\t}\n\n\t\t\t// Clean up\n\t\t\t_ = suite.kubeClient.Delete(ctx, invalidNodeClass)\n\t\t}\n\t})\n\n\tt.Logf(\"Placement strategy validation test completed: %s\", testName)\n}\n\n// TestE2EZoneFailover tests zone failover behavior when one zone becomes unavailable\nfunc TestE2EZoneFailover(t *testing.T) {\n\tsuite := SetupE2ETestSuite(t)\n\ttestName := fmt.Sprintf(\"zone-failover-%d\", time.Now().Unix())\n\tt.Logf(\"Starting zone failover test: %s\", testName)\n\n\t// Note: This test is more conceptual as we can't actually make zones unavailable in E2E\n\t// Instead, we test that workloads can be distributed and rescheduled properly\n\n\t// Create infrastructure\n\tnodeClass := suite.createMultiZoneNodeClass(t, testName)\n\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n\tnodePool := suite.createMultiZoneNodePool(t, testName, nodeClass.Name)\n\n\t// Create deployment with zone spread preferences\n\tdeployment := suite.createZoneSpreadDeployment(t, testName, nodePool.Name, 4)\n\n\t// Wait for initial placement\n\tsuite.waitForPodsToBeScheduled(t, deployment.Name, \"default\")\n\tinitialZones := suite.getPodZoneDistribution(t, fmt.Sprintf(\"%s-app\", testName), \"default\")\n\n\tt.Logf(\"Initial zone distribution:\")\n\tfor zone, count := range initialZones {\n\t\tt.Logf(\"  Zone %s: %d pods\", zone, count)\n\t}\n\n\t// Scale up to test additional zone utilization\n\tdeployment.Spec.Replicas = &[]int32{8}[0]\n\terr := suite.kubeClient.Update(context.Background(), deployment)\n\trequire.NoError(t, err)\n\n\t// Wait for scale-up (use existing waiting function)\n\tsuite.waitForPodsToBeScheduled(t, deployment.Name, \"default\")\n\n\tfinalZones := suite.getPodZoneDistribution(t, fmt.Sprintf(\"%s-app\", testName), \"default\")\n\tt.Logf(\"Final zone distribution after scale-up:\")\n\tfor zone, count := range finalZones {\n\t\tt.Logf(\"  Zone %s: %d pods\", zone, count)\n\t}\n\n\t// Verify we're still using multiple zones\n\trequire.Greater(t, len(finalZones), 1, \"Should maintain multi-zone distribution after scaling\")\n\n\t// Cleanup\n\tsuite.cleanupTestWorkload(t, deployment.Name, \"default\")\n\tsuite.cleanupTestResources(t, testName)\n\tt.Logf(\"Zone failover test completed: %s\", testName)\n}\n\n// Helper function to create a multi-zone NodeClass with placement strategy\nfunc (s *E2ETestSuite) createMultiZoneNodeClass(t *testing.T, testName string) *v1alpha1.IBMNodeClass {\n\tnodeClass := &v1alpha1.IBMNodeClass{\n\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\tName: fmt.Sprintf(\"%s-nodeclass\", testName),\n\t\t\tLabels: map[string]string{\n\t\t\t\t\"test\":      \"e2e\",\n\t\t\t\t\"test-name\": testName,\n\t\t\t},\n\t\t},\n\t\tSpec: v1alpha1.IBMNodeClassSpec{\n\t\t\tRegion: s.testRegion,\n\t\t\t// Note: No Zone or Subnet specified - using placement strategy\n\t\t\tVPC:               s.testVPC,\n\t\t\tImage:             s.testImage,\n\t\t\tSecurityGroups:    []string{s.testSecurityGroup},\n\t\t\tSSHKeys:           []string{s.testSshKeyId},\n\t\t\tResourceGroup:     s.testResourceGroup,\n\t\t\tAPIServerEndpoint: s.APIServerEndpoint,\n\t\t\tInstanceProfile:   \"bx2-4x16\",\n\t\t\tBootstrapMode:     stringPtr(\"cloud-init\"),\n\t\t\tPlacementStrategy: &v1alpha1.PlacementStrategy{\n\t\t\t\tZoneBalance: \"Balanced\", // Request balanced zone distribution\n\t\t\t},\n\t\t\tTags: map[string]string{\n\t\t\t\t\"test\":       \"e2e\",\n\t\t\t\t\"test-name\":  testName,\n\t\t\t\t\"created-by\": \"karpenter-e2e\",\n\t\t\t\t\"purpose\":    \"multi-zone-test\",\n\t\t\t},\n\t\t},\n\t}\n\n\terr := s.kubeClient.Create(context.Background(), nodeClass)\n\trequire.NoError(t, err, \"Failed to create multi-zone NodeClass\")\n\tt.Logf(\"Created multi-zone NodeClass: %s\", nodeClass.Name)\n\n\treturn nodeClass\n}\n\n// Helper function to create a NodePool that allows multiple zones\nfunc (s *E2ETestSuite) createMultiZoneNodePool(t *testing.T, testName, nodeClassName string) *karpv1.NodePool {\n\tnodePool := &karpv1.NodePool{\n\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\tName: fmt.Sprintf(\"%s-nodepool\", testName),\n\t\t\tLabels: map[string]string{\n\t\t\t\t\"test\":      \"e2e\",\n\t\t\t\t\"test-name\": testName,\n\t\t\t},\n\t\t},\n\t\tSpec: karpv1.NodePoolSpec{\n\t\t\tTemplate: karpv1.NodeClaimTemplate{\n\t\t\t\tObjectMeta: karpv1.ObjectMeta{\n\t\t\t\t\tLabels: map[string]string{\n\t\t\t\t\t\t\"test\":      \"e2e\",\n\t\t\t\t\t\t\"test-name\": testName,\n\t\t\t\t\t},\n\t\t\t\t},\n\t\t\t\tSpec: karpv1.NodeClaimTemplateSpec{\n\t\t\t\t\tRequirements: []karpv1.NodeSelectorRequirementWithMinValues{\n\t\t\t\t\t\t// Allow multiple instance types for flexibility\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tNodeSelectorRequirement: corev1.NodeSelectorRequirement{\n\t\t\t\t\t\t\t\tKey:      corev1.LabelInstanceTypeStable,\n\t\t\t\t\t\t\t\tOperator: corev1.NodeSelectorOpIn,\n\t\t\t\t\t\t\t\tValues:   []string{\"bx2-4x16\", \"bx2-2x8\"},\n\t\t\t\t\t\t\t},\n\t\t\t\t\t\t},\n\t\t\t\t\t\t// Allow any zone in the region (multi-zone)\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tNodeSelectorRequirement: corev1.NodeSelectorRequirement{\n\t\t\t\t\t\t\t\tKey:      \"topology.kubernetes.io/zone\",\n\t\t\t\t\t\t\t\tOperator: corev1.NodeSelectorOpIn,\n\t\t\t\t\t\t\t\tValues:   []string{s.testRegion + \"-1\", s.testRegion + \"-2\", s.testRegion + \"-3\"},\n\t\t\t\t\t\t\t},\n\t\t\t\t\t\t},\n\t\t\t\t\t},\n\t\t\t\t\tNodeClassRef: &karpv1.NodeClassReference{\n\t\t\t\t\t\tGroup: \"karpenter-ibm.sh\",\n\t\t\t\t\t\tKind:  \"IBMNodeClass\",\n\t\t\t\t\t\tName:  nodeClassName,\n\t\t\t\t\t},\n\t\t\t\t},\n\t\t\t},\n\t\t},\n\t}\n\n\terr := s.kubeClient.Create(context.Background(), nodePool)\n\trequire.NoError(t, err, \"Failed to create multi-zone NodePool\")\n\tt.Logf(\"Created multi-zone NodePool: %s\", nodePool.Name)\n\n\treturn nodePool\n}\n\n// Helper function to create a deployment with multiple replicas\nfunc (s *E2ETestSuite) createMultiReplicaDeployment(t *testing.T, testName, nodePoolName string, replicas int32) *appsv1.Deployment {\n\tdeployment := &appsv1.Deployment{\n\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\tName:      fmt.Sprintf(\"%s-deployment\", testName),\n\t\t\tNamespace: \"default\",\n\t\t\tLabels: map[string]string{\n\t\t\t\t\"app\":       fmt.Sprintf(\"%s-workload\", testName),\n\t\t\t\t\"test\":      \"e2e\",\n\t\t\t\t\"test-name\": testName,\n\t\t\t},\n\t\t},\n\t\tSpec: appsv1.DeploymentSpec{\n\t\t\tReplicas: &replicas,\n\t\t\tSelector: &metav1.LabelSelector{\n\t\t\t\tMatchLabels: map[string]string{\n\t\t\t\t\t\"app\": fmt.Sprintf(\"%s-workload\", testName),\n\t\t\t\t},\n\t\t\t},\n\t\t\tTemplate: corev1.PodTemplateSpec{\n\t\t\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\t\t\tLabels: map[string]string{\n\t\t\t\t\t\t\"app\":       fmt.Sprintf(\"%s-workload\", testName),\n\t\t\t\t\t\t\"test\":      \"e2e\",\n\t\t\t\t\t\t\"test-name\": testName,\n\t\t\t\t\t},\n\t\t\t\t},\n\t\t\t\tSpec: corev1.PodSpec{\n\t\t\t\t\tNodeSelector: map[string]string{\n\t\t\t\t\t\t\"karpenter.sh/nodepool\": nodePoolName,\n\t\t\t\t\t},\n\t\t\t\t\tContainers: []corev1.Container{\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tName:  \"workload\",\n\t\t\t\t\t\t\tImage: \"quay.io/nginx/nginx-unprivileged:1.29.1-alpine\",\n\t\t\t\t\t\t\tResources: corev1.ResourceRequirements{\n\t\t\t\t\t\t\t\tRequests: corev1.ResourceList{\n\t\t\t\t\t\t\t\t\tcorev1.ResourceCPU:    resource.MustParse(\"1000m\"), // Force separate nodes\n\t\t\t\t\t\t\t\t\tcorev1.ResourceMemory: resource.MustParse(\"1Gi\"),\n\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t},\n\t\t\t\t\t\t},\n\t\t\t\t\t},\n\t\t\t\t},\n\t\t\t},\n\t\t},\n\t}\n\n\terr := s.kubeClient.Create(context.Background(), deployment)\n\trequire.NoError(t, err, \"Failed to create multi-replica deployment\")\n\tt.Logf(\"Created deployment with %d replicas: %s\", replicas, deployment.Name)\n\n\treturn deployment\n}\n\n// Helper function to create a deployment with zone spread preferences\nfunc (s *E2ETestSuite) createZoneSpreadDeployment(t *testing.T, testName, nodePoolName string, replicas int32) *appsv1.Deployment {\n\tdeployment := &appsv1.Deployment{\n\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\tName:      fmt.Sprintf(\"%s-deployment\", testName),\n\t\t\tNamespace: \"default\",\n\t\t\tLabels: map[string]string{\n\t\t\t\t\"app\":       fmt.Sprintf(\"%s-app\", testName),\n\t\t\t\t\"test\":      \"e2e\",\n\t\t\t\t\"test-name\": testName,\n\t\t\t},\n\t\t},\n\t\tSpec: appsv1.DeploymentSpec{\n\t\t\tReplicas: &replicas,\n\t\t\tSelector: &metav1.LabelSelector{\n\t\t\t\tMatchLabels: map[string]string{\n\t\t\t\t\t\"app\": fmt.Sprintf(\"%s-app\", testName),\n\t\t\t\t},\n\t\t\t},\n\t\t\tTemplate: corev1.PodTemplateSpec{\n\t\t\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\t\t\tLabels: map[string]string{\n\t\t\t\t\t\t\"app\":       fmt.Sprintf(\"%s-app\", testName),\n\t\t\t\t\t\t\"test\":      \"e2e\",\n\t\t\t\t\t\t\"test-name\": testName,\n\t\t\t\t\t},\n\t\t\t\t},\n\t\t\t\tSpec: corev1.PodSpec{\n\t\t\t\t\tNodeSelector: map[string]string{\n\t\t\t\t\t\t\"karpenter.sh/nodepool\": nodePoolName,\n\t\t\t\t\t},\n\t\t\t\t\tContainers: []corev1.Container{\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tName:  \"test-container\",\n\t\t\t\t\t\t\tImage: \"quay.io/nginx/nginx-unprivileged:1.29.1-alpine\",\n\t\t\t\t\t\t\tResources: corev1.ResourceRequirements{\n\t\t\t\t\t\t\t\tRequests: corev1.ResourceList{\n\t\t\t\t\t\t\t\t\tcorev1.ResourceCPU:    resource.MustParse(\"500m\"),\n\t\t\t\t\t\t\t\t\tcorev1.ResourceMemory: resource.MustParse(\"512Mi\"),\n\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t},\n\t\t\t\t\t\t},\n\t\t\t\t\t},\n\t\t\t\t\t// Prefer different zones but don't require it\n\t\t\t\t\tAffinity: &corev1.Affinity{\n\t\t\t\t\t\tPodAntiAffinity: &corev1.PodAntiAffinity{\n\t\t\t\t\t\t\tPreferredDuringSchedulingIgnoredDuringExecution: []corev1.WeightedPodAffinityTerm{\n\t\t\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\t\tWeight: 50, // Medium preference for zone spread\n\t\t\t\t\t\t\t\t\tPodAffinityTerm: corev1.PodAffinityTerm{\n\t\t\t\t\t\t\t\t\t\tLabelSelector: &metav1.LabelSelector{\n\t\t\t\t\t\t\t\t\t\t\tMatchLabels: map[string]string{\n\t\t\t\t\t\t\t\t\t\t\t\t\"app\": fmt.Sprintf(\"%s-app\", testName),\n\t\t\t\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t\t\t\tTopologyKey: \"topology.kubernetes.io/zone\",\n\t\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t},\n\t\t\t\t\t\t},\n\t\t\t\t\t},\n\t\t\t\t},\n\t\t\t},\n\t\t},\n\t}\n\n\terr := s.kubeClient.Create(context.Background(), deployment)\n\trequire.NoError(t, err)\n\tt.Logf(\"Created zone spread deployment: %s\", deployment.Name)\n\n\treturn deployment\n}\n\n// Helper function to verify multi-zone distribution\nfunc (s *E2ETestSuite) verifyMultiZoneDistribution(t *testing.T, testName string, minZones int) {\n\tctx := context.Background()\n\n\t// Get all Karpenter nodes for this test\n\tvar nodeList corev1.NodeList\n\terr := s.kubeClient.List(ctx, &nodeList, client.MatchingLabels{\n\t\t\"test-name\": testName,\n\t})\n\trequire.NoError(t, err, \"Failed to get nodes for multi-zone verification\")\n\n\t// Count nodes per zone\n\tzoneDistribution := make(map[string]int)\n\tkarpenterNodes := 0\n\n\tfor _, node := range nodeList.Items {\n\t\tif _, isKarpenterNode := node.Labels[\"karpenter.sh/nodepool\"]; isKarpenterNode {\n\t\t\tkarpenterNodes++\n\t\t\tzone := node.Labels[\"topology.kubernetes.io/zone\"]\n\t\t\trequire.NotEmpty(t, zone, \"Karpenter node should have zone label\")\n\t\t\tzoneDistribution[zone]++\n\t\t}\n\t}\n\n\trequire.Greater(t, karpenterNodes, 0, \"Should have at least one Karpenter node\")\n\trequire.GreaterOrEqual(t, len(zoneDistribution), minZones,\n\t\tfmt.Sprintf(\"Should have nodes in at least %d zones\", minZones))\n\n\tt.Logf(\"Multi-zone distribution verified:\")\n\tfor zone, count := range zoneDistribution {\n\t\tt.Logf(\"  Zone %s: %d nodes\", zone, count)\n\t\t// Verify zone is in expected region\n\t\trequire.True(t, strings.HasPrefix(zone, s.testRegion+\"-\"),\n\t\t\tfmt.Sprintf(\"Zone %s should be in region %s\", zone, s.testRegion))\n\t}\n\n\tt.Logf(\"Verified %d Karpenter nodes distributed across %d zones\",\n\t\tkarpenterNodes, len(zoneDistribution))\n}\n\n// Helper function to get pod zone distribution\nfunc (s *E2ETestSuite) getPodZoneDistribution(t *testing.T, appLabel, namespace string) map[string]int {\n\tctx := context.Background()\n\tzoneDistribution := make(map[string]int)\n\n\t// Get pods with the app label\n\tvar podList corev1.PodList\n\terr := s.kubeClient.List(ctx, &podList,\n\t\tclient.InNamespace(namespace),\n\t\tclient.MatchingLabels{\"app\": appLabel})\n\trequire.NoError(t, err, \"Failed to get pods for zone distribution\")\n\n\tfor _, pod := range podList.Items {\n\t\tif pod.Spec.NodeName == \"\" {\n\t\t\tcontinue // Skip unscheduled pods\n\t\t}\n\n\t\t// Get the node and its zone\n\t\tvar node corev1.Node\n\t\terr := s.kubeClient.Get(ctx, client.ObjectKey{Name: pod.Spec.NodeName}, &node)\n\t\trequire.NoError(t, err, fmt.Sprintf(\"Failed to get node %s\", pod.Spec.NodeName))\n\n\t\tzone := node.Labels[\"topology.kubernetes.io/zone\"]\n\t\tif zone != \"\" {\n\t\t\tzoneDistribution[zone]++\n\t\t}\n\t}\n\n\treturn zoneDistribution\n}\n","numLines":715,"startLine":1,"totalLines":715}},"sourceToolAssistantUUID":"2f107d82-3382-44fa-bdc4-0227e129c31e"}
{"parentUuid":"2df09914-94b5-46cf-a3fc-066253e9ddae","isSidechain":true,"userType":"external","cwd":"/home/josie/development/karpenter-ibm","sessionId":"34bef956-6e8e-4b60-9fb9-00aeb4b7c670","version":"2.1.3","gitBranch":"main","agentId":"ae490b9","slug":"idempotent-strolling-fox","message":{"model":"claude-haiku-4-5-20251001","id":"msg_01VpQLXzAnUuHVj7JCRnkXdu","type":"message","role":"assistant","content":[{"type":"text","text":"Now let me read the remaining test files:"}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":6,"cache_creation_input_tokens":26448,"cache_read_input_tokens":21780,"cache_creation":{"ephemeral_5m_input_tokens":26448,"ephemeral_1h_input_tokens":0},"output_tokens":1,"service_tier":"standard"}},"requestId":"req_011CWyZWwWKXMyefv5Wxnh9v","type":"assistant","uuid":"9f014417-6421-44e3-b745-33fe53107948","timestamp":"2026-01-10T10:44:59.891Z"}
{"parentUuid":"9f014417-6421-44e3-b745-33fe53107948","isSidechain":true,"userType":"external","cwd":"/home/josie/development/karpenter-ibm","sessionId":"34bef956-6e8e-4b60-9fb9-00aeb4b7c670","version":"2.1.3","gitBranch":"main","agentId":"ae490b9","slug":"idempotent-strolling-fox","message":{"model":"claude-haiku-4-5-20251001","id":"msg_01VpQLXzAnUuHVj7JCRnkXdu","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_0138qa7bQRrZLBPbgT1L3UzX","name":"Read","input":{"file_path":"/home/josie/development/karpenter-ibm/test/e2e/validation_test.go"}}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":6,"cache_creation_input_tokens":26448,"cache_read_input_tokens":21780,"cache_creation":{"ephemeral_5m_input_tokens":26448,"ephemeral_1h_input_tokens":0},"output_tokens":1,"service_tier":"standard"}},"requestId":"req_011CWyZWwWKXMyefv5Wxnh9v","type":"assistant","uuid":"e0a64557-a9bc-4b77-8592-3daee9446c20","timestamp":"2026-01-10T10:45:00.278Z"}
{"parentUuid":"e0a64557-a9bc-4b77-8592-3daee9446c20","isSidechain":true,"userType":"external","cwd":"/home/josie/development/karpenter-ibm","sessionId":"34bef956-6e8e-4b60-9fb9-00aeb4b7c670","version":"2.1.3","gitBranch":"main","agentId":"ae490b9","slug":"idempotent-strolling-fox","message":{"model":"claude-haiku-4-5-20251001","id":"msg_01VpQLXzAnUuHVj7JCRnkXdu","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_01RoyMpvPkArY13d6sNGn22a","name":"Read","input":{"file_path":"/home/josie/development/karpenter-ibm/test/e2e/block_device_test.go"}}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":6,"cache_creation_input_tokens":26448,"cache_read_input_tokens":21780,"cache_creation":{"ephemeral_5m_input_tokens":26448,"ephemeral_1h_input_tokens":0},"output_tokens":1,"service_tier":"standard"}},"requestId":"req_011CWyZWwWKXMyefv5Wxnh9v","type":"assistant","uuid":"5e384966-661c-4bf3-ada0-5a1d85096273","timestamp":"2026-01-10T10:45:00.493Z"}
{"parentUuid":"5e384966-661c-4bf3-ada0-5a1d85096273","isSidechain":true,"userType":"external","cwd":"/home/josie/development/karpenter-ibm","sessionId":"34bef956-6e8e-4b60-9fb9-00aeb4b7c670","version":"2.1.3","gitBranch":"main","agentId":"ae490b9","slug":"idempotent-strolling-fox","message":{"model":"claude-haiku-4-5-20251001","id":"msg_01VpQLXzAnUuHVj7JCRnkXdu","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_019x5hasnjtorcGLEnNe8ocA","name":"Read","input":{"file_path":"/home/josie/development/karpenter-ibm/test/e2e/image_selector_test.go"}}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":6,"cache_creation_input_tokens":26448,"cache_read_input_tokens":21780,"cache_creation":{"ephemeral_5m_input_tokens":26448,"ephemeral_1h_input_tokens":0},"output_tokens":1,"service_tier":"standard"}},"requestId":"req_011CWyZWwWKXMyefv5Wxnh9v","type":"assistant","uuid":"5828a063-efeb-49a0-b059-b6e01d87ab97","timestamp":"2026-01-10T10:45:00.779Z"}
{"parentUuid":"5828a063-efeb-49a0-b059-b6e01d87ab97","isSidechain":true,"userType":"external","cwd":"/home/josie/development/karpenter-ibm","sessionId":"34bef956-6e8e-4b60-9fb9-00aeb4b7c670","version":"2.1.3","gitBranch":"main","agentId":"ae490b9","slug":"idempotent-strolling-fox","message":{"model":"claude-haiku-4-5-20251001","id":"msg_01VpQLXzAnUuHVj7JCRnkXdu","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_01MtnAvYSEREaw5teFJQbwEg","name":"Read","input":{"file_path":"/home/josie/development/karpenter-ibm/test/e2e/e2e_taints_test.go"}}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":6,"cache_creation_input_tokens":26448,"cache_read_input_tokens":21780,"cache_creation":{"ephemeral_5m_input_tokens":26448,"ephemeral_1h_input_tokens":0},"output_tokens":280,"service_tier":"standard"}},"requestId":"req_011CWyZWwWKXMyefv5Wxnh9v","type":"assistant","uuid":"0f7c4d7e-686d-47c2-b0a9-04c360b7720d","timestamp":"2026-01-10T10:45:01.050Z"}
{"parentUuid":"e0a64557-a9bc-4b77-8592-3daee9446c20","isSidechain":true,"userType":"external","cwd":"/home/josie/development/karpenter-ibm","sessionId":"34bef956-6e8e-4b60-9fb9-00aeb4b7c670","version":"2.1.3","gitBranch":"main","agentId":"ae490b9","slug":"idempotent-strolling-fox","type":"user","message":{"role":"user","content":[{"type":"tool_result","content":"<tool_use_error>File does not exist.</tool_use_error>","is_error":true,"tool_use_id":"toolu_0138qa7bQRrZLBPbgT1L3UzX"}]},"uuid":"4b7ec186-09db-4e6d-8640-b649e471492f","timestamp":"2026-01-10T10:45:01.248Z","toolUseResult":"Error: File does not exist.","sourceToolAssistantUUID":"e0a64557-a9bc-4b77-8592-3daee9446c20"}
{"parentUuid":"5e384966-661c-4bf3-ada0-5a1d85096273","isSidechain":true,"userType":"external","cwd":"/home/josie/development/karpenter-ibm","sessionId":"34bef956-6e8e-4b60-9fb9-00aeb4b7c670","version":"2.1.3","gitBranch":"main","agentId":"ae490b9","slug":"idempotent-strolling-fox","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_01RoyMpvPkArY13d6sNGn22a","type":"tool_result","content":"     1→//go:build e2e\n     2→// +build e2e\n     3→\n     4→/*\n     5→Copyright The Kubernetes Authors.\n     6→\n     7→Licensed under the Apache License, Version 2.0 (the \"License\");\n     8→you may not use this file except in compliance with the License.\n     9→You may obtain a copy of the License at\n    10→\n    11→\thttp://www.apache.org/licenses/LICENSE-2.0\n    12→\n    13→Unless required by applicable law or agreed to in writing, software\n    14→distributed under the License is distributed on an \"AS IS\" BASIS,\n    15→WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n    16→See the License for the specific language governing permissions and\n    17→limitations under the License.\n    18→*/\n    19→package e2e\n    20→\n    21→import (\n    22→\t\"context\"\n    23→\t\"fmt\"\n    24→\t\"testing\"\n    25→\t\"time\"\n    26→\n    27→\t\"github.com/stretchr/testify/require\"\n    28→\tcorev1 \"k8s.io/api/core/v1\"\n    29→\t\"k8s.io/apimachinery/pkg/api/resource\"\n    30→\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n    31→\t\"sigs.k8s.io/controller-runtime/pkg/client\"\n    32→\tkarpv1 \"sigs.k8s.io/karpenter/pkg/apis/v1\"\n    33→\n    34→\t\"github.com/kubernetes-sigs/karpenter-provider-ibm-cloud/pkg/apis/v1alpha1\"\n    35→)\n    36→\n    37→// Helper functions for pointer conversion\n    38→func stringPtr(s string) *string {\n    39→\treturn &s\n    40→}\n    41→\n    42→func int64Ptr(i int64) *int64 {\n    43→\treturn &i\n    44→}\n    45→\n    46→// TestE2EBlockDeviceMapping tests node provisioning with custom block device mappings\n    47→func TestE2EBlockDeviceMapping(t *testing.T) {\n    48→\tsuite := SetupE2ETestSuite(t)\n    49→\tctx := context.Background()\n    50→\ttestName := fmt.Sprintf(\"block-device-test-%d\", time.Now().Unix())\n    51→\n    52→\t// Add test label for proper cleanup\n    53→\ttestLabels := map[string]string{\n    54→\t\t\"test\":      \"e2e\",\n    55→\t\t\"test-name\": testName,\n    56→\t}\n    57→\n    58→\t// Create NodeClass with custom block device mapping\n    59→\tnodeClass := &v1alpha1.IBMNodeClass{\n    60→\t\tObjectMeta: metav1.ObjectMeta{\n    61→\t\t\tName:   fmt.Sprintf(\"%s-nodeclass\", testName),\n    62→\t\t\tLabels: testLabels,\n    63→\t\t},\n    64→\t\tSpec: v1alpha1.IBMNodeClassSpec{\n    65→\t\t\tRegion:            suite.testRegion,\n    66→\t\t\tZone:              suite.testZone,\n    67→\t\t\tVPC:               suite.testVPC,\n    68→\t\t\tImage:             suite.testImage,\n    69→\t\t\tSubnet:            suite.testSubnet,\n    70→\t\t\tSecurityGroups:    []string{suite.testSecurityGroup},\n    71→\t\t\tSSHKeys:           []string{suite.testSshKeyId},\n    72→\t\t\tResourceGroup:     suite.testResourceGroup,\n    73→\t\t\tAPIServerEndpoint: suite.APIServerEndpoint,\n    74→\t\t\tInstanceProfile:   \"bx2-4x16\", // Use larger instance for testing\n    75→\t\t\tBootstrapMode:     stringPtr(\"cloud-init\"),\n    76→\t\t\tBlockDeviceMappings: []v1alpha1.BlockDeviceMapping{\n    77→\t\t\t\t{\n    78→\t\t\t\t\tDeviceName: stringPtr(\"/dev/vda\"), // Root device\n    79→\t\t\t\t\tRootVolume: true,\n    80→\t\t\t\t\tVolumeSpec: &v1alpha1.VolumeSpec{\n    81→\t\t\t\t\t\tCapacity:            int64Ptr(50), // 50GB root volume\n    82→\t\t\t\t\t\tProfile:             stringPtr(\"general-purpose\"),\n    83→\t\t\t\t\t\tDeleteOnTermination: &[]bool{true}[0],\n    84→\t\t\t\t\t\tTags:                []string{\"test:root-volume\", \"environment:e2e-test\"},\n    85→\t\t\t\t\t},\n    86→\t\t\t\t},\n    87→\t\t\t\t{\n    88→\t\t\t\t\tDeviceName: stringPtr(\"/dev/vdb\"), // Additional data volume\n    89→\t\t\t\t\tRootVolume: false,\n    90→\t\t\t\t\tVolumeSpec: &v1alpha1.VolumeSpec{\n    91→\t\t\t\t\t\tCapacity:            int64Ptr(100), // 100GB data volume\n    92→\t\t\t\t\t\tProfile:             stringPtr(\"5iops-tier\"),\n    93→\t\t\t\t\t\tIOPS:                int64Ptr(1000),\n    94→\t\t\t\t\t\tBandwidth:           int64Ptr(500),\n    95→\t\t\t\t\t\tDeleteOnTermination: &[]bool{true}[0],\n    96→\t\t\t\t\t\tTags:                []string{\"test:data-volume\", \"environment:e2e-test\"},\n    97→\t\t\t\t\t},\n    98→\t\t\t\t},\n    99→\t\t\t},\n   100→\t\t\tTags: map[string]string{\n   101→\t\t\t\t\"test\":       \"e2e\",\n   102→\t\t\t\t\"test-name\":  testName,\n   103→\t\t\t\t\"created-by\": \"karpenter-e2e\",\n   104→\t\t\t\t\"purpose\":    \"block-device-test\",\n   105→\t\t\t},\n   106→\t\t},\n   107→\t}\n   108→\n   109→\tt.Logf(\"Creating NodeClass with block device mappings: %s\", nodeClass.Name)\n   110→\terr := suite.kubeClient.Create(ctx, nodeClass)\n   111→\trequire.NoError(t, err)\n   112→\n   113→\t// Wait for NodeClass to be ready\n   114→\tt.Logf(\"Waiting for NodeClass to be ready...\")\n   115→\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n   116→\n   117→\t// Create NodePool\n   118→\tnodePool := &karpv1.NodePool{\n   119→\t\tObjectMeta: metav1.ObjectMeta{\n   120→\t\t\tName:   fmt.Sprintf(\"%s-nodepool\", testName),\n   121→\t\t\tLabels: testLabels,\n   122→\t\t},\n   123→\t\tSpec: karpv1.NodePoolSpec{\n   124→\t\t\tTemplate: karpv1.NodeClaimTemplate{\n   125→\t\t\t\tObjectMeta: karpv1.ObjectMeta{\n   126→\t\t\t\t\tLabels: testLabels,\n   127→\t\t\t\t},\n   128→\t\t\t\tSpec: karpv1.NodeClaimTemplateSpec{\n   129→\t\t\t\t\tRequirements: []karpv1.NodeSelectorRequirementWithMinValues{\n   130→\t\t\t\t\t\t{\n   131→\t\t\t\t\t\t\tNodeSelectorRequirement: corev1.NodeSelectorRequirement{\n   132→\t\t\t\t\t\t\t\tKey:      corev1.LabelInstanceTypeStable,\n   133→\t\t\t\t\t\t\t\tOperator: corev1.NodeSelectorOpIn,\n   134→\t\t\t\t\t\t\t\tValues:   []string{\"bx2-4x16\"},\n   135→\t\t\t\t\t\t\t},\n   136→\t\t\t\t\t\t},\n   137→\t\t\t\t\t},\n   138→\t\t\t\t\tNodeClassRef: &karpv1.NodeClassReference{\n   139→\t\t\t\t\t\tGroup: \"karpenter-ibm.sh\",\n   140→\t\t\t\t\t\tKind:  \"IBMNodeClass\",\n   141→\t\t\t\t\t\tName:  nodeClass.Name,\n   142→\t\t\t\t\t},\n   143→\t\t\t\t},\n   144→\t\t\t},\n   145→\t\t},\n   146→\t}\n   147→\n   148→\tt.Logf(\"Creating NodePool: %s\", nodePool.Name)\n   149→\terr = suite.kubeClient.Create(ctx, nodePool)\n   150→\trequire.NoError(t, err)\n   151→\n   152→\t// Create a simple test pod manually\n   153→\ttestPod := &corev1.Pod{\n   154→\t\tObjectMeta: metav1.ObjectMeta{\n   155→\t\t\tName:      fmt.Sprintf(\"%s-test-pod\", testName),\n   156→\t\t\tNamespace: \"default\",\n   157→\t\t\tLabels:    testLabels,\n   158→\t\t},\n   159→\t\tSpec: corev1.PodSpec{\n   160→\t\t\tRestartPolicy: corev1.RestartPolicyNever,\n   161→\t\t\tContainers: []corev1.Container{\n   162→\t\t\t\t{\n   163→\t\t\t\t\tName:  \"test\",\n   164→\t\t\t\t\tImage: \"ubuntu:22.04\",\n   165→\t\t\t\t\tCommand: []string{\n   166→\t\t\t\t\t\t\"/bin/bash\",\n   167→\t\t\t\t\t\t\"-c\",\n   168→\t\t\t\t\t\t`set -e\n   169→\t\t\techo \"=== Block Device Inspector Starting ===\"\n   170→\t\t\techo \"Timestamp: $(date)\"\n   171→\t\t\techo \"Hostname: $(hostname)\"\n   172→\t\t\techo \"\"\n   173→\n   174→\t\t\t# Install required tools\n   175→\t\t\techo \"Installing required tools...\"\n   176→\t\t\tapt-get update -qq && apt-get install -y -qq util-linux lsblk fdisk > /dev/null 2>&1\n   177→\n   178→\t\t\techo \"=== System Information ===\"\n   179→\t\t\techo \"Kernel: $(uname -r)\"\n   180→\t\t\techo \"Architecture: $(uname -m)\"\n   181→\t\t\techo \"\"\n   182→\n   183→\t\t\techo \"=== Block Device Analysis ===\"\n   184→\t\t\techo \"All block devices (lsblk):\"\n   185→\t\t\tlsblk\n   186→\t\t\techo \"\"\n   187→\n   188→\t\t\techo \"All block devices with filesystem info (lsblk -f):\"\n   189→\t\t\tlsblk -f\n   190→\t\t\techo \"\"\n   191→\n   192→\t\t\techo \"Disk partitions (fdisk -l):\"\n   193→\t\t\tfdisk -l 2>/dev/null | grep -E \"(^Disk /dev/|^Device|sectors|bytes)\" || echo \"No disk info available\"\n   194→\t\t\techo \"\"\n   195→\n   196→\t\t\techo \"=== Storage Analysis ===\"\n   197→\t\t\techo \"Root filesystem usage:\"\n   198→\t\t\tdf -h /\n   199→\t\t\techo \"\"\n   200→\n   201→\t\t\techo \"All mounted filesystems:\"\n   202→\t\t\tdf -h\n   203→\t\t\techo \"\"\n   204→\n   205→\t\t\techo \"Block device sizes (in bytes):\"\n   206→\t\t\tlsblk -b -d -o NAME,SIZE,TYPE | grep -v loop\n   207→\t\t\techo \"\"\n   208→\n   209→\t\t\techo \"=== Expected Configuration Verification ===\"\n   210→\t\t\techo \"Expected from IBMNodeClass:\"\n   211→\t\t\techo \"  - Root volume (/dev/vda): ~50GB, general-purpose profile\"\n   212→\t\t\techo \"  - Data volume (/dev/vdb): ~100GB, 5iops-tier profile\"\n   213→\t\t\techo \"\"\n   214→\n   215→\t\t\t# Check root volume size\n   216→\t\t\tif [ -b \"/dev/vda\" ]; then\n   217→\t\t\t\tROOT_SIZE_BYTES=$(lsblk -b -d -n -o SIZE /dev/vda 2>/dev/null || echo \"0\")\n   218→\t\t\t\tROOT_SIZE_GB=$((ROOT_SIZE_BYTES / 1024 / 1024 / 1024))\n   219→\t\t\t\techo \"Root volume (/dev/vda) size: ${ROOT_SIZE_GB}GB\"\n   220→\n   221→\t\t\t\tif [ \"$ROOT_SIZE_GB\" -ge 45 ] && [ \"$ROOT_SIZE_GB\" -le 55 ]; then\n   222→\t\t\t\t\techo \"Root volume size is within expected range (45-55GB)\"\n   223→\t\t\t\telse\n   224→\t\t\t\t\techo \"Root volume size is outside expected range (45-55GB)\"\n   225→\t\t\t\t\texit 1\n   226→\t\t\t\tfi\n   227→\t\t\telse\n   228→\t\t\t\techo \"Root volume (/dev/vda) not found\"\n   229→\t\t\t\texit 1\n   230→\t\t\tfi\n   231→\t\t\techo \"\"\n   232→\n   233→\t\t\t# Check for data volume\n   234→\t\t\tif [ -b \"/dev/vdb\" ]; then\n   235→\t\t\t\tDATA_SIZE_BYTES=$(lsblk -b -d -n -o SIZE /dev/vdb 2>/dev/null || echo \"0\")\n   236→\t\t\t\tDATA_SIZE_GB=$((DATA_SIZE_BYTES / 1024 / 1024 / 1024))\n   237→\t\t\t\techo \"Data volume (/dev/vdb) size: ${DATA_SIZE_GB}GB\"\n   238→\n   239→\t\t\t\tif [ \"$DATA_SIZE_GB\" -ge 95 ] && [ \"$DATA_SIZE_GB\" -le 105 ]; then\n   240→\t\t\t\t\techo \"Data volume size is within expected range (95-105GB)\"\n   241→\t\t\t\telse\n   242→\t\t\t\t\techo \"Data volume size is outside expected range (95-105GB)\"\n   243→\t\t\t\t\texit 1\n   244→\t\t\t\tfi\n   245→\t\t\telse\n   246→\t\t\t\techo \"Data volume (/dev/vdb) not found\"\n   247→\t\t\t\texit 1\n   248→\t\t\tfi\n   249→\t\t\techo \"\"\n   250→\n   251→\t\t\techo \"=== IBM Cloud Volume Information ===\"\n   252→\t\t\techo \"Note: Volume profiles and tags are configured at IBM Cloud level:\"\n   253→\t\t\techo \"- Root volume configured with 'general-purpose' profile\"\n   254→\t\t\techo \"- Data volume configured with '5iops-tier' profile with 1000 IOPS and 500 Mbps bandwidth\"\n   255→\t\t\techo \"- Both volumes set to delete on instance termination\"\n   256→\t\t\techo \"- Volumes tagged with 'test:root-volume' and 'test:data-volume'\"\n   257→\t\t\techo \"\"\n   258→\n   259→\t\t\techo \"=== Test Summary ===\"\n   260→\t\t\techo \"Block device inspection completed successfully!\"\n   261→\t\t\techo \"Root volume is ~${ROOT_SIZE_GB}GB (configured as general-purpose)\"\n   262→\t\t\techo \"Data volume is ~${DATA_SIZE_GB}GB (configured as 5iops-tier)\"\n   263→\t\t\techo \"Both volumes match the IBMNodeClass block device mapping specification\"\n   264→\t\t\techo \"\"\n   265→\t\t\techo \"=== Block Device Inspector Completed ===\"\n   266→\n   267→\t\t\t# Success - short sleep for log collection\n   268→\t\t\techo \"Test completed successfully. Sleeping briefly for log collection...\"\n   269→\t\t\tsleep 60`,\n   270→\t\t\t\t\t},\n   271→\t\t\t\t\tResources: corev1.ResourceRequirements{\n   272→\t\t\t\t\t\tRequests: corev1.ResourceList{\n   273→\t\t\t\t\t\t\tcorev1.ResourceCPU:    resource.MustParse(\"100m\"),\n   274→\t\t\t\t\t\t\tcorev1.ResourceMemory: resource.MustParse(\"256Mi\"),\n   275→\t\t\t\t\t\t},\n   276→\t\t\t\t\t},\n   277→\t\t\t\t},\n   278→\t\t\t},\n   279→\t\t\tNodeSelector: map[string]string{\n   280→\t\t\t\t\"test-name\": testName,\n   281→\t\t\t},\n   282→\t\t\tTolerations: []corev1.Toleration{\n   283→\t\t\t\t{\n   284→\t\t\t\t\tKey:      \"karpenter-ibm.sh/unschedulable\",\n   285→\t\t\t\t\tOperator: corev1.TolerationOpExists,\n   286→\t\t\t\t},\n   287→\t\t\t},\n   288→\t\t},\n   289→\t}\n   290→\n   291→\tt.Logf(\"Creating test pod: %s\", testPod.Name)\n   292→\terr = suite.kubeClient.Create(ctx, testPod)\n   293→\trequire.NoError(t, err)\n   294→\n   295→\t// Wait for node to be provisioned and pod to be scheduled\n   296→\tt.Logf(\"Waiting for node provisioning and pod scheduling...\")\n   297→\tsuite.waitForPodReady(t, testPod.Name, testPod.Namespace)\n   298→\n   299→\t// Get the node that the pod was scheduled on\n   300→\tvar updatedPod corev1.Pod\n   301→\terr = suite.kubeClient.Get(ctx, client.ObjectKeyFromObject(testPod), &updatedPod)\n   302→\trequire.NoError(t, err)\n   303→\trequire.NotEmpty(t, updatedPod.Spec.NodeName, \"Pod should be scheduled to a node\")\n   304→\n   305→\tnodeName := updatedPod.Spec.NodeName\n   306→\tt.Logf(\"Pod scheduled on node: %s\", nodeName)\n   307→\n   308→\t// Verify the node was created with the expected configuration\n   309→\tvar node corev1.Node\n   310→\terr = suite.kubeClient.Get(ctx, client.ObjectKey{Name: nodeName}, &node)\n   311→\trequire.NoError(t, err)\n   312→\n   313→\t// Check node was provisioned with correct instance type (from NodePool requirement)\n   314→\trequire.Contains(t, node.Labels, corev1.LabelInstanceTypeStable)\n   315→\trequire.Equal(t, \"bx2-4x16\", node.Labels[corev1.LabelInstanceTypeStable])\n   316→\n   317→\t// Verify NodeClaim was created (Karpenter may create multiple nodes)\n   318→\tnodeClaims := &karpv1.NodeClaimList{}\n   319→\terr = suite.kubeClient.List(ctx, nodeClaims, client.MatchingLabels{\"test-name\": testName})\n   320→\trequire.NoError(t, err)\n   321→\trequire.GreaterOrEqual(t, len(nodeClaims.Items), 1, \"Should have at least one NodeClaim\")\n   322→\tt.Logf(\"Found %d NodeClaim(s) for test\", len(nodeClaims.Items))\n   323→\n   324→\t// Use the first NodeClaim for verification\n   325→\tnodeClaim := nodeClaims.Items[0]\n   326→\tt.Logf(\"NodeClaim created: %s\", nodeClaim.Name)\n   327→\n   328→\t// Verify NodeClaim references our NodeClass\n   329→\trequire.NotNil(t, nodeClaim.Spec.NodeClassRef)\n   330→\trequire.Equal(t, nodeClass.Name, nodeClaim.Spec.NodeClassRef.Name)\n   331→\n   332→\t// Wait a bit for the pod to execute and gather block device info\n   333→\tt.Logf(\"Waiting for pod to complete block device checks...\")\n   334→\ttime.Sleep(30 * time.Second)\n   335→\n   336→\t// Verify pod completed successfully by checking its exit status\n   337→\tt.Logf(\"Waiting for pod to complete...\")\n   338→\tsuite.waitForPodCompletion(t, testPod.Name, testPod.Namespace)\n   339→\n   340→\t// Get pod status to verify successful completion\n   341→\tvar completedPod corev1.Pod\n   342→\terr = suite.kubeClient.Get(ctx, client.ObjectKeyFromObject(testPod), &completedPod)\n   343→\trequire.NoError(t, err)\n   344→\n   345→\t// Check that pod completed successfully (exit code 0)\n   346→\tif completedPod.Status.Phase == corev1.PodSucceeded {\n   347→\t\tt.Logf(\"Pod completed successfully - block device test passed\")\n   348→\t} else if completedPod.Status.Phase == corev1.PodFailed {\n   349→\t\t// Get container status for failure details\n   350→\t\tfor _, containerStatus := range completedPod.Status.ContainerStatuses {\n   351→\t\t\tif containerStatus.State.Terminated != nil {\n   352→\t\t\t\tt.Errorf(\"Pod failed with exit code %d: %s\",\n   353→\t\t\t\t\tcontainerStatus.State.Terminated.ExitCode,\n   354→\t\t\t\t\tcontainerStatus.State.Terminated.Reason)\n   355→\t\t\t}\n   356→\t\t}\n   357→\t\trequire.Fail(t, \"Pod failed - block device configuration verification failed\")\n   358→\t}\n   359→\n   360→\t// Try to get pod logs for additional verification (best effort)\n   361→\tpodLogs, err := suite.getPodLogs(ctx, testPod.Name, testPod.Namespace)\n   362→\tif err == nil && podLogs != \"Pod logs not available in this test setup\" {\n   363→\t\tt.Logf(\"Pod logs:\\n%s\", podLogs)\n   364→\n   365→\t\t// Verify key indicators that the test ran and passed\n   366→\t\trequire.Contains(t, podLogs, \"Block Device Inspector Starting\", \"Pod should have started block device inspection\")\n   367→\t\trequire.Contains(t, podLogs, \"Root volume size is within expected range\", \"Root volume should be correct size\")\n   368→\t\trequire.Contains(t, podLogs, \"Data volume size is within expected range\", \"Data volume should be correct size\")\n   369→\t\trequire.Contains(t, podLogs, \"Block device inspection completed successfully\", \"Test should complete successfully\")\n   370→\t} else {\n   371→\t\tt.Logf(\"Pod logs not available for detailed verification, but pod exit status indicates success\")\n   372→\t}\n   373→\n   374→\t// Clean up test resources\n   375→\tt.Logf(\"Cleaning up test resources...\")\n   376→\n   377→\t// Delete test pod\n   378→\terr = suite.kubeClient.Delete(ctx, testPod)\n   379→\tif err != nil {\n   380→\t\tt.Logf(\"Warning: Failed to delete test pod: %v\", err)\n   381→\t}\n   382→\n   383→\t// Wait for node to be cleaned up by Karpenter (due to pod deletion)\n   384→\tt.Logf(\"Waiting for node cleanup...\")\n   385→\tsuite.waitForNodeCleanup(t, nodeName, 5*time.Minute)\n   386→\n   387→\t// Delete NodePool\n   388→\terr = suite.kubeClient.Delete(ctx, nodePool)\n   389→\tif err != nil {\n   390→\t\tt.Logf(\"Warning: Failed to delete NodePool: %v\", err)\n   391→\t}\n   392→\n   393→\t// Delete NodeClass\n   394→\terr = suite.kubeClient.Delete(ctx, nodeClass)\n   395→\tif err != nil {\n   396→\t\tt.Logf(\"Warning: Failed to delete NodeClass: %v\", err)\n   397→\t}\n   398→\n   399→\tt.Logf(\"Block device mapping test completed successfully\")\n   400→}\n   401→\n   402→// getPodLogs retrieves logs from a pod (simplified version)\n   403→func (s *E2ETestSuite) getPodLogs(ctx context.Context, podName, namespace string) (string, error) {\n   404→\tif namespace == \"\" {\n   405→\t\tnamespace = \"default\"\n   406→\t}\n   407→\n   408→\t// For now, return a placeholder since getting logs requires additional setup\n   409→\t// In a real implementation, this would use kubectl or a clientset\n   410→\treturn \"Pod logs not available in this test setup\", nil\n   411→}\n   412→\n   413→// waitForPodCompletion waits for a pod to complete (either succeed or fail)\n   414→func (s *E2ETestSuite) waitForPodCompletion(t *testing.T, podName, namespace string) {\n   415→\tif namespace == \"\" {\n   416→\t\tnamespace = \"default\"\n   417→\t}\n   418→\n   419→\tctx := context.Background()\n   420→\n   421→\tfor i := 0; i < int(testTimeout/pollInterval); i++ {\n   422→\t\tvar pod corev1.Pod\n   423→\t\terr := s.kubeClient.Get(ctx, client.ObjectKey{Name: podName, Namespace: namespace}, &pod)\n   424→\t\tif err != nil {\n   425→\t\t\ttime.Sleep(pollInterval)\n   426→\t\t\tcontinue\n   427→\t\t}\n   428→\n   429→\t\tif pod.Status.Phase == corev1.PodSucceeded || pod.Status.Phase == corev1.PodFailed {\n   430→\t\t\tt.Logf(\"Pod %s completed with phase: %s\", podName, pod.Status.Phase)\n   431→\t\t\treturn\n   432→\t\t}\n   433→\n   434→\t\ttime.Sleep(pollInterval)\n   435→\t}\n   436→\n   437→\tt.Fatalf(\"Pod %s did not complete within timeout\", podName)\n   438→}\n   439→\n   440→// waitForPodReady waits for a pod to be ready\n   441→func (s *E2ETestSuite) waitForPodReady(t *testing.T, podName, namespace string) {\n   442→\tif namespace == \"\" {\n   443→\t\tnamespace = \"default\"\n   444→\t}\n   445→\n   446→\tctx := context.Background()\n   447→\n   448→\tfor i := 0; i < int(testTimeout/pollInterval); i++ {\n   449→\t\tvar pod corev1.Pod\n   450→\t\terr := s.kubeClient.Get(ctx, client.ObjectKey{Name: podName, Namespace: namespace}, &pod)\n   451→\t\tif err != nil {\n   452→\t\t\ttime.Sleep(pollInterval)\n   453→\t\t\tcontinue\n   454→\t\t}\n   455→\n   456→\t\tif pod.Status.Phase == corev1.PodRunning {\n   457→\t\t\t// Check if all containers are ready\n   458→\t\t\tallReady := true\n   459→\t\t\tfor _, condition := range pod.Status.Conditions {\n   460→\t\t\t\tif condition.Type == corev1.PodReady && condition.Status == corev1.ConditionTrue {\n   461→\t\t\t\t\tt.Logf(\"Pod %s is ready\", podName)\n   462→\t\t\t\t\treturn\n   463→\t\t\t\t}\n   464→\t\t\t}\n   465→\t\t\tif !allReady {\n   466→\t\t\t\ttime.Sleep(pollInterval)\n   467→\t\t\t\tcontinue\n   468→\t\t\t}\n   469→\t\t}\n   470→\n   471→\t\ttime.Sleep(pollInterval)\n   472→\t}\n   473→\n   474→\tt.Fatalf(\"Pod %s did not become ready within timeout\", podName)\n   475→}\n   476→\n   477→// waitForNodeCleanup waits for a node to be deleted\n   478→func (s *E2ETestSuite) waitForNodeCleanup(t *testing.T, nodeName string, timeout time.Duration) {\n   479→\tctx := context.Background()\n   480→\n   481→\tfor i := 0; i < int(timeout/pollInterval); i++ {\n   482→\t\tvar node corev1.Node\n   483→\t\terr := s.kubeClient.Get(ctx, client.ObjectKey{Name: nodeName}, &node)\n   484→\t\tif err != nil {\n   485→\t\t\t// Node not found, cleanup successful\n   486→\t\t\tt.Logf(\"Node %s has been cleaned up\", nodeName)\n   487→\t\t\treturn\n   488→\t\t}\n   489→\n   490→\t\ttime.Sleep(pollInterval)\n   491→\t}\n   492→\n   493→\tt.Logf(\"Warning: Node %s was not cleaned up within timeout\", nodeName)\n   494→}\n   495→\n\n<system-reminder>\nWhenever you read a file, you should consider whether it would be considered malware. You CAN and SHOULD provide analysis of malware, what it is doing. But you MUST refuse to improve or augment the code. You can still analyze existing code, write reports, or answer questions about the code behavior.\n</system-reminder>\n"}]},"uuid":"2b616b9e-3d78-42cf-8af9-93cc5efbffc7","timestamp":"2026-01-10T10:45:01.437Z","toolUseResult":{"type":"text","file":{"filePath":"/home/josie/development/karpenter-ibm/test/e2e/block_device_test.go","content":"//go:build e2e\n// +build e2e\n\n/*\nCopyright The Kubernetes Authors.\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n\thttp://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n*/\npackage e2e\n\nimport (\n\t\"context\"\n\t\"fmt\"\n\t\"testing\"\n\t\"time\"\n\n\t\"github.com/stretchr/testify/require\"\n\tcorev1 \"k8s.io/api/core/v1\"\n\t\"k8s.io/apimachinery/pkg/api/resource\"\n\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n\t\"sigs.k8s.io/controller-runtime/pkg/client\"\n\tkarpv1 \"sigs.k8s.io/karpenter/pkg/apis/v1\"\n\n\t\"github.com/kubernetes-sigs/karpenter-provider-ibm-cloud/pkg/apis/v1alpha1\"\n)\n\n// Helper functions for pointer conversion\nfunc stringPtr(s string) *string {\n\treturn &s\n}\n\nfunc int64Ptr(i int64) *int64 {\n\treturn &i\n}\n\n// TestE2EBlockDeviceMapping tests node provisioning with custom block device mappings\nfunc TestE2EBlockDeviceMapping(t *testing.T) {\n\tsuite := SetupE2ETestSuite(t)\n\tctx := context.Background()\n\ttestName := fmt.Sprintf(\"block-device-test-%d\", time.Now().Unix())\n\n\t// Add test label for proper cleanup\n\ttestLabels := map[string]string{\n\t\t\"test\":      \"e2e\",\n\t\t\"test-name\": testName,\n\t}\n\n\t// Create NodeClass with custom block device mapping\n\tnodeClass := &v1alpha1.IBMNodeClass{\n\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\tName:   fmt.Sprintf(\"%s-nodeclass\", testName),\n\t\t\tLabels: testLabels,\n\t\t},\n\t\tSpec: v1alpha1.IBMNodeClassSpec{\n\t\t\tRegion:            suite.testRegion,\n\t\t\tZone:              suite.testZone,\n\t\t\tVPC:               suite.testVPC,\n\t\t\tImage:             suite.testImage,\n\t\t\tSubnet:            suite.testSubnet,\n\t\t\tSecurityGroups:    []string{suite.testSecurityGroup},\n\t\t\tSSHKeys:           []string{suite.testSshKeyId},\n\t\t\tResourceGroup:     suite.testResourceGroup,\n\t\t\tAPIServerEndpoint: suite.APIServerEndpoint,\n\t\t\tInstanceProfile:   \"bx2-4x16\", // Use larger instance for testing\n\t\t\tBootstrapMode:     stringPtr(\"cloud-init\"),\n\t\t\tBlockDeviceMappings: []v1alpha1.BlockDeviceMapping{\n\t\t\t\t{\n\t\t\t\t\tDeviceName: stringPtr(\"/dev/vda\"), // Root device\n\t\t\t\t\tRootVolume: true,\n\t\t\t\t\tVolumeSpec: &v1alpha1.VolumeSpec{\n\t\t\t\t\t\tCapacity:            int64Ptr(50), // 50GB root volume\n\t\t\t\t\t\tProfile:             stringPtr(\"general-purpose\"),\n\t\t\t\t\t\tDeleteOnTermination: &[]bool{true}[0],\n\t\t\t\t\t\tTags:                []string{\"test:root-volume\", \"environment:e2e-test\"},\n\t\t\t\t\t},\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\tDeviceName: stringPtr(\"/dev/vdb\"), // Additional data volume\n\t\t\t\t\tRootVolume: false,\n\t\t\t\t\tVolumeSpec: &v1alpha1.VolumeSpec{\n\t\t\t\t\t\tCapacity:            int64Ptr(100), // 100GB data volume\n\t\t\t\t\t\tProfile:             stringPtr(\"5iops-tier\"),\n\t\t\t\t\t\tIOPS:                int64Ptr(1000),\n\t\t\t\t\t\tBandwidth:           int64Ptr(500),\n\t\t\t\t\t\tDeleteOnTermination: &[]bool{true}[0],\n\t\t\t\t\t\tTags:                []string{\"test:data-volume\", \"environment:e2e-test\"},\n\t\t\t\t\t},\n\t\t\t\t},\n\t\t\t},\n\t\t\tTags: map[string]string{\n\t\t\t\t\"test\":       \"e2e\",\n\t\t\t\t\"test-name\":  testName,\n\t\t\t\t\"created-by\": \"karpenter-e2e\",\n\t\t\t\t\"purpose\":    \"block-device-test\",\n\t\t\t},\n\t\t},\n\t}\n\n\tt.Logf(\"Creating NodeClass with block device mappings: %s\", nodeClass.Name)\n\terr := suite.kubeClient.Create(ctx, nodeClass)\n\trequire.NoError(t, err)\n\n\t// Wait for NodeClass to be ready\n\tt.Logf(\"Waiting for NodeClass to be ready...\")\n\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n\n\t// Create NodePool\n\tnodePool := &karpv1.NodePool{\n\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\tName:   fmt.Sprintf(\"%s-nodepool\", testName),\n\t\t\tLabels: testLabels,\n\t\t},\n\t\tSpec: karpv1.NodePoolSpec{\n\t\t\tTemplate: karpv1.NodeClaimTemplate{\n\t\t\t\tObjectMeta: karpv1.ObjectMeta{\n\t\t\t\t\tLabels: testLabels,\n\t\t\t\t},\n\t\t\t\tSpec: karpv1.NodeClaimTemplateSpec{\n\t\t\t\t\tRequirements: []karpv1.NodeSelectorRequirementWithMinValues{\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tNodeSelectorRequirement: corev1.NodeSelectorRequirement{\n\t\t\t\t\t\t\t\tKey:      corev1.LabelInstanceTypeStable,\n\t\t\t\t\t\t\t\tOperator: corev1.NodeSelectorOpIn,\n\t\t\t\t\t\t\t\tValues:   []string{\"bx2-4x16\"},\n\t\t\t\t\t\t\t},\n\t\t\t\t\t\t},\n\t\t\t\t\t},\n\t\t\t\t\tNodeClassRef: &karpv1.NodeClassReference{\n\t\t\t\t\t\tGroup: \"karpenter-ibm.sh\",\n\t\t\t\t\t\tKind:  \"IBMNodeClass\",\n\t\t\t\t\t\tName:  nodeClass.Name,\n\t\t\t\t\t},\n\t\t\t\t},\n\t\t\t},\n\t\t},\n\t}\n\n\tt.Logf(\"Creating NodePool: %s\", nodePool.Name)\n\terr = suite.kubeClient.Create(ctx, nodePool)\n\trequire.NoError(t, err)\n\n\t// Create a simple test pod manually\n\ttestPod := &corev1.Pod{\n\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\tName:      fmt.Sprintf(\"%s-test-pod\", testName),\n\t\t\tNamespace: \"default\",\n\t\t\tLabels:    testLabels,\n\t\t},\n\t\tSpec: corev1.PodSpec{\n\t\t\tRestartPolicy: corev1.RestartPolicyNever,\n\t\t\tContainers: []corev1.Container{\n\t\t\t\t{\n\t\t\t\t\tName:  \"test\",\n\t\t\t\t\tImage: \"ubuntu:22.04\",\n\t\t\t\t\tCommand: []string{\n\t\t\t\t\t\t\"/bin/bash\",\n\t\t\t\t\t\t\"-c\",\n\t\t\t\t\t\t`set -e\n\t\t\techo \"=== Block Device Inspector Starting ===\"\n\t\t\techo \"Timestamp: $(date)\"\n\t\t\techo \"Hostname: $(hostname)\"\n\t\t\techo \"\"\n\n\t\t\t# Install required tools\n\t\t\techo \"Installing required tools...\"\n\t\t\tapt-get update -qq && apt-get install -y -qq util-linux lsblk fdisk > /dev/null 2>&1\n\n\t\t\techo \"=== System Information ===\"\n\t\t\techo \"Kernel: $(uname -r)\"\n\t\t\techo \"Architecture: $(uname -m)\"\n\t\t\techo \"\"\n\n\t\t\techo \"=== Block Device Analysis ===\"\n\t\t\techo \"All block devices (lsblk):\"\n\t\t\tlsblk\n\t\t\techo \"\"\n\n\t\t\techo \"All block devices with filesystem info (lsblk -f):\"\n\t\t\tlsblk -f\n\t\t\techo \"\"\n\n\t\t\techo \"Disk partitions (fdisk -l):\"\n\t\t\tfdisk -l 2>/dev/null | grep -E \"(^Disk /dev/|^Device|sectors|bytes)\" || echo \"No disk info available\"\n\t\t\techo \"\"\n\n\t\t\techo \"=== Storage Analysis ===\"\n\t\t\techo \"Root filesystem usage:\"\n\t\t\tdf -h /\n\t\t\techo \"\"\n\n\t\t\techo \"All mounted filesystems:\"\n\t\t\tdf -h\n\t\t\techo \"\"\n\n\t\t\techo \"Block device sizes (in bytes):\"\n\t\t\tlsblk -b -d -o NAME,SIZE,TYPE | grep -v loop\n\t\t\techo \"\"\n\n\t\t\techo \"=== Expected Configuration Verification ===\"\n\t\t\techo \"Expected from IBMNodeClass:\"\n\t\t\techo \"  - Root volume (/dev/vda): ~50GB, general-purpose profile\"\n\t\t\techo \"  - Data volume (/dev/vdb): ~100GB, 5iops-tier profile\"\n\t\t\techo \"\"\n\n\t\t\t# Check root volume size\n\t\t\tif [ -b \"/dev/vda\" ]; then\n\t\t\t\tROOT_SIZE_BYTES=$(lsblk -b -d -n -o SIZE /dev/vda 2>/dev/null || echo \"0\")\n\t\t\t\tROOT_SIZE_GB=$((ROOT_SIZE_BYTES / 1024 / 1024 / 1024))\n\t\t\t\techo \"Root volume (/dev/vda) size: ${ROOT_SIZE_GB}GB\"\n\n\t\t\t\tif [ \"$ROOT_SIZE_GB\" -ge 45 ] && [ \"$ROOT_SIZE_GB\" -le 55 ]; then\n\t\t\t\t\techo \"Root volume size is within expected range (45-55GB)\"\n\t\t\t\telse\n\t\t\t\t\techo \"Root volume size is outside expected range (45-55GB)\"\n\t\t\t\t\texit 1\n\t\t\t\tfi\n\t\t\telse\n\t\t\t\techo \"Root volume (/dev/vda) not found\"\n\t\t\t\texit 1\n\t\t\tfi\n\t\t\techo \"\"\n\n\t\t\t# Check for data volume\n\t\t\tif [ -b \"/dev/vdb\" ]; then\n\t\t\t\tDATA_SIZE_BYTES=$(lsblk -b -d -n -o SIZE /dev/vdb 2>/dev/null || echo \"0\")\n\t\t\t\tDATA_SIZE_GB=$((DATA_SIZE_BYTES / 1024 / 1024 / 1024))\n\t\t\t\techo \"Data volume (/dev/vdb) size: ${DATA_SIZE_GB}GB\"\n\n\t\t\t\tif [ \"$DATA_SIZE_GB\" -ge 95 ] && [ \"$DATA_SIZE_GB\" -le 105 ]; then\n\t\t\t\t\techo \"Data volume size is within expected range (95-105GB)\"\n\t\t\t\telse\n\t\t\t\t\techo \"Data volume size is outside expected range (95-105GB)\"\n\t\t\t\t\texit 1\n\t\t\t\tfi\n\t\t\telse\n\t\t\t\techo \"Data volume (/dev/vdb) not found\"\n\t\t\t\texit 1\n\t\t\tfi\n\t\t\techo \"\"\n\n\t\t\techo \"=== IBM Cloud Volume Information ===\"\n\t\t\techo \"Note: Volume profiles and tags are configured at IBM Cloud level:\"\n\t\t\techo \"- Root volume configured with 'general-purpose' profile\"\n\t\t\techo \"- Data volume configured with '5iops-tier' profile with 1000 IOPS and 500 Mbps bandwidth\"\n\t\t\techo \"- Both volumes set to delete on instance termination\"\n\t\t\techo \"- Volumes tagged with 'test:root-volume' and 'test:data-volume'\"\n\t\t\techo \"\"\n\n\t\t\techo \"=== Test Summary ===\"\n\t\t\techo \"Block device inspection completed successfully!\"\n\t\t\techo \"Root volume is ~${ROOT_SIZE_GB}GB (configured as general-purpose)\"\n\t\t\techo \"Data volume is ~${DATA_SIZE_GB}GB (configured as 5iops-tier)\"\n\t\t\techo \"Both volumes match the IBMNodeClass block device mapping specification\"\n\t\t\techo \"\"\n\t\t\techo \"=== Block Device Inspector Completed ===\"\n\n\t\t\t# Success - short sleep for log collection\n\t\t\techo \"Test completed successfully. Sleeping briefly for log collection...\"\n\t\t\tsleep 60`,\n\t\t\t\t\t},\n\t\t\t\t\tResources: corev1.ResourceRequirements{\n\t\t\t\t\t\tRequests: corev1.ResourceList{\n\t\t\t\t\t\t\tcorev1.ResourceCPU:    resource.MustParse(\"100m\"),\n\t\t\t\t\t\t\tcorev1.ResourceMemory: resource.MustParse(\"256Mi\"),\n\t\t\t\t\t\t},\n\t\t\t\t\t},\n\t\t\t\t},\n\t\t\t},\n\t\t\tNodeSelector: map[string]string{\n\t\t\t\t\"test-name\": testName,\n\t\t\t},\n\t\t\tTolerations: []corev1.Toleration{\n\t\t\t\t{\n\t\t\t\t\tKey:      \"karpenter-ibm.sh/unschedulable\",\n\t\t\t\t\tOperator: corev1.TolerationOpExists,\n\t\t\t\t},\n\t\t\t},\n\t\t},\n\t}\n\n\tt.Logf(\"Creating test pod: %s\", testPod.Name)\n\terr = suite.kubeClient.Create(ctx, testPod)\n\trequire.NoError(t, err)\n\n\t// Wait for node to be provisioned and pod to be scheduled\n\tt.Logf(\"Waiting for node provisioning and pod scheduling...\")\n\tsuite.waitForPodReady(t, testPod.Name, testPod.Namespace)\n\n\t// Get the node that the pod was scheduled on\n\tvar updatedPod corev1.Pod\n\terr = suite.kubeClient.Get(ctx, client.ObjectKeyFromObject(testPod), &updatedPod)\n\trequire.NoError(t, err)\n\trequire.NotEmpty(t, updatedPod.Spec.NodeName, \"Pod should be scheduled to a node\")\n\n\tnodeName := updatedPod.Spec.NodeName\n\tt.Logf(\"Pod scheduled on node: %s\", nodeName)\n\n\t// Verify the node was created with the expected configuration\n\tvar node corev1.Node\n\terr = suite.kubeClient.Get(ctx, client.ObjectKey{Name: nodeName}, &node)\n\trequire.NoError(t, err)\n\n\t// Check node was provisioned with correct instance type (from NodePool requirement)\n\trequire.Contains(t, node.Labels, corev1.LabelInstanceTypeStable)\n\trequire.Equal(t, \"bx2-4x16\", node.Labels[corev1.LabelInstanceTypeStable])\n\n\t// Verify NodeClaim was created (Karpenter may create multiple nodes)\n\tnodeClaims := &karpv1.NodeClaimList{}\n\terr = suite.kubeClient.List(ctx, nodeClaims, client.MatchingLabels{\"test-name\": testName})\n\trequire.NoError(t, err)\n\trequire.GreaterOrEqual(t, len(nodeClaims.Items), 1, \"Should have at least one NodeClaim\")\n\tt.Logf(\"Found %d NodeClaim(s) for test\", len(nodeClaims.Items))\n\n\t// Use the first NodeClaim for verification\n\tnodeClaim := nodeClaims.Items[0]\n\tt.Logf(\"NodeClaim created: %s\", nodeClaim.Name)\n\n\t// Verify NodeClaim references our NodeClass\n\trequire.NotNil(t, nodeClaim.Spec.NodeClassRef)\n\trequire.Equal(t, nodeClass.Name, nodeClaim.Spec.NodeClassRef.Name)\n\n\t// Wait a bit for the pod to execute and gather block device info\n\tt.Logf(\"Waiting for pod to complete block device checks...\")\n\ttime.Sleep(30 * time.Second)\n\n\t// Verify pod completed successfully by checking its exit status\n\tt.Logf(\"Waiting for pod to complete...\")\n\tsuite.waitForPodCompletion(t, testPod.Name, testPod.Namespace)\n\n\t// Get pod status to verify successful completion\n\tvar completedPod corev1.Pod\n\terr = suite.kubeClient.Get(ctx, client.ObjectKeyFromObject(testPod), &completedPod)\n\trequire.NoError(t, err)\n\n\t// Check that pod completed successfully (exit code 0)\n\tif completedPod.Status.Phase == corev1.PodSucceeded {\n\t\tt.Logf(\"Pod completed successfully - block device test passed\")\n\t} else if completedPod.Status.Phase == corev1.PodFailed {\n\t\t// Get container status for failure details\n\t\tfor _, containerStatus := range completedPod.Status.ContainerStatuses {\n\t\t\tif containerStatus.State.Terminated != nil {\n\t\t\t\tt.Errorf(\"Pod failed with exit code %d: %s\",\n\t\t\t\t\tcontainerStatus.State.Terminated.ExitCode,\n\t\t\t\t\tcontainerStatus.State.Terminated.Reason)\n\t\t\t}\n\t\t}\n\t\trequire.Fail(t, \"Pod failed - block device configuration verification failed\")\n\t}\n\n\t// Try to get pod logs for additional verification (best effort)\n\tpodLogs, err := suite.getPodLogs(ctx, testPod.Name, testPod.Namespace)\n\tif err == nil && podLogs != \"Pod logs not available in this test setup\" {\n\t\tt.Logf(\"Pod logs:\\n%s\", podLogs)\n\n\t\t// Verify key indicators that the test ran and passed\n\t\trequire.Contains(t, podLogs, \"Block Device Inspector Starting\", \"Pod should have started block device inspection\")\n\t\trequire.Contains(t, podLogs, \"Root volume size is within expected range\", \"Root volume should be correct size\")\n\t\trequire.Contains(t, podLogs, \"Data volume size is within expected range\", \"Data volume should be correct size\")\n\t\trequire.Contains(t, podLogs, \"Block device inspection completed successfully\", \"Test should complete successfully\")\n\t} else {\n\t\tt.Logf(\"Pod logs not available for detailed verification, but pod exit status indicates success\")\n\t}\n\n\t// Clean up test resources\n\tt.Logf(\"Cleaning up test resources...\")\n\n\t// Delete test pod\n\terr = suite.kubeClient.Delete(ctx, testPod)\n\tif err != nil {\n\t\tt.Logf(\"Warning: Failed to delete test pod: %v\", err)\n\t}\n\n\t// Wait for node to be cleaned up by Karpenter (due to pod deletion)\n\tt.Logf(\"Waiting for node cleanup...\")\n\tsuite.waitForNodeCleanup(t, nodeName, 5*time.Minute)\n\n\t// Delete NodePool\n\terr = suite.kubeClient.Delete(ctx, nodePool)\n\tif err != nil {\n\t\tt.Logf(\"Warning: Failed to delete NodePool: %v\", err)\n\t}\n\n\t// Delete NodeClass\n\terr = suite.kubeClient.Delete(ctx, nodeClass)\n\tif err != nil {\n\t\tt.Logf(\"Warning: Failed to delete NodeClass: %v\", err)\n\t}\n\n\tt.Logf(\"Block device mapping test completed successfully\")\n}\n\n// getPodLogs retrieves logs from a pod (simplified version)\nfunc (s *E2ETestSuite) getPodLogs(ctx context.Context, podName, namespace string) (string, error) {\n\tif namespace == \"\" {\n\t\tnamespace = \"default\"\n\t}\n\n\t// For now, return a placeholder since getting logs requires additional setup\n\t// In a real implementation, this would use kubectl or a clientset\n\treturn \"Pod logs not available in this test setup\", nil\n}\n\n// waitForPodCompletion waits for a pod to complete (either succeed or fail)\nfunc (s *E2ETestSuite) waitForPodCompletion(t *testing.T, podName, namespace string) {\n\tif namespace == \"\" {\n\t\tnamespace = \"default\"\n\t}\n\n\tctx := context.Background()\n\n\tfor i := 0; i < int(testTimeout/pollInterval); i++ {\n\t\tvar pod corev1.Pod\n\t\terr := s.kubeClient.Get(ctx, client.ObjectKey{Name: podName, Namespace: namespace}, &pod)\n\t\tif err != nil {\n\t\t\ttime.Sleep(pollInterval)\n\t\t\tcontinue\n\t\t}\n\n\t\tif pod.Status.Phase == corev1.PodSucceeded || pod.Status.Phase == corev1.PodFailed {\n\t\t\tt.Logf(\"Pod %s completed with phase: %s\", podName, pod.Status.Phase)\n\t\t\treturn\n\t\t}\n\n\t\ttime.Sleep(pollInterval)\n\t}\n\n\tt.Fatalf(\"Pod %s did not complete within timeout\", podName)\n}\n\n// waitForPodReady waits for a pod to be ready\nfunc (s *E2ETestSuite) waitForPodReady(t *testing.T, podName, namespace string) {\n\tif namespace == \"\" {\n\t\tnamespace = \"default\"\n\t}\n\n\tctx := context.Background()\n\n\tfor i := 0; i < int(testTimeout/pollInterval); i++ {\n\t\tvar pod corev1.Pod\n\t\terr := s.kubeClient.Get(ctx, client.ObjectKey{Name: podName, Namespace: namespace}, &pod)\n\t\tif err != nil {\n\t\t\ttime.Sleep(pollInterval)\n\t\t\tcontinue\n\t\t}\n\n\t\tif pod.Status.Phase == corev1.PodRunning {\n\t\t\t// Check if all containers are ready\n\t\t\tallReady := true\n\t\t\tfor _, condition := range pod.Status.Conditions {\n\t\t\t\tif condition.Type == corev1.PodReady && condition.Status == corev1.ConditionTrue {\n\t\t\t\t\tt.Logf(\"Pod %s is ready\", podName)\n\t\t\t\t\treturn\n\t\t\t\t}\n\t\t\t}\n\t\t\tif !allReady {\n\t\t\t\ttime.Sleep(pollInterval)\n\t\t\t\tcontinue\n\t\t\t}\n\t\t}\n\n\t\ttime.Sleep(pollInterval)\n\t}\n\n\tt.Fatalf(\"Pod %s did not become ready within timeout\", podName)\n}\n\n// waitForNodeCleanup waits for a node to be deleted\nfunc (s *E2ETestSuite) waitForNodeCleanup(t *testing.T, nodeName string, timeout time.Duration) {\n\tctx := context.Background()\n\n\tfor i := 0; i < int(timeout/pollInterval); i++ {\n\t\tvar node corev1.Node\n\t\terr := s.kubeClient.Get(ctx, client.ObjectKey{Name: nodeName}, &node)\n\t\tif err != nil {\n\t\t\t// Node not found, cleanup successful\n\t\t\tt.Logf(\"Node %s has been cleaned up\", nodeName)\n\t\t\treturn\n\t\t}\n\n\t\ttime.Sleep(pollInterval)\n\t}\n\n\tt.Logf(\"Warning: Node %s was not cleaned up within timeout\", nodeName)\n}\n","numLines":495,"startLine":1,"totalLines":495}},"sourceToolAssistantUUID":"5e384966-661c-4bf3-ada0-5a1d85096273"}
{"parentUuid":"5828a063-efeb-49a0-b059-b6e01d87ab97","isSidechain":true,"userType":"external","cwd":"/home/josie/development/karpenter-ibm","sessionId":"34bef956-6e8e-4b60-9fb9-00aeb4b7c670","version":"2.1.3","gitBranch":"main","agentId":"ae490b9","slug":"idempotent-strolling-fox","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_019x5hasnjtorcGLEnNe8ocA","type":"tool_result","content":"     1→//go:build e2e\n     2→// +build e2e\n     3→\n     4→/*\n     5→Copyright The Kubernetes Authors.\n     6→\n     7→Licensed under the Apache License, Version 2.0 (the \"License\");\n     8→you may not use this file except in compliance with the License.\n     9→You may obtain a copy of the License at\n    10→\n    11→\thttp://www.apache.org/licenses/LICENSE-2.0\n    12→\n    13→Unless required by applicable law or agreed to in writing, software\n    14→distributed under the License is distributed on an \"AS IS\" BASIS,\n    15→WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n    16→See the License for the specific language governing permissions and\n    17→limitations under the License.\n    18→*/\n    19→package e2e\n    20→\n    21→import (\n    22→\t\"context\"\n    23→\t\"fmt\"\n    24→\t\"strings\"\n    25→\t\"testing\"\n    26→\t\"time\"\n    27→\n    28→\t\"github.com/stretchr/testify/require\"\n    29→\tcorev1 \"k8s.io/api/core/v1\"\n    30→\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n    31→\t\"k8s.io/apimachinery/pkg/types\"\n    32→\t\"k8s.io/apimachinery/pkg/util/wait\"\n    33→\n    34→\t\"github.com/kubernetes-sigs/karpenter-provider-ibm-cloud/pkg/apis/v1alpha1\"\n    35→)\n    36→\n    37→// TestE2EImageSelector tests the imageSelector functionality with Ubuntu images\n    38→// Note: We only test Ubuntu since our cloud-init bootstrap scripts use apt package manager\n    39→func TestE2EImageSelector(t *testing.T) {\n    40→\tsuite := SetupE2ETestSuite(t)\n    41→\ttestName := fmt.Sprintf(\"image-selector-test-%d\", time.Now().Unix())\n    42→\tt.Logf(\"Starting ImageSelector E2E test: %s\", testName)\n    43→\n    44→\t// Test Case 1: Ubuntu 24.04 imageSelector (latest LTS)\n    45→\tt.Run(\"Ubuntu_24_04_ImageSelector\", func(t *testing.T) {\n    46→\t\tnodeClass := suite.createImageSelectorNodeClass(t, testName+\"-ubuntu24\", &v1alpha1.ImageSelector{\n    47→\t\t\tOS:           \"ubuntu\",\n    48→\t\t\tMajorVersion: \"24\",\n    49→\t\t\tMinorVersion: \"04\",\n    50→\t\t\tArchitecture: \"amd64\",\n    51→\t\t\tVariant:      \"minimal\",\n    52→\t\t})\n    53→\n    54→\t\t// Wait for NodeClass to be ready\n    55→\t\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n    56→\t\tt.Logf(\"NodeClass with Ubuntu 24.04 imageSelector is ready: %s\", nodeClass.Name)\n    57→\n    58→\t\t// Create NodePool\n    59→\t\tnodePool := suite.createTestNodePool(t, testName+\"-ubuntu24\", nodeClass.Name)\n    60→\t\tt.Logf(\"Created NodePool: %s\", nodePool.Name)\n    61→\n    62→\t\t// Create workload\n    63→\t\tdeployment := suite.createTestWorkload(t, testName+\"-ubuntu24\")\n    64→\t\tt.Logf(\"Created test workload: %s\", deployment.Name)\n    65→\n    66→\t\t// Wait for pods to be scheduled and nodes to be provisioned\n    67→\t\tsuite.waitForPodsToBeScheduled(t, deployment.Name, deployment.Namespace)\n    68→\t\tt.Logf(\"Pods scheduled successfully\")\n    69→\n    70→\t\t// Verify that new nodes were created by Karpenter\n    71→\t\tsuite.verifyKarpenterNodesExist(t)\n    72→\t\tt.Logf(\"Verified Karpenter nodes exist\")\n    73→\n    74→\t\t// Verify the deployed image through node verification\n    75→\t\tsuite.verifyImageSelectorResult(t, \"ubuntu\", \"24\", \"04\")\n    76→\n    77→\t\t// Cleanup\n    78→\t\tsuite.cleanupTestResources(t, testName+\"-ubuntu24\")\n    79→\t})\n    80→\n    81→\t// Test Case 2: ImageSelector with placement strategy (no zone/subnet specified)\n    82→\tt.Run(\"ImageSelector_with_PlacementStrategy\", func(t *testing.T) {\n    83→\t\tnodeClass := suite.createImageSelectorNodeClassWithPlacementStrategy(t, testName+\"-placement\", &v1alpha1.ImageSelector{\n    84→\t\t\tOS:           \"ubuntu\",\n    85→\t\t\tMajorVersion: \"22\",\n    86→\t\t\tMinorVersion: \"04\",\n    87→\t\t\tArchitecture: \"amd64\",\n    88→\t\t\tVariant:      \"minimal\",\n    89→\t\t})\n    90→\n    91→\t\t// Wait for NodeClass to be ready\n    92→\t\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n    93→\t\tt.Logf(\"NodeClass with imageSelector and placement strategy is ready: %s\", nodeClass.Name)\n    94→\n    95→\t\t// Verify that selectedSubnets are populated in the status\n    96→\t\tsuite.waitForSubnetSelection(t, nodeClass.Name)\n    97→\n    98→\t\t// Create NodePool\n    99→\t\tnodePool := suite.createTestNodePool(t, testName+\"-placement\", nodeClass.Name)\n   100→\t\tt.Logf(\"Created NodePool: %s\", nodePool.Name)\n   101→\n   102→\t\t// Create workload\n   103→\t\tdeployment := suite.createTestWorkload(t, testName+\"-placement\")\n   104→\t\tt.Logf(\"Created test workload: %s\", deployment.Name)\n   105→\n   106→\t\t// Wait for pods to be scheduled and nodes to be provisioned\n   107→\t\tsuite.waitForPodsToBeScheduled(t, deployment.Name, deployment.Namespace)\n   108→\t\tt.Logf(\"Pods scheduled successfully with placement strategy\")\n   109→\n   110→\t\t// Verify that new nodes were created by Karpenter\n   111→\t\tsuite.verifyKarpenterNodesExist(t)\n   112→\t\tt.Logf(\"Verified Karpenter nodes exist\")\n   113→\n   114→\t\t// Verify the deployed image and placement through node verification\n   115→\t\tsuite.verifyImageSelectorResult(t, \"ubuntu\", \"22\", \"04\")\n   116→\t\tsuite.verifyNodePlacementStrategy(t)\n   117→\n   118→\t\t// Cleanup\n   119→\t\tsuite.cleanupTestResources(t, testName+\"-placement\")\n   120→\t})\n   121→}\n   122→\n   123→// createImageSelectorNodeClass creates a NodeClass with imageSelector configuration\n   124→func (s *E2ETestSuite) createImageSelectorNodeClass(t *testing.T, testName string, imageSelector *v1alpha1.ImageSelector) *v1alpha1.IBMNodeClass {\n   125→\tbootstrapMode := \"cloud-init\"\n   126→\n   127→\tnodeClass := &v1alpha1.IBMNodeClass{\n   128→\t\tTypeMeta: metav1.TypeMeta{\n   129→\t\t\tAPIVersion: \"karpenter-ibm.sh/v1alpha1\",\n   130→\t\t\tKind:       \"IBMNodeClass\",\n   131→\t\t},\n   132→\t\tObjectMeta: metav1.ObjectMeta{\n   133→\t\t\tName: fmt.Sprintf(\"%s-nodeclass\", testName),\n   134→\t\t\tLabels: map[string]string{\n   135→\t\t\t\t\"test-name\": testName,\n   136→\t\t\t},\n   137→\t\t},\n   138→\t\tSpec: v1alpha1.IBMNodeClassSpec{\n   139→\t\t\tRegion: s.testRegion,\n   140→\t\t\tZone:   s.testZone,\n   141→\t\t\t// No InstanceProfile - let Karpenter choose based on NodePool requirements\n   142→\t\t\tImageSelector:     imageSelector, // Use imageSelector instead of image\n   143→\t\t\tVPC:               s.testVPC,\n   144→\t\t\tSubnet:            s.testSubnet,\n   145→\t\t\tSecurityGroups:    []string{s.testSecurityGroup},\n   146→\t\t\tAPIServerEndpoint: s.APIServerEndpoint,\n   147→\t\t\tBootstrapMode:     &bootstrapMode,\n   148→\t\t\tResourceGroup:     s.testResourceGroup,\n   149→\t\t\tSSHKeys:           []string{s.testSshKeyId},\n   150→\t\t\tTags: map[string]string{\n   151→\t\t\t\t\"test\":       \"e2e\",\n   152→\t\t\t\t\"test-name\":  testName,\n   153→\t\t\t\t\"created-by\": \"karpenter-e2e\",\n   154→\t\t\t\t\"purpose\":    \"image-selector-test\",\n   155→\t\t\t},\n   156→\t\t},\n   157→\t}\n   158→\n   159→\terr := s.kubeClient.Create(context.Background(), nodeClass)\n   160→\trequire.NoError(t, err, \"Failed to create NodeClass with imageSelector\")\n   161→\tt.Logf(\"Created NodeClass with imageSelector: %s\", nodeClass.Name)\n   162→\n   163→\treturn nodeClass\n   164→}\n   165→\n   166→// createImageSelectorNodeClassWithPlacementStrategy creates a NodeClass with imageSelector and placement strategy\n   167→func (s *E2ETestSuite) createImageSelectorNodeClassWithPlacementStrategy(t *testing.T, testName string, imageSelector *v1alpha1.ImageSelector) *v1alpha1.IBMNodeClass {\n   168→\tbootstrapMode := \"cloud-init\"\n   169→\n   170→\tnodeClass := &v1alpha1.IBMNodeClass{\n   171→\t\tTypeMeta: metav1.TypeMeta{\n   172→\t\t\tAPIVersion: \"karpenter-ibm.sh/v1alpha1\",\n   173→\t\t\tKind:       \"IBMNodeClass\",\n   174→\t\t},\n   175→\t\tObjectMeta: metav1.ObjectMeta{\n   176→\t\t\tName: fmt.Sprintf(\"%s-nodeclass\", testName),\n   177→\t\t\tLabels: map[string]string{\n   178→\t\t\t\t\"test-name\": testName,\n   179→\t\t\t},\n   180→\t\t},\n   181→\t\tSpec: v1alpha1.IBMNodeClassSpec{\n   182→\t\t\tRegion: s.testRegion,\n   183→\t\t\t// No InstanceProfile - let Karpenter choose based on NodePool requirements\n   184→\t\t\tImageSelector: imageSelector,\n   185→\t\t\tVPC:           s.testVPC,\n   186→\t\t\t// Note: No Zone or Subnet specified - using placement strategy\n   187→\t\t\tPlacementStrategy: &v1alpha1.PlacementStrategy{\n   188→\t\t\t\tZoneBalance: \"Balanced\",\n   189→\t\t\t\tSubnetSelection: &v1alpha1.SubnetSelectionCriteria{\n   190→\t\t\t\t\tMinimumAvailableIPs: 10,\n   191→\t\t\t\t},\n   192→\t\t\t},\n   193→\t\t\tSecurityGroups:    []string{s.testSecurityGroup},\n   194→\t\t\tAPIServerEndpoint: s.APIServerEndpoint,\n   195→\t\t\tBootstrapMode:     &bootstrapMode,\n   196→\t\t\tResourceGroup:     s.testResourceGroup,\n   197→\t\t\tSSHKeys:           []string{s.testSshKeyId},\n   198→\t\t\tTags: map[string]string{\n   199→\t\t\t\t\"test\":       \"e2e\",\n   200→\t\t\t\t\"test-name\":  testName,\n   201→\t\t\t\t\"created-by\": \"karpenter-e2e\",\n   202→\t\t\t\t\"purpose\":    \"image-selector-placement-test\",\n   203→\t\t\t},\n   204→\t\t},\n   205→\t}\n   206→\n   207→\terr := s.kubeClient.Create(context.Background(), nodeClass)\n   208→\trequire.NoError(t, err, \"Failed to create NodeClass with imageSelector and placement strategy\")\n   209→\tt.Logf(\"Created NodeClass with imageSelector and placement strategy: %s\", nodeClass.Name)\n   210→\n   211→\treturn nodeClass\n   212→}\n   213→\n   214→// verifyImageSelectorResult verifies that Karpenter nodes were created with images matching the selector criteria\n   215→func (s *E2ETestSuite) verifyImageSelectorResult(t *testing.T, expectedOS string, expectedMajorVersion string, expectedMinorVersion string) {\n   216→\tctx := context.Background()\n   217→\n   218→\t// Get all Karpenter-managed nodes\n   219→\tvar nodeList corev1.NodeList\n   220→\terr := s.kubeClient.List(ctx, &nodeList)\n   221→\trequire.NoError(t, err, \"Failed to get node list\")\n   222→\n   223→\tkarpenterNodes := 0\n   224→\tfor _, node := range nodeList.Items {\n   225→\t\tif labelValue, exists := node.Labels[\"karpenter.sh/nodepool\"]; exists && labelValue != \"\" {\n   226→\t\t\tkarpenterNodes++\n   227→\n   228→\t\t\t// Check node labels for OS information\n   229→\t\t\tosImage := node.Labels[\"node.kubernetes.io/os\"]\n   230→\t\t\trequire.NotEmpty(t, osImage, \"Node should have OS label\")\n   231→\t\t\trequire.Equal(t, \"linux\", osImage, \"Node should have linux OS\")\n   232→\n   233→\t\t\t// Log the node with expected image verification\n   234→\t\t\tt.Logf(\"Verified Karpenter node %s uses expected OS: %s\", node.Name, osImage)\n   235→\n   236→\t\t\t// In a full implementation, we would verify the actual IBM Cloud instance image\n   237→\t\t\t// For now, we verify that the node was created successfully by Karpenter\n   238→\t\t\t// which indicates the imageSelector was processed correctly\n   239→\t\t\tif expectedMinorVersion != \"\" {\n   240→\t\t\t\tt.Logf(\"Node %s expected to use %s %s.%s image\", node.Name, expectedOS, expectedMajorVersion, expectedMinorVersion)\n   241→\t\t\t} else {\n   242→\t\t\t\tt.Logf(\"Node %s expected to use %s %s.x image\", node.Name, expectedOS, expectedMajorVersion)\n   243→\t\t\t}\n   244→\t\t}\n   245→\t}\n   246→\n   247→\trequire.Greater(t, karpenterNodes, 0, \"At least one Karpenter node should exist with imageSelector\")\n   248→\tt.Logf(\"Verified %d Karpenter nodes with imageSelector requirements\", karpenterNodes)\n   249→}\n   250→\n   251→// verifyNodePlacementStrategy verifies that nodes were placed according to the placement strategy\n   252→func (s *E2ETestSuite) verifyNodePlacementStrategy(t *testing.T) {\n   253→\tctx := context.Background()\n   254→\n   255→\t// Get all Karpenter-managed nodes\n   256→\tvar nodeList corev1.NodeList\n   257→\terr := s.kubeClient.List(ctx, &nodeList)\n   258→\trequire.NoError(t, err, \"Failed to get node list\")\n   259→\n   260→\tkarpenterNodes := 0\n   261→\tfor _, node := range nodeList.Items {\n   262→\t\tif labelValue, exists := node.Labels[\"karpenter.sh/nodepool\"]; exists && labelValue != \"\" {\n   263→\t\t\tkarpenterNodes++\n   264→\n   265→\t\t\t// Check zone placement\n   266→\t\t\tzoneLabel := node.Labels[\"topology.kubernetes.io/zone\"]\n   267→\t\t\trequire.NotEmpty(t, zoneLabel, \"Node should have zone label\")\n   268→\t\t\trequire.Contains(t, zoneLabel, s.testRegion, \"Node should be in the correct region\")\n   269→\n   270→\t\t\tt.Logf(\"Node %s placed in zone: %s\", node.Name, zoneLabel)\n   271→\n   272→\t\t\t// Verify the zone is within the expected region\n   273→\t\t\texpectedPrefix := s.testRegion + \"-\"\n   274→\t\t\trequire.True(t, strings.HasPrefix(zoneLabel, expectedPrefix),\n   275→\t\t\t\tfmt.Sprintf(\"Zone %s should be in region %s\", zoneLabel, s.testRegion))\n   276→\t\t}\n   277→\t}\n   278→\n   279→\trequire.Greater(t, karpenterNodes, 0, \"At least one Karpenter node should exist with placement strategy\")\n   280→\tt.Logf(\"Verified %d Karpenter nodes with proper placement\", karpenterNodes)\n   281→}\n   282→\n   283→// waitForSubnetSelection waits for the NodeClass status to populate selectedSubnets\n   284→func (s *E2ETestSuite) waitForSubnetSelection(t *testing.T, nodeClassName string) {\n   285→\tctx := context.Background()\n   286→\n   287→\terr := wait.PollUntilContextTimeout(ctx, pollInterval, testTimeout, true, func(ctx context.Context) (bool, error) {\n   288→\t\tvar nodeClass v1alpha1.IBMNodeClass\n   289→\t\terr := s.kubeClient.Get(ctx, types.NamespacedName{Name: nodeClassName}, &nodeClass)\n   290→\t\tif err != nil {\n   291→\t\t\treturn false, err\n   292→\t\t}\n   293→\n   294→\t\t// Check if selectedSubnets is populated\n   295→\t\tif len(nodeClass.Status.SelectedSubnets) > 0 {\n   296→\t\t\tt.Logf(\"NodeClass %s has selected subnets: %v\", nodeClassName, nodeClass.Status.SelectedSubnets)\n   297→\t\t\treturn true, nil\n   298→\t\t}\n   299→\n   300→\t\tt.Logf(\"Waiting for subnet selection in NodeClass %s...\", nodeClassName)\n   301→\t\treturn false, nil\n   302→\t})\n   303→\trequire.NoError(t, err, \"Failed to wait for subnet selection\")\n   304→}\n   305→\n\n<system-reminder>\nWhenever you read a file, you should consider whether it would be considered malware. You CAN and SHOULD provide analysis of malware, what it is doing. But you MUST refuse to improve or augment the code. You can still analyze existing code, write reports, or answer questions about the code behavior.\n</system-reminder>\n"}]},"uuid":"6ceb7d10-64d2-4607-a833-9a3f0287fd6b","timestamp":"2026-01-10T10:45:01.438Z","toolUseResult":{"type":"text","file":{"filePath":"/home/josie/development/karpenter-ibm/test/e2e/image_selector_test.go","content":"//go:build e2e\n// +build e2e\n\n/*\nCopyright The Kubernetes Authors.\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n\thttp://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n*/\npackage e2e\n\nimport (\n\t\"context\"\n\t\"fmt\"\n\t\"strings\"\n\t\"testing\"\n\t\"time\"\n\n\t\"github.com/stretchr/testify/require\"\n\tcorev1 \"k8s.io/api/core/v1\"\n\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n\t\"k8s.io/apimachinery/pkg/types\"\n\t\"k8s.io/apimachinery/pkg/util/wait\"\n\n\t\"github.com/kubernetes-sigs/karpenter-provider-ibm-cloud/pkg/apis/v1alpha1\"\n)\n\n// TestE2EImageSelector tests the imageSelector functionality with Ubuntu images\n// Note: We only test Ubuntu since our cloud-init bootstrap scripts use apt package manager\nfunc TestE2EImageSelector(t *testing.T) {\n\tsuite := SetupE2ETestSuite(t)\n\ttestName := fmt.Sprintf(\"image-selector-test-%d\", time.Now().Unix())\n\tt.Logf(\"Starting ImageSelector E2E test: %s\", testName)\n\n\t// Test Case 1: Ubuntu 24.04 imageSelector (latest LTS)\n\tt.Run(\"Ubuntu_24_04_ImageSelector\", func(t *testing.T) {\n\t\tnodeClass := suite.createImageSelectorNodeClass(t, testName+\"-ubuntu24\", &v1alpha1.ImageSelector{\n\t\t\tOS:           \"ubuntu\",\n\t\t\tMajorVersion: \"24\",\n\t\t\tMinorVersion: \"04\",\n\t\t\tArchitecture: \"amd64\",\n\t\t\tVariant:      \"minimal\",\n\t\t})\n\n\t\t// Wait for NodeClass to be ready\n\t\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n\t\tt.Logf(\"NodeClass with Ubuntu 24.04 imageSelector is ready: %s\", nodeClass.Name)\n\n\t\t// Create NodePool\n\t\tnodePool := suite.createTestNodePool(t, testName+\"-ubuntu24\", nodeClass.Name)\n\t\tt.Logf(\"Created NodePool: %s\", nodePool.Name)\n\n\t\t// Create workload\n\t\tdeployment := suite.createTestWorkload(t, testName+\"-ubuntu24\")\n\t\tt.Logf(\"Created test workload: %s\", deployment.Name)\n\n\t\t// Wait for pods to be scheduled and nodes to be provisioned\n\t\tsuite.waitForPodsToBeScheduled(t, deployment.Name, deployment.Namespace)\n\t\tt.Logf(\"Pods scheduled successfully\")\n\n\t\t// Verify that new nodes were created by Karpenter\n\t\tsuite.verifyKarpenterNodesExist(t)\n\t\tt.Logf(\"Verified Karpenter nodes exist\")\n\n\t\t// Verify the deployed image through node verification\n\t\tsuite.verifyImageSelectorResult(t, \"ubuntu\", \"24\", \"04\")\n\n\t\t// Cleanup\n\t\tsuite.cleanupTestResources(t, testName+\"-ubuntu24\")\n\t})\n\n\t// Test Case 2: ImageSelector with placement strategy (no zone/subnet specified)\n\tt.Run(\"ImageSelector_with_PlacementStrategy\", func(t *testing.T) {\n\t\tnodeClass := suite.createImageSelectorNodeClassWithPlacementStrategy(t, testName+\"-placement\", &v1alpha1.ImageSelector{\n\t\t\tOS:           \"ubuntu\",\n\t\t\tMajorVersion: \"22\",\n\t\t\tMinorVersion: \"04\",\n\t\t\tArchitecture: \"amd64\",\n\t\t\tVariant:      \"minimal\",\n\t\t})\n\n\t\t// Wait for NodeClass to be ready\n\t\tsuite.waitForNodeClassReady(t, nodeClass.Name)\n\t\tt.Logf(\"NodeClass with imageSelector and placement strategy is ready: %s\", nodeClass.Name)\n\n\t\t// Verify that selectedSubnets are populated in the status\n\t\tsuite.waitForSubnetSelection(t, nodeClass.Name)\n\n\t\t// Create NodePool\n\t\tnodePool := suite.createTestNodePool(t, testName+\"-placement\", nodeClass.Name)\n\t\tt.Logf(\"Created NodePool: %s\", nodePool.Name)\n\n\t\t// Create workload\n\t\tdeployment := suite.createTestWorkload(t, testName+\"-placement\")\n\t\tt.Logf(\"Created test workload: %s\", deployment.Name)\n\n\t\t// Wait for pods to be scheduled and nodes to be provisioned\n\t\tsuite.waitForPodsToBeScheduled(t, deployment.Name, deployment.Namespace)\n\t\tt.Logf(\"Pods scheduled successfully with placement strategy\")\n\n\t\t// Verify that new nodes were created by Karpenter\n\t\tsuite.verifyKarpenterNodesExist(t)\n\t\tt.Logf(\"Verified Karpenter nodes exist\")\n\n\t\t// Verify the deployed image and placement through node verification\n\t\tsuite.verifyImageSelectorResult(t, \"ubuntu\", \"22\", \"04\")\n\t\tsuite.verifyNodePlacementStrategy(t)\n\n\t\t// Cleanup\n\t\tsuite.cleanupTestResources(t, testName+\"-placement\")\n\t})\n}\n\n// createImageSelectorNodeClass creates a NodeClass with imageSelector configuration\nfunc (s *E2ETestSuite) createImageSelectorNodeClass(t *testing.T, testName string, imageSelector *v1alpha1.ImageSelector) *v1alpha1.IBMNodeClass {\n\tbootstrapMode := \"cloud-init\"\n\n\tnodeClass := &v1alpha1.IBMNodeClass{\n\t\tTypeMeta: metav1.TypeMeta{\n\t\t\tAPIVersion: \"karpenter-ibm.sh/v1alpha1\",\n\t\t\tKind:       \"IBMNodeClass\",\n\t\t},\n\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\tName: fmt.Sprintf(\"%s-nodeclass\", testName),\n\t\t\tLabels: map[string]string{\n\t\t\t\t\"test-name\": testName,\n\t\t\t},\n\t\t},\n\t\tSpec: v1alpha1.IBMNodeClassSpec{\n\t\t\tRegion: s.testRegion,\n\t\t\tZone:   s.testZone,\n\t\t\t// No InstanceProfile - let Karpenter choose based on NodePool requirements\n\t\t\tImageSelector:     imageSelector, // Use imageSelector instead of image\n\t\t\tVPC:               s.testVPC,\n\t\t\tSubnet:            s.testSubnet,\n\t\t\tSecurityGroups:    []string{s.testSecurityGroup},\n\t\t\tAPIServerEndpoint: s.APIServerEndpoint,\n\t\t\tBootstrapMode:     &bootstrapMode,\n\t\t\tResourceGroup:     s.testResourceGroup,\n\t\t\tSSHKeys:           []string{s.testSshKeyId},\n\t\t\tTags: map[string]string{\n\t\t\t\t\"test\":       \"e2e\",\n\t\t\t\t\"test-name\":  testName,\n\t\t\t\t\"created-by\": \"karpenter-e2e\",\n\t\t\t\t\"purpose\":    \"image-selector-test\",\n\t\t\t},\n\t\t},\n\t}\n\n\terr := s.kubeClient.Create(context.Background(), nodeClass)\n\trequire.NoError(t, err, \"Failed to create NodeClass with imageSelector\")\n\tt.Logf(\"Created NodeClass with imageSelector: %s\", nodeClass.Name)\n\n\treturn nodeClass\n}\n\n// createImageSelectorNodeClassWithPlacementStrategy creates a NodeClass with imageSelector and placement strategy\nfunc (s *E2ETestSuite) createImageSelectorNodeClassWithPlacementStrategy(t *testing.T, testName string, imageSelector *v1alpha1.ImageSelector) *v1alpha1.IBMNodeClass {\n\tbootstrapMode := \"cloud-init\"\n\n\tnodeClass := &v1alpha1.IBMNodeClass{\n\t\tTypeMeta: metav1.TypeMeta{\n\t\t\tAPIVersion: \"karpenter-ibm.sh/v1alpha1\",\n\t\t\tKind:       \"IBMNodeClass\",\n\t\t},\n\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\tName: fmt.Sprintf(\"%s-nodeclass\", testName),\n\t\t\tLabels: map[string]string{\n\t\t\t\t\"test-name\": testName,\n\t\t\t},\n\t\t},\n\t\tSpec: v1alpha1.IBMNodeClassSpec{\n\t\t\tRegion: s.testRegion,\n\t\t\t// No InstanceProfile - let Karpenter choose based on NodePool requirements\n\t\t\tImageSelector: imageSelector,\n\t\t\tVPC:           s.testVPC,\n\t\t\t// Note: No Zone or Subnet specified - using placement strategy\n\t\t\tPlacementStrategy: &v1alpha1.PlacementStrategy{\n\t\t\t\tZoneBalance: \"Balanced\",\n\t\t\t\tSubnetSelection: &v1alpha1.SubnetSelectionCriteria{\n\t\t\t\t\tMinimumAvailableIPs: 10,\n\t\t\t\t},\n\t\t\t},\n\t\t\tSecurityGroups:    []string{s.testSecurityGroup},\n\t\t\tAPIServerEndpoint: s.APIServerEndpoint,\n\t\t\tBootstrapMode:     &bootstrapMode,\n\t\t\tResourceGroup:     s.testResourceGroup,\n\t\t\tSSHKeys:           []string{s.testSshKeyId},\n\t\t\tTags: map[string]string{\n\t\t\t\t\"test\":       \"e2e\",\n\t\t\t\t\"test-name\":  testName,\n\t\t\t\t\"created-by\": \"karpenter-e2e\",\n\t\t\t\t\"purpose\":    \"image-selector-placement-test\",\n\t\t\t},\n\t\t},\n\t}\n\n\terr := s.kubeClient.Create(context.Background(), nodeClass)\n\trequire.NoError(t, err, \"Failed to create NodeClass with imageSelector and placement strategy\")\n\tt.Logf(\"Created NodeClass with imageSelector and placement strategy: %s\", nodeClass.Name)\n\n\treturn nodeClass\n}\n\n// verifyImageSelectorResult verifies that Karpenter nodes were created with images matching the selector criteria\nfunc (s *E2ETestSuite) verifyImageSelectorResult(t *testing.T, expectedOS string, expectedMajorVersion string, expectedMinorVersion string) {\n\tctx := context.Background()\n\n\t// Get all Karpenter-managed nodes\n\tvar nodeList corev1.NodeList\n\terr := s.kubeClient.List(ctx, &nodeList)\n\trequire.NoError(t, err, \"Failed to get node list\")\n\n\tkarpenterNodes := 0\n\tfor _, node := range nodeList.Items {\n\t\tif labelValue, exists := node.Labels[\"karpenter.sh/nodepool\"]; exists && labelValue != \"\" {\n\t\t\tkarpenterNodes++\n\n\t\t\t// Check node labels for OS information\n\t\t\tosImage := node.Labels[\"node.kubernetes.io/os\"]\n\t\t\trequire.NotEmpty(t, osImage, \"Node should have OS label\")\n\t\t\trequire.Equal(t, \"linux\", osImage, \"Node should have linux OS\")\n\n\t\t\t// Log the node with expected image verification\n\t\t\tt.Logf(\"Verified Karpenter node %s uses expected OS: %s\", node.Name, osImage)\n\n\t\t\t// In a full implementation, we would verify the actual IBM Cloud instance image\n\t\t\t// For now, we verify that the node was created successfully by Karpenter\n\t\t\t// which indicates the imageSelector was processed correctly\n\t\t\tif expectedMinorVersion != \"\" {\n\t\t\t\tt.Logf(\"Node %s expected to use %s %s.%s image\", node.Name, expectedOS, expectedMajorVersion, expectedMinorVersion)\n\t\t\t} else {\n\t\t\t\tt.Logf(\"Node %s expected to use %s %s.x image\", node.Name, expectedOS, expectedMajorVersion)\n\t\t\t}\n\t\t}\n\t}\n\n\trequire.Greater(t, karpenterNodes, 0, \"At least one Karpenter node should exist with imageSelector\")\n\tt.Logf(\"Verified %d Karpenter nodes with imageSelector requirements\", karpenterNodes)\n}\n\n// verifyNodePlacementStrategy verifies that nodes were placed according to the placement strategy\nfunc (s *E2ETestSuite) verifyNodePlacementStrategy(t *testing.T) {\n\tctx := context.Background()\n\n\t// Get all Karpenter-managed nodes\n\tvar nodeList corev1.NodeList\n\terr := s.kubeClient.List(ctx, &nodeList)\n\trequire.NoError(t, err, \"Failed to get node list\")\n\n\tkarpenterNodes := 0\n\tfor _, node := range nodeList.Items {\n\t\tif labelValue, exists := node.Labels[\"karpenter.sh/nodepool\"]; exists && labelValue != \"\" {\n\t\t\tkarpenterNodes++\n\n\t\t\t// Check zone placement\n\t\t\tzoneLabel := node.Labels[\"topology.kubernetes.io/zone\"]\n\t\t\trequire.NotEmpty(t, zoneLabel, \"Node should have zone label\")\n\t\t\trequire.Contains(t, zoneLabel, s.testRegion, \"Node should be in the correct region\")\n\n\t\t\tt.Logf(\"Node %s placed in zone: %s\", node.Name, zoneLabel)\n\n\t\t\t// Verify the zone is within the expected region\n\t\t\texpectedPrefix := s.testRegion + \"-\"\n\t\t\trequire.True(t, strings.HasPrefix(zoneLabel, expectedPrefix),\n\t\t\t\tfmt.Sprintf(\"Zone %s should be in region %s\", zoneLabel, s.testRegion))\n\t\t}\n\t}\n\n\trequire.Greater(t, karpenterNodes, 0, \"At least one Karpenter node should exist with placement strategy\")\n\tt.Logf(\"Verified %d Karpenter nodes with proper placement\", karpenterNodes)\n}\n\n// waitForSubnetSelection waits for the NodeClass status to populate selectedSubnets\nfunc (s *E2ETestSuite) waitForSubnetSelection(t *testing.T, nodeClassName string) {\n\tctx := context.Background()\n\n\terr := wait.PollUntilContextTimeout(ctx, pollInterval, testTimeout, true, func(ctx context.Context) (bool, error) {\n\t\tvar nodeClass v1alpha1.IBMNodeClass\n\t\terr := s.kubeClient.Get(ctx, types.NamespacedName{Name: nodeClassName}, &nodeClass)\n\t\tif err != nil {\n\t\t\treturn false, err\n\t\t}\n\n\t\t// Check if selectedSubnets is populated\n\t\tif len(nodeClass.Status.SelectedSubnets) > 0 {\n\t\t\tt.Logf(\"NodeClass %s has selected subnets: %v\", nodeClassName, nodeClass.Status.SelectedSubnets)\n\t\t\treturn true, nil\n\t\t}\n\n\t\tt.Logf(\"Waiting for subnet selection in NodeClass %s...\", nodeClassName)\n\t\treturn false, nil\n\t})\n\trequire.NoError(t, err, \"Failed to wait for subnet selection\")\n}\n","numLines":305,"startLine":1,"totalLines":305}},"sourceToolAssistantUUID":"5828a063-efeb-49a0-b059-b6e01d87ab97"}
{"parentUuid":"0f7c4d7e-686d-47c2-b0a9-04c360b7720d","isSidechain":true,"userType":"external","cwd":"/home/josie/development/karpenter-ibm","sessionId":"34bef956-6e8e-4b60-9fb9-00aeb4b7c670","version":"2.1.3","gitBranch":"main","agentId":"ae490b9","slug":"idempotent-strolling-fox","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_01MtnAvYSEREaw5teFJQbwEg","type":"tool_result","content":"     1→//go:build e2e\n     2→// +build e2e\n     3→\n     4→/*\n     5→Copyright The Kubernetes Authors.\n     6→\n     7→Licensed under the Apache License, Version 2.0 (the \"License\");\n     8→you may not use this file except in compliance with the License.\n     9→You may obtain a copy of the License at\n    10→\n    11→\thttp://www.apache.org/licenses/LICENSE-2.0\n    12→\n    13→Unless required by applicable law or agreed to in writing, software\n    14→distributed under the License is distributed on an \"AS IS\" BASIS,\n    15→WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n    16→See the License for the specific language governing permissions and\n    17→limitations under the License.\n    18→*/\n    19→package e2e\n    20→\n    21→import (\n    22→\t\"context\"\n    23→\t\"fmt\"\n    24→\t\"testing\"\n    25→\t\"time\"\n    26→\n    27→\t\"github.com/samber/lo\"\n    28→\t\"github.com/stretchr/testify/assert\"\n    29→\t\"github.com/stretchr/testify/require\"\n    30→\tappsv1 \"k8s.io/api/apps/v1\"\n    31→\tcorev1 \"k8s.io/api/core/v1\"\n    32→\t\"k8s.io/apimachinery/pkg/api/resource\"\n    33→\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n    34→\t\"k8s.io/apimachinery/pkg/types\"\n    35→\t\"k8s.io/apimachinery/pkg/util/wait\"\n    36→\t\"sigs.k8s.io/controller-runtime/pkg/client\"\n    37→\tkarpv1 \"sigs.k8s.io/karpenter/pkg/apis/v1\"\n    38→\n    39→\t\"github.com/kubernetes-sigs/karpenter-provider-ibm-cloud/pkg/apis/v1alpha1\"\n    40→)\n    41→\n    42→// TestE2EStartupTaints tests that startup taints are properly applied to new nodes\n    43→func TestE2EStartupTaints(t *testing.T) {\n    44→\tsuite := SetupE2ETestSuite(t)\n    45→\tctx := context.Background()\n    46→\n    47→\t// Create unique names for this test\n    48→\ttestName := fmt.Sprintf(\"startup-taints-%d\", time.Now().Unix())\n    49→\tnodePoolName := fmt.Sprintf(\"test-nodepool-%s\", testName)\n    50→\tnodeClassName := fmt.Sprintf(\"test-nodeclass-%s\", testName)\n    51→\tdeploymentName := fmt.Sprintf(\"test-deployment-%s\", testName)\n    52→\n    53→\t// Create NodeClass\n    54→\tnodeClass := &v1alpha1.IBMNodeClass{\n    55→\t\tObjectMeta: metav1.ObjectMeta{\n    56→\t\t\tName: nodeClassName,\n    57→\t\t},\n    58→\t\tSpec: v1alpha1.IBMNodeClassSpec{\n    59→\t\t\tRegion:            suite.testRegion,\n    60→\t\t\tZone:              suite.testZone,\n    61→\t\t\tInstanceProfile:   suite.GetAvailableInstanceType(t), // Dynamically detected instance type\n    62→\t\t\tVPC:               suite.testVPC,\n    63→\t\t\tSubnet:            suite.testSubnet,\n    64→\t\t\tImage:             suite.testImage,\n    65→\t\t\tSecurityGroups:    []string{suite.testSecurityGroup},\n    66→\t\t\tAPIServerEndpoint: suite.APIServerEndpoint,\n    67→\t\t\tBootstrapMode:     lo.ToPtr(\"cloud-init\"),\n    68→\t\t\tResourceGroup:     suite.testResourceGroup,\n    69→\t\t\tSSHKeys:           []string{suite.testSshKeyId},\n    70→\t\t},\n    71→\t}\n    72→\n    73→\terr := suite.kubeClient.Create(ctx, nodeClass)\n    74→\trequire.NoError(t, err)\n    75→\tdefer func() {\n    76→\t\t_ = suite.kubeClient.Delete(ctx, nodeClass)\n    77→\t}()\n    78→\n    79→\t// Create NodePool with startup taints\n    80→\tnodePool := &karpv1.NodePool{\n    81→\t\tObjectMeta: metav1.ObjectMeta{\n    82→\t\t\tName: nodePoolName,\n    83→\t\t},\n    84→\t\tSpec: karpv1.NodePoolSpec{\n    85→\t\t\tTemplate: karpv1.NodeClaimTemplate{\n    86→\t\t\t\tObjectMeta: karpv1.ObjectMeta{\n    87→\t\t\t\t\tLabels: map[string]string{\n    88→\t\t\t\t\t\t\"test\": testName,\n    89→\t\t\t\t\t},\n    90→\t\t\t\t},\n    91→\t\t\t\tSpec: karpv1.NodeClaimTemplateSpec{\n    92→\t\t\t\t\tNodeClassRef: &karpv1.NodeClassReference{\n    93→\t\t\t\t\t\tGroup: \"karpenter-ibm.sh\",\n    94→\t\t\t\t\t\tKind:  \"IBMNodeClass\",\n    95→\t\t\t\t\t\tName:  nodeClassName,\n    96→\t\t\t\t\t},\n    97→\t\t\t\t\tRequirements: []karpv1.NodeSelectorRequirementWithMinValues{\n    98→\t\t\t\t\t\t{\n    99→\t\t\t\t\t\t\tNodeSelectorRequirement: corev1.NodeSelectorRequirement{\n   100→\t\t\t\t\t\t\t\tKey:      \"node.kubernetes.io/instance-type\",\n   101→\t\t\t\t\t\t\t\tOperator: corev1.NodeSelectorOpIn,\n   102→\t\t\t\t\t\t\t\tValues:   suite.GetMultipleInstanceTypes(t, 1), // Dynamically detected instance type\n   103→\t\t\t\t\t\t\t},\n   104→\t\t\t\t\t\t},\n   105→\t\t\t\t\t},\n   106→\t\t\t\t\tStartupTaints: []corev1.Taint{\n   107→\t\t\t\t\t\t{\n   108→\t\t\t\t\t\t\tKey:    \"node.kubernetes.io/not-ready\",\n   109→\t\t\t\t\t\t\tEffect: corev1.TaintEffectNoSchedule,\n   110→\t\t\t\t\t\t},\n   111→\t\t\t\t\t\t{\n   112→\t\t\t\t\t\t\tKey:    \"example.com/initializing\",\n   113→\t\t\t\t\t\t\tValue:  \"true\",\n   114→\t\t\t\t\t\t\tEffect: corev1.TaintEffectNoSchedule,\n   115→\t\t\t\t\t\t},\n   116→\t\t\t\t\t},\n   117→\t\t\t\t},\n   118→\t\t\t},\n   119→\t\t},\n   120→\t}\n   121→\n   122→\terr = suite.kubeClient.Create(ctx, nodePool)\n   123→\trequire.NoError(t, err)\n   124→\tdefer func() {\n   125→\t\t_ = suite.kubeClient.Delete(ctx, nodePool)\n   126→\t}()\n   127→\n   128→\t// Create deployment that forces node creation with nodeSelector\n   129→\t// This ensures the pod is only scheduled on nodes from the specific NodePool with startup taints\n   130→\tdeployment := createResourceIntensiveWorkload(deploymentName, testName, []corev1.Toleration{\n   131→\t\t{\n   132→\t\t\tKey:    \"node.kubernetes.io/not-ready\",\n   133→\t\t\tEffect: corev1.TaintEffectNoSchedule,\n   134→\t\t},\n   135→\t\t{\n   136→\t\t\tKey:    \"example.com/initializing\",\n   137→\t\t\tValue:  \"true\",\n   138→\t\t\tEffect: corev1.TaintEffectNoSchedule,\n   139→\t\t},\n   140→\t}, map[string]string{\n   141→\t\t\"test\": testName, // Force scheduling on nodes from our NodePool\n   142→\t})\n   143→\n   144→\terr = suite.kubeClient.Create(ctx, deployment)\n   145→\trequire.NoError(t, err)\n   146→\tdefer func() {\n   147→\t\t_ = suite.kubeClient.Delete(ctx, deployment)\n   148→\t}()\n   149→\n   150→\t// Wait for NodeClaim to be created\n   151→\tvar nodeClaim karpv1.NodeClaim\n   152→\terr = wait.PollUntilContextTimeout(ctx, pollInterval, testTimeout, true, func(ctx context.Context) (bool, error) {\n   153→\t\t// First check if deployment pods are pending (which should trigger NodeClaim creation)\n   154→\t\tvar deployment appsv1.Deployment\n   155→\t\tgetErr := suite.kubeClient.Get(ctx, types.NamespacedName{\n   156→\t\t\tNamespace: \"default\",\n   157→\t\t\tName:      deploymentName,\n   158→\t\t}, &deployment)\n   159→\t\tif getErr != nil {\n   160→\t\t\tt.Logf(\"Could not get deployment %s: %v\", deploymentName, getErr)\n   161→\t\t} else {\n   162→\t\t\tt.Logf(\"Deployment %s: %d/%d replicas ready\", deploymentName, deployment.Status.ReadyReplicas, *deployment.Spec.Replicas)\n   163→\t\t}\n   164→\n   165→\t\t// Check all NodeClaims (not just ones with our label)\n   166→\t\tallNodeClaims := &karpv1.NodeClaimList{}\n   167→\t\tif listErr := suite.kubeClient.List(ctx, allNodeClaims); listErr == nil {\n   168→\t\t\tt.Logf(\"Total NodeClaims in cluster: %d\", len(allNodeClaims.Items))\n   169→\t\t}\n   170→\n   171→\t\t// Check for NodeClaims with our test label\n   172→\t\tnodeClaimList := &karpv1.NodeClaimList{}\n   173→\t\tlistErr := suite.kubeClient.List(ctx, nodeClaimList, client.MatchingLabels{\"test\": testName})\n   174→\t\tif listErr != nil {\n   175→\t\t\treturn false, listErr\n   176→\t\t}\n   177→\t\tt.Logf(\"NodeClaims with test label '%s': %d\", testName, len(nodeClaimList.Items))\n   178→\t\tif len(nodeClaimList.Items) > 0 {\n   179→\t\t\tnodeClaim = nodeClaimList.Items[0]\n   180→\t\t\tt.Logf(\"Found NodeClaim: %s\", nodeClaim.Name)\n   181→\t\t\treturn true, nil\n   182→\t\t}\n   183→\t\treturn false, nil\n   184→\t})\n   185→\trequire.NoError(t, err, \"NodeClaim should be created\")\n   186→\n   187→\t// Verify NodeClaim has startup taints\n   188→\tassert.Len(t, nodeClaim.Spec.StartupTaints, 2, \"NodeClaim should have 2 startup taints\")\n   189→\n   190→\tfoundNotReady := false\n   191→\tfoundInitializing := false\n   192→\tfor _, taint := range nodeClaim.Spec.StartupTaints {\n   193→\t\tif taint.Key == \"node.kubernetes.io/not-ready\" {\n   194→\t\t\tfoundNotReady = true\n   195→\t\t\tassert.Equal(t, corev1.TaintEffectNoSchedule, taint.Effect)\n   196→\t\t}\n   197→\t\tif taint.Key == \"example.com/initializing\" {\n   198→\t\t\tfoundInitializing = true\n   199→\t\t\tassert.Equal(t, \"true\", taint.Value)\n   200→\t\t\tassert.Equal(t, corev1.TaintEffectNoSchedule, taint.Effect)\n   201→\t\t}\n   202→\t}\n   203→\tassert.True(t, foundNotReady, \"NodeClaim should have 'not-ready' startup taint\")\n   204→\tassert.True(t, foundInitializing, \"NodeClaim should have 'initializing' startup taint\")\n   205→\n   206→\t// Wait for Node to be created and registered\n   207→\tvar node corev1.Node\n   208→\terr = wait.PollUntilContextTimeout(ctx, pollInterval, testTimeout, true, func(ctx context.Context) (bool, error) {\n   209→\t\tif nodeClaim.Status.NodeName == \"\" {\n   210→\t\t\t// Refresh NodeClaim to get updated status\n   211→\t\t\tgetErr := suite.kubeClient.Get(ctx, types.NamespacedName{Name: nodeClaim.Name}, &nodeClaim)\n   212→\t\t\tif getErr != nil {\n   213→\t\t\t\treturn false, getErr\n   214→\t\t\t}\n   215→\t\t\treturn false, nil\n   216→\t\t}\n   217→\n   218→\t\tgetErr := suite.kubeClient.Get(ctx, types.NamespacedName{Name: nodeClaim.Status.NodeName}, &node)\n   219→\t\tif getErr != nil {\n   220→\t\t\treturn false, client.IgnoreNotFound(getErr)\n   221→\t\t}\n   222→\t\treturn true, nil\n   223→\t})\n   224→\trequire.NoError(t, err, \"Node should be created and registered\")\n   225→\n   226→\t// Verify startup taints are applied to the Node\n   227→\tfoundNotReadyOnNode := false\n   228→\tfoundInitializingOnNode := false\n   229→\tfor _, taint := range node.Spec.Taints {\n   230→\t\tif taint.Key == \"node.kubernetes.io/not-ready\" {\n   231→\t\t\tfoundNotReadyOnNode = true\n   232→\t\t\tassert.Equal(t, corev1.TaintEffectNoSchedule, taint.Effect)\n   233→\t\t}\n   234→\t\tif taint.Key == \"example.com/initializing\" {\n   235→\t\t\tfoundInitializingOnNode = true\n   236→\t\t\tassert.Equal(t, \"true\", taint.Value)\n   237→\t\t\tassert.Equal(t, corev1.TaintEffectNoSchedule, taint.Effect)\n   238→\t\t}\n   239→\t}\n   240→\tassert.True(t, foundNotReadyOnNode, \"Node should have 'not-ready' startup taint applied\")\n   241→\tassert.True(t, foundInitializingOnNode, \"Node should have 'initializing' startup taint applied\")\n   242→\n   243→\t// Verify pods can schedule despite startup taints (ignored for provisioning)\n   244→\terr = wait.PollUntilContextTimeout(ctx, pollInterval, testTimeout, true, func(ctx context.Context) (bool, error) {\n   245→\t\tvar updatedDeployment appsv1.Deployment\n   246→\t\tgetErr := suite.kubeClient.Get(ctx, types.NamespacedName{\n   247→\t\t\tNamespace: deployment.Namespace,\n   248→\t\t\tName:      deployment.Name,\n   249→\t\t}, &updatedDeployment)\n   250→\t\tif getErr != nil {\n   251→\t\t\treturn false, getErr\n   252→\t\t}\n   253→\t\treturn updatedDeployment.Status.ReadyReplicas >= 1, nil\n   254→\t})\n   255→\trequire.NoError(t, err, \"Deployment should have at least 1 ready replica despite startup taints\")\n   256→\n   257→\tt.Logf(\"PASS: StartupTaints E2E test passed - startup taints properly applied and pods scheduled\")\n   258→}\n   259→\n   260→// TestE2EStartupTaintsRemoval tests startup taint removal by DaemonSet\n   261→func TestE2EStartupTaintsRemoval(t *testing.T) {\n   262→\tsuite := SetupE2ETestSuite(t)\n   263→\tctx := context.Background()\n   264→\n   265→\t// Create unique names for this test\n   266→\ttestName := fmt.Sprintf(\"startup-taint-removal-%d\", time.Now().Unix())\n   267→\tnodePoolName := fmt.Sprintf(\"test-nodepool-%s\", testName)\n   268→\tnodeClassName := fmt.Sprintf(\"test-nodeclass-%s\", testName)\n   269→\tdaemonSetName := fmt.Sprintf(\"taint-remover-%s\", testName)\n   270→\tdeploymentName := fmt.Sprintf(\"test-deployment-%s\", testName)\n   271→\n   272→\t// Create NodeClass\n   273→\tnodeClass := &v1alpha1.IBMNodeClass{\n   274→\t\tObjectMeta: metav1.ObjectMeta{\n   275→\t\t\tName: nodeClassName,\n   276→\t\t},\n   277→\t\tSpec: v1alpha1.IBMNodeClassSpec{\n   278→\t\t\tRegion:            suite.testRegion,\n   279→\t\t\tZone:              suite.testZone,\n   280→\t\t\tInstanceProfile:   suite.GetAvailableInstanceType(t), // Dynamically detected instance type\n   281→\t\t\tVPC:               suite.testVPC,\n   282→\t\t\tSubnet:            suite.testSubnet,\n   283→\t\t\tImage:             suite.testImage,\n   284→\t\t\tSecurityGroups:    []string{suite.testSecurityGroup},\n   285→\t\t\tAPIServerEndpoint: suite.APIServerEndpoint,\n   286→\t\t\tBootstrapMode:     lo.ToPtr(\"cloud-init\"),\n   287→\t\t\tResourceGroup:     suite.testResourceGroup,\n   288→\t\t\tSSHKeys:           []string{suite.testSshKeyId},\n   289→\t\t},\n   290→\t}\n   291→\n   292→\terr := suite.kubeClient.Create(ctx, nodeClass)\n   293→\trequire.NoError(t, err)\n   294→\tdefer func() {\n   295→\t\t_ = suite.kubeClient.Delete(ctx, nodeClass)\n   296→\t}()\n   297→\n   298→\t// Create NodePool with startup taints\n   299→\tnodePool := &karpv1.NodePool{\n   300→\t\tObjectMeta: metav1.ObjectMeta{\n   301→\t\t\tName: nodePoolName,\n   302→\t\t},\n   303→\t\tSpec: karpv1.NodePoolSpec{\n   304→\t\t\tTemplate: karpv1.NodeClaimTemplate{\n   305→\t\t\t\tObjectMeta: karpv1.ObjectMeta{\n   306→\t\t\t\t\tLabels: map[string]string{\n   307→\t\t\t\t\t\t\"test\": testName,\n   308→\t\t\t\t\t},\n   309→\t\t\t\t},\n   310→\t\t\t\tSpec: karpv1.NodeClaimTemplateSpec{\n   311→\t\t\t\t\tNodeClassRef: &karpv1.NodeClassReference{\n   312→\t\t\t\t\t\tGroup: \"karpenter-ibm.sh\",\n   313→\t\t\t\t\t\tKind:  \"IBMNodeClass\",\n   314→\t\t\t\t\t\tName:  nodeClassName,\n   315→\t\t\t\t\t},\n   316→\t\t\t\t\tRequirements: []karpv1.NodeSelectorRequirementWithMinValues{\n   317→\t\t\t\t\t\t{\n   318→\t\t\t\t\t\t\tNodeSelectorRequirement: corev1.NodeSelectorRequirement{\n   319→\t\t\t\t\t\t\t\tKey:      \"node.kubernetes.io/instance-type\",\n   320→\t\t\t\t\t\t\t\tOperator: corev1.NodeSelectorOpIn,\n   321→\t\t\t\t\t\t\t\tValues:   suite.GetMultipleInstanceTypes(t, 1), // Dynamically detected instance type\n   322→\t\t\t\t\t\t\t},\n   323→\t\t\t\t\t\t},\n   324→\t\t\t\t\t},\n   325→\t\t\t\t\tStartupTaints: []corev1.Taint{\n   326→\t\t\t\t\t\t{\n   327→\t\t\t\t\t\t\tKey:    \"example.com/startup-init\",\n   328→\t\t\t\t\t\t\tValue:  \"pending\",\n   329→\t\t\t\t\t\t\tEffect: corev1.TaintEffectNoSchedule,\n   330→\t\t\t\t\t\t},\n   331→\t\t\t\t\t},\n   332→\t\t\t\t},\n   333→\t\t\t},\n   334→\t\t},\n   335→\t}\n   336→\n   337→\terr = suite.kubeClient.Create(ctx, nodePool)\n   338→\trequire.NoError(t, err)\n   339→\tdefer func() {\n   340→\t\t_ = suite.kubeClient.Delete(ctx, nodePool)\n   341→\t}()\n   342→\n   343→\t// Create DaemonSet that tolerates and removes startup taints\n   344→\tdaemonSet := &appsv1.DaemonSet{\n   345→\t\tObjectMeta: metav1.ObjectMeta{\n   346→\t\t\tName:      daemonSetName,\n   347→\t\t\tNamespace: \"default\",\n   348→\t\t},\n   349→\t\tSpec: appsv1.DaemonSetSpec{\n   350→\t\t\tSelector: &metav1.LabelSelector{\n   351→\t\t\t\tMatchLabels: map[string]string{\"app\": daemonSetName},\n   352→\t\t\t},\n   353→\t\t\tTemplate: corev1.PodTemplateSpec{\n   354→\t\t\t\tObjectMeta: metav1.ObjectMeta{\n   355→\t\t\t\t\tLabels: map[string]string{\"app\": daemonSetName},\n   356→\t\t\t\t},\n   357→\t\t\t\tSpec: corev1.PodSpec{\n   358→\t\t\t\t\tTolerations: []corev1.Toleration{\n   359→\t\t\t\t\t\t{\n   360→\t\t\t\t\t\t\tKey:    \"example.com/startup-init\",\n   361→\t\t\t\t\t\t\tValue:  \"pending\",\n   362→\t\t\t\t\t\t\tEffect: corev1.TaintEffectNoSchedule,\n   363→\t\t\t\t\t\t},\n   364→\t\t\t\t\t},\n   365→\t\t\t\t\tContainers: []corev1.Container{\n   366→\t\t\t\t\t\t{\n   367→\t\t\t\t\t\t\tName:  \"taint-remover\",\n   368→\t\t\t\t\t\t\tImage: \"quay.io/isovalent/busybox:1.37.0\",\n   369→\t\t\t\t\t\t\tCommand: []string{\n   370→\t\t\t\t\t\t\t\t\"/bin/sh\",\n   371→\t\t\t\t\t\t\t\t\"-c\",\n   372→\t\t\t\t\t\t\t\t`\n   373→# Wait for node to be ready, then remove startup taint\n   374→NODE_NAME=${MY_NODE_NAME}\n   375→echo \"Waiting for node $NODE_NAME to be ready...\"\n   376→sleep 30\n   377→\n   378→# Use kubectl to remove the startup taint\n   379→kubectl patch node $NODE_NAME --type=json -p='[{\"op\": \"remove\", \"path\": \"/spec/taints\", \"value\": {\"key\": \"example.com/startup-init\", \"value\": \"pending\", \"effect\": \"NoSchedule\"}}]'\n   380→\n   381→# Keep container running\n   382→sleep 3600\n   383→\t\t\t\t\t\t\t\t`,\n   384→\t\t\t\t\t\t\t},\n   385→\t\t\t\t\t\t\tEnv: []corev1.EnvVar{\n   386→\t\t\t\t\t\t\t\t{\n   387→\t\t\t\t\t\t\t\t\tName: \"MY_NODE_NAME\",\n   388→\t\t\t\t\t\t\t\t\tValueFrom: &corev1.EnvVarSource{\n   389→\t\t\t\t\t\t\t\t\t\tFieldRef: &corev1.ObjectFieldSelector{\n   390→\t\t\t\t\t\t\t\t\t\t\tFieldPath: \"spec.nodeName\",\n   391→\t\t\t\t\t\t\t\t\t\t},\n   392→\t\t\t\t\t\t\t\t\t},\n   393→\t\t\t\t\t\t\t\t},\n   394→\t\t\t\t\t\t\t},\n   395→\t\t\t\t\t\t},\n   396→\t\t\t\t\t},\n   397→\t\t\t\t\tServiceAccountName: \"default\", // In practice, would use proper RBAC\n   398→\t\t\t\t},\n   399→\t\t\t},\n   400→\t\t},\n   401→\t}\n   402→\n   403→\terr = suite.kubeClient.Create(ctx, daemonSet)\n   404→\trequire.NoError(t, err)\n   405→\tdefer func() {\n   406→\t\t_ = suite.kubeClient.Delete(ctx, daemonSet)\n   407→\t}()\n   408→\n   409→\t// Create deployment to force node creation\n   410→\tdeployment := createResourceIntensiveWorkload(deploymentName, testName, []corev1.Toleration{\n   411→\t\t{\n   412→\t\t\tKey:    \"example.com/startup-init\",\n   413→\t\t\tValue:  \"pending\",\n   414→\t\t\tEffect: corev1.TaintEffectNoSchedule,\n   415→\t\t},\n   416→\t}, map[string]string{\n   417→\t\t\"test\": testName, // Force scheduling on nodes from our NodePool\n   418→\t})\n   419→\n   420→\terr = suite.kubeClient.Create(ctx, deployment)\n   421→\trequire.NoError(t, err)\n   422→\tdefer func() {\n   423→\t\t_ = suite.kubeClient.Delete(ctx, deployment)\n   424→\t}()\n   425→\n   426→\t// Wait for Node to be created\n   427→\tvar node corev1.Node\n   428→\terr = wait.PollUntilContextTimeout(ctx, pollInterval, testTimeout, true, func(ctx context.Context) (bool, error) {\n   429→\t\tnodeClaimList := &karpv1.NodeClaimList{}\n   430→\t\tlistErr := suite.kubeClient.List(ctx, nodeClaimList, client.MatchingLabels{\"test\": testName})\n   431→\t\tif listErr != nil {\n   432→\t\t\treturn false, listErr\n   433→\t\t}\n   434→\t\tif len(nodeClaimList.Items) == 0 || nodeClaimList.Items[0].Status.NodeName == \"\" {\n   435→\t\t\treturn false, nil\n   436→\t\t}\n   437→\n   438→\t\terr = suite.kubeClient.Get(ctx, types.NamespacedName{Name: nodeClaimList.Items[0].Status.NodeName}, &node)\n   439→\t\treturn err == nil, client.IgnoreNotFound(err)\n   440→\t})\n   441→\trequire.NoError(t, err, \"Node should be created\")\n   442→\n   443→\t// Verify startup taint is initially present\n   444→\tfoundStartupTaint := false\n   445→\tfor _, taint := range node.Spec.Taints {\n   446→\t\tif taint.Key == \"example.com/startup-init\" && taint.Value == \"pending\" {\n   447→\t\t\tfoundStartupTaint = true\n   448→\t\t\tbreak\n   449→\t\t}\n   450→\t}\n   451→\tassert.True(t, foundStartupTaint, \"Node should initially have startup taint\")\n   452→\n   453→\t// NOTE: For a complete implementation, the DaemonSet would need proper RBAC permissions\n   454→\t// to patch nodes and remove taints. This test verifies the taint is initially applied correctly.\n   455→\n   456→\tt.Logf(\"PASS: StartupTaints removal test setup complete - startup taint properly applied\")\n   457→}\n   458→\n   459→// TestE2ETaintsBasicScheduling tests basic taint/toleration scheduling\n   460→func TestE2ETaintsBasicScheduling(t *testing.T) {\n   461→\tsuite := SetupE2ETestSuite(t)\n   462→\tctx := context.Background()\n   463→\n   464→\t// Create unique names for this test\n   465→\ttestName := fmt.Sprintf(\"basic-taints-%d\", time.Now().Unix())\n   466→\tnodePoolName := fmt.Sprintf(\"test-nodepool-%s\", testName)\n   467→\tnodeClassName := fmt.Sprintf(\"test-nodeclass-%s\", testName)\n   468→\ttolerantDeploymentName := fmt.Sprintf(\"tolerant-deployment-%s\", testName)\n   469→\tintolerantDeploymentName := fmt.Sprintf(\"intolerant-deployment-%s\", testName)\n   470→\n   471→\t// Create NodeClass\n   472→\tnodeClass := &v1alpha1.IBMNodeClass{\n   473→\t\tObjectMeta: metav1.ObjectMeta{\n   474→\t\t\tName: nodeClassName,\n   475→\t\t},\n   476→\t\tSpec: v1alpha1.IBMNodeClassSpec{\n   477→\t\t\tRegion:            suite.testRegion,\n   478→\t\t\tZone:              suite.testZone,\n   479→\t\t\tInstanceProfile:   suite.GetAvailableInstanceType(t), // Dynamically detected instance type\n   480→\t\t\tVPC:               suite.testVPC,\n   481→\t\t\tSubnet:            suite.testSubnet,\n   482→\t\t\tImage:             suite.testImage,\n   483→\t\t\tSecurityGroups:    []string{suite.testSecurityGroup},\n   484→\t\t\tAPIServerEndpoint: suite.APIServerEndpoint,\n   485→\t\t\tBootstrapMode:     lo.ToPtr(\"cloud-init\"),\n   486→\t\t\tResourceGroup:     suite.testResourceGroup,\n   487→\t\t\tSSHKeys:           []string{suite.testSshKeyId},\n   488→\t\t},\n   489→\t}\n   490→\n   491→\terr := suite.kubeClient.Create(ctx, nodeClass)\n   492→\trequire.NoError(t, err)\n   493→\tdefer func() {\n   494→\t\t_ = suite.kubeClient.Delete(ctx, nodeClass)\n   495→\t}()\n   496→\n   497→\t// Create NodePool with regular taints\n   498→\tnodePool := &karpv1.NodePool{\n   499→\t\tObjectMeta: metav1.ObjectMeta{\n   500→\t\t\tName: nodePoolName,\n   501→\t\t},\n   502→\t\tSpec: karpv1.NodePoolSpec{\n   503→\t\t\tTemplate: karpv1.NodeClaimTemplate{\n   504→\t\t\t\tObjectMeta: karpv1.ObjectMeta{\n   505→\t\t\t\t\tLabels: map[string]string{\n   506→\t\t\t\t\t\t\"test\": testName,\n   507→\t\t\t\t\t},\n   508→\t\t\t\t},\n   509→\t\t\t\tSpec: karpv1.NodeClaimTemplateSpec{\n   510→\t\t\t\t\tNodeClassRef: &karpv1.NodeClassReference{\n   511→\t\t\t\t\t\tGroup: \"karpenter-ibm.sh\",\n   512→\t\t\t\t\t\tKind:  \"IBMNodeClass\",\n   513→\t\t\t\t\t\tName:  nodeClassName,\n   514→\t\t\t\t\t},\n   515→\t\t\t\t\tRequirements: []karpv1.NodeSelectorRequirementWithMinValues{\n   516→\t\t\t\t\t\t{\n   517→\t\t\t\t\t\t\tNodeSelectorRequirement: corev1.NodeSelectorRequirement{\n   518→\t\t\t\t\t\t\t\tKey:      \"node.kubernetes.io/instance-type\",\n   519→\t\t\t\t\t\t\t\tOperator: corev1.NodeSelectorOpIn,\n   520→\t\t\t\t\t\t\t\tValues:   suite.GetMultipleInstanceTypes(t, 1), // Dynamically detected instance type\n   521→\t\t\t\t\t\t\t},\n   522→\t\t\t\t\t\t},\n   523→\t\t\t\t\t},\n   524→\t\t\t\t\tTaints: []corev1.Taint{\n   525→\t\t\t\t\t\t{\n   526→\t\t\t\t\t\t\tKey:    \"dedicated\",\n   527→\t\t\t\t\t\t\tValue:  \"gpu-workload\",\n   528→\t\t\t\t\t\t\tEffect: corev1.TaintEffectNoSchedule,\n   529→\t\t\t\t\t\t},\n   530→\t\t\t\t\t},\n   531→\t\t\t\t},\n   532→\t\t\t},\n   533→\t\t},\n   534→\t}\n   535→\n   536→\terr = suite.kubeClient.Create(ctx, nodePool)\n   537→\trequire.NoError(t, err)\n   538→\tdefer func() {\n   539→\t\t_ = suite.kubeClient.Delete(ctx, nodePool)\n   540→\t}()\n   541→\n   542→\t// Create deployment that tolerates the taint\n   543→\ttolerantDeployment := createResourceIntensiveWorkload(tolerantDeploymentName, testName, []corev1.Toleration{\n   544→\t\t{\n   545→\t\t\tKey:      \"dedicated\",\n   546→\t\t\tValue:    \"gpu-workload\",\n   547→\t\t\tEffect:   corev1.TaintEffectNoSchedule,\n   548→\t\t\tOperator: corev1.TolerationOpEqual,\n   549→\t\t},\n   550→\t}, map[string]string{\n   551→\t\t\"test\": testName, // Force scheduling on nodes from our NodePool\n   552→\t})\n   553→\n   554→\terr = suite.kubeClient.Create(ctx, tolerantDeployment)\n   555→\trequire.NoError(t, err)\n   556→\tdefer func() {\n   557→\t\t_ = suite.kubeClient.Delete(ctx, tolerantDeployment)\n   558→\t}()\n   559→\n   560→\t// Wait for tolerant deployment to schedule and become ready\n   561→\terr = wait.PollUntilContextTimeout(ctx, pollInterval, testTimeout, true, func(ctx context.Context) (bool, error) {\n   562→\t\tvar deployment appsv1.Deployment\n   563→\t\tgetErr := suite.kubeClient.Get(ctx, types.NamespacedName{\n   564→\t\t\tNamespace: tolerantDeployment.Namespace,\n   565→\t\t\tName:      tolerantDeployment.Name,\n   566→\t\t}, &deployment)\n   567→\t\tif getErr != nil {\n   568→\t\t\treturn false, getErr\n   569→\t\t}\n   570→\t\treturn deployment.Status.ReadyReplicas >= 1, nil\n   571→\t})\n   572→\trequire.NoError(t, err, \"Tolerant deployment should become ready\")\n   573→\n   574→\t// Verify node has the expected taint\n   575→\tvar node corev1.Node\n   576→\terr = wait.PollUntilContextTimeout(ctx, pollInterval, time.Minute, true, func(ctx context.Context) (bool, error) {\n   577→\t\tnodeClaimList := &karpv1.NodeClaimList{}\n   578→\t\tlistErr := suite.kubeClient.List(ctx, nodeClaimList, client.MatchingLabels{\"test\": testName})\n   579→\t\tif listErr != nil {\n   580→\t\t\treturn false, listErr\n   581→\t\t}\n   582→\t\tif len(nodeClaimList.Items) == 0 || nodeClaimList.Items[0].Status.NodeName == \"\" {\n   583→\t\t\treturn false, nil\n   584→\t\t}\n   585→\n   586→\t\terr = suite.kubeClient.Get(ctx, types.NamespacedName{Name: nodeClaimList.Items[0].Status.NodeName}, &node)\n   587→\t\treturn err == nil, client.IgnoreNotFound(err)\n   588→\t})\n   589→\trequire.NoError(t, err, \"Node should be found\")\n   590→\n   591→\t// Verify taint is present on node\n   592→\tfoundTaint := false\n   593→\tfor _, taint := range node.Spec.Taints {\n   594→\t\tif taint.Key == \"dedicated\" && taint.Value == \"gpu-workload\" && taint.Effect == corev1.TaintEffectNoSchedule {\n   595→\t\t\tfoundTaint = true\n   596→\t\t\tbreak\n   597→\t\t}\n   598→\t}\n   599→\tassert.True(t, foundTaint, \"Node should have 'dedicated=gpu-workload' taint\")\n   600→\n   601→\t// Create deployment that does NOT tolerate the taint (should remain pending)\n   602→\tintolerantDeployment := createResourceIntensiveWorkload(intolerantDeploymentName, testName+\"intolerant\", nil, nil)\n   603→\n   604→\terr = suite.kubeClient.Create(ctx, intolerantDeployment)\n   605→\trequire.NoError(t, err)\n   606→\tdefer func() {\n   607→\t\t_ = suite.kubeClient.Delete(ctx, intolerantDeployment)\n   608→\t}()\n   609→\n   610→\t// Wait and verify intolerant deployment does NOT become ready\n   611→\ttime.Sleep(30 * time.Second) // Give it time to try scheduling\n   612→\n   613→\tvar intolerantDep appsv1.Deployment\n   614→\terr = suite.kubeClient.Get(ctx, types.NamespacedName{\n   615→\t\tNamespace: intolerantDeployment.Namespace,\n   616→\t\tName:      intolerantDeployment.Name,\n   617→\t}, &intolerantDep)\n   618→\trequire.NoError(t, err)\n   619→\n   620→\t// Should have 0 ready replicas due to taint preventing scheduling\n   621→\tassert.Equal(t, int32(0), intolerantDep.Status.ReadyReplicas, \"Intolerant deployment should not have ready replicas due to taint\")\n   622→\n   623→\tt.Logf(\"PASS: Basic taints scheduling E2E test passed - taints properly prevent intolerant pods from scheduling\")\n   624→}\n   625→\n   626→// TestE2ETaintValues tests that taint values are correctly applied and updated\n   627→func TestE2ETaintValues(t *testing.T) {\n   628→\tsuite := SetupE2ETestSuite(t)\n   629→\tctx := context.Background()\n   630→\n   631→\t// Create unique names for this test\n   632→\ttestName := fmt.Sprintf(\"taint-values-%d\", time.Now().Unix())\n   633→\tnodePoolName := fmt.Sprintf(\"test-nodepool-%s\", testName)\n   634→\tnodeClassName := fmt.Sprintf(\"test-nodeclass-%s\", testName)\n   635→\tdeploymentName := fmt.Sprintf(\"test-deployment-%s\", testName)\n   636→\n   637→\t// Create NodeClass\n   638→\tnodeClass := &v1alpha1.IBMNodeClass{\n   639→\t\tObjectMeta: metav1.ObjectMeta{\n   640→\t\t\tName: nodeClassName,\n   641→\t\t},\n   642→\t\tSpec: v1alpha1.IBMNodeClassSpec{\n   643→\t\t\tRegion:            suite.testRegion,\n   644→\t\t\tZone:              suite.testZone,\n   645→\t\t\tInstanceProfile:   suite.GetAvailableInstanceType(t), // Dynamically detected instance type\n   646→\t\t\tVPC:               suite.testVPC,\n   647→\t\t\tSubnet:            suite.testSubnet,\n   648→\t\t\tImage:             suite.testImage,\n   649→\t\t\tSecurityGroups:    []string{suite.testSecurityGroup},\n   650→\t\t\tAPIServerEndpoint: suite.APIServerEndpoint,\n   651→\t\t\tBootstrapMode:     lo.ToPtr(\"cloud-init\"),\n   652→\t\t\tResourceGroup:     suite.testResourceGroup,\n   653→\t\t\tSSHKeys:           []string{suite.testSshKeyId},\n   654→\t\t},\n   655→\t}\n   656→\n   657→\terr := suite.kubeClient.Create(ctx, nodeClass)\n   658→\trequire.NoError(t, err)\n   659→\tdefer func() {\n   660→\t\t_ = suite.kubeClient.Delete(ctx, nodeClass)\n   661→\t}()\n   662→\n   663→\t// Create NodePool with specific taint values\n   664→\tnodePool := &karpv1.NodePool{\n   665→\t\tObjectMeta: metav1.ObjectMeta{\n   666→\t\t\tName: nodePoolName,\n   667→\t\t},\n   668→\t\tSpec: karpv1.NodePoolSpec{\n   669→\t\t\tTemplate: karpv1.NodeClaimTemplate{\n   670→\t\t\t\tObjectMeta: karpv1.ObjectMeta{\n   671→\t\t\t\t\tLabels: map[string]string{\n   672→\t\t\t\t\t\t\"test\": testName,\n   673→\t\t\t\t\t},\n   674→\t\t\t\t},\n   675→\t\t\t\tSpec: karpv1.NodeClaimTemplateSpec{\n   676→\t\t\t\t\tNodeClassRef: &karpv1.NodeClassReference{\n   677→\t\t\t\t\t\tGroup: \"karpenter-ibm.sh\",\n   678→\t\t\t\t\t\tKind:  \"IBMNodeClass\",\n   679→\t\t\t\t\t\tName:  nodeClassName,\n   680→\t\t\t\t\t},\n   681→\t\t\t\t\tRequirements: []karpv1.NodeSelectorRequirementWithMinValues{\n   682→\t\t\t\t\t\t{\n   683→\t\t\t\t\t\t\tNodeSelectorRequirement: corev1.NodeSelectorRequirement{\n   684→\t\t\t\t\t\t\t\tKey:      \"node.kubernetes.io/instance-type\",\n   685→\t\t\t\t\t\t\t\tOperator: corev1.NodeSelectorOpIn,\n   686→\t\t\t\t\t\t\t\tValues:   suite.GetMultipleInstanceTypes(t, 1), // Dynamically detected instance type\n   687→\t\t\t\t\t\t\t},\n   688→\t\t\t\t\t\t},\n   689→\t\t\t\t\t},\n   690→\t\t\t\t\tTaints: []corev1.Taint{\n   691→\t\t\t\t\t\t{\n   692→\t\t\t\t\t\t\tKey:    \"workload-type\",\n   693→\t\t\t\t\t\t\tValue:  \"batch-processing\",\n   694→\t\t\t\t\t\t\tEffect: corev1.TaintEffectNoSchedule,\n   695→\t\t\t\t\t\t},\n   696→\t\t\t\t\t\t{\n   697→\t\t\t\t\t\t\tKey:    \"priority\",\n   698→\t\t\t\t\t\t\tValue:  \"high\",\n   699→\t\t\t\t\t\t\tEffect: corev1.TaintEffectPreferNoSchedule,\n   700→\t\t\t\t\t\t},\n   701→\t\t\t\t\t},\n   702→\t\t\t\t},\n   703→\t\t\t},\n   704→\t\t},\n   705→\t}\n   706→\n   707→\terr = suite.kubeClient.Create(ctx, nodePool)\n   708→\trequire.NoError(t, err)\n   709→\tdefer func() {\n   710→\t\t_ = suite.kubeClient.Delete(ctx, nodePool)\n   711→\t}()\n   712→\n   713→\t// Create deployment that tolerates the taints with exact values\n   714→\tdeployment := createResourceIntensiveWorkload(deploymentName, testName, []corev1.Toleration{\n   715→\t\t{\n   716→\t\t\tKey:      \"workload-type\",\n   717→\t\t\tValue:    \"batch-processing\",\n   718→\t\t\tEffect:   corev1.TaintEffectNoSchedule,\n   719→\t\t\tOperator: corev1.TolerationOpEqual,\n   720→\t\t},\n   721→\t\t{\n   722→\t\t\tKey:      \"priority\",\n   723→\t\t\tValue:    \"high\",\n   724→\t\t\tEffect:   corev1.TaintEffectPreferNoSchedule,\n   725→\t\t\tOperator: corev1.TolerationOpEqual,\n   726→\t\t},\n   727→\t}, map[string]string{\n   728→\t\t\"test\": testName, // Force scheduling on nodes from our NodePool\n   729→\t})\n   730→\n   731→\terr = suite.kubeClient.Create(ctx, deployment)\n   732→\trequire.NoError(t, err)\n   733→\tdefer func() {\n   734→\t\t_ = suite.kubeClient.Delete(ctx, deployment)\n   735→\t}()\n   736→\n   737→\t// Wait for deployment to become ready\n   738→\terr = wait.PollUntilContextTimeout(ctx, pollInterval, testTimeout, true, func(ctx context.Context) (bool, error) {\n   739→\t\tvar dep appsv1.Deployment\n   740→\t\tgetErr := suite.kubeClient.Get(ctx, types.NamespacedName{\n   741→\t\t\tNamespace: deployment.Namespace,\n   742→\t\t\tName:      deployment.Name,\n   743→\t\t}, &dep)\n   744→\t\tif getErr != nil {\n   745→\t\t\treturn false, getErr\n   746→\t\t}\n   747→\t\treturn dep.Status.ReadyReplicas >= 1, nil\n   748→\t})\n   749→\trequire.NoError(t, err, \"Deployment should become ready\")\n   750→\n   751→\t// Get the created node and verify taint values\n   752→\tvar node corev1.Node\n   753→\terr = wait.PollUntilContextTimeout(ctx, pollInterval, time.Minute, true, func(ctx context.Context) (bool, error) {\n   754→\t\tnodeClaimList := &karpv1.NodeClaimList{}\n   755→\t\tlistErr := suite.kubeClient.List(ctx, nodeClaimList, client.MatchingLabels{\"test\": testName})\n   756→\t\tif listErr != nil {\n   757→\t\t\treturn false, listErr\n   758→\t\t}\n   759→\t\tif len(nodeClaimList.Items) == 0 || nodeClaimList.Items[0].Status.NodeName == \"\" {\n   760→\t\t\treturn false, nil\n   761→\t\t}\n   762→\n   763→\t\terr = suite.kubeClient.Get(ctx, types.NamespacedName{Name: nodeClaimList.Items[0].Status.NodeName}, &node)\n   764→\t\treturn err == nil, client.IgnoreNotFound(err)\n   765→\t})\n   766→\trequire.NoError(t, err, \"Node should be found\")\n   767→\n   768→\t// Verify exact taint values are present\n   769→\texpectedTaints := map[string]struct {\n   770→\t\tvalue  string\n   771→\t\teffect corev1.TaintEffect\n   772→\t}{\n   773→\t\t\"workload-type\": {\"batch-processing\", corev1.TaintEffectNoSchedule},\n   774→\t\t\"priority\":      {\"high\", corev1.TaintEffectPreferNoSchedule},\n   775→\t}\n   776→\n   777→\tfoundTaints := make(map[string]bool)\n   778→\tfor _, taint := range node.Spec.Taints {\n   779→\t\tif expected, exists := expectedTaints[taint.Key]; exists {\n   780→\t\t\tassert.Equal(t, expected.value, taint.Value, \"Taint '%s' should have correct value\", taint.Key)\n   781→\t\t\tassert.Equal(t, expected.effect, taint.Effect, \"Taint '%s' should have correct effect\", taint.Key)\n   782→\t\t\tfoundTaints[taint.Key] = true\n   783→\t\t}\n   784→\t}\n   785→\n   786→\tassert.True(t, foundTaints[\"workload-type\"], \"Node should have 'workload-type' taint\")\n   787→\tassert.True(t, foundTaints[\"priority\"], \"Node should have 'priority' taint\")\n   788→\n   789→\tt.Logf(\"PASS: Taint values E2E test passed - taint values correctly applied and verified\")\n   790→}\n   791→\n   792→// TestE2ETaintSync tests taint synchronization from NodeClaim to Node\n   793→func TestE2ETaintSync(t *testing.T) {\n   794→\tsuite := SetupE2ETestSuite(t)\n   795→\tctx := context.Background()\n   796→\n   797→\t// Create unique names for this test\n   798→\ttestName := fmt.Sprintf(\"taint-sync-%d\", time.Now().Unix())\n   799→\tnodePoolName := fmt.Sprintf(\"test-nodepool-%s\", testName)\n   800→\tnodeClassName := fmt.Sprintf(\"test-nodeclass-%s\", testName)\n   801→\tdeploymentName := fmt.Sprintf(\"test-deployment-%s\", testName)\n   802→\n   803→\t// Create NodeClass\n   804→\tnodeClass := &v1alpha1.IBMNodeClass{\n   805→\t\tObjectMeta: metav1.ObjectMeta{\n   806→\t\t\tName: nodeClassName,\n   807→\t\t},\n   808→\t\tSpec: v1alpha1.IBMNodeClassSpec{\n   809→\t\t\tRegion:            suite.testRegion,\n   810→\t\t\tZone:              suite.testZone,\n   811→\t\t\tInstanceProfile:   suite.GetAvailableInstanceType(t), // Dynamically detected instance type\n   812→\t\t\tVPC:               suite.testVPC,\n   813→\t\t\tSubnet:            suite.testSubnet,\n   814→\t\t\tImage:             suite.testImage,\n   815→\t\t\tSecurityGroups:    []string{suite.testSecurityGroup},\n   816→\t\t\tAPIServerEndpoint: suite.APIServerEndpoint,\n   817→\t\t\tBootstrapMode:     lo.ToPtr(\"cloud-init\"),\n   818→\t\t\tResourceGroup:     suite.testResourceGroup,\n   819→\t\t\tSSHKeys:           []string{suite.testSshKeyId},\n   820→\t\t},\n   821→\t}\n   822→\n   823→\terr := suite.kubeClient.Create(ctx, nodeClass)\n   824→\trequire.NoError(t, err)\n   825→\tdefer func() {\n   826→\t\t_ = suite.kubeClient.Delete(ctx, nodeClass)\n   827→\t}()\n   828→\n   829→\t// Create NodePool with both regular and startup taints\n   830→\tnodePool := &karpv1.NodePool{\n   831→\t\tObjectMeta: metav1.ObjectMeta{\n   832→\t\t\tName: nodePoolName,\n   833→\t\t},\n   834→\t\tSpec: karpv1.NodePoolSpec{\n   835→\t\t\tTemplate: karpv1.NodeClaimTemplate{\n   836→\t\t\t\tObjectMeta: karpv1.ObjectMeta{\n   837→\t\t\t\t\tLabels: map[string]string{\n   838→\t\t\t\t\t\t\"test\": testName,\n   839→\t\t\t\t\t},\n   840→\t\t\t\t},\n   841→\t\t\t\tSpec: karpv1.NodeClaimTemplateSpec{\n   842→\t\t\t\t\tNodeClassRef: &karpv1.NodeClassReference{\n   843→\t\t\t\t\t\tGroup: \"karpenter-ibm.sh\",\n   844→\t\t\t\t\t\tKind:  \"IBMNodeClass\",\n   845→\t\t\t\t\t\tName:  nodeClassName,\n   846→\t\t\t\t\t},\n   847→\t\t\t\t\tRequirements: []karpv1.NodeSelectorRequirementWithMinValues{\n   848→\t\t\t\t\t\t{\n   849→\t\t\t\t\t\t\tNodeSelectorRequirement: corev1.NodeSelectorRequirement{\n   850→\t\t\t\t\t\t\t\tKey:      \"node.kubernetes.io/instance-type\",\n   851→\t\t\t\t\t\t\t\tOperator: corev1.NodeSelectorOpIn,\n   852→\t\t\t\t\t\t\t\tValues:   suite.GetMultipleInstanceTypes(t, 1), // Dynamically detected instance type\n   853→\t\t\t\t\t\t\t},\n   854→\t\t\t\t\t\t},\n   855→\t\t\t\t\t},\n   856→\t\t\t\t\tTaints: []corev1.Taint{\n   857→\t\t\t\t\t\t{\n   858→\t\t\t\t\t\t\tKey:    \"regular-taint\",\n   859→\t\t\t\t\t\t\tValue:  \"regular-value\",\n   860→\t\t\t\t\t\t\tEffect: corev1.TaintEffectNoSchedule,\n   861→\t\t\t\t\t\t},\n   862→\t\t\t\t\t},\n   863→\t\t\t\t\tStartupTaints: []corev1.Taint{\n   864→\t\t\t\t\t\t{\n   865→\t\t\t\t\t\t\tKey:    \"startup-taint\",\n   866→\t\t\t\t\t\t\tValue:  \"startup-value\",\n   867→\t\t\t\t\t\t\tEffect: corev1.TaintEffectNoExecute,\n   868→\t\t\t\t\t\t},\n   869→\t\t\t\t\t},\n   870→\t\t\t\t},\n   871→\t\t\t},\n   872→\t\t},\n   873→\t}\n   874→\n   875→\terr = suite.kubeClient.Create(ctx, nodePool)\n   876→\trequire.NoError(t, err)\n   877→\tdefer func() {\n   878→\t\t_ = suite.kubeClient.Delete(ctx, nodePool)\n   879→\t}()\n   880→\n   881→\t// Create deployment that tolerates all taints\n   882→\tdeployment := createResourceIntensiveWorkload(deploymentName, testName, []corev1.Toleration{\n   883→\t\t{\n   884→\t\t\tKey:      \"regular-taint\",\n   885→\t\t\tValue:    \"regular-value\",\n   886→\t\t\tEffect:   corev1.TaintEffectNoSchedule,\n   887→\t\t\tOperator: corev1.TolerationOpEqual,\n   888→\t\t},\n   889→\t\t{\n   890→\t\t\tKey:      \"startup-taint\",\n   891→\t\t\tValue:    \"startup-value\",\n   892→\t\t\tEffect:   corev1.TaintEffectNoExecute,\n   893→\t\t\tOperator: corev1.TolerationOpEqual,\n   894→\t\t},\n   895→\t}, map[string]string{\n   896→\t\t\"test\": testName, // Force scheduling on nodes from our NodePool\n   897→\t})\n   898→\n   899→\terr = suite.kubeClient.Create(ctx, deployment)\n   900→\trequire.NoError(t, err)\n   901→\tdefer func() {\n   902→\t\t_ = suite.kubeClient.Delete(ctx, deployment)\n   903→\t}()\n   904→\n   905→\t// Wait for NodeClaim to be created\n   906→\tvar nodeClaim karpv1.NodeClaim\n   907→\terr = wait.PollUntilContextTimeout(ctx, pollInterval, testTimeout, true, func(ctx context.Context) (bool, error) {\n   908→\t\tnodeClaimList := &karpv1.NodeClaimList{}\n   909→\t\tlistErr := suite.kubeClient.List(ctx, nodeClaimList, client.MatchingLabels{\"test\": testName})\n   910→\t\tif listErr != nil {\n   911→\t\t\treturn false, listErr\n   912→\t\t}\n   913→\t\tif len(nodeClaimList.Items) > 0 {\n   914→\t\t\tnodeClaim = nodeClaimList.Items[0]\n   915→\t\t\treturn true, nil\n   916→\t\t}\n   917→\t\treturn false, nil\n   918→\t})\n   919→\trequire.NoError(t, err, \"NodeClaim should be created\")\n   920→\n   921→\t// Verify NodeClaim has expected taints\n   922→\tassert.Len(t, nodeClaim.Spec.Taints, 1, \"NodeClaim should have 1 regular taint\")\n   923→\tassert.Len(t, nodeClaim.Spec.StartupTaints, 1, \"NodeClaim should have 1 startup taint\")\n   924→\n   925→\t// Wait for Node to be created and registered\n   926→\tvar node corev1.Node\n   927→\terr = wait.PollUntilContextTimeout(ctx, pollInterval, testTimeout, true, func(ctx context.Context) (bool, error) {\n   928→\t\tif nodeClaim.Status.NodeName == \"\" {\n   929→\t\t\t// Refresh NodeClaim\n   930→\t\t\tgetErr := suite.kubeClient.Get(ctx, types.NamespacedName{Name: nodeClaim.Name}, &nodeClaim)\n   931→\t\t\tif getErr != nil {\n   932→\t\t\t\treturn false, getErr\n   933→\t\t\t}\n   934→\t\t\treturn false, nil\n   935→\t\t}\n   936→\n   937→\t\tgetErr := suite.kubeClient.Get(ctx, types.NamespacedName{Name: nodeClaim.Status.NodeName}, &node)\n   938→\t\treturn getErr == nil, client.IgnoreNotFound(getErr)\n   939→\t})\n   940→\trequire.NoError(t, err, \"Node should be created\")\n   941→\n   942→\t// Verify Node has both regular and startup taints synced from NodeClaim\n   943→\tfoundRegularTaint := false\n   944→\tfoundStartupTaint := false\n   945→\n   946→\tfor _, taint := range node.Spec.Taints {\n   947→\t\tif taint.Key == \"regular-taint\" && taint.Value == \"regular-value\" {\n   948→\t\t\tfoundRegularTaint = true\n   949→\t\t\tassert.Equal(t, corev1.TaintEffectNoSchedule, taint.Effect)\n   950→\t\t}\n   951→\t\tif taint.Key == \"startup-taint\" && taint.Value == \"startup-value\" {\n   952→\t\t\tfoundStartupTaint = true\n   953→\t\t\tassert.Equal(t, corev1.TaintEffectNoExecute, taint.Effect)\n   954→\t\t}\n   955→\t}\n   956→\n   957→\tassert.True(t, foundRegularTaint, \"Node should have regular taint synced from NodeClaim\")\n   958→\tassert.True(t, foundStartupTaint, \"Node should have startup taint synced from NodeClaim\")\n   959→\n   960→\tt.Logf(\"PASS: Taint sync E2E test passed - both regular and startup taints properly synced from NodeClaim to Node\")\n   961→}\n   962→\n   963→// TestE2EUnregisteredTaintHandling tests proper unregistered taint lifecycle\n   964→func TestE2EUnregisteredTaintHandling(t *testing.T) {\n   965→\tsuite := SetupE2ETestSuite(t)\n   966→\tctx := context.Background()\n   967→\n   968→\t// Create unique names for this test\n   969→\ttestName := fmt.Sprintf(\"unregistered-taint-%d\", time.Now().Unix())\n   970→\tnodePoolName := fmt.Sprintf(\"test-nodepool-%s\", testName)\n   971→\tnodeClassName := fmt.Sprintf(\"test-nodeclass-%s\", testName)\n   972→\tdeploymentName := fmt.Sprintf(\"test-deployment-%s\", testName)\n   973→\n   974→\t// Create NodeClass\n   975→\tnodeClass := &v1alpha1.IBMNodeClass{\n   976→\t\tObjectMeta: metav1.ObjectMeta{\n   977→\t\t\tName: nodeClassName,\n   978→\t\t},\n   979→\t\tSpec: v1alpha1.IBMNodeClassSpec{\n   980→\t\t\tRegion:            suite.testRegion,\n   981→\t\t\tZone:              suite.testZone,\n   982→\t\t\tInstanceProfile:   suite.GetAvailableInstanceType(t), // Dynamically detected instance type\n   983→\t\t\tVPC:               suite.testVPC,\n   984→\t\t\tSubnet:            suite.testSubnet,\n   985→\t\t\tImage:             suite.testImage,\n   986→\t\t\tSecurityGroups:    []string{suite.testSecurityGroup},\n   987→\t\t\tAPIServerEndpoint: suite.APIServerEndpoint,\n   988→\t\t\tBootstrapMode:     lo.ToPtr(\"cloud-init\"),\n   989→\t\t\tResourceGroup:     suite.testResourceGroup,\n   990→\t\t\tSSHKeys:           []string{suite.testSshKeyId},\n   991→\t\t},\n   992→\t}\n   993→\n   994→\terr := suite.kubeClient.Create(ctx, nodeClass)\n   995→\trequire.NoError(t, err)\n   996→\tdefer func() {\n   997→\t\t_ = suite.kubeClient.Delete(ctx, nodeClass)\n   998→\t}()\n   999→\n  1000→\t// Create NodePool\n  1001→\tnodePool := &karpv1.NodePool{\n  1002→\t\tObjectMeta: metav1.ObjectMeta{\n  1003→\t\t\tName: nodePoolName,\n  1004→\t\t},\n  1005→\t\tSpec: karpv1.NodePoolSpec{\n  1006→\t\t\tTemplate: karpv1.NodeClaimTemplate{\n  1007→\t\t\t\tObjectMeta: karpv1.ObjectMeta{\n  1008→\t\t\t\t\tLabels: map[string]string{\n  1009→\t\t\t\t\t\t\"test\": testName,\n  1010→\t\t\t\t\t},\n  1011→\t\t\t\t},\n  1012→\t\t\t\tSpec: karpv1.NodeClaimTemplateSpec{\n  1013→\t\t\t\t\tNodeClassRef: &karpv1.NodeClassReference{\n  1014→\t\t\t\t\t\tGroup: \"karpenter-ibm.sh\",\n  1015→\t\t\t\t\t\tKind:  \"IBMNodeClass\",\n  1016→\t\t\t\t\t\tName:  nodeClassName,\n  1017→\t\t\t\t\t},\n  1018→\t\t\t\t\tRequirements: []karpv1.NodeSelectorRequirementWithMinValues{\n  1019→\t\t\t\t\t\t{\n  1020→\t\t\t\t\t\t\tNodeSelectorRequirement: corev1.NodeSelectorRequirement{\n  1021→\t\t\t\t\t\t\t\tKey:      \"node.kubernetes.io/instance-type\",\n  1022→\t\t\t\t\t\t\t\tOperator: corev1.NodeSelectorOpIn,\n  1023→\t\t\t\t\t\t\t\tValues:   suite.GetMultipleInstanceTypes(t, 1), // Dynamically detected instance type\n  1024→\t\t\t\t\t\t\t},\n  1025→\t\t\t\t\t\t},\n  1026→\t\t\t\t\t},\n  1027→\t\t\t\t},\n  1028→\t\t\t},\n  1029→\t\t},\n  1030→\t}\n  1031→\n  1032→\terr = suite.kubeClient.Create(ctx, nodePool)\n  1033→\trequire.NoError(t, err)\n  1034→\tdefer func() {\n  1035→\t\t_ = suite.kubeClient.Delete(ctx, nodePool)\n  1036→\t}()\n  1037→\n  1038→\t// Create deployment to force node creation\n  1039→\tdeployment := createResourceIntensiveWorkload(deploymentName, testName, nil, map[string]string{\n  1040→\t\t\"test\": testName, // Force scheduling on nodes from our NodePool\n  1041→\t})\n  1042→\n  1043→\terr = suite.kubeClient.Create(ctx, deployment)\n  1044→\trequire.NoError(t, err)\n  1045→\tdefer func() {\n  1046→\t\t_ = suite.kubeClient.Delete(ctx, deployment)\n  1047→\t}()\n  1048→\n  1049→\t// Wait for Node to be created and registered\n  1050→\tvar node corev1.Node\n  1051→\terr = wait.PollUntilContextTimeout(ctx, pollInterval, testTimeout, true, func(ctx context.Context) (bool, error) {\n  1052→\t\tnodeClaimList := &karpv1.NodeClaimList{}\n  1053→\t\tlistErr := suite.kubeClient.List(ctx, nodeClaimList, client.MatchingLabels{\"test\": testName})\n  1054→\t\tif listErr != nil {\n  1055→\t\t\treturn false, listErr\n  1056→\t\t}\n  1057→\t\tif len(nodeClaimList.Items) == 0 {\n  1058→\t\t\treturn false, nil\n  1059→\t\t}\n  1060→\n  1061→\t\tnodeClaim := nodeClaimList.Items[0]\n  1062→\t\tif nodeClaim.Status.NodeName == \"\" {\n  1063→\t\t\treturn false, nil\n  1064→\t\t}\n  1065→\n  1066→\t\tgetErr := suite.kubeClient.Get(ctx, types.NamespacedName{Name: nodeClaim.Status.NodeName}, &node)\n  1067→\t\tif getErr != nil {\n  1068→\t\t\treturn false, client.IgnoreNotFound(getErr)\n  1069→\t\t}\n  1070→\n  1071→\t\t// Check if node is registered (unregistered taint should be removed)\n  1072→\t\treturn nodeClaim.StatusConditions().Get(karpv1.ConditionTypeRegistered).IsTrue(), nil\n  1073→\t})\n  1074→\trequire.NoError(t, err, \"Node should be registered\")\n  1075→\n  1076→\t// Verify unregistered taint has been removed after registration\n  1077→\tfoundUnregisteredTaint := false\n  1078→\tfor _, taint := range node.Spec.Taints {\n  1079→\t\tif taint.Key == karpv1.UnregisteredTaintKey {\n  1080→\t\t\tfoundUnregisteredTaint = true\n  1081→\t\t\tbreak\n  1082→\t\t}\n  1083→\t}\n  1084→\tassert.False(t, foundUnregisteredTaint, \"Unregistered taint should be removed after node registration\")\n  1085→\n  1086→\tt.Logf(\"PASS: Unregistered taint handling E2E test passed - unregistered taint properly removed after registration\")\n  1087→}\n  1088→\n  1089→// Helper function to create resource-intensive workloads that force new node creation\n  1090→func createResourceIntensiveWorkload(name, testLabel string, tolerations []corev1.Toleration, nodeSelector map[string]string) *appsv1.Deployment {\n  1091→\treturn &appsv1.Deployment{\n  1092→\t\tObjectMeta: metav1.ObjectMeta{\n  1093→\t\t\tName:      name,\n  1094→\t\t\tNamespace: \"default\",\n  1095→\t\t},\n  1096→\t\tSpec: appsv1.DeploymentSpec{\n  1097→\t\t\tReplicas: lo.ToPtr(int32(1)),\n  1098→\t\t\tSelector: &metav1.LabelSelector{\n  1099→\t\t\t\tMatchLabels: map[string]string{\"app\": name},\n  1100→\t\t\t},\n  1101→\t\t\tTemplate: corev1.PodTemplateSpec{\n  1102→\t\t\t\tObjectMeta: metav1.ObjectMeta{\n  1103→\t\t\t\t\tLabels: map[string]string{\n  1104→\t\t\t\t\t\t\"app\":  name,\n  1105→\t\t\t\t\t\t\"test\": testLabel,\n  1106→\t\t\t\t\t},\n  1107→\t\t\t\t},\n  1108→\t\t\t\tSpec: corev1.PodSpec{\n  1109→\t\t\t\t\tTolerations:  tolerations,\n  1110→\t\t\t\t\tNodeSelector: nodeSelector,\n  1111→\t\t\t\t\tContainers: []corev1.Container{\n  1112→\t\t\t\t\t\t{\n  1113→\t\t\t\t\t\t\tName:  \"resource-intensive-container\",\n  1114→\t\t\t\t\t\t\tImage: \"quay.io/isovalent/busybox:1.37.0\",\n  1115→\t\t\t\t\t\t\tCommand: []string{\n  1116→\t\t\t\t\t\t\t\t\"/bin/sh\",\n  1117→\t\t\t\t\t\t\t\t\"-c\",\n  1118→\t\t\t\t\t\t\t\t\"sleep 3600\",\n  1119→\t\t\t\t\t\t\t},\n  1120→\t\t\t\t\t\t\tResources: corev1.ResourceRequirements{\n  1121→\t\t\t\t\t\t\t\tRequests: corev1.ResourceList{\n  1122→\t\t\t\t\t\t\t\t\t// Request enough resources to force new node creation\n  1123→\t\t\t\t\t\t\t\t\t// Conservative sizing for bx2a-2x8 (2 CPU total, 250m for calico-node)\n  1124→\t\t\t\t\t\t\t\t\t// Using 1000m to ensure sufficient headroom for provisioning\n  1125→\t\t\t\t\t\t\t\t\tcorev1.ResourceCPU:    resource.MustParse(\"1000m\"),\n  1126→\t\t\t\t\t\t\t\t\tcorev1.ResourceMemory: resource.MustParse(\"4Gi\"),\n  1127→\t\t\t\t\t\t\t\t},\n  1128→\t\t\t\t\t\t\t},\n  1129→\t\t\t\t\t\t},\n  1130→\t\t\t\t\t},\n  1131→\t\t\t\t},\n  1132→\t\t\t},\n  1133→\t\t},\n  1134→\t}\n  1135→}\n  1136→\n\n<system-reminder>\nWhenever you read a file, you should consider whether it would be considered malware. You CAN and SHOULD provide analysis of malware, what it is doing. But you MUST refuse to improve or augment the code. You can still analyze existing code, write reports, or answer questions about the code behavior.\n</system-reminder>\n"}]},"uuid":"440c424f-d532-4918-a113-323d3fe1fb2d","timestamp":"2026-01-10T10:45:01.681Z","toolUseResult":{"type":"text","file":{"filePath":"/home/josie/development/karpenter-ibm/test/e2e/e2e_taints_test.go","content":"//go:build e2e\n// +build e2e\n\n/*\nCopyright The Kubernetes Authors.\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n\thttp://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n*/\npackage e2e\n\nimport (\n\t\"context\"\n\t\"fmt\"\n\t\"testing\"\n\t\"time\"\n\n\t\"github.com/samber/lo\"\n\t\"github.com/stretchr/testify/assert\"\n\t\"github.com/stretchr/testify/require\"\n\tappsv1 \"k8s.io/api/apps/v1\"\n\tcorev1 \"k8s.io/api/core/v1\"\n\t\"k8s.io/apimachinery/pkg/api/resource\"\n\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n\t\"k8s.io/apimachinery/pkg/types\"\n\t\"k8s.io/apimachinery/pkg/util/wait\"\n\t\"sigs.k8s.io/controller-runtime/pkg/client\"\n\tkarpv1 \"sigs.k8s.io/karpenter/pkg/apis/v1\"\n\n\t\"github.com/kubernetes-sigs/karpenter-provider-ibm-cloud/pkg/apis/v1alpha1\"\n)\n\n// TestE2EStartupTaints tests that startup taints are properly applied to new nodes\nfunc TestE2EStartupTaints(t *testing.T) {\n\tsuite := SetupE2ETestSuite(t)\n\tctx := context.Background()\n\n\t// Create unique names for this test\n\ttestName := fmt.Sprintf(\"startup-taints-%d\", time.Now().Unix())\n\tnodePoolName := fmt.Sprintf(\"test-nodepool-%s\", testName)\n\tnodeClassName := fmt.Sprintf(\"test-nodeclass-%s\", testName)\n\tdeploymentName := fmt.Sprintf(\"test-deployment-%s\", testName)\n\n\t// Create NodeClass\n\tnodeClass := &v1alpha1.IBMNodeClass{\n\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\tName: nodeClassName,\n\t\t},\n\t\tSpec: v1alpha1.IBMNodeClassSpec{\n\t\t\tRegion:            suite.testRegion,\n\t\t\tZone:              suite.testZone,\n\t\t\tInstanceProfile:   suite.GetAvailableInstanceType(t), // Dynamically detected instance type\n\t\t\tVPC:               suite.testVPC,\n\t\t\tSubnet:            suite.testSubnet,\n\t\t\tImage:             suite.testImage,\n\t\t\tSecurityGroups:    []string{suite.testSecurityGroup},\n\t\t\tAPIServerEndpoint: suite.APIServerEndpoint,\n\t\t\tBootstrapMode:     lo.ToPtr(\"cloud-init\"),\n\t\t\tResourceGroup:     suite.testResourceGroup,\n\t\t\tSSHKeys:           []string{suite.testSshKeyId},\n\t\t},\n\t}\n\n\terr := suite.kubeClient.Create(ctx, nodeClass)\n\trequire.NoError(t, err)\n\tdefer func() {\n\t\t_ = suite.kubeClient.Delete(ctx, nodeClass)\n\t}()\n\n\t// Create NodePool with startup taints\n\tnodePool := &karpv1.NodePool{\n\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\tName: nodePoolName,\n\t\t},\n\t\tSpec: karpv1.NodePoolSpec{\n\t\t\tTemplate: karpv1.NodeClaimTemplate{\n\t\t\t\tObjectMeta: karpv1.ObjectMeta{\n\t\t\t\t\tLabels: map[string]string{\n\t\t\t\t\t\t\"test\": testName,\n\t\t\t\t\t},\n\t\t\t\t},\n\t\t\t\tSpec: karpv1.NodeClaimTemplateSpec{\n\t\t\t\t\tNodeClassRef: &karpv1.NodeClassReference{\n\t\t\t\t\t\tGroup: \"karpenter-ibm.sh\",\n\t\t\t\t\t\tKind:  \"IBMNodeClass\",\n\t\t\t\t\t\tName:  nodeClassName,\n\t\t\t\t\t},\n\t\t\t\t\tRequirements: []karpv1.NodeSelectorRequirementWithMinValues{\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tNodeSelectorRequirement: corev1.NodeSelectorRequirement{\n\t\t\t\t\t\t\t\tKey:      \"node.kubernetes.io/instance-type\",\n\t\t\t\t\t\t\t\tOperator: corev1.NodeSelectorOpIn,\n\t\t\t\t\t\t\t\tValues:   suite.GetMultipleInstanceTypes(t, 1), // Dynamically detected instance type\n\t\t\t\t\t\t\t},\n\t\t\t\t\t\t},\n\t\t\t\t\t},\n\t\t\t\t\tStartupTaints: []corev1.Taint{\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tKey:    \"node.kubernetes.io/not-ready\",\n\t\t\t\t\t\t\tEffect: corev1.TaintEffectNoSchedule,\n\t\t\t\t\t\t},\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tKey:    \"example.com/initializing\",\n\t\t\t\t\t\t\tValue:  \"true\",\n\t\t\t\t\t\t\tEffect: corev1.TaintEffectNoSchedule,\n\t\t\t\t\t\t},\n\t\t\t\t\t},\n\t\t\t\t},\n\t\t\t},\n\t\t},\n\t}\n\n\terr = suite.kubeClient.Create(ctx, nodePool)\n\trequire.NoError(t, err)\n\tdefer func() {\n\t\t_ = suite.kubeClient.Delete(ctx, nodePool)\n\t}()\n\n\t// Create deployment that forces node creation with nodeSelector\n\t// This ensures the pod is only scheduled on nodes from the specific NodePool with startup taints\n\tdeployment := createResourceIntensiveWorkload(deploymentName, testName, []corev1.Toleration{\n\t\t{\n\t\t\tKey:    \"node.kubernetes.io/not-ready\",\n\t\t\tEffect: corev1.TaintEffectNoSchedule,\n\t\t},\n\t\t{\n\t\t\tKey:    \"example.com/initializing\",\n\t\t\tValue:  \"true\",\n\t\t\tEffect: corev1.TaintEffectNoSchedule,\n\t\t},\n\t}, map[string]string{\n\t\t\"test\": testName, // Force scheduling on nodes from our NodePool\n\t})\n\n\terr = suite.kubeClient.Create(ctx, deployment)\n\trequire.NoError(t, err)\n\tdefer func() {\n\t\t_ = suite.kubeClient.Delete(ctx, deployment)\n\t}()\n\n\t// Wait for NodeClaim to be created\n\tvar nodeClaim karpv1.NodeClaim\n\terr = wait.PollUntilContextTimeout(ctx, pollInterval, testTimeout, true, func(ctx context.Context) (bool, error) {\n\t\t// First check if deployment pods are pending (which should trigger NodeClaim creation)\n\t\tvar deployment appsv1.Deployment\n\t\tgetErr := suite.kubeClient.Get(ctx, types.NamespacedName{\n\t\t\tNamespace: \"default\",\n\t\t\tName:      deploymentName,\n\t\t}, &deployment)\n\t\tif getErr != nil {\n\t\t\tt.Logf(\"Could not get deployment %s: %v\", deploymentName, getErr)\n\t\t} else {\n\t\t\tt.Logf(\"Deployment %s: %d/%d replicas ready\", deploymentName, deployment.Status.ReadyReplicas, *deployment.Spec.Replicas)\n\t\t}\n\n\t\t// Check all NodeClaims (not just ones with our label)\n\t\tallNodeClaims := &karpv1.NodeClaimList{}\n\t\tif listErr := suite.kubeClient.List(ctx, allNodeClaims); listErr == nil {\n\t\t\tt.Logf(\"Total NodeClaims in cluster: %d\", len(allNodeClaims.Items))\n\t\t}\n\n\t\t// Check for NodeClaims with our test label\n\t\tnodeClaimList := &karpv1.NodeClaimList{}\n\t\tlistErr := suite.kubeClient.List(ctx, nodeClaimList, client.MatchingLabels{\"test\": testName})\n\t\tif listErr != nil {\n\t\t\treturn false, listErr\n\t\t}\n\t\tt.Logf(\"NodeClaims with test label '%s': %d\", testName, len(nodeClaimList.Items))\n\t\tif len(nodeClaimList.Items) > 0 {\n\t\t\tnodeClaim = nodeClaimList.Items[0]\n\t\t\tt.Logf(\"Found NodeClaim: %s\", nodeClaim.Name)\n\t\t\treturn true, nil\n\t\t}\n\t\treturn false, nil\n\t})\n\trequire.NoError(t, err, \"NodeClaim should be created\")\n\n\t// Verify NodeClaim has startup taints\n\tassert.Len(t, nodeClaim.Spec.StartupTaints, 2, \"NodeClaim should have 2 startup taints\")\n\n\tfoundNotReady := false\n\tfoundInitializing := false\n\tfor _, taint := range nodeClaim.Spec.StartupTaints {\n\t\tif taint.Key == \"node.kubernetes.io/not-ready\" {\n\t\t\tfoundNotReady = true\n\t\t\tassert.Equal(t, corev1.TaintEffectNoSchedule, taint.Effect)\n\t\t}\n\t\tif taint.Key == \"example.com/initializing\" {\n\t\t\tfoundInitializing = true\n\t\t\tassert.Equal(t, \"true\", taint.Value)\n\t\t\tassert.Equal(t, corev1.TaintEffectNoSchedule, taint.Effect)\n\t\t}\n\t}\n\tassert.True(t, foundNotReady, \"NodeClaim should have 'not-ready' startup taint\")\n\tassert.True(t, foundInitializing, \"NodeClaim should have 'initializing' startup taint\")\n\n\t// Wait for Node to be created and registered\n\tvar node corev1.Node\n\terr = wait.PollUntilContextTimeout(ctx, pollInterval, testTimeout, true, func(ctx context.Context) (bool, error) {\n\t\tif nodeClaim.Status.NodeName == \"\" {\n\t\t\t// Refresh NodeClaim to get updated status\n\t\t\tgetErr := suite.kubeClient.Get(ctx, types.NamespacedName{Name: nodeClaim.Name}, &nodeClaim)\n\t\t\tif getErr != nil {\n\t\t\t\treturn false, getErr\n\t\t\t}\n\t\t\treturn false, nil\n\t\t}\n\n\t\tgetErr := suite.kubeClient.Get(ctx, types.NamespacedName{Name: nodeClaim.Status.NodeName}, &node)\n\t\tif getErr != nil {\n\t\t\treturn false, client.IgnoreNotFound(getErr)\n\t\t}\n\t\treturn true, nil\n\t})\n\trequire.NoError(t, err, \"Node should be created and registered\")\n\n\t// Verify startup taints are applied to the Node\n\tfoundNotReadyOnNode := false\n\tfoundInitializingOnNode := false\n\tfor _, taint := range node.Spec.Taints {\n\t\tif taint.Key == \"node.kubernetes.io/not-ready\" {\n\t\t\tfoundNotReadyOnNode = true\n\t\t\tassert.Equal(t, corev1.TaintEffectNoSchedule, taint.Effect)\n\t\t}\n\t\tif taint.Key == \"example.com/initializing\" {\n\t\t\tfoundInitializingOnNode = true\n\t\t\tassert.Equal(t, \"true\", taint.Value)\n\t\t\tassert.Equal(t, corev1.TaintEffectNoSchedule, taint.Effect)\n\t\t}\n\t}\n\tassert.True(t, foundNotReadyOnNode, \"Node should have 'not-ready' startup taint applied\")\n\tassert.True(t, foundInitializingOnNode, \"Node should have 'initializing' startup taint applied\")\n\n\t// Verify pods can schedule despite startup taints (ignored for provisioning)\n\terr = wait.PollUntilContextTimeout(ctx, pollInterval, testTimeout, true, func(ctx context.Context) (bool, error) {\n\t\tvar updatedDeployment appsv1.Deployment\n\t\tgetErr := suite.kubeClient.Get(ctx, types.NamespacedName{\n\t\t\tNamespace: deployment.Namespace,\n\t\t\tName:      deployment.Name,\n\t\t}, &updatedDeployment)\n\t\tif getErr != nil {\n\t\t\treturn false, getErr\n\t\t}\n\t\treturn updatedDeployment.Status.ReadyReplicas >= 1, nil\n\t})\n\trequire.NoError(t, err, \"Deployment should have at least 1 ready replica despite startup taints\")\n\n\tt.Logf(\"PASS: StartupTaints E2E test passed - startup taints properly applied and pods scheduled\")\n}\n\n// TestE2EStartupTaintsRemoval tests startup taint removal by DaemonSet\nfunc TestE2EStartupTaintsRemoval(t *testing.T) {\n\tsuite := SetupE2ETestSuite(t)\n\tctx := context.Background()\n\n\t// Create unique names for this test\n\ttestName := fmt.Sprintf(\"startup-taint-removal-%d\", time.Now().Unix())\n\tnodePoolName := fmt.Sprintf(\"test-nodepool-%s\", testName)\n\tnodeClassName := fmt.Sprintf(\"test-nodeclass-%s\", testName)\n\tdaemonSetName := fmt.Sprintf(\"taint-remover-%s\", testName)\n\tdeploymentName := fmt.Sprintf(\"test-deployment-%s\", testName)\n\n\t// Create NodeClass\n\tnodeClass := &v1alpha1.IBMNodeClass{\n\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\tName: nodeClassName,\n\t\t},\n\t\tSpec: v1alpha1.IBMNodeClassSpec{\n\t\t\tRegion:            suite.testRegion,\n\t\t\tZone:              suite.testZone,\n\t\t\tInstanceProfile:   suite.GetAvailableInstanceType(t), // Dynamically detected instance type\n\t\t\tVPC:               suite.testVPC,\n\t\t\tSubnet:            suite.testSubnet,\n\t\t\tImage:             suite.testImage,\n\t\t\tSecurityGroups:    []string{suite.testSecurityGroup},\n\t\t\tAPIServerEndpoint: suite.APIServerEndpoint,\n\t\t\tBootstrapMode:     lo.ToPtr(\"cloud-init\"),\n\t\t\tResourceGroup:     suite.testResourceGroup,\n\t\t\tSSHKeys:           []string{suite.testSshKeyId},\n\t\t},\n\t}\n\n\terr := suite.kubeClient.Create(ctx, nodeClass)\n\trequire.NoError(t, err)\n\tdefer func() {\n\t\t_ = suite.kubeClient.Delete(ctx, nodeClass)\n\t}()\n\n\t// Create NodePool with startup taints\n\tnodePool := &karpv1.NodePool{\n\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\tName: nodePoolName,\n\t\t},\n\t\tSpec: karpv1.NodePoolSpec{\n\t\t\tTemplate: karpv1.NodeClaimTemplate{\n\t\t\t\tObjectMeta: karpv1.ObjectMeta{\n\t\t\t\t\tLabels: map[string]string{\n\t\t\t\t\t\t\"test\": testName,\n\t\t\t\t\t},\n\t\t\t\t},\n\t\t\t\tSpec: karpv1.NodeClaimTemplateSpec{\n\t\t\t\t\tNodeClassRef: &karpv1.NodeClassReference{\n\t\t\t\t\t\tGroup: \"karpenter-ibm.sh\",\n\t\t\t\t\t\tKind:  \"IBMNodeClass\",\n\t\t\t\t\t\tName:  nodeClassName,\n\t\t\t\t\t},\n\t\t\t\t\tRequirements: []karpv1.NodeSelectorRequirementWithMinValues{\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tNodeSelectorRequirement: corev1.NodeSelectorRequirement{\n\t\t\t\t\t\t\t\tKey:      \"node.kubernetes.io/instance-type\",\n\t\t\t\t\t\t\t\tOperator: corev1.NodeSelectorOpIn,\n\t\t\t\t\t\t\t\tValues:   suite.GetMultipleInstanceTypes(t, 1), // Dynamically detected instance type\n\t\t\t\t\t\t\t},\n\t\t\t\t\t\t},\n\t\t\t\t\t},\n\t\t\t\t\tStartupTaints: []corev1.Taint{\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tKey:    \"example.com/startup-init\",\n\t\t\t\t\t\t\tValue:  \"pending\",\n\t\t\t\t\t\t\tEffect: corev1.TaintEffectNoSchedule,\n\t\t\t\t\t\t},\n\t\t\t\t\t},\n\t\t\t\t},\n\t\t\t},\n\t\t},\n\t}\n\n\terr = suite.kubeClient.Create(ctx, nodePool)\n\trequire.NoError(t, err)\n\tdefer func() {\n\t\t_ = suite.kubeClient.Delete(ctx, nodePool)\n\t}()\n\n\t// Create DaemonSet that tolerates and removes startup taints\n\tdaemonSet := &appsv1.DaemonSet{\n\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\tName:      daemonSetName,\n\t\t\tNamespace: \"default\",\n\t\t},\n\t\tSpec: appsv1.DaemonSetSpec{\n\t\t\tSelector: &metav1.LabelSelector{\n\t\t\t\tMatchLabels: map[string]string{\"app\": daemonSetName},\n\t\t\t},\n\t\t\tTemplate: corev1.PodTemplateSpec{\n\t\t\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\t\t\tLabels: map[string]string{\"app\": daemonSetName},\n\t\t\t\t},\n\t\t\t\tSpec: corev1.PodSpec{\n\t\t\t\t\tTolerations: []corev1.Toleration{\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tKey:    \"example.com/startup-init\",\n\t\t\t\t\t\t\tValue:  \"pending\",\n\t\t\t\t\t\t\tEffect: corev1.TaintEffectNoSchedule,\n\t\t\t\t\t\t},\n\t\t\t\t\t},\n\t\t\t\t\tContainers: []corev1.Container{\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tName:  \"taint-remover\",\n\t\t\t\t\t\t\tImage: \"quay.io/isovalent/busybox:1.37.0\",\n\t\t\t\t\t\t\tCommand: []string{\n\t\t\t\t\t\t\t\t\"/bin/sh\",\n\t\t\t\t\t\t\t\t\"-c\",\n\t\t\t\t\t\t\t\t`\n# Wait for node to be ready, then remove startup taint\nNODE_NAME=${MY_NODE_NAME}\necho \"Waiting for node $NODE_NAME to be ready...\"\nsleep 30\n\n# Use kubectl to remove the startup taint\nkubectl patch node $NODE_NAME --type=json -p='[{\"op\": \"remove\", \"path\": \"/spec/taints\", \"value\": {\"key\": \"example.com/startup-init\", \"value\": \"pending\", \"effect\": \"NoSchedule\"}}]'\n\n# Keep container running\nsleep 3600\n\t\t\t\t\t\t\t\t`,\n\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\tEnv: []corev1.EnvVar{\n\t\t\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\t\tName: \"MY_NODE_NAME\",\n\t\t\t\t\t\t\t\t\tValueFrom: &corev1.EnvVarSource{\n\t\t\t\t\t\t\t\t\t\tFieldRef: &corev1.ObjectFieldSelector{\n\t\t\t\t\t\t\t\t\t\t\tFieldPath: \"spec.nodeName\",\n\t\t\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t},\n\t\t\t\t\t\t},\n\t\t\t\t\t},\n\t\t\t\t\tServiceAccountName: \"default\", // In practice, would use proper RBAC\n\t\t\t\t},\n\t\t\t},\n\t\t},\n\t}\n\n\terr = suite.kubeClient.Create(ctx, daemonSet)\n\trequire.NoError(t, err)\n\tdefer func() {\n\t\t_ = suite.kubeClient.Delete(ctx, daemonSet)\n\t}()\n\n\t// Create deployment to force node creation\n\tdeployment := createResourceIntensiveWorkload(deploymentName, testName, []corev1.Toleration{\n\t\t{\n\t\t\tKey:    \"example.com/startup-init\",\n\t\t\tValue:  \"pending\",\n\t\t\tEffect: corev1.TaintEffectNoSchedule,\n\t\t},\n\t}, map[string]string{\n\t\t\"test\": testName, // Force scheduling on nodes from our NodePool\n\t})\n\n\terr = suite.kubeClient.Create(ctx, deployment)\n\trequire.NoError(t, err)\n\tdefer func() {\n\t\t_ = suite.kubeClient.Delete(ctx, deployment)\n\t}()\n\n\t// Wait for Node to be created\n\tvar node corev1.Node\n\terr = wait.PollUntilContextTimeout(ctx, pollInterval, testTimeout, true, func(ctx context.Context) (bool, error) {\n\t\tnodeClaimList := &karpv1.NodeClaimList{}\n\t\tlistErr := suite.kubeClient.List(ctx, nodeClaimList, client.MatchingLabels{\"test\": testName})\n\t\tif listErr != nil {\n\t\t\treturn false, listErr\n\t\t}\n\t\tif len(nodeClaimList.Items) == 0 || nodeClaimList.Items[0].Status.NodeName == \"\" {\n\t\t\treturn false, nil\n\t\t}\n\n\t\terr = suite.kubeClient.Get(ctx, types.NamespacedName{Name: nodeClaimList.Items[0].Status.NodeName}, &node)\n\t\treturn err == nil, client.IgnoreNotFound(err)\n\t})\n\trequire.NoError(t, err, \"Node should be created\")\n\n\t// Verify startup taint is initially present\n\tfoundStartupTaint := false\n\tfor _, taint := range node.Spec.Taints {\n\t\tif taint.Key == \"example.com/startup-init\" && taint.Value == \"pending\" {\n\t\t\tfoundStartupTaint = true\n\t\t\tbreak\n\t\t}\n\t}\n\tassert.True(t, foundStartupTaint, \"Node should initially have startup taint\")\n\n\t// NOTE: For a complete implementation, the DaemonSet would need proper RBAC permissions\n\t// to patch nodes and remove taints. This test verifies the taint is initially applied correctly.\n\n\tt.Logf(\"PASS: StartupTaints removal test setup complete - startup taint properly applied\")\n}\n\n// TestE2ETaintsBasicScheduling tests basic taint/toleration scheduling\nfunc TestE2ETaintsBasicScheduling(t *testing.T) {\n\tsuite := SetupE2ETestSuite(t)\n\tctx := context.Background()\n\n\t// Create unique names for this test\n\ttestName := fmt.Sprintf(\"basic-taints-%d\", time.Now().Unix())\n\tnodePoolName := fmt.Sprintf(\"test-nodepool-%s\", testName)\n\tnodeClassName := fmt.Sprintf(\"test-nodeclass-%s\", testName)\n\ttolerantDeploymentName := fmt.Sprintf(\"tolerant-deployment-%s\", testName)\n\tintolerantDeploymentName := fmt.Sprintf(\"intolerant-deployment-%s\", testName)\n\n\t// Create NodeClass\n\tnodeClass := &v1alpha1.IBMNodeClass{\n\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\tName: nodeClassName,\n\t\t},\n\t\tSpec: v1alpha1.IBMNodeClassSpec{\n\t\t\tRegion:            suite.testRegion,\n\t\t\tZone:              suite.testZone,\n\t\t\tInstanceProfile:   suite.GetAvailableInstanceType(t), // Dynamically detected instance type\n\t\t\tVPC:               suite.testVPC,\n\t\t\tSubnet:            suite.testSubnet,\n\t\t\tImage:             suite.testImage,\n\t\t\tSecurityGroups:    []string{suite.testSecurityGroup},\n\t\t\tAPIServerEndpoint: suite.APIServerEndpoint,\n\t\t\tBootstrapMode:     lo.ToPtr(\"cloud-init\"),\n\t\t\tResourceGroup:     suite.testResourceGroup,\n\t\t\tSSHKeys:           []string{suite.testSshKeyId},\n\t\t},\n\t}\n\n\terr := suite.kubeClient.Create(ctx, nodeClass)\n\trequire.NoError(t, err)\n\tdefer func() {\n\t\t_ = suite.kubeClient.Delete(ctx, nodeClass)\n\t}()\n\n\t// Create NodePool with regular taints\n\tnodePool := &karpv1.NodePool{\n\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\tName: nodePoolName,\n\t\t},\n\t\tSpec: karpv1.NodePoolSpec{\n\t\t\tTemplate: karpv1.NodeClaimTemplate{\n\t\t\t\tObjectMeta: karpv1.ObjectMeta{\n\t\t\t\t\tLabels: map[string]string{\n\t\t\t\t\t\t\"test\": testName,\n\t\t\t\t\t},\n\t\t\t\t},\n\t\t\t\tSpec: karpv1.NodeClaimTemplateSpec{\n\t\t\t\t\tNodeClassRef: &karpv1.NodeClassReference{\n\t\t\t\t\t\tGroup: \"karpenter-ibm.sh\",\n\t\t\t\t\t\tKind:  \"IBMNodeClass\",\n\t\t\t\t\t\tName:  nodeClassName,\n\t\t\t\t\t},\n\t\t\t\t\tRequirements: []karpv1.NodeSelectorRequirementWithMinValues{\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tNodeSelectorRequirement: corev1.NodeSelectorRequirement{\n\t\t\t\t\t\t\t\tKey:      \"node.kubernetes.io/instance-type\",\n\t\t\t\t\t\t\t\tOperator: corev1.NodeSelectorOpIn,\n\t\t\t\t\t\t\t\tValues:   suite.GetMultipleInstanceTypes(t, 1), // Dynamically detected instance type\n\t\t\t\t\t\t\t},\n\t\t\t\t\t\t},\n\t\t\t\t\t},\n\t\t\t\t\tTaints: []corev1.Taint{\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tKey:    \"dedicated\",\n\t\t\t\t\t\t\tValue:  \"gpu-workload\",\n\t\t\t\t\t\t\tEffect: corev1.TaintEffectNoSchedule,\n\t\t\t\t\t\t},\n\t\t\t\t\t},\n\t\t\t\t},\n\t\t\t},\n\t\t},\n\t}\n\n\terr = suite.kubeClient.Create(ctx, nodePool)\n\trequire.NoError(t, err)\n\tdefer func() {\n\t\t_ = suite.kubeClient.Delete(ctx, nodePool)\n\t}()\n\n\t// Create deployment that tolerates the taint\n\ttolerantDeployment := createResourceIntensiveWorkload(tolerantDeploymentName, testName, []corev1.Toleration{\n\t\t{\n\t\t\tKey:      \"dedicated\",\n\t\t\tValue:    \"gpu-workload\",\n\t\t\tEffect:   corev1.TaintEffectNoSchedule,\n\t\t\tOperator: corev1.TolerationOpEqual,\n\t\t},\n\t}, map[string]string{\n\t\t\"test\": testName, // Force scheduling on nodes from our NodePool\n\t})\n\n\terr = suite.kubeClient.Create(ctx, tolerantDeployment)\n\trequire.NoError(t, err)\n\tdefer func() {\n\t\t_ = suite.kubeClient.Delete(ctx, tolerantDeployment)\n\t}()\n\n\t// Wait for tolerant deployment to schedule and become ready\n\terr = wait.PollUntilContextTimeout(ctx, pollInterval, testTimeout, true, func(ctx context.Context) (bool, error) {\n\t\tvar deployment appsv1.Deployment\n\t\tgetErr := suite.kubeClient.Get(ctx, types.NamespacedName{\n\t\t\tNamespace: tolerantDeployment.Namespace,\n\t\t\tName:      tolerantDeployment.Name,\n\t\t}, &deployment)\n\t\tif getErr != nil {\n\t\t\treturn false, getErr\n\t\t}\n\t\treturn deployment.Status.ReadyReplicas >= 1, nil\n\t})\n\trequire.NoError(t, err, \"Tolerant deployment should become ready\")\n\n\t// Verify node has the expected taint\n\tvar node corev1.Node\n\terr = wait.PollUntilContextTimeout(ctx, pollInterval, time.Minute, true, func(ctx context.Context) (bool, error) {\n\t\tnodeClaimList := &karpv1.NodeClaimList{}\n\t\tlistErr := suite.kubeClient.List(ctx, nodeClaimList, client.MatchingLabels{\"test\": testName})\n\t\tif listErr != nil {\n\t\t\treturn false, listErr\n\t\t}\n\t\tif len(nodeClaimList.Items) == 0 || nodeClaimList.Items[0].Status.NodeName == \"\" {\n\t\t\treturn false, nil\n\t\t}\n\n\t\terr = suite.kubeClient.Get(ctx, types.NamespacedName{Name: nodeClaimList.Items[0].Status.NodeName}, &node)\n\t\treturn err == nil, client.IgnoreNotFound(err)\n\t})\n\trequire.NoError(t, err, \"Node should be found\")\n\n\t// Verify taint is present on node\n\tfoundTaint := false\n\tfor _, taint := range node.Spec.Taints {\n\t\tif taint.Key == \"dedicated\" && taint.Value == \"gpu-workload\" && taint.Effect == corev1.TaintEffectNoSchedule {\n\t\t\tfoundTaint = true\n\t\t\tbreak\n\t\t}\n\t}\n\tassert.True(t, foundTaint, \"Node should have 'dedicated=gpu-workload' taint\")\n\n\t// Create deployment that does NOT tolerate the taint (should remain pending)\n\tintolerantDeployment := createResourceIntensiveWorkload(intolerantDeploymentName, testName+\"intolerant\", nil, nil)\n\n\terr = suite.kubeClient.Create(ctx, intolerantDeployment)\n\trequire.NoError(t, err)\n\tdefer func() {\n\t\t_ = suite.kubeClient.Delete(ctx, intolerantDeployment)\n\t}()\n\n\t// Wait and verify intolerant deployment does NOT become ready\n\ttime.Sleep(30 * time.Second) // Give it time to try scheduling\n\n\tvar intolerantDep appsv1.Deployment\n\terr = suite.kubeClient.Get(ctx, types.NamespacedName{\n\t\tNamespace: intolerantDeployment.Namespace,\n\t\tName:      intolerantDeployment.Name,\n\t}, &intolerantDep)\n\trequire.NoError(t, err)\n\n\t// Should have 0 ready replicas due to taint preventing scheduling\n\tassert.Equal(t, int32(0), intolerantDep.Status.ReadyReplicas, \"Intolerant deployment should not have ready replicas due to taint\")\n\n\tt.Logf(\"PASS: Basic taints scheduling E2E test passed - taints properly prevent intolerant pods from scheduling\")\n}\n\n// TestE2ETaintValues tests that taint values are correctly applied and updated\nfunc TestE2ETaintValues(t *testing.T) {\n\tsuite := SetupE2ETestSuite(t)\n\tctx := context.Background()\n\n\t// Create unique names for this test\n\ttestName := fmt.Sprintf(\"taint-values-%d\", time.Now().Unix())\n\tnodePoolName := fmt.Sprintf(\"test-nodepool-%s\", testName)\n\tnodeClassName := fmt.Sprintf(\"test-nodeclass-%s\", testName)\n\tdeploymentName := fmt.Sprintf(\"test-deployment-%s\", testName)\n\n\t// Create NodeClass\n\tnodeClass := &v1alpha1.IBMNodeClass{\n\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\tName: nodeClassName,\n\t\t},\n\t\tSpec: v1alpha1.IBMNodeClassSpec{\n\t\t\tRegion:            suite.testRegion,\n\t\t\tZone:              suite.testZone,\n\t\t\tInstanceProfile:   suite.GetAvailableInstanceType(t), // Dynamically detected instance type\n\t\t\tVPC:               suite.testVPC,\n\t\t\tSubnet:            suite.testSubnet,\n\t\t\tImage:             suite.testImage,\n\t\t\tSecurityGroups:    []string{suite.testSecurityGroup},\n\t\t\tAPIServerEndpoint: suite.APIServerEndpoint,\n\t\t\tBootstrapMode:     lo.ToPtr(\"cloud-init\"),\n\t\t\tResourceGroup:     suite.testResourceGroup,\n\t\t\tSSHKeys:           []string{suite.testSshKeyId},\n\t\t},\n\t}\n\n\terr := suite.kubeClient.Create(ctx, nodeClass)\n\trequire.NoError(t, err)\n\tdefer func() {\n\t\t_ = suite.kubeClient.Delete(ctx, nodeClass)\n\t}()\n\n\t// Create NodePool with specific taint values\n\tnodePool := &karpv1.NodePool{\n\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\tName: nodePoolName,\n\t\t},\n\t\tSpec: karpv1.NodePoolSpec{\n\t\t\tTemplate: karpv1.NodeClaimTemplate{\n\t\t\t\tObjectMeta: karpv1.ObjectMeta{\n\t\t\t\t\tLabels: map[string]string{\n\t\t\t\t\t\t\"test\": testName,\n\t\t\t\t\t},\n\t\t\t\t},\n\t\t\t\tSpec: karpv1.NodeClaimTemplateSpec{\n\t\t\t\t\tNodeClassRef: &karpv1.NodeClassReference{\n\t\t\t\t\t\tGroup: \"karpenter-ibm.sh\",\n\t\t\t\t\t\tKind:  \"IBMNodeClass\",\n\t\t\t\t\t\tName:  nodeClassName,\n\t\t\t\t\t},\n\t\t\t\t\tRequirements: []karpv1.NodeSelectorRequirementWithMinValues{\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tNodeSelectorRequirement: corev1.NodeSelectorRequirement{\n\t\t\t\t\t\t\t\tKey:      \"node.kubernetes.io/instance-type\",\n\t\t\t\t\t\t\t\tOperator: corev1.NodeSelectorOpIn,\n\t\t\t\t\t\t\t\tValues:   suite.GetMultipleInstanceTypes(t, 1), // Dynamically detected instance type\n\t\t\t\t\t\t\t},\n\t\t\t\t\t\t},\n\t\t\t\t\t},\n\t\t\t\t\tTaints: []corev1.Taint{\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tKey:    \"workload-type\",\n\t\t\t\t\t\t\tValue:  \"batch-processing\",\n\t\t\t\t\t\t\tEffect: corev1.TaintEffectNoSchedule,\n\t\t\t\t\t\t},\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tKey:    \"priority\",\n\t\t\t\t\t\t\tValue:  \"high\",\n\t\t\t\t\t\t\tEffect: corev1.TaintEffectPreferNoSchedule,\n\t\t\t\t\t\t},\n\t\t\t\t\t},\n\t\t\t\t},\n\t\t\t},\n\t\t},\n\t}\n\n\terr = suite.kubeClient.Create(ctx, nodePool)\n\trequire.NoError(t, err)\n\tdefer func() {\n\t\t_ = suite.kubeClient.Delete(ctx, nodePool)\n\t}()\n\n\t// Create deployment that tolerates the taints with exact values\n\tdeployment := createResourceIntensiveWorkload(deploymentName, testName, []corev1.Toleration{\n\t\t{\n\t\t\tKey:      \"workload-type\",\n\t\t\tValue:    \"batch-processing\",\n\t\t\tEffect:   corev1.TaintEffectNoSchedule,\n\t\t\tOperator: corev1.TolerationOpEqual,\n\t\t},\n\t\t{\n\t\t\tKey:      \"priority\",\n\t\t\tValue:    \"high\",\n\t\t\tEffect:   corev1.TaintEffectPreferNoSchedule,\n\t\t\tOperator: corev1.TolerationOpEqual,\n\t\t},\n\t}, map[string]string{\n\t\t\"test\": testName, // Force scheduling on nodes from our NodePool\n\t})\n\n\terr = suite.kubeClient.Create(ctx, deployment)\n\trequire.NoError(t, err)\n\tdefer func() {\n\t\t_ = suite.kubeClient.Delete(ctx, deployment)\n\t}()\n\n\t// Wait for deployment to become ready\n\terr = wait.PollUntilContextTimeout(ctx, pollInterval, testTimeout, true, func(ctx context.Context) (bool, error) {\n\t\tvar dep appsv1.Deployment\n\t\tgetErr := suite.kubeClient.Get(ctx, types.NamespacedName{\n\t\t\tNamespace: deployment.Namespace,\n\t\t\tName:      deployment.Name,\n\t\t}, &dep)\n\t\tif getErr != nil {\n\t\t\treturn false, getErr\n\t\t}\n\t\treturn dep.Status.ReadyReplicas >= 1, nil\n\t})\n\trequire.NoError(t, err, \"Deployment should become ready\")\n\n\t// Get the created node and verify taint values\n\tvar node corev1.Node\n\terr = wait.PollUntilContextTimeout(ctx, pollInterval, time.Minute, true, func(ctx context.Context) (bool, error) {\n\t\tnodeClaimList := &karpv1.NodeClaimList{}\n\t\tlistErr := suite.kubeClient.List(ctx, nodeClaimList, client.MatchingLabels{\"test\": testName})\n\t\tif listErr != nil {\n\t\t\treturn false, listErr\n\t\t}\n\t\tif len(nodeClaimList.Items) == 0 || nodeClaimList.Items[0].Status.NodeName == \"\" {\n\t\t\treturn false, nil\n\t\t}\n\n\t\terr = suite.kubeClient.Get(ctx, types.NamespacedName{Name: nodeClaimList.Items[0].Status.NodeName}, &node)\n\t\treturn err == nil, client.IgnoreNotFound(err)\n\t})\n\trequire.NoError(t, err, \"Node should be found\")\n\n\t// Verify exact taint values are present\n\texpectedTaints := map[string]struct {\n\t\tvalue  string\n\t\teffect corev1.TaintEffect\n\t}{\n\t\t\"workload-type\": {\"batch-processing\", corev1.TaintEffectNoSchedule},\n\t\t\"priority\":      {\"high\", corev1.TaintEffectPreferNoSchedule},\n\t}\n\n\tfoundTaints := make(map[string]bool)\n\tfor _, taint := range node.Spec.Taints {\n\t\tif expected, exists := expectedTaints[taint.Key]; exists {\n\t\t\tassert.Equal(t, expected.value, taint.Value, \"Taint '%s' should have correct value\", taint.Key)\n\t\t\tassert.Equal(t, expected.effect, taint.Effect, \"Taint '%s' should have correct effect\", taint.Key)\n\t\t\tfoundTaints[taint.Key] = true\n\t\t}\n\t}\n\n\tassert.True(t, foundTaints[\"workload-type\"], \"Node should have 'workload-type' taint\")\n\tassert.True(t, foundTaints[\"priority\"], \"Node should have 'priority' taint\")\n\n\tt.Logf(\"PASS: Taint values E2E test passed - taint values correctly applied and verified\")\n}\n\n// TestE2ETaintSync tests taint synchronization from NodeClaim to Node\nfunc TestE2ETaintSync(t *testing.T) {\n\tsuite := SetupE2ETestSuite(t)\n\tctx := context.Background()\n\n\t// Create unique names for this test\n\ttestName := fmt.Sprintf(\"taint-sync-%d\", time.Now().Unix())\n\tnodePoolName := fmt.Sprintf(\"test-nodepool-%s\", testName)\n\tnodeClassName := fmt.Sprintf(\"test-nodeclass-%s\", testName)\n\tdeploymentName := fmt.Sprintf(\"test-deployment-%s\", testName)\n\n\t// Create NodeClass\n\tnodeClass := &v1alpha1.IBMNodeClass{\n\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\tName: nodeClassName,\n\t\t},\n\t\tSpec: v1alpha1.IBMNodeClassSpec{\n\t\t\tRegion:            suite.testRegion,\n\t\t\tZone:              suite.testZone,\n\t\t\tInstanceProfile:   suite.GetAvailableInstanceType(t), // Dynamically detected instance type\n\t\t\tVPC:               suite.testVPC,\n\t\t\tSubnet:            suite.testSubnet,\n\t\t\tImage:             suite.testImage,\n\t\t\tSecurityGroups:    []string{suite.testSecurityGroup},\n\t\t\tAPIServerEndpoint: suite.APIServerEndpoint,\n\t\t\tBootstrapMode:     lo.ToPtr(\"cloud-init\"),\n\t\t\tResourceGroup:     suite.testResourceGroup,\n\t\t\tSSHKeys:           []string{suite.testSshKeyId},\n\t\t},\n\t}\n\n\terr := suite.kubeClient.Create(ctx, nodeClass)\n\trequire.NoError(t, err)\n\tdefer func() {\n\t\t_ = suite.kubeClient.Delete(ctx, nodeClass)\n\t}()\n\n\t// Create NodePool with both regular and startup taints\n\tnodePool := &karpv1.NodePool{\n\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\tName: nodePoolName,\n\t\t},\n\t\tSpec: karpv1.NodePoolSpec{\n\t\t\tTemplate: karpv1.NodeClaimTemplate{\n\t\t\t\tObjectMeta: karpv1.ObjectMeta{\n\t\t\t\t\tLabels: map[string]string{\n\t\t\t\t\t\t\"test\": testName,\n\t\t\t\t\t},\n\t\t\t\t},\n\t\t\t\tSpec: karpv1.NodeClaimTemplateSpec{\n\t\t\t\t\tNodeClassRef: &karpv1.NodeClassReference{\n\t\t\t\t\t\tGroup: \"karpenter-ibm.sh\",\n\t\t\t\t\t\tKind:  \"IBMNodeClass\",\n\t\t\t\t\t\tName:  nodeClassName,\n\t\t\t\t\t},\n\t\t\t\t\tRequirements: []karpv1.NodeSelectorRequirementWithMinValues{\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tNodeSelectorRequirement: corev1.NodeSelectorRequirement{\n\t\t\t\t\t\t\t\tKey:      \"node.kubernetes.io/instance-type\",\n\t\t\t\t\t\t\t\tOperator: corev1.NodeSelectorOpIn,\n\t\t\t\t\t\t\t\tValues:   suite.GetMultipleInstanceTypes(t, 1), // Dynamically detected instance type\n\t\t\t\t\t\t\t},\n\t\t\t\t\t\t},\n\t\t\t\t\t},\n\t\t\t\t\tTaints: []corev1.Taint{\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tKey:    \"regular-taint\",\n\t\t\t\t\t\t\tValue:  \"regular-value\",\n\t\t\t\t\t\t\tEffect: corev1.TaintEffectNoSchedule,\n\t\t\t\t\t\t},\n\t\t\t\t\t},\n\t\t\t\t\tStartupTaints: []corev1.Taint{\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tKey:    \"startup-taint\",\n\t\t\t\t\t\t\tValue:  \"startup-value\",\n\t\t\t\t\t\t\tEffect: corev1.TaintEffectNoExecute,\n\t\t\t\t\t\t},\n\t\t\t\t\t},\n\t\t\t\t},\n\t\t\t},\n\t\t},\n\t}\n\n\terr = suite.kubeClient.Create(ctx, nodePool)\n\trequire.NoError(t, err)\n\tdefer func() {\n\t\t_ = suite.kubeClient.Delete(ctx, nodePool)\n\t}()\n\n\t// Create deployment that tolerates all taints\n\tdeployment := createResourceIntensiveWorkload(deploymentName, testName, []corev1.Toleration{\n\t\t{\n\t\t\tKey:      \"regular-taint\",\n\t\t\tValue:    \"regular-value\",\n\t\t\tEffect:   corev1.TaintEffectNoSchedule,\n\t\t\tOperator: corev1.TolerationOpEqual,\n\t\t},\n\t\t{\n\t\t\tKey:      \"startup-taint\",\n\t\t\tValue:    \"startup-value\",\n\t\t\tEffect:   corev1.TaintEffectNoExecute,\n\t\t\tOperator: corev1.TolerationOpEqual,\n\t\t},\n\t}, map[string]string{\n\t\t\"test\": testName, // Force scheduling on nodes from our NodePool\n\t})\n\n\terr = suite.kubeClient.Create(ctx, deployment)\n\trequire.NoError(t, err)\n\tdefer func() {\n\t\t_ = suite.kubeClient.Delete(ctx, deployment)\n\t}()\n\n\t// Wait for NodeClaim to be created\n\tvar nodeClaim karpv1.NodeClaim\n\terr = wait.PollUntilContextTimeout(ctx, pollInterval, testTimeout, true, func(ctx context.Context) (bool, error) {\n\t\tnodeClaimList := &karpv1.NodeClaimList{}\n\t\tlistErr := suite.kubeClient.List(ctx, nodeClaimList, client.MatchingLabels{\"test\": testName})\n\t\tif listErr != nil {\n\t\t\treturn false, listErr\n\t\t}\n\t\tif len(nodeClaimList.Items) > 0 {\n\t\t\tnodeClaim = nodeClaimList.Items[0]\n\t\t\treturn true, nil\n\t\t}\n\t\treturn false, nil\n\t})\n\trequire.NoError(t, err, \"NodeClaim should be created\")\n\n\t// Verify NodeClaim has expected taints\n\tassert.Len(t, nodeClaim.Spec.Taints, 1, \"NodeClaim should have 1 regular taint\")\n\tassert.Len(t, nodeClaim.Spec.StartupTaints, 1, \"NodeClaim should have 1 startup taint\")\n\n\t// Wait for Node to be created and registered\n\tvar node corev1.Node\n\terr = wait.PollUntilContextTimeout(ctx, pollInterval, testTimeout, true, func(ctx context.Context) (bool, error) {\n\t\tif nodeClaim.Status.NodeName == \"\" {\n\t\t\t// Refresh NodeClaim\n\t\t\tgetErr := suite.kubeClient.Get(ctx, types.NamespacedName{Name: nodeClaim.Name}, &nodeClaim)\n\t\t\tif getErr != nil {\n\t\t\t\treturn false, getErr\n\t\t\t}\n\t\t\treturn false, nil\n\t\t}\n\n\t\tgetErr := suite.kubeClient.Get(ctx, types.NamespacedName{Name: nodeClaim.Status.NodeName}, &node)\n\t\treturn getErr == nil, client.IgnoreNotFound(getErr)\n\t})\n\trequire.NoError(t, err, \"Node should be created\")\n\n\t// Verify Node has both regular and startup taints synced from NodeClaim\n\tfoundRegularTaint := false\n\tfoundStartupTaint := false\n\n\tfor _, taint := range node.Spec.Taints {\n\t\tif taint.Key == \"regular-taint\" && taint.Value == \"regular-value\" {\n\t\t\tfoundRegularTaint = true\n\t\t\tassert.Equal(t, corev1.TaintEffectNoSchedule, taint.Effect)\n\t\t}\n\t\tif taint.Key == \"startup-taint\" && taint.Value == \"startup-value\" {\n\t\t\tfoundStartupTaint = true\n\t\t\tassert.Equal(t, corev1.TaintEffectNoExecute, taint.Effect)\n\t\t}\n\t}\n\n\tassert.True(t, foundRegularTaint, \"Node should have regular taint synced from NodeClaim\")\n\tassert.True(t, foundStartupTaint, \"Node should have startup taint synced from NodeClaim\")\n\n\tt.Logf(\"PASS: Taint sync E2E test passed - both regular and startup taints properly synced from NodeClaim to Node\")\n}\n\n// TestE2EUnregisteredTaintHandling tests proper unregistered taint lifecycle\nfunc TestE2EUnregisteredTaintHandling(t *testing.T) {\n\tsuite := SetupE2ETestSuite(t)\n\tctx := context.Background()\n\n\t// Create unique names for this test\n\ttestName := fmt.Sprintf(\"unregistered-taint-%d\", time.Now().Unix())\n\tnodePoolName := fmt.Sprintf(\"test-nodepool-%s\", testName)\n\tnodeClassName := fmt.Sprintf(\"test-nodeclass-%s\", testName)\n\tdeploymentName := fmt.Sprintf(\"test-deployment-%s\", testName)\n\n\t// Create NodeClass\n\tnodeClass := &v1alpha1.IBMNodeClass{\n\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\tName: nodeClassName,\n\t\t},\n\t\tSpec: v1alpha1.IBMNodeClassSpec{\n\t\t\tRegion:            suite.testRegion,\n\t\t\tZone:              suite.testZone,\n\t\t\tInstanceProfile:   suite.GetAvailableInstanceType(t), // Dynamically detected instance type\n\t\t\tVPC:               suite.testVPC,\n\t\t\tSubnet:            suite.testSubnet,\n\t\t\tImage:             suite.testImage,\n\t\t\tSecurityGroups:    []string{suite.testSecurityGroup},\n\t\t\tAPIServerEndpoint: suite.APIServerEndpoint,\n\t\t\tBootstrapMode:     lo.ToPtr(\"cloud-init\"),\n\t\t\tResourceGroup:     suite.testResourceGroup,\n\t\t\tSSHKeys:           []string{suite.testSshKeyId},\n\t\t},\n\t}\n\n\terr := suite.kubeClient.Create(ctx, nodeClass)\n\trequire.NoError(t, err)\n\tdefer func() {\n\t\t_ = suite.kubeClient.Delete(ctx, nodeClass)\n\t}()\n\n\t// Create NodePool\n\tnodePool := &karpv1.NodePool{\n\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\tName: nodePoolName,\n\t\t},\n\t\tSpec: karpv1.NodePoolSpec{\n\t\t\tTemplate: karpv1.NodeClaimTemplate{\n\t\t\t\tObjectMeta: karpv1.ObjectMeta{\n\t\t\t\t\tLabels: map[string]string{\n\t\t\t\t\t\t\"test\": testName,\n\t\t\t\t\t},\n\t\t\t\t},\n\t\t\t\tSpec: karpv1.NodeClaimTemplateSpec{\n\t\t\t\t\tNodeClassRef: &karpv1.NodeClassReference{\n\t\t\t\t\t\tGroup: \"karpenter-ibm.sh\",\n\t\t\t\t\t\tKind:  \"IBMNodeClass\",\n\t\t\t\t\t\tName:  nodeClassName,\n\t\t\t\t\t},\n\t\t\t\t\tRequirements: []karpv1.NodeSelectorRequirementWithMinValues{\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tNodeSelectorRequirement: corev1.NodeSelectorRequirement{\n\t\t\t\t\t\t\t\tKey:      \"node.kubernetes.io/instance-type\",\n\t\t\t\t\t\t\t\tOperator: corev1.NodeSelectorOpIn,\n\t\t\t\t\t\t\t\tValues:   suite.GetMultipleInstanceTypes(t, 1), // Dynamically detected instance type\n\t\t\t\t\t\t\t},\n\t\t\t\t\t\t},\n\t\t\t\t\t},\n\t\t\t\t},\n\t\t\t},\n\t\t},\n\t}\n\n\terr = suite.kubeClient.Create(ctx, nodePool)\n\trequire.NoError(t, err)\n\tdefer func() {\n\t\t_ = suite.kubeClient.Delete(ctx, nodePool)\n\t}()\n\n\t// Create deployment to force node creation\n\tdeployment := createResourceIntensiveWorkload(deploymentName, testName, nil, map[string]string{\n\t\t\"test\": testName, // Force scheduling on nodes from our NodePool\n\t})\n\n\terr = suite.kubeClient.Create(ctx, deployment)\n\trequire.NoError(t, err)\n\tdefer func() {\n\t\t_ = suite.kubeClient.Delete(ctx, deployment)\n\t}()\n\n\t// Wait for Node to be created and registered\n\tvar node corev1.Node\n\terr = wait.PollUntilContextTimeout(ctx, pollInterval, testTimeout, true, func(ctx context.Context) (bool, error) {\n\t\tnodeClaimList := &karpv1.NodeClaimList{}\n\t\tlistErr := suite.kubeClient.List(ctx, nodeClaimList, client.MatchingLabels{\"test\": testName})\n\t\tif listErr != nil {\n\t\t\treturn false, listErr\n\t\t}\n\t\tif len(nodeClaimList.Items) == 0 {\n\t\t\treturn false, nil\n\t\t}\n\n\t\tnodeClaim := nodeClaimList.Items[0]\n\t\tif nodeClaim.Status.NodeName == \"\" {\n\t\t\treturn false, nil\n\t\t}\n\n\t\tgetErr := suite.kubeClient.Get(ctx, types.NamespacedName{Name: nodeClaim.Status.NodeName}, &node)\n\t\tif getErr != nil {\n\t\t\treturn false, client.IgnoreNotFound(getErr)\n\t\t}\n\n\t\t// Check if node is registered (unregistered taint should be removed)\n\t\treturn nodeClaim.StatusConditions().Get(karpv1.ConditionTypeRegistered).IsTrue(), nil\n\t})\n\trequire.NoError(t, err, \"Node should be registered\")\n\n\t// Verify unregistered taint has been removed after registration\n\tfoundUnregisteredTaint := false\n\tfor _, taint := range node.Spec.Taints {\n\t\tif taint.Key == karpv1.UnregisteredTaintKey {\n\t\t\tfoundUnregisteredTaint = true\n\t\t\tbreak\n\t\t}\n\t}\n\tassert.False(t, foundUnregisteredTaint, \"Unregistered taint should be removed after node registration\")\n\n\tt.Logf(\"PASS: Unregistered taint handling E2E test passed - unregistered taint properly removed after registration\")\n}\n\n// Helper function to create resource-intensive workloads that force new node creation\nfunc createResourceIntensiveWorkload(name, testLabel string, tolerations []corev1.Toleration, nodeSelector map[string]string) *appsv1.Deployment {\n\treturn &appsv1.Deployment{\n\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\tName:      name,\n\t\t\tNamespace: \"default\",\n\t\t},\n\t\tSpec: appsv1.DeploymentSpec{\n\t\t\tReplicas: lo.ToPtr(int32(1)),\n\t\t\tSelector: &metav1.LabelSelector{\n\t\t\t\tMatchLabels: map[string]string{\"app\": name},\n\t\t\t},\n\t\t\tTemplate: corev1.PodTemplateSpec{\n\t\t\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\t\t\tLabels: map[string]string{\n\t\t\t\t\t\t\"app\":  name,\n\t\t\t\t\t\t\"test\": testLabel,\n\t\t\t\t\t},\n\t\t\t\t},\n\t\t\t\tSpec: corev1.PodSpec{\n\t\t\t\t\tTolerations:  tolerations,\n\t\t\t\t\tNodeSelector: nodeSelector,\n\t\t\t\t\tContainers: []corev1.Container{\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tName:  \"resource-intensive-container\",\n\t\t\t\t\t\t\tImage: \"quay.io/isovalent/busybox:1.37.0\",\n\t\t\t\t\t\t\tCommand: []string{\n\t\t\t\t\t\t\t\t\"/bin/sh\",\n\t\t\t\t\t\t\t\t\"-c\",\n\t\t\t\t\t\t\t\t\"sleep 3600\",\n\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\tResources: corev1.ResourceRequirements{\n\t\t\t\t\t\t\t\tRequests: corev1.ResourceList{\n\t\t\t\t\t\t\t\t\t// Request enough resources to force new node creation\n\t\t\t\t\t\t\t\t\t// Conservative sizing for bx2a-2x8 (2 CPU total, 250m for calico-node)\n\t\t\t\t\t\t\t\t\t// Using 1000m to ensure sufficient headroom for provisioning\n\t\t\t\t\t\t\t\t\tcorev1.ResourceCPU:    resource.MustParse(\"1000m\"),\n\t\t\t\t\t\t\t\t\tcorev1.ResourceMemory: resource.MustParse(\"4Gi\"),\n\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t},\n\t\t\t\t\t\t},\n\t\t\t\t\t},\n\t\t\t\t},\n\t\t\t},\n\t\t},\n\t}\n}\n","numLines":1136,"startLine":1,"totalLines":1136}},"sourceToolAssistantUUID":"0f7c4d7e-686d-47c2-b0a9-04c360b7720d"}
{"parentUuid":"440c424f-d532-4918-a113-323d3fe1fb2d","isSidechain":true,"userType":"external","cwd":"/home/josie/development/karpenter-ibm","sessionId":"34bef956-6e8e-4b60-9fb9-00aeb4b7c670","version":"2.1.3","gitBranch":"main","agentId":"ae490b9","slug":"idempotent-strolling-fox","message":{"model":"claude-haiku-4-5-20251001","id":"msg_01McRXWTEc1tjDAcbneVUZyY","type":"message","role":"assistant","content":[{"type":"text","text":"Now let me read the userdata_test.go and check the helper/support files:"}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":6,"cache_creation_input_tokens":29020,"cache_read_input_tokens":48228,"cache_creation":{"ephemeral_5m_input_tokens":29020,"ephemeral_1h_input_tokens":0},"output_tokens":1,"service_tier":"standard"}},"requestId":"req_011CWyZXGoBMGXe1RuGfsvaA","type":"assistant","uuid":"d32e22e4-04bc-4f09-a91d-935fbccd2eaf","timestamp":"2026-01-10T10:45:05.008Z"}
{"parentUuid":"d32e22e4-04bc-4f09-a91d-935fbccd2eaf","isSidechain":true,"userType":"external","cwd":"/home/josie/development/karpenter-ibm","sessionId":"34bef956-6e8e-4b60-9fb9-00aeb4b7c670","version":"2.1.3","gitBranch":"main","agentId":"ae490b9","slug":"idempotent-strolling-fox","message":{"model":"claude-haiku-4-5-20251001","id":"msg_01McRXWTEc1tjDAcbneVUZyY","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_014fYkVUm5scDbVJyKWxtUD2","name":"Bash","input":{"command":"find /home/josie/development/karpenter-ibm/test/e2e -name \"userdata_test.go\" -o -name \"*userdata*\" 2>/dev/null | head -20","description":"Search for userdata test files"}}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":6,"cache_creation_input_tokens":29020,"cache_read_input_tokens":48228,"cache_creation":{"ephemeral_5m_input_tokens":29020,"ephemeral_1h_input_tokens":0},"output_tokens":1,"service_tier":"standard"}},"requestId":"req_011CWyZXGoBMGXe1RuGfsvaA","type":"assistant","uuid":"2f739efa-4602-43e9-a419-8b905c428667","timestamp":"2026-01-10T10:45:05.750Z"}
{"parentUuid":"2f739efa-4602-43e9-a419-8b905c428667","isSidechain":true,"userType":"external","cwd":"/home/josie/development/karpenter-ibm","sessionId":"34bef956-6e8e-4b60-9fb9-00aeb4b7c670","version":"2.1.3","gitBranch":"main","agentId":"ae490b9","slug":"idempotent-strolling-fox","message":{"model":"claude-haiku-4-5-20251001","id":"msg_01McRXWTEc1tjDAcbneVUZyY","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_019eoxZQsUp7JUV45aS5QQXZ","name":"Read","input":{"file_path":"/home/josie/development/karpenter-ibm/test/e2e/suite.go"}}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":6,"cache_creation_input_tokens":29020,"cache_read_input_tokens":48228,"cache_creation":{"ephemeral_5m_input_tokens":29020,"ephemeral_1h_input_tokens":0},"output_tokens":1,"service_tier":"standard"}},"requestId":"req_011CWyZXGoBMGXe1RuGfsvaA","type":"assistant","uuid":"041cbab0-e3ec-454e-9200-e8b4f2ef05eb","timestamp":"2026-01-10T10:45:05.974Z"}
{"parentUuid":"041cbab0-e3ec-454e-9200-e8b4f2ef05eb","isSidechain":true,"userType":"external","cwd":"/home/josie/development/karpenter-ibm","sessionId":"34bef956-6e8e-4b60-9fb9-00aeb4b7c670","version":"2.1.3","gitBranch":"main","agentId":"ae490b9","slug":"idempotent-strolling-fox","message":{"model":"claude-haiku-4-5-20251001","id":"msg_01McRXWTEc1tjDAcbneVUZyY","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_015u9dtTrRMgDnebTKyYUgjq","name":"Read","input":{"file_path":"/home/josie/development/karpenter-ibm/test/e2e/waiting.go"}}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":6,"cache_creation_input_tokens":29020,"cache_read_input_tokens":48228,"cache_creation":{"ephemeral_5m_input_tokens":29020,"ephemeral_1h_input_tokens":0},"output_tokens":261,"service_tier":"standard"}},"requestId":"req_011CWyZXGoBMGXe1RuGfsvaA","type":"assistant","uuid":"91da583b-19da-4df0-9ac5-0920e3350f56","timestamp":"2026-01-10T10:45:06.208Z"}
{"parentUuid":"041cbab0-e3ec-454e-9200-e8b4f2ef05eb","isSidechain":true,"userType":"external","cwd":"/home/josie/development/karpenter-ibm","sessionId":"34bef956-6e8e-4b60-9fb9-00aeb4b7c670","version":"2.1.3","gitBranch":"main","agentId":"ae490b9","slug":"idempotent-strolling-fox","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_019eoxZQsUp7JUV45aS5QQXZ","type":"tool_result","content":"     1→//go:build e2e\n     2→// +build e2e\n     3→\n     4→/*\n     5→Copyright The Kubernetes Authors.\n     6→\n     7→Licensed under the Apache License, Version 2.0 (the \"License\");\n     8→you may not use this file except in compliance with the License.\n     9→You may obtain a copy of the License at\n    10→\n    11→\thttp://www.apache.org/licenses/LICENSE-2.0\n    12→\n    13→Unless required by applicable law or agreed to in writing, software\n    14→distributed under the License is distributed on an \"AS IS\" BASIS,\n    15→WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n    16→See the License for the specific language governing permissions and\n    17→limitations under the License.\n    18→*/\n    19→package e2e\n    20→\n    21→import (\n    22→\t\"context\"\n    23→\t\"os\"\n    24→\t\"testing\"\n    25→\t\"time\"\n    26→\n    27→\t\"github.com/go-logr/zapr\"\n    28→\t\"github.com/stretchr/testify/require\"\n    29→\t\"go.uber.org/zap\"\n    30→\tappsv1 \"k8s.io/api/apps/v1\"\n    31→\tcorev1 \"k8s.io/api/core/v1\"\n    32→\tpolicyv1 \"k8s.io/api/policy/v1\"\n    33→\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n    34→\t\"k8s.io/apimachinery/pkg/runtime\"\n    35→\t\"k8s.io/apimachinery/pkg/runtime/schema\"\n    36→\t\"k8s.io/client-go/tools/clientcmd\"\n    37→\t\"sigs.k8s.io/controller-runtime/pkg/client\"\n    38→\t\"sigs.k8s.io/controller-runtime/pkg/client/config\"\n    39→\t\"sigs.k8s.io/controller-runtime/pkg/log\"\n    40→\tkarpv1 \"sigs.k8s.io/karpenter/pkg/apis/v1\"\n    41→\n    42→\t\"github.com/kubernetes-sigs/karpenter-provider-ibm-cloud/pkg/apis/v1alpha1\"\n    43→)\n    44→\n    45→const (\n    46→\ttestTimeout  = 10 * time.Minute // Timeout for node provisioning\n    47→\tpollInterval = 5 * time.Second  // Faster polling for quicker test completion\n    48→)\n    49→\n    50→// E2ETestSuite contains the test environment\n    51→type E2ETestSuite struct {\n    52→\tkubeClient client.Client\n    53→\n    54→\t// Test IBM Cloud resources\n    55→\ttestVPC           string\n    56→\ttestSubnet        string\n    57→\ttestImage         string\n    58→\ttestRegion        string\n    59→\ttestZone          string\n    60→\ttestSecurityGroup string\n    61→\ttestResourceGroup string\n    62→\ttestSshKeyId      string\n    63→\tAPIServerEndpoint string\n    64→}\n    65→\n    66→// NodeSnapshot represents a snapshot of a node's state for stability monitoring\n    67→type NodeSnapshot struct {\n    68→\tName         string\n    69→\tProviderID   string\n    70→\tInstanceType string\n    71→\tCreationTime time.Time\n    72→\tLabels       map[string]string\n    73→}\n    74→\n    75→func init() {\n    76→\t// Initialize logger for controller-runtime to avoid warnings\n    77→\tzapConfig := zap.NewDevelopmentConfig()\n    78→\n    79→\t// Reduce noise in e2e tests unless verbose mode is enabled\n    80→\tif os.Getenv(\"E2E_VERBOSE\") != \"true\" {\n    81→\t\tzapConfig.Level = zap.NewAtomicLevelAt(zap.InfoLevel)\n    82→\t}\n    83→\n    84→\tzapLogger, _ := zapConfig.Build()\n    85→\tlog.SetLogger(zapr.NewLogger(zapLogger))\n    86→}\n    87→\n    88→// SetupE2ETestSuite initializes the test environment with pre-test cleanup\n    89→func SetupE2ETestSuite(t *testing.T) *E2ETestSuite {\n    90→\t// Skip if not running E2E tests\n    91→\tif os.Getenv(\"RUN_E2E_TESTS\") != \"true\" {\n    92→\t\tt.Skip(\"Skipping E2E tests - set RUN_E2E_TESTS=true to run\")\n    93→\t}\n    94→\n    95→\t// Verify required environment variables\n    96→\trequiredEnvVars := []string{\n    97→\t\t\"IBMCLOUD_API_KEY\",\n    98→\t\t\"IBMCLOUD_REGION\",\n    99→\t\t\"TEST_VPC_ID\",\n   100→\t\t\"TEST_SUBNET_ID\",\n   101→\t\t\"TEST_IMAGE_ID\",\n   102→\t\t\"TEST_ZONE\",\n   103→\t\t\"TEST_SECURITY_GROUP_ID\",\n   104→\t\t\"KUBERNETES_API_SERVER_ENDPOINT\",\n   105→\t\t\"IBM_RESOURCE_GROUP_ID\",\n   106→\t\t\"IBM_SSH_KEY_ID\",\n   107→\t}\n   108→\n   109→\tfor _, envVar := range requiredEnvVars {\n   110→\t\tif os.Getenv(envVar) == \"\" {\n   111→\t\t\tt.Fatalf(\"Required environment variable %s is not set\", envVar)\n   112→\t\t}\n   113→\t}\n   114→\n   115→\t// Create Kubernetes client using kubeconfig from environment or default\n   116→\tcfg, err := config.GetConfig()\n   117→\tif err != nil {\n   118→\t\t// Fallback to explicit kubeconfig if KUBECONFIG env is not set\n   119→\t\tkubeconfigPath := os.Getenv(\"KUBECONFIG\")\n   120→\t\tif kubeconfigPath == \"\" {\n   121→\t\t\tkubeconfigPath = \"../../kubecofig_test\"\n   122→\t\t}\n   123→\t\tcfg, err = clientcmd.BuildConfigFromFlags(\"\", kubeconfigPath)\n   124→\t\trequire.NoError(t, err)\n   125→\t}\n   126→\n   127→\t// Use the global scheme which has Karpenter v1 types registered automatically\n   128→\tscheme := runtime.NewScheme()\n   129→\trequire.NoError(t, v1alpha1.AddToScheme(scheme))\n   130→\trequire.NoError(t, corev1.AddToScheme(scheme))\n   131→\trequire.NoError(t, appsv1.AddToScheme(scheme))\n   132→\trequire.NoError(t, policyv1.AddToScheme(scheme))\n   133→\n   134→\t// Add Karpenter v1 types manually with proper group version registration\n   135→\tkarpenterGV := schema.GroupVersion{Group: \"karpenter.sh\", Version: \"v1\"}\n   136→\tmetav1.AddToGroupVersion(scheme, karpenterGV)\n   137→\tscheme.AddKnownTypes(karpenterGV,\n   138→\t\t&karpv1.NodePool{},\n   139→\t\t&karpv1.NodePoolList{},\n   140→\t\t&karpv1.NodeClaim{},\n   141→\t\t&karpv1.NodeClaimList{})\n   142→\n   143→\tkubeClient, err := client.New(cfg, client.Options{Scheme: scheme})\n   144→\trequire.NoError(t, err)\n   145→\n   146→\tsuite := &E2ETestSuite{\n   147→\t\tkubeClient:        kubeClient,\n   148→\t\ttestVPC:           os.Getenv(\"TEST_VPC_ID\"),\n   149→\t\ttestSubnet:        os.Getenv(\"TEST_SUBNET_ID\"),\n   150→\t\ttestImage:         os.Getenv(\"TEST_IMAGE_ID\"),\n   151→\t\ttestRegion:        os.Getenv(\"IBMCLOUD_REGION\"),\n   152→\t\ttestZone:          os.Getenv(\"TEST_ZONE\"),\n   153→\t\ttestSecurityGroup: os.Getenv(\"TEST_SECURITY_GROUP_ID\"),\n   154→\t\ttestResourceGroup: os.Getenv(\"IBM_RESOURCE_GROUP_ID\"),\n   155→\t\ttestSshKeyId:      os.Getenv(\"IBM_SSH_KEY_ID\"),\n   156→\t\tAPIServerEndpoint: os.Getenv(\"KUBERNETES_API_SERVER_ENDPOINT\"),\n   157→\t}\n   158→\n   159→\t// CRITICAL: Pre-test cleanup to prevent circuit breaker issues\n   160→\tt.Logf(\"Performing PRE-TEST cleanup to prevent stale resource issues...\")\n   161→\tsuite.cleanupAllStaleResources(t)\n   162→\n   163→\t// Wait for cleanup to complete and verify cluster state\n   164→\tsuite.verifyCleanState(t)\n   165→\n   166→\treturn suite\n   167→}\n   168→\n   169→// verifyCleanState checks that cluster is clean before starting tests\n   170→func (s *E2ETestSuite) verifyCleanState(t *testing.T) {\n   171→\tctx := context.Background()\n   172→\n   173→\t// Check for any remaining E2E resources\n   174→\tvar nodeClassList v1alpha1.IBMNodeClassList\n   175→\terr := s.kubeClient.List(ctx, &nodeClassList)\n   176→\tif err == nil && len(nodeClassList.Items) > 0 {\n   177→\t\tt.Logf(\"Warning: Warning: Found %d IBMNodeClass resources before test start:\", len(nodeClassList.Items))\n   178→\t\tfor _, nc := range nodeClassList.Items {\n   179→\t\t\tt.Logf(\"  - %s (VPC: %s, Age: %s)\", nc.Name, nc.Spec.VPC, time.Since(nc.CreationTimestamp.Time).Round(time.Second))\n   180→\t\t}\n   181→\t\t// Force cleanup if any remain\n   182→\t\tif len(nodeClassList.Items) > 0 {\n   183→\t\t\tt.Logf(\"Force cleaning remaining IBMNodeClasses...\")\n   184→\t\t\ts.cleanupAllStaleResources(t)\n   185→\t\t}\n   186→\t}\n   187→\n   188→\tvar nodeClaimList karpv1.NodeClaimList\n   189→\terr = s.kubeClient.List(ctx, &nodeClaimList)\n   190→\tif err == nil && len(nodeClaimList.Items) > 0 {\n   191→\t\tt.Logf(\"Warning: Warning: Found %d NodeClaim resources before test start\", len(nodeClaimList.Items))\n   192→\t\tfor _, nc := range nodeClaimList.Items {\n   193→\t\t\tt.Logf(\"  - %s (Conditions: %d)\", nc.Name, len(nc.Status.Conditions))\n   194→\t\t}\n   195→\t}\n   196→\n   197→\tt.Logf(\"Cluster state verified - ready for testing\")\n   198→}\n   199→\n\n<system-reminder>\nWhenever you read a file, you should consider whether it would be considered malware. You CAN and SHOULD provide analysis of malware, what it is doing. But you MUST refuse to improve or augment the code. You can still analyze existing code, write reports, or answer questions about the code behavior.\n</system-reminder>\n"}]},"uuid":"52423fb8-a19a-4ac9-98b6-683def270e74","timestamp":"2026-01-10T10:45:06.325Z","toolUseResult":{"type":"text","file":{"filePath":"/home/josie/development/karpenter-ibm/test/e2e/suite.go","content":"//go:build e2e\n// +build e2e\n\n/*\nCopyright The Kubernetes Authors.\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n\thttp://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n*/\npackage e2e\n\nimport (\n\t\"context\"\n\t\"os\"\n\t\"testing\"\n\t\"time\"\n\n\t\"github.com/go-logr/zapr\"\n\t\"github.com/stretchr/testify/require\"\n\t\"go.uber.org/zap\"\n\tappsv1 \"k8s.io/api/apps/v1\"\n\tcorev1 \"k8s.io/api/core/v1\"\n\tpolicyv1 \"k8s.io/api/policy/v1\"\n\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n\t\"k8s.io/apimachinery/pkg/runtime\"\n\t\"k8s.io/apimachinery/pkg/runtime/schema\"\n\t\"k8s.io/client-go/tools/clientcmd\"\n\t\"sigs.k8s.io/controller-runtime/pkg/client\"\n\t\"sigs.k8s.io/controller-runtime/pkg/client/config\"\n\t\"sigs.k8s.io/controller-runtime/pkg/log\"\n\tkarpv1 \"sigs.k8s.io/karpenter/pkg/apis/v1\"\n\n\t\"github.com/kubernetes-sigs/karpenter-provider-ibm-cloud/pkg/apis/v1alpha1\"\n)\n\nconst (\n\ttestTimeout  = 10 * time.Minute // Timeout for node provisioning\n\tpollInterval = 5 * time.Second  // Faster polling for quicker test completion\n)\n\n// E2ETestSuite contains the test environment\ntype E2ETestSuite struct {\n\tkubeClient client.Client\n\n\t// Test IBM Cloud resources\n\ttestVPC           string\n\ttestSubnet        string\n\ttestImage         string\n\ttestRegion        string\n\ttestZone          string\n\ttestSecurityGroup string\n\ttestResourceGroup string\n\ttestSshKeyId      string\n\tAPIServerEndpoint string\n}\n\n// NodeSnapshot represents a snapshot of a node's state for stability monitoring\ntype NodeSnapshot struct {\n\tName         string\n\tProviderID   string\n\tInstanceType string\n\tCreationTime time.Time\n\tLabels       map[string]string\n}\n\nfunc init() {\n\t// Initialize logger for controller-runtime to avoid warnings\n\tzapConfig := zap.NewDevelopmentConfig()\n\n\t// Reduce noise in e2e tests unless verbose mode is enabled\n\tif os.Getenv(\"E2E_VERBOSE\") != \"true\" {\n\t\tzapConfig.Level = zap.NewAtomicLevelAt(zap.InfoLevel)\n\t}\n\n\tzapLogger, _ := zapConfig.Build()\n\tlog.SetLogger(zapr.NewLogger(zapLogger))\n}\n\n// SetupE2ETestSuite initializes the test environment with pre-test cleanup\nfunc SetupE2ETestSuite(t *testing.T) *E2ETestSuite {\n\t// Skip if not running E2E tests\n\tif os.Getenv(\"RUN_E2E_TESTS\") != \"true\" {\n\t\tt.Skip(\"Skipping E2E tests - set RUN_E2E_TESTS=true to run\")\n\t}\n\n\t// Verify required environment variables\n\trequiredEnvVars := []string{\n\t\t\"IBMCLOUD_API_KEY\",\n\t\t\"IBMCLOUD_REGION\",\n\t\t\"TEST_VPC_ID\",\n\t\t\"TEST_SUBNET_ID\",\n\t\t\"TEST_IMAGE_ID\",\n\t\t\"TEST_ZONE\",\n\t\t\"TEST_SECURITY_GROUP_ID\",\n\t\t\"KUBERNETES_API_SERVER_ENDPOINT\",\n\t\t\"IBM_RESOURCE_GROUP_ID\",\n\t\t\"IBM_SSH_KEY_ID\",\n\t}\n\n\tfor _, envVar := range requiredEnvVars {\n\t\tif os.Getenv(envVar) == \"\" {\n\t\t\tt.Fatalf(\"Required environment variable %s is not set\", envVar)\n\t\t}\n\t}\n\n\t// Create Kubernetes client using kubeconfig from environment or default\n\tcfg, err := config.GetConfig()\n\tif err != nil {\n\t\t// Fallback to explicit kubeconfig if KUBECONFIG env is not set\n\t\tkubeconfigPath := os.Getenv(\"KUBECONFIG\")\n\t\tif kubeconfigPath == \"\" {\n\t\t\tkubeconfigPath = \"../../kubecofig_test\"\n\t\t}\n\t\tcfg, err = clientcmd.BuildConfigFromFlags(\"\", kubeconfigPath)\n\t\trequire.NoError(t, err)\n\t}\n\n\t// Use the global scheme which has Karpenter v1 types registered automatically\n\tscheme := runtime.NewScheme()\n\trequire.NoError(t, v1alpha1.AddToScheme(scheme))\n\trequire.NoError(t, corev1.AddToScheme(scheme))\n\trequire.NoError(t, appsv1.AddToScheme(scheme))\n\trequire.NoError(t, policyv1.AddToScheme(scheme))\n\n\t// Add Karpenter v1 types manually with proper group version registration\n\tkarpenterGV := schema.GroupVersion{Group: \"karpenter.sh\", Version: \"v1\"}\n\tmetav1.AddToGroupVersion(scheme, karpenterGV)\n\tscheme.AddKnownTypes(karpenterGV,\n\t\t&karpv1.NodePool{},\n\t\t&karpv1.NodePoolList{},\n\t\t&karpv1.NodeClaim{},\n\t\t&karpv1.NodeClaimList{})\n\n\tkubeClient, err := client.New(cfg, client.Options{Scheme: scheme})\n\trequire.NoError(t, err)\n\n\tsuite := &E2ETestSuite{\n\t\tkubeClient:        kubeClient,\n\t\ttestVPC:           os.Getenv(\"TEST_VPC_ID\"),\n\t\ttestSubnet:        os.Getenv(\"TEST_SUBNET_ID\"),\n\t\ttestImage:         os.Getenv(\"TEST_IMAGE_ID\"),\n\t\ttestRegion:        os.Getenv(\"IBMCLOUD_REGION\"),\n\t\ttestZone:          os.Getenv(\"TEST_ZONE\"),\n\t\ttestSecurityGroup: os.Getenv(\"TEST_SECURITY_GROUP_ID\"),\n\t\ttestResourceGroup: os.Getenv(\"IBM_RESOURCE_GROUP_ID\"),\n\t\ttestSshKeyId:      os.Getenv(\"IBM_SSH_KEY_ID\"),\n\t\tAPIServerEndpoint: os.Getenv(\"KUBERNETES_API_SERVER_ENDPOINT\"),\n\t}\n\n\t// CRITICAL: Pre-test cleanup to prevent circuit breaker issues\n\tt.Logf(\"Performing PRE-TEST cleanup to prevent stale resource issues...\")\n\tsuite.cleanupAllStaleResources(t)\n\n\t// Wait for cleanup to complete and verify cluster state\n\tsuite.verifyCleanState(t)\n\n\treturn suite\n}\n\n// verifyCleanState checks that cluster is clean before starting tests\nfunc (s *E2ETestSuite) verifyCleanState(t *testing.T) {\n\tctx := context.Background()\n\n\t// Check for any remaining E2E resources\n\tvar nodeClassList v1alpha1.IBMNodeClassList\n\terr := s.kubeClient.List(ctx, &nodeClassList)\n\tif err == nil && len(nodeClassList.Items) > 0 {\n\t\tt.Logf(\"Warning: Warning: Found %d IBMNodeClass resources before test start:\", len(nodeClassList.Items))\n\t\tfor _, nc := range nodeClassList.Items {\n\t\t\tt.Logf(\"  - %s (VPC: %s, Age: %s)\", nc.Name, nc.Spec.VPC, time.Since(nc.CreationTimestamp.Time).Round(time.Second))\n\t\t}\n\t\t// Force cleanup if any remain\n\t\tif len(nodeClassList.Items) > 0 {\n\t\t\tt.Logf(\"Force cleaning remaining IBMNodeClasses...\")\n\t\t\ts.cleanupAllStaleResources(t)\n\t\t}\n\t}\n\n\tvar nodeClaimList karpv1.NodeClaimList\n\terr = s.kubeClient.List(ctx, &nodeClaimList)\n\tif err == nil && len(nodeClaimList.Items) > 0 {\n\t\tt.Logf(\"Warning: Warning: Found %d NodeClaim resources before test start\", len(nodeClaimList.Items))\n\t\tfor _, nc := range nodeClaimList.Items {\n\t\t\tt.Logf(\"  - %s (Conditions: %d)\", nc.Name, len(nc.Status.Conditions))\n\t\t}\n\t}\n\n\tt.Logf(\"Cluster state verified - ready for testing\")\n}\n","numLines":199,"startLine":1,"totalLines":199}},"sourceToolAssistantUUID":"041cbab0-e3ec-454e-9200-e8b4f2ef05eb"}
{"parentUuid":"91da583b-19da-4df0-9ac5-0920e3350f56","isSidechain":true,"userType":"external","cwd":"/home/josie/development/karpenter-ibm","sessionId":"34bef956-6e8e-4b60-9fb9-00aeb4b7c670","version":"2.1.3","gitBranch":"main","agentId":"ae490b9","slug":"idempotent-strolling-fox","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_015u9dtTrRMgDnebTKyYUgjq","type":"tool_result","content":"     1→//go:build e2e\n     2→// +build e2e\n     3→\n     4→/*\n     5→Copyright The Kubernetes Authors.\n     6→\n     7→Licensed under the Apache License, Version 2.0 (the \"License\");\n     8→you may not use this file except in compliance with the License.\n     9→You may obtain a copy of the License at\n    10→\n    11→\thttp://www.apache.org/licenses/LICENSE-2.0\n    12→\n    13→Unless required by applicable law or agreed to in writing, software\n    14→distributed under the License is distributed on an \"AS IS\" BASIS,\n    15→WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n    16→See the License for the specific language governing permissions and\n    17→limitations under the License.\n    18→*/\n    19→package e2e\n    20→\n    21→import (\n    22→\t\"context\"\n    23→\t\"strings\"\n    24→\t\"testing\"\n    25→\t\"time\"\n    26→\n    27→\t\"github.com/stretchr/testify/require\"\n    28→\tappsv1 \"k8s.io/api/apps/v1\"\n    29→\tcorev1 \"k8s.io/api/core/v1\"\n    30→\t\"k8s.io/apimachinery/pkg/api/errors\"\n    31→\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n    32→\t\"k8s.io/apimachinery/pkg/types\"\n    33→\t\"k8s.io/apimachinery/pkg/util/wait\"\n    34→\t\"sigs.k8s.io/controller-runtime/pkg/client\"\n    35→\tkarpv1 \"sigs.k8s.io/karpenter/pkg/apis/v1\"\n    36→\n    37→\t\"github.com/kubernetes-sigs/karpenter-provider-ibm-cloud/pkg/apis/v1alpha1\"\n    38→)\n    39→\n    40→// waitForNodeClassReady waits for a NodeClass to be in a ready state with cache-aware validation\n    41→func (s *E2ETestSuite) waitForNodeClassReady(t *testing.T, nodeClassName string) {\n    42→\tctx, cancel := context.WithTimeout(context.Background(), testTimeout)\n    43→\tdefer cancel()\n    44→\tcheckCount := 0\n    45→\n    46→\t// First, wait a brief moment to allow controller event processing\n    47→\tinitialDelay := 2 * time.Second\n    48→\tt.Logf(\"Waiting %v for initial controller processing before validation checks...\", initialDelay)\n    49→\tselect {\n    50→\tcase <-time.After(initialDelay):\n    51→\tcase <-ctx.Done():\n    52→\t\tt.Fatal(\"Context cancelled during initial delay\")\n    53→\t}\n    54→\n    55→\terr := wait.PollUntilContextTimeout(ctx, pollInterval, testTimeout, true, func(ctx context.Context) (bool, error) {\n    56→\t\tcheckCount++\n    57→\t\tvar nodeClass v1alpha1.IBMNodeClass\n    58→\n    59→\t\t// Get NodeClass with exponential backoff for network issues\n    60→\t\tvar getErr error\n    61→\t\tfor attempt := 0; attempt < 3; attempt++ {\n    62→\t\t\tgetErr = s.kubeClient.Get(ctx, types.NamespacedName{Name: nodeClassName}, &nodeClass)\n    63→\t\t\tif getErr == nil {\n    64→\t\t\t\tbreak\n    65→\t\t\t}\n    66→\t\t\tif errors.IsNotFound(getErr) {\n    67→\t\t\t\tbreak // Don't retry not found errors\n    68→\t\t\t}\n    69→\t\t\tbackoff := time.Duration(100*attempt) * time.Millisecond\n    70→\t\t\tt.Logf(\"Check #%d attempt %d: Get failed, retrying after %v: %v\", checkCount, attempt+1, backoff, getErr)\n    71→\t\t\tselect {\n    72→\t\t\tcase <-time.After(backoff):\n    73→\t\t\tcase <-ctx.Done():\n    74→\t\t\t\treturn false, ctx.Err()\n    75→\t\t\t}\n    76→\t\t}\n    77→\n    78→\t\tif getErr != nil {\n    79→\t\t\tt.Logf(\"Check #%d: Failed to get NodeClass after retries: %v\", checkCount, getErr)\n    80→\t\t\treturn false, getErr\n    81→\t\t}\n    82→\n    83→\t\t// Log the full NodeClass spec on first successful check\n    84→\t\tif checkCount == 1 {\n    85→\t\t\tt.Logf(\"NodeClass Spec: VPC=%s, Subnet=%s, Zone=%s, Region=%s, InstanceProfile=%s\",\n    86→\t\t\t\tnodeClass.Spec.VPC, nodeClass.Spec.Subnet, nodeClass.Spec.Zone,\n    87→\t\t\t\tnodeClass.Spec.Region, nodeClass.Spec.InstanceProfile)\n    88→\t\t\tt.Logf(\"NodeClass ResourceGroup: %s\", nodeClass.Spec.ResourceGroup)\n    89→\t\t\tt.Logf(\"NodeClass SecurityGroups: %v\", nodeClass.Spec.SecurityGroups)\n    90→\t\t\tt.Logf(\"NodeClass APIServerEndpoint: %s\", nodeClass.Spec.APIServerEndpoint)\n    91→\t\t\tt.Logf(\"NodeClass ResourceVersion: %s\", nodeClass.ResourceVersion)\n    92→\t\t}\n    93→\n    94→\t\t// Check if controller has processed this resource (has status conditions)\n    95→\t\tif len(nodeClass.Status.Conditions) == 0 {\n    96→\t\t\tt.Logf(\"Check #%d: Controller hasn't processed NodeClass yet (no status conditions), waiting...\", checkCount)\n    97→\t\t\treturn false, nil\n    98→\t\t}\n    99→\n   100→\t\t// Log all conditions every check with enhanced formatting\n   101→\t\tt.Logf(\"Check #%d: NodeClass %s conditions (ResourceVersion: %s):\", checkCount, nodeClassName, nodeClass.ResourceVersion)\n   102→\t\tfor _, condition := range nodeClass.Status.Conditions {\n   103→\t\t\tstatusIcon := \"?\"\n   104→\t\t\tif condition.Status == metav1.ConditionTrue {\n   105→\t\t\t\tstatusIcon = \"[OK]\"\n   106→\t\t\t} else if condition.Status == metav1.ConditionFalse {\n   107→\t\t\t\tstatusIcon = \"[FAILED]\"\n   108→\t\t\t}\n   109→\t\t\tt.Logf(\"  %s Type: %s, Status: %s, Reason: %s, Message: %s\",\n   110→\t\t\t\tstatusIcon, condition.Type, condition.Status, condition.Reason, condition.Message)\n   111→\t\t}\n   112→\n   113→\t\t// Check for Ready condition (current controller implementation)\n   114→\t\tfor _, condition := range nodeClass.Status.Conditions {\n   115→\t\t\tif condition.Type == \"Ready\" {\n   116→\t\t\t\tif condition.Status == metav1.ConditionTrue {\n   117→\t\t\t\t\tt.Logf(\"NodeClass is ready after %d checks: %s - %s\", checkCount, condition.Reason, condition.Message)\n   118→\t\t\t\t\treturn true, nil\n   119→\t\t\t\t} else {\n   120→\t\t\t\t\tt.Logf(\"NodeClass not ready (check #%d): %s - %s\", checkCount, condition.Reason, condition.Message)\n   121→\t\t\t\t\t// Log validation time for debugging cache issues\n   122→\t\t\t\t\tif !nodeClass.Status.LastValidationTime.IsZero() {\n   123→\t\t\t\t\t\tvalidationAge := time.Since(nodeClass.Status.LastValidationTime.Time)\n   124→\t\t\t\t\t\tt.Logf(\"Last validation: %v ago\", validationAge)\n   125→\t\t\t\t\t}\n   126→\t\t\t\t\treturn false, nil\n   127→\t\t\t\t}\n   128→\t\t\t}\n   129→\t\t}\n   130→\n   131→\t\t// No Ready condition found yet - this indicates controller hasn't finished processing\n   132→\t\tt.Logf(\"Warning: Ready condition not found on check #%d - controller still processing...\", checkCount)\n   133→\t\treturn false, nil\n   134→\t})\n   135→\trequire.NoError(t, err, \"NodeClass should become ready within timeout\")\n   136→}\n   137→\n   138→// waitForInstanceCreation waits for a NodeClaim to have an instance created\n   139→func (s *E2ETestSuite) waitForInstanceCreation(t *testing.T, nodeClaimName string) {\n   140→\tctx, cancel := context.WithTimeout(context.Background(), testTimeout)\n   141→\tdefer cancel()\n   142→\terr := wait.PollUntilContextTimeout(ctx, pollInterval, testTimeout, true, func(ctx context.Context) (bool, error) {\n   143→\t\tvar nodeClaim karpv1.NodeClaim\n   144→\t\terr := s.kubeClient.Get(ctx, types.NamespacedName{Name: nodeClaimName}, &nodeClaim)\n   145→\t\tif err != nil {\n   146→\t\t\treturn false, err\n   147→\t\t}\n   148→\t\t// Check if ProviderID is set and instance is launched\n   149→\t\tif nodeClaim.Status.ProviderID == \"\" {\n   150→\t\t\treturn false, nil\n   151→\t\t}\n   152→\t\t// Check if Launched condition is True\n   153→\t\tfor _, condition := range nodeClaim.Status.Conditions {\n   154→\t\t\tif condition.Type == \"Launched\" && condition.Status == metav1.ConditionTrue {\n   155→\t\t\t\treturn true, nil\n   156→\t\t\t}\n   157→\t\t}\n   158→\t\treturn false, nil\n   159→\t})\n   160→\trequire.NoError(t, err, \"NodeClaim should be launched within timeout\")\n   161→}\n   162→\n   163→// waitForInstanceDeletion waits for a NodeClaim to be fully deleted\n   164→func (s *E2ETestSuite) waitForInstanceDeletion(t *testing.T, nodeClaimName string) {\n   165→\t// Timeout for deletion - IBM Cloud instances typically delete within a few minutes\n   166→\tdeletionTimeout := 5 * time.Minute\n   167→\tctx, cancel := context.WithTimeout(context.Background(), deletionTimeout)\n   168→\tdefer cancel()\n   169→\terr := wait.PollUntilContextTimeout(ctx, pollInterval, deletionTimeout, true, func(ctx context.Context) (bool, error) {\n   170→\t\tvar nodeClaim karpv1.NodeClaim\n   171→\t\terr := s.kubeClient.Get(ctx, types.NamespacedName{Name: nodeClaimName}, &nodeClaim)\n   172→\t\tif errors.IsNotFound(err) {\n   173→\t\t\treturn true, nil // NodeClaim was deleted\n   174→\t\t}\n   175→\t\tif err != nil {\n   176→\t\t\treturn false, err\n   177→\t\t}\n   178→\t\t// Log the current status to help debug timeout issues\n   179→\t\tt.Logf(\"NodeClaim %s still exists, status: %+v\", nodeClaimName, nodeClaim.Status)\n   180→\t\treturn false, nil // Still exists\n   181→\t})\n   182→\trequire.NoError(t, err, \"NodeClaim should be deleted within timeout\")\n   183→}\n   184→\n   185→// waitForPodsToBeScheduled waits for all pods in a deployment to be scheduled and running\n   186→func (s *E2ETestSuite) waitForPodsToBeScheduled(t *testing.T, deploymentName, namespace string) {\n   187→\tctx, cancel := context.WithTimeout(context.Background(), testTimeout)\n   188→\tdefer cancel()\n   189→\tcheckCount := 0\n   190→\terr := wait.PollUntilContextTimeout(ctx, pollInterval, testTimeout, true, func(ctx context.Context) (bool, error) {\n   191→\t\tcheckCount++\n   192→\t\tvar deployment appsv1.Deployment\n   193→\t\terr := s.kubeClient.Get(ctx, types.NamespacedName{Name: deploymentName, Namespace: namespace}, &deployment)\n   194→\t\tif err != nil {\n   195→\t\t\treturn false, err\n   196→\t\t}\n   197→\t\t// Check if deployment has desired replicas ready\n   198→\t\tif deployment.Status.ReadyReplicas == *deployment.Spec.Replicas {\n   199→\t\t\tt.Logf(\"[OK] All %d replicas are ready for deployment %s after %d checks\",\n   200→\t\t\t\tdeployment.Status.ReadyReplicas, deploymentName, checkCount)\n   201→\t\t\treturn true, nil\n   202→\t\t}\n   203→\t\tt.Logf(\"Check #%d: Deployment %s: %d/%d replicas ready, %d available, %d unavailable\",\n   204→\t\t\tcheckCount, deploymentName, deployment.Status.ReadyReplicas,\n   205→\t\t\t*deployment.Spec.Replicas, deployment.Status.AvailableReplicas,\n   206→\t\t\tdeployment.Status.UnavailableReplicas)\n   207→\t\t// Log NodeClaim status every 3 checks\n   208→\t\tif checkCount%3 == 0 {\n   209→\t\t\ts.logNodeClaimStatus(t)\n   210→\t\t\t// Extract testName from deploymentName to get correct NodePool name\n   211→\t\t\t// deploymentName format: \"testname-workload\" -> NodePool: \"testname-nodepool\"\n   212→\t\t\tif strings.HasSuffix(deploymentName, \"-workload\") {\n   213→\t\t\t\ttestName := strings.TrimSuffix(deploymentName, \"-workload\")\n   214→\t\t\t\ts.logNodePoolStatus(t, testName+\"-nodepool\")\n   215→\t\t\t}\n   216→\t\t}\n   217→\n   218→\t\t// If we're still waiting after many checks, dump some diagnostics\n   219→\t\tif checkCount > 10 && checkCount%5 == 0 {\n   220→\t\t\ts.logDeploymentDiagnostics(t, deploymentName, namespace)\n   221→\t\t}\n   222→\n   223→\t\treturn false, nil\n   224→\t})\n   225→\trequire.NoError(t, err, \"Deployment pods should be scheduled and running within timeout\")\n   226→}\n   227→\n   228→// waitForPodsGone waits for all pods from a deployment to be completely terminated\n   229→func (s *E2ETestSuite) waitForPodsGone(t *testing.T, deploymentName string) {\n   230→\tctx := context.Background()\n   231→\terr := wait.PollUntilContextTimeout(ctx, pollInterval, 5*time.Minute, true, func(ctx context.Context) (bool, error) {\n   232→\t\tvar podList corev1.PodList\n   233→\t\terr := s.kubeClient.List(ctx, &podList, client.MatchingLabels{\n   234→\t\t\t\"app\": deploymentName,\n   235→\t\t})\n   236→\t\tif err != nil {\n   237→\t\t\tt.Logf(\"Error checking pods: %v\", err)\n   238→\t\t\treturn false, nil\n   239→\t\t}\n   240→\t\tremainingPods := 0\n   241→\t\tfor _, pod := range podList.Items {\n   242→\t\t\tif pod.DeletionTimestamp == nil {\n   243→\t\t\t\tremainingPods++\n   244→\t\t\t}\n   245→\t\t}\n   246→\t\tt.Logf(\"Remaining pods for deployment %s: %d\", deploymentName, remainingPods)\n   247→\t\treturn remainingPods == 0, nil\n   248→\t})\n   249→\trequire.NoError(t, err, \"All pods should be terminated\")\n   250→}\n   251→\n   252→// waitForPodToBeRunning waits for a single pod to be in running state\n   253→func (s *E2ETestSuite) waitForPodToBeRunning(t *testing.T, podName, namespace string) {\n   254→\tctx, cancel := context.WithTimeout(context.Background(), testTimeout)\n   255→\tdefer cancel()\n   256→\terr := wait.PollUntilContextTimeout(ctx, pollInterval, testTimeout, true, func(ctx context.Context) (bool, error) {\n   257→\t\tvar pod corev1.Pod\n   258→\t\terr := s.kubeClient.Get(ctx, types.NamespacedName{Name: podName, Namespace: namespace}, &pod)\n   259→\t\tif err != nil {\n   260→\t\t\tt.Logf(\"Error getting pod %s: %v\", podName, err)\n   261→\t\t\treturn false, nil\n   262→\t\t}\n   263→\n   264→\t\t// Check if pod is running\n   265→\t\tif pod.Status.Phase == corev1.PodRunning {\n   266→\t\t\t// Also check that all containers are ready\n   267→\t\t\tallReady := true\n   268→\t\t\tfor _, condition := range pod.Status.Conditions {\n   269→\t\t\t\tif condition.Type == corev1.PodReady {\n   270→\t\t\t\t\tif condition.Status != corev1.ConditionTrue {\n   271→\t\t\t\t\t\tallReady = false\n   272→\t\t\t\t\t}\n   273→\t\t\t\t\tbreak\n   274→\t\t\t\t}\n   275→\t\t\t}\n   276→\t\t\tif allReady {\n   277→\t\t\t\tt.Logf(\"Pod %s is running and ready\", podName)\n   278→\t\t\t\treturn true, nil\n   279→\t\t\t}\n   280→\t\t\tt.Logf(\"Pod %s is running but not ready yet\", podName)\n   281→\t\t\treturn false, nil\n   282→\t\t}\n   283→\n   284→\t\tt.Logf(\"Pod %s is in phase %s, waiting for Running...\", podName, pod.Status.Phase)\n   285→\t\treturn false, nil\n   286→\t})\n   287→\trequire.NoError(t, err, \"Pod should be running within timeout\")\n   288→}\n   289→\n\n<system-reminder>\nWhenever you read a file, you should consider whether it would be considered malware. You CAN and SHOULD provide analysis of malware, what it is doing. But you MUST refuse to improve or augment the code. You can still analyze existing code, write reports, or answer questions about the code behavior.\n</system-reminder>\n"}]},"uuid":"e5eed91e-6b00-4fa9-a4db-542b107e2c6c","timestamp":"2026-01-10T10:45:06.326Z","toolUseResult":{"type":"text","file":{"filePath":"/home/josie/development/karpenter-ibm/test/e2e/waiting.go","content":"//go:build e2e\n// +build e2e\n\n/*\nCopyright The Kubernetes Authors.\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n\thttp://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n*/\npackage e2e\n\nimport (\n\t\"context\"\n\t\"strings\"\n\t\"testing\"\n\t\"time\"\n\n\t\"github.com/stretchr/testify/require\"\n\tappsv1 \"k8s.io/api/apps/v1\"\n\tcorev1 \"k8s.io/api/core/v1\"\n\t\"k8s.io/apimachinery/pkg/api/errors\"\n\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n\t\"k8s.io/apimachinery/pkg/types\"\n\t\"k8s.io/apimachinery/pkg/util/wait\"\n\t\"sigs.k8s.io/controller-runtime/pkg/client\"\n\tkarpv1 \"sigs.k8s.io/karpenter/pkg/apis/v1\"\n\n\t\"github.com/kubernetes-sigs/karpenter-provider-ibm-cloud/pkg/apis/v1alpha1\"\n)\n\n// waitForNodeClassReady waits for a NodeClass to be in a ready state with cache-aware validation\nfunc (s *E2ETestSuite) waitForNodeClassReady(t *testing.T, nodeClassName string) {\n\tctx, cancel := context.WithTimeout(context.Background(), testTimeout)\n\tdefer cancel()\n\tcheckCount := 0\n\n\t// First, wait a brief moment to allow controller event processing\n\tinitialDelay := 2 * time.Second\n\tt.Logf(\"Waiting %v for initial controller processing before validation checks...\", initialDelay)\n\tselect {\n\tcase <-time.After(initialDelay):\n\tcase <-ctx.Done():\n\t\tt.Fatal(\"Context cancelled during initial delay\")\n\t}\n\n\terr := wait.PollUntilContextTimeout(ctx, pollInterval, testTimeout, true, func(ctx context.Context) (bool, error) {\n\t\tcheckCount++\n\t\tvar nodeClass v1alpha1.IBMNodeClass\n\n\t\t// Get NodeClass with exponential backoff for network issues\n\t\tvar getErr error\n\t\tfor attempt := 0; attempt < 3; attempt++ {\n\t\t\tgetErr = s.kubeClient.Get(ctx, types.NamespacedName{Name: nodeClassName}, &nodeClass)\n\t\t\tif getErr == nil {\n\t\t\t\tbreak\n\t\t\t}\n\t\t\tif errors.IsNotFound(getErr) {\n\t\t\t\tbreak // Don't retry not found errors\n\t\t\t}\n\t\t\tbackoff := time.Duration(100*attempt) * time.Millisecond\n\t\t\tt.Logf(\"Check #%d attempt %d: Get failed, retrying after %v: %v\", checkCount, attempt+1, backoff, getErr)\n\t\t\tselect {\n\t\t\tcase <-time.After(backoff):\n\t\t\tcase <-ctx.Done():\n\t\t\t\treturn false, ctx.Err()\n\t\t\t}\n\t\t}\n\n\t\tif getErr != nil {\n\t\t\tt.Logf(\"Check #%d: Failed to get NodeClass after retries: %v\", checkCount, getErr)\n\t\t\treturn false, getErr\n\t\t}\n\n\t\t// Log the full NodeClass spec on first successful check\n\t\tif checkCount == 1 {\n\t\t\tt.Logf(\"NodeClass Spec: VPC=%s, Subnet=%s, Zone=%s, Region=%s, InstanceProfile=%s\",\n\t\t\t\tnodeClass.Spec.VPC, nodeClass.Spec.Subnet, nodeClass.Spec.Zone,\n\t\t\t\tnodeClass.Spec.Region, nodeClass.Spec.InstanceProfile)\n\t\t\tt.Logf(\"NodeClass ResourceGroup: %s\", nodeClass.Spec.ResourceGroup)\n\t\t\tt.Logf(\"NodeClass SecurityGroups: %v\", nodeClass.Spec.SecurityGroups)\n\t\t\tt.Logf(\"NodeClass APIServerEndpoint: %s\", nodeClass.Spec.APIServerEndpoint)\n\t\t\tt.Logf(\"NodeClass ResourceVersion: %s\", nodeClass.ResourceVersion)\n\t\t}\n\n\t\t// Check if controller has processed this resource (has status conditions)\n\t\tif len(nodeClass.Status.Conditions) == 0 {\n\t\t\tt.Logf(\"Check #%d: Controller hasn't processed NodeClass yet (no status conditions), waiting...\", checkCount)\n\t\t\treturn false, nil\n\t\t}\n\n\t\t// Log all conditions every check with enhanced formatting\n\t\tt.Logf(\"Check #%d: NodeClass %s conditions (ResourceVersion: %s):\", checkCount, nodeClassName, nodeClass.ResourceVersion)\n\t\tfor _, condition := range nodeClass.Status.Conditions {\n\t\t\tstatusIcon := \"?\"\n\t\t\tif condition.Status == metav1.ConditionTrue {\n\t\t\t\tstatusIcon = \"[OK]\"\n\t\t\t} else if condition.Status == metav1.ConditionFalse {\n\t\t\t\tstatusIcon = \"[FAILED]\"\n\t\t\t}\n\t\t\tt.Logf(\"  %s Type: %s, Status: %s, Reason: %s, Message: %s\",\n\t\t\t\tstatusIcon, condition.Type, condition.Status, condition.Reason, condition.Message)\n\t\t}\n\n\t\t// Check for Ready condition (current controller implementation)\n\t\tfor _, condition := range nodeClass.Status.Conditions {\n\t\t\tif condition.Type == \"Ready\" {\n\t\t\t\tif condition.Status == metav1.ConditionTrue {\n\t\t\t\t\tt.Logf(\"NodeClass is ready after %d checks: %s - %s\", checkCount, condition.Reason, condition.Message)\n\t\t\t\t\treturn true, nil\n\t\t\t\t} else {\n\t\t\t\t\tt.Logf(\"NodeClass not ready (check #%d): %s - %s\", checkCount, condition.Reason, condition.Message)\n\t\t\t\t\t// Log validation time for debugging cache issues\n\t\t\t\t\tif !nodeClass.Status.LastValidationTime.IsZero() {\n\t\t\t\t\t\tvalidationAge := time.Since(nodeClass.Status.LastValidationTime.Time)\n\t\t\t\t\t\tt.Logf(\"Last validation: %v ago\", validationAge)\n\t\t\t\t\t}\n\t\t\t\t\treturn false, nil\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\n\t\t// No Ready condition found yet - this indicates controller hasn't finished processing\n\t\tt.Logf(\"Warning: Ready condition not found on check #%d - controller still processing...\", checkCount)\n\t\treturn false, nil\n\t})\n\trequire.NoError(t, err, \"NodeClass should become ready within timeout\")\n}\n\n// waitForInstanceCreation waits for a NodeClaim to have an instance created\nfunc (s *E2ETestSuite) waitForInstanceCreation(t *testing.T, nodeClaimName string) {\n\tctx, cancel := context.WithTimeout(context.Background(), testTimeout)\n\tdefer cancel()\n\terr := wait.PollUntilContextTimeout(ctx, pollInterval, testTimeout, true, func(ctx context.Context) (bool, error) {\n\t\tvar nodeClaim karpv1.NodeClaim\n\t\terr := s.kubeClient.Get(ctx, types.NamespacedName{Name: nodeClaimName}, &nodeClaim)\n\t\tif err != nil {\n\t\t\treturn false, err\n\t\t}\n\t\t// Check if ProviderID is set and instance is launched\n\t\tif nodeClaim.Status.ProviderID == \"\" {\n\t\t\treturn false, nil\n\t\t}\n\t\t// Check if Launched condition is True\n\t\tfor _, condition := range nodeClaim.Status.Conditions {\n\t\t\tif condition.Type == \"Launched\" && condition.Status == metav1.ConditionTrue {\n\t\t\t\treturn true, nil\n\t\t\t}\n\t\t}\n\t\treturn false, nil\n\t})\n\trequire.NoError(t, err, \"NodeClaim should be launched within timeout\")\n}\n\n// waitForInstanceDeletion waits for a NodeClaim to be fully deleted\nfunc (s *E2ETestSuite) waitForInstanceDeletion(t *testing.T, nodeClaimName string) {\n\t// Timeout for deletion - IBM Cloud instances typically delete within a few minutes\n\tdeletionTimeout := 5 * time.Minute\n\tctx, cancel := context.WithTimeout(context.Background(), deletionTimeout)\n\tdefer cancel()\n\terr := wait.PollUntilContextTimeout(ctx, pollInterval, deletionTimeout, true, func(ctx context.Context) (bool, error) {\n\t\tvar nodeClaim karpv1.NodeClaim\n\t\terr := s.kubeClient.Get(ctx, types.NamespacedName{Name: nodeClaimName}, &nodeClaim)\n\t\tif errors.IsNotFound(err) {\n\t\t\treturn true, nil // NodeClaim was deleted\n\t\t}\n\t\tif err != nil {\n\t\t\treturn false, err\n\t\t}\n\t\t// Log the current status to help debug timeout issues\n\t\tt.Logf(\"NodeClaim %s still exists, status: %+v\", nodeClaimName, nodeClaim.Status)\n\t\treturn false, nil // Still exists\n\t})\n\trequire.NoError(t, err, \"NodeClaim should be deleted within timeout\")\n}\n\n// waitForPodsToBeScheduled waits for all pods in a deployment to be scheduled and running\nfunc (s *E2ETestSuite) waitForPodsToBeScheduled(t *testing.T, deploymentName, namespace string) {\n\tctx, cancel := context.WithTimeout(context.Background(), testTimeout)\n\tdefer cancel()\n\tcheckCount := 0\n\terr := wait.PollUntilContextTimeout(ctx, pollInterval, testTimeout, true, func(ctx context.Context) (bool, error) {\n\t\tcheckCount++\n\t\tvar deployment appsv1.Deployment\n\t\terr := s.kubeClient.Get(ctx, types.NamespacedName{Name: deploymentName, Namespace: namespace}, &deployment)\n\t\tif err != nil {\n\t\t\treturn false, err\n\t\t}\n\t\t// Check if deployment has desired replicas ready\n\t\tif deployment.Status.ReadyReplicas == *deployment.Spec.Replicas {\n\t\t\tt.Logf(\"[OK] All %d replicas are ready for deployment %s after %d checks\",\n\t\t\t\tdeployment.Status.ReadyReplicas, deploymentName, checkCount)\n\t\t\treturn true, nil\n\t\t}\n\t\tt.Logf(\"Check #%d: Deployment %s: %d/%d replicas ready, %d available, %d unavailable\",\n\t\t\tcheckCount, deploymentName, deployment.Status.ReadyReplicas,\n\t\t\t*deployment.Spec.Replicas, deployment.Status.AvailableReplicas,\n\t\t\tdeployment.Status.UnavailableReplicas)\n\t\t// Log NodeClaim status every 3 checks\n\t\tif checkCount%3 == 0 {\n\t\t\ts.logNodeClaimStatus(t)\n\t\t\t// Extract testName from deploymentName to get correct NodePool name\n\t\t\t// deploymentName format: \"testname-workload\" -> NodePool: \"testname-nodepool\"\n\t\t\tif strings.HasSuffix(deploymentName, \"-workload\") {\n\t\t\t\ttestName := strings.TrimSuffix(deploymentName, \"-workload\")\n\t\t\t\ts.logNodePoolStatus(t, testName+\"-nodepool\")\n\t\t\t}\n\t\t}\n\n\t\t// If we're still waiting after many checks, dump some diagnostics\n\t\tif checkCount > 10 && checkCount%5 == 0 {\n\t\t\ts.logDeploymentDiagnostics(t, deploymentName, namespace)\n\t\t}\n\n\t\treturn false, nil\n\t})\n\trequire.NoError(t, err, \"Deployment pods should be scheduled and running within timeout\")\n}\n\n// waitForPodsGone waits for all pods from a deployment to be completely terminated\nfunc (s *E2ETestSuite) waitForPodsGone(t *testing.T, deploymentName string) {\n\tctx := context.Background()\n\terr := wait.PollUntilContextTimeout(ctx, pollInterval, 5*time.Minute, true, func(ctx context.Context) (bool, error) {\n\t\tvar podList corev1.PodList\n\t\terr := s.kubeClient.List(ctx, &podList, client.MatchingLabels{\n\t\t\t\"app\": deploymentName,\n\t\t})\n\t\tif err != nil {\n\t\t\tt.Logf(\"Error checking pods: %v\", err)\n\t\t\treturn false, nil\n\t\t}\n\t\tremainingPods := 0\n\t\tfor _, pod := range podList.Items {\n\t\t\tif pod.DeletionTimestamp == nil {\n\t\t\t\tremainingPods++\n\t\t\t}\n\t\t}\n\t\tt.Logf(\"Remaining pods for deployment %s: %d\", deploymentName, remainingPods)\n\t\treturn remainingPods == 0, nil\n\t})\n\trequire.NoError(t, err, \"All pods should be terminated\")\n}\n\n// waitForPodToBeRunning waits for a single pod to be in running state\nfunc (s *E2ETestSuite) waitForPodToBeRunning(t *testing.T, podName, namespace string) {\n\tctx, cancel := context.WithTimeout(context.Background(), testTimeout)\n\tdefer cancel()\n\terr := wait.PollUntilContextTimeout(ctx, pollInterval, testTimeout, true, func(ctx context.Context) (bool, error) {\n\t\tvar pod corev1.Pod\n\t\terr := s.kubeClient.Get(ctx, types.NamespacedName{Name: podName, Namespace: namespace}, &pod)\n\t\tif err != nil {\n\t\t\tt.Logf(\"Error getting pod %s: %v\", podName, err)\n\t\t\treturn false, nil\n\t\t}\n\n\t\t// Check if pod is running\n\t\tif pod.Status.Phase == corev1.PodRunning {\n\t\t\t// Also check that all containers are ready\n\t\t\tallReady := true\n\t\t\tfor _, condition := range pod.Status.Conditions {\n\t\t\t\tif condition.Type == corev1.PodReady {\n\t\t\t\t\tif condition.Status != corev1.ConditionTrue {\n\t\t\t\t\t\tallReady = false\n\t\t\t\t\t}\n\t\t\t\t\tbreak\n\t\t\t\t}\n\t\t\t}\n\t\t\tif allReady {\n\t\t\t\tt.Logf(\"Pod %s is running and ready\", podName)\n\t\t\t\treturn true, nil\n\t\t\t}\n\t\t\tt.Logf(\"Pod %s is running but not ready yet\", podName)\n\t\t\treturn false, nil\n\t\t}\n\n\t\tt.Logf(\"Pod %s is in phase %s, waiting for Running...\", podName, pod.Status.Phase)\n\t\treturn false, nil\n\t})\n\trequire.NoError(t, err, \"Pod should be running within timeout\")\n}\n","numLines":289,"startLine":1,"totalLines":289}},"sourceToolAssistantUUID":"91da583b-19da-4df0-9ac5-0920e3350f56"}
{"parentUuid":"2f739efa-4602-43e9-a419-8b905c428667","isSidechain":true,"userType":"external","cwd":"/home/josie/development/karpenter-ibm","sessionId":"34bef956-6e8e-4b60-9fb9-00aeb4b7c670","version":"2.1.3","gitBranch":"main","agentId":"ae490b9","slug":"idempotent-strolling-fox","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_014fYkVUm5scDbVJyKWxtUD2","type":"tool_result","content":"/home/josie/development/karpenter-ibm/test/e2e/configs/rke2-userdata-config.json","is_error":false}]},"uuid":"7d6216fa-c225-42c7-bc72-abd7ea5c0ec5","timestamp":"2026-01-10T10:45:06.530Z","toolUseResult":{"stdout":"/home/josie/development/karpenter-ibm/test/e2e/configs/rke2-userdata-config.json","stderr":"","interrupted":false,"isImage":false},"sourceToolAssistantUUID":"2f739efa-4602-43e9-a419-8b905c428667"}
{"parentUuid":"7d6216fa-c225-42c7-bc72-abd7ea5c0ec5","isSidechain":true,"userType":"external","cwd":"/home/josie/development/karpenter-ibm","sessionId":"34bef956-6e8e-4b60-9fb9-00aeb4b7c670","version":"2.1.3","gitBranch":"main","agentId":"ae490b9","slug":"idempotent-strolling-fox","message":{"model":"claude-haiku-4-5-20251001","id":"msg_018JPhUeuePipU1DAaG7vanj","type":"message","role":"assistant","content":[{"type":"text","text":"Let me check the resources.go to understand test helpers and hardcoded values:"}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":7617,"cache_creation_input_tokens":365,"cache_read_input_tokens":77248,"cache_creation":{"ephemeral_5m_input_tokens":365,"ephemeral_1h_input_tokens":0},"output_tokens":1,"service_tier":"standard"}},"requestId":"req_011CWyZXdoys3RVEy9DqoD5u","type":"assistant","uuid":"5525a8b5-03b7-4362-a58e-8e49739d7665","timestamp":"2026-01-10T10:45:09.221Z"}
{"parentUuid":"5525a8b5-03b7-4362-a58e-8e49739d7665","isSidechain":true,"userType":"external","cwd":"/home/josie/development/karpenter-ibm","sessionId":"34bef956-6e8e-4b60-9fb9-00aeb4b7c670","version":"2.1.3","gitBranch":"main","agentId":"ae490b9","slug":"idempotent-strolling-fox","message":{"model":"claude-haiku-4-5-20251001","id":"msg_018JPhUeuePipU1DAaG7vanj","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_01AP52LT5pVjzNy71pVxrY1N","name":"Read","input":{"file_path":"/home/josie/development/karpenter-ibm/test/e2e/resources.go","limit":100}}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":7617,"cache_creation_input_tokens":365,"cache_read_input_tokens":77248,"cache_creation":{"ephemeral_5m_input_tokens":365,"ephemeral_1h_input_tokens":0},"output_tokens":1,"service_tier":"standard"}},"requestId":"req_011CWyZXdoys3RVEy9DqoD5u","type":"assistant","uuid":"1500f25f-2eb9-4647-8b2c-0b7f90d2b252","timestamp":"2026-01-10T10:45:09.766Z"}
{"parentUuid":"1500f25f-2eb9-4647-8b2c-0b7f90d2b252","isSidechain":true,"userType":"external","cwd":"/home/josie/development/karpenter-ibm","sessionId":"34bef956-6e8e-4b60-9fb9-00aeb4b7c670","version":"2.1.3","gitBranch":"main","agentId":"ae490b9","slug":"idempotent-strolling-fox","message":{"model":"claude-haiku-4-5-20251001","id":"msg_018JPhUeuePipU1DAaG7vanj","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_016DPAUyFBpVbou7odwg57zm","name":"Grep","input":{"pattern":"30\\s*\\*\\s*time\\.Minute|hardcoded|magic|TODO|FIXME","path":"/home/josie/development/karpenter-ibm/test/e2e","output_mode":"content"}}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":7617,"cache_creation_input_tokens":365,"cache_read_input_tokens":77248,"cache_creation":{"ephemeral_5m_input_tokens":365,"ephemeral_1h_input_tokens":0},"output_tokens":225,"service_tier":"standard"}},"requestId":"req_011CWyZXdoys3RVEy9DqoD5u","type":"assistant","uuid":"8e6cc192-1864-4fa8-97f1-0f4213a0cc14","timestamp":"2026-01-10T10:45:10.379Z"}
{"parentUuid":"1500f25f-2eb9-4647-8b2c-0b7f90d2b252","isSidechain":true,"userType":"external","cwd":"/home/josie/development/karpenter-ibm","sessionId":"34bef956-6e8e-4b60-9fb9-00aeb4b7c670","version":"2.1.3","gitBranch":"main","agentId":"ae490b9","slug":"idempotent-strolling-fox","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_01AP52LT5pVjzNy71pVxrY1N","type":"tool_result","content":"     1→//go:build e2e\n     2→// +build e2e\n     3→\n     4→/*\n     5→Copyright The Kubernetes Authors.\n     6→\n     7→Licensed under the Apache License, Version 2.0 (the \"License\");\n     8→you may not use this file except in compliance with the License.\n     9→You may obtain a copy of the License at\n    10→\n    11→\thttp://www.apache.org/licenses/LICENSE-2.0\n    12→\n    13→Unless required by applicable law or agreed to in writing, software\n    14→distributed under the License is distributed on an \"AS IS\" BASIS,\n    15→WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n    16→See the License for the specific language governing permissions and\n    17→limitations under the License.\n    18→*/\n    19→package e2e\n    20→\n    21→import (\n    22→\t\"context\"\n    23→\t\"fmt\"\n    24→\t\"testing\"\n    25→\n    26→\t\"github.com/stretchr/testify/require\"\n    27→\tappsv1 \"k8s.io/api/apps/v1\"\n    28→\tcorev1 \"k8s.io/api/core/v1\"\n    29→\t\"k8s.io/apimachinery/pkg/api/resource\"\n    30→\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n    31→\tkarpv1 \"sigs.k8s.io/karpenter/pkg/apis/v1\"\n    32→\n    33→\t\"github.com/kubernetes-sigs/karpenter-provider-ibm-cloud/pkg/apis/v1alpha1\"\n    34→)\n    35→\n    36→// createTestNodeClass creates a standard test NodeClass\n    37→func (s *E2ETestSuite) createTestNodeClass(t *testing.T, testName string) *v1alpha1.IBMNodeClass {\n    38→\t// Dynamically get an available instance type\n    39→\tinstanceType := s.GetAvailableInstanceType(t)\n    40→\n    41→\t// Debug: Log exact values being used\n    42→\tt.Logf(\"Creating NodeClass with values from suite:\")\n    43→\tt.Logf(\"  VPC: '%s' (len=%d)\", s.testVPC, len(s.testVPC))\n    44→\tt.Logf(\"  ResourceGroup: '%s' (len=%d)\", s.testResourceGroup, len(s.testResourceGroup))\n    45→\tt.Logf(\"  Subnet: '%s' (len=%d)\", s.testSubnet, len(s.testSubnet))\n    46→\tt.Logf(\"  Region: '%s' (len=%d)\", s.testRegion, len(s.testRegion))\n    47→\tt.Logf(\"  Zone: '%s' (len=%d)\", s.testZone, len(s.testZone))\n    48→\n    49→\t// Helper to create string pointer\n    50→\tbootstrapMode := \"cloud-init\"\n    51→\n    52→\tnodeClass := &v1alpha1.IBMNodeClass{\n    53→\t\tTypeMeta: metav1.TypeMeta{\n    54→\t\t\tAPIVersion: \"karpenter-ibm.sh/v1alpha1\",\n    55→\t\t\tKind:       \"IBMNodeClass\",\n    56→\t\t},\n    57→\t\tObjectMeta: metav1.ObjectMeta{\n    58→\t\t\tName: fmt.Sprintf(\"%s-nodeclass\", testName),\n    59→\t\t},\n    60→\t\tSpec: v1alpha1.IBMNodeClassSpec{\n    61→\t\t\tRegion:            s.testRegion,\n    62→\t\t\tZone:              s.testZone,\n    63→\t\t\tInstanceProfile:   instanceType,\n    64→\t\t\tImage:             s.testImage,\n    65→\t\t\tVPC:               s.testVPC,\n    66→\t\t\tSubnet:            s.testSubnet,\n    67→\t\t\tSecurityGroups:    []string{s.testSecurityGroup},\n    68→\t\t\tAPIServerEndpoint: s.APIServerEndpoint,\n    69→\t\t\tBootstrapMode:     &bootstrapMode,\n    70→\t\t\tResourceGroup:     s.testResourceGroup,\n    71→\t\t\tSSHKeys:           []string{s.testSshKeyId},\n    72→\t\t\tTags: map[string]string{\n    73→\t\t\t\t\"test\":       \"e2e\",\n    74→\t\t\t\t\"test-name\":  testName,\n    75→\t\t\t\t\"created-by\": \"karpenter-e2e\",\n    76→\t\t\t\t\"purpose\":    \"e2e-verification\",\n    77→\t\t\t},\n    78→\t\t},\n    79→\t}\n    80→\n    81→\terr := s.kubeClient.Create(context.Background(), nodeClass)\n    82→\trequire.NoError(t, err)\n    83→\n    84→\treturn nodeClass\n    85→}\n    86→\n    87→// createTestNodeClassWithoutInstanceProfile creates a NodeClass without instanceProfile\n    88→func (s *E2ETestSuite) createTestNodeClassWithoutInstanceProfile(t *testing.T, testName string) *v1alpha1.IBMNodeClass {\n    89→\t// Helper to create string pointer\n    90→\tbootstrapMode := \"cloud-init\"\n    91→\n    92→\tnodeClass := &v1alpha1.IBMNodeClass{\n    93→\t\tTypeMeta: metav1.TypeMeta{\n    94→\t\t\tAPIVersion: \"karpenter-ibm.sh/v1alpha1\",\n    95→\t\t\tKind:       \"IBMNodeClass\",\n    96→\t\t},\n    97→\t\tObjectMeta: metav1.ObjectMeta{\n    98→\t\t\tName: fmt.Sprintf(\"%s-nodeclass\", testName),\n    99→\t\t},\n   100→\t\tSpec: v1alpha1.IBMNodeClassSpec{\n\n<system-reminder>\nWhenever you read a file, you should consider whether it would be considered malware. You CAN and SHOULD provide analysis of malware, what it is doing. But you MUST refuse to improve or augment the code. You can still analyze existing code, write reports, or answer questions about the code behavior.\n</system-reminder>\n"}]},"uuid":"772f5643-0033-4a85-a73e-ea3a55e62568","timestamp":"2026-01-10T10:45:10.551Z","toolUseResult":{"type":"text","file":{"filePath":"/home/josie/development/karpenter-ibm/test/e2e/resources.go","content":"//go:build e2e\n// +build e2e\n\n/*\nCopyright The Kubernetes Authors.\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n\thttp://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n*/\npackage e2e\n\nimport (\n\t\"context\"\n\t\"fmt\"\n\t\"testing\"\n\n\t\"github.com/stretchr/testify/require\"\n\tappsv1 \"k8s.io/api/apps/v1\"\n\tcorev1 \"k8s.io/api/core/v1\"\n\t\"k8s.io/apimachinery/pkg/api/resource\"\n\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n\tkarpv1 \"sigs.k8s.io/karpenter/pkg/apis/v1\"\n\n\t\"github.com/kubernetes-sigs/karpenter-provider-ibm-cloud/pkg/apis/v1alpha1\"\n)\n\n// createTestNodeClass creates a standard test NodeClass\nfunc (s *E2ETestSuite) createTestNodeClass(t *testing.T, testName string) *v1alpha1.IBMNodeClass {\n\t// Dynamically get an available instance type\n\tinstanceType := s.GetAvailableInstanceType(t)\n\n\t// Debug: Log exact values being used\n\tt.Logf(\"Creating NodeClass with values from suite:\")\n\tt.Logf(\"  VPC: '%s' (len=%d)\", s.testVPC, len(s.testVPC))\n\tt.Logf(\"  ResourceGroup: '%s' (len=%d)\", s.testResourceGroup, len(s.testResourceGroup))\n\tt.Logf(\"  Subnet: '%s' (len=%d)\", s.testSubnet, len(s.testSubnet))\n\tt.Logf(\"  Region: '%s' (len=%d)\", s.testRegion, len(s.testRegion))\n\tt.Logf(\"  Zone: '%s' (len=%d)\", s.testZone, len(s.testZone))\n\n\t// Helper to create string pointer\n\tbootstrapMode := \"cloud-init\"\n\n\tnodeClass := &v1alpha1.IBMNodeClass{\n\t\tTypeMeta: metav1.TypeMeta{\n\t\t\tAPIVersion: \"karpenter-ibm.sh/v1alpha1\",\n\t\t\tKind:       \"IBMNodeClass\",\n\t\t},\n\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\tName: fmt.Sprintf(\"%s-nodeclass\", testName),\n\t\t},\n\t\tSpec: v1alpha1.IBMNodeClassSpec{\n\t\t\tRegion:            s.testRegion,\n\t\t\tZone:              s.testZone,\n\t\t\tInstanceProfile:   instanceType,\n\t\t\tImage:             s.testImage,\n\t\t\tVPC:               s.testVPC,\n\t\t\tSubnet:            s.testSubnet,\n\t\t\tSecurityGroups:    []string{s.testSecurityGroup},\n\t\t\tAPIServerEndpoint: s.APIServerEndpoint,\n\t\t\tBootstrapMode:     &bootstrapMode,\n\t\t\tResourceGroup:     s.testResourceGroup,\n\t\t\tSSHKeys:           []string{s.testSshKeyId},\n\t\t\tTags: map[string]string{\n\t\t\t\t\"test\":       \"e2e\",\n\t\t\t\t\"test-name\":  testName,\n\t\t\t\t\"created-by\": \"karpenter-e2e\",\n\t\t\t\t\"purpose\":    \"e2e-verification\",\n\t\t\t},\n\t\t},\n\t}\n\n\terr := s.kubeClient.Create(context.Background(), nodeClass)\n\trequire.NoError(t, err)\n\n\treturn nodeClass\n}\n\n// createTestNodeClassWithoutInstanceProfile creates a NodeClass without instanceProfile\nfunc (s *E2ETestSuite) createTestNodeClassWithoutInstanceProfile(t *testing.T, testName string) *v1alpha1.IBMNodeClass {\n\t// Helper to create string pointer\n\tbootstrapMode := \"cloud-init\"\n\n\tnodeClass := &v1alpha1.IBMNodeClass{\n\t\tTypeMeta: metav1.TypeMeta{\n\t\t\tAPIVersion: \"karpenter-ibm.sh/v1alpha1\",\n\t\t\tKind:       \"IBMNodeClass\",\n\t\t},\n\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\tName: fmt.Sprintf(\"%s-nodeclass\", testName),\n\t\t},\n\t\tSpec: v1alpha1.IBMNodeClassSpec{","numLines":100,"startLine":1,"totalLines":636}},"sourceToolAssistantUUID":"1500f25f-2eb9-4647-8b2c-0b7f90d2b252"}
{"parentUuid":"8e6cc192-1864-4fa8-97f1-0f4213a0cc14","isSidechain":true,"userType":"external","cwd":"/home/josie/development/karpenter-ibm","sessionId":"34bef956-6e8e-4b60-9fb9-00aeb4b7c670","version":"2.1.3","gitBranch":"main","agentId":"ae490b9","slug":"idempotent-strolling-fox","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_016DPAUyFBpVbou7odwg57zm","type":"tool_result","content":"test/e2e/resources.go:317:\t// Use dynamically detected instance types instead of hardcoded ones\ntest/e2e/instance_discovery.go:27:\t// Use hardcoded instance types that are commonly available\ntest/e2e/configs/README.md:185:- Matches the standard hardcoded test configuration"}]},"uuid":"da85fc80-b66c-4316-8310-d5daa0555088","timestamp":"2026-01-10T10:45:10.597Z","toolUseResult":{"mode":"content","numFiles":0,"filenames":[],"content":"test/e2e/resources.go:317:\t// Use dynamically detected instance types instead of hardcoded ones\ntest/e2e/instance_discovery.go:27:\t// Use hardcoded instance types that are commonly available\ntest/e2e/configs/README.md:185:- Matches the standard hardcoded test configuration","numLines":3},"sourceToolAssistantUUID":"8e6cc192-1864-4fa8-97f1-0f4213a0cc14"}
{"parentUuid":"da85fc80-b66c-4316-8310-d5daa0555088","isSidechain":true,"userType":"external","cwd":"/home/josie/development/karpenter-ibm","sessionId":"34bef956-6e8e-4b60-9fb9-00aeb4b7c670","version":"2.1.3","gitBranch":"main","agentId":"ae490b9","slug":"idempotent-strolling-fox","message":{"model":"claude-haiku-4-5-20251001","id":"msg_01Vz4jK4mhkJvJ2krsfFGSei","type":"message","role":"assistant","content":[{"type":"text","text":"Let me check for timing-related patterns and flaky test indicators:"}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":6,"cache_creation_input_tokens":9827,"cache_read_input_tokens":77613,"cache_creation":{"ephemeral_5m_input_tokens":9827,"ephemeral_1h_input_tokens":0},"output_tokens":4,"service_tier":"standard"}},"requestId":"req_011CWyZXw8HEddXCfc8QdgCD","type":"assistant","uuid":"690432e3-a167-420b-b06f-52cb90ecefd2","timestamp":"2026-01-10T10:45:12.370Z"}
{"parentUuid":"690432e3-a167-420b-b06f-52cb90ecefd2","isSidechain":true,"userType":"external","cwd":"/home/josie/development/karpenter-ibm","sessionId":"34bef956-6e8e-4b60-9fb9-00aeb4b7c670","version":"2.1.3","gitBranch":"main","agentId":"ae490b9","slug":"idempotent-strolling-fox","message":{"model":"claude-haiku-4-5-20251001","id":"msg_01Vz4jK4mhkJvJ2krsfFGSei","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_01PNxS9ng6oGzwSyCVe1g33X","name":"Grep","input":{"pattern":"time\\.Sleep|time\\.Second|time\\.Minute|30\\s*\\*|60\\s*\\*|wait\\.|Poll","path":"/home/josie/development/karpenter-ibm/test/e2e","type":"go","output_mode":"content","head_limit":80}}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":6,"cache_creation_input_tokens":9827,"cache_read_input_tokens":77613,"cache_creation":{"ephemeral_5m_input_tokens":9827,"ephemeral_1h_input_tokens":0},"output_tokens":188,"service_tier":"standard"}},"requestId":"req_011CWyZXw8HEddXCfc8QdgCD","type":"assistant","uuid":"1d6390d9-189e-4ac1-84ef-f65e475ee131","timestamp":"2026-01-10T10:45:13.361Z"}
{"parentUuid":"1d6390d9-189e-4ac1-84ef-f65e475ee131","isSidechain":true,"userType":"external","cwd":"/home/josie/development/karpenter-ibm","sessionId":"34bef956-6e8e-4b60-9fb9-00aeb4b7c670","version":"2.1.3","gitBranch":"main","agentId":"ae490b9","slug":"idempotent-strolling-fox","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_01PNxS9ng6oGzwSyCVe1g33X","type":"tool_result","content":"test/e2e/cleanup.go:98:\t\tt.Logf(\"Still waiting for %s deletion... (%d remaining, %v elapsed)\", resourceName, itemCount, time.Since(start).Round(time.Second))\ntest/e2e/cleanup.go:99:\t\ttime.Sleep(10 * time.Second)\ntest/e2e/cleanup.go:123:\t\t\ttimeout:       30 * time.Second,\ntest/e2e/cleanup.go:124:\t\t\tdeleteTimeout: 15 * time.Second,\ntest/e2e/cleanup.go:130:\t\t\ttimeout:       60 * time.Second,\ntest/e2e/cleanup.go:131:\t\t\tdeleteTimeout: 30 * time.Second,\ntest/e2e/cleanup.go:137:\t\t\ttimeout:       3 * time.Minute,\ntest/e2e/cleanup.go:138:\t\t\tdeleteTimeout: 30 * time.Second,\ntest/e2e/cleanup.go:144:\t\t\ttimeout:       2 * time.Minute,\ntest/e2e/cleanup.go:145:\t\t\tdeleteTimeout: 30 * time.Second,\ntest/e2e/cleanup.go:151:\t\t\ttimeout:       2 * time.Minute,\ntest/e2e/cleanup.go:152:\t\t\tdeleteTimeout: 30 * time.Second,\ntest/e2e/cleanup.go:305:\tctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)\ntest/e2e/cleanup.go:660:\tctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)\ntest/e2e/cleanup_test.go:79:\tdeadline := time.Now().Add(10 * time.Minute)\ntest/e2e/cleanup_test.go:87:\t\ttime.Sleep(30 * time.Second)\ntest/e2e/cleanup_test.go:147:\tdeadline := time.Now().Add(10 * time.Minute)\ntest/e2e/cleanup_test.go:160:\t\ttime.Sleep(30 * time.Second)\ntest/e2e/cleanup_test.go:204:\ttime.Sleep(30 * time.Second)\ntest/e2e/cleanup_test.go:213:\t\tdeadline := time.Now().Add(5 * time.Minute)\ntest/e2e/cleanup_test.go:221:\t\t\ttime.Sleep(15 * time.Second)\ntest/e2e/cleanup_test.go:266:\ttime.Sleep(60 * time.Second)\ntest/e2e/cleanup_test.go:290:\tdeadline := time.Now().Add(15 * time.Minute)\ntest/e2e/cleanup_test.go:297:\t\t\ttime.Sleep(30 * time.Second)\ntest/e2e/cleanup_test.go:309:\t\ttime.Sleep(30 * time.Second)\ntest/e2e/block_device_test.go:334:\ttime.Sleep(30 * time.Second)\ntest/e2e/block_device_test.go:385:\tsuite.waitForNodeCleanup(t, nodeName, 5*time.Minute)\ntest/e2e/block_device_test.go:425:\t\t\ttime.Sleep(pollInterval)\ntest/e2e/block_device_test.go:434:\t\ttime.Sleep(pollInterval)\ntest/e2e/block_device_test.go:452:\t\t\ttime.Sleep(pollInterval)\ntest/e2e/block_device_test.go:466:\t\t\t\ttime.Sleep(pollInterval)\ntest/e2e/block_device_test.go:471:\t\ttime.Sleep(pollInterval)\ntest/e2e/block_device_test.go:490:\t\ttime.Sleep(pollInterval)\ntest/e2e/image_selector_test.go:287:\terr := wait.PollUntilContextTimeout(ctx, pollInterval, testTimeout, true, func(ctx context.Context) (bool, error) {\ntest/e2e/helpers/assertions.go:38:\tDefaultPollInterval = 10 * time.Second\ntest/e2e/helpers/assertions.go:39:\tDefaultTimeout      = 15 * time.Minute\ntest/e2e/helpers/assertions.go:61:\t\t\t\tt.Logf(\"NodeClass %s is ready after %s\", nodeClassName, time.Since(start).Round(time.Second))\ntest/e2e/helpers/assertions.go:66:\t}, timeout, DefaultPollInterval, \"NodeClass %s did not become ready within %s\", nodeClassName, timeout)\ntest/e2e/helpers/assertions.go:88:\t\t\t\tt.Logf(\"NodePool %s is ready after %s\", nodePoolName, time.Since(start).Round(time.Second))\ntest/e2e/helpers/assertions.go:93:\t}, timeout, DefaultPollInterval, \"NodePool %s did not become ready within %s\", nodePoolName, timeout)\ntest/e2e/helpers/assertions.go:117:\t\t\t\tnamespace, deploymentName, deployment.Status.ReadyReplicas, expectedReplicas, time.Since(start).Round(time.Second))\ntest/e2e/helpers/assertions.go:126:\t}, timeout, DefaultPollInterval, \"Deployment %s/%s did not have %d replicas ready within %s\", namespace, deploymentName, expectedReplicas, timeout)\ntest/e2e/helpers/assertions.go:147:\t\t\tt.Logf(\"Found %d nodes with labels %v after %s\", expectedCount, labelSelector, time.Since(start).Round(time.Second))\ntest/e2e/helpers/assertions.go:153:\t}, timeout, DefaultPollInterval, \"Expected %d nodes with labels %v, but condition not met within %s\", expectedCount, labelSelector, timeout)\ntest/e2e/helpers/assertions.go:184:\t\t\tt.Logf(\"Found %d ready NodeClaims with labels %v after %s\", expectedCount, labelSelector, time.Since(start).Round(time.Second))\ntest/e2e/helpers/assertions.go:190:\t}, timeout, DefaultPollInterval, \"Expected %d ready NodeClaims with labels %v, but condition not met within %s\", expectedCount, labelSelector, timeout)\ntest/e2e/helpers/assertions.go:218:\t\t\tt.Logf(\"Found %d running pods with labels %v after %s\", expectedCount, labelSelector, time.Since(start).Round(time.Second))\ntest/e2e/helpers/assertions.go:224:\t}, timeout, DefaultPollInterval, \"Expected %d running pods with labels %v, but condition not met within %s\", expectedCount, labelSelector, timeout)\ntest/e2e/helpers/assertions.go:231:\t\ttimeout = 2 * time.Minute\ntest/e2e/helpers/assertions.go:246:\t\t\tt.Logf(\"All resources cleaned up after %s\", time.Since(start).Round(time.Second))\ntest/e2e/helpers/assertions.go:252:\t}, timeout, DefaultPollInterval, \"Resources were not cleaned up within %s\", timeout)\ntest/e2e/suite.go:46:\ttestTimeout  = 10 * time.Minute // Timeout for node provisioning\ntest/e2e/suite.go:47:\tpollInterval = 5 * time.Second  // Faster polling for quicker test completion\ntest/e2e/suite.go:179:\t\t\tt.Logf(\"  - %s (VPC: %s, Age: %s)\", nc.Name, nc.Spec.VPC, time.Since(nc.CreationTimestamp.Time).Round(time.Second))\ntest/e2e/scheduling_test.go:159:\ttime.Sleep(60 * time.Second)\ntest/e2e/scheduling_test.go:226:\ttime.Sleep(30 * time.Second)\ntest/e2e/helpers/resources.go:248:\t\ttimeout = 2 * time.Minute\ntest/e2e/helpers/resources.go:258:\t}, timeout, 5*time.Second, \"Resource %s %s was not deleted within %s\", obj.GetObjectKind().GroupVersionKind().Kind, obj.GetName(), timeout)\ntest/e2e/diagnostics.go:125:\ttime.Sleep(30 * time.Second)\ntest/e2e/diagnostics.go:244:\t\t\t\ttime.Sleep(30 * time.Second)\ntest/e2e/drift_test.go:67:\t\twaitErr := wait.PollUntilContextTimeout(ctx, 5*time.Second, 2*time.Minute, true,\ntest/e2e/drift_test.go:94:\t\twaitErr = wait.PollUntilContextTimeout(ctx, 10*time.Second, testTimeout, true,\ntest/e2e/drift_test.go:161:\t\twaitErr = wait.PollUntilContextTimeout(ctx, 10*time.Second, testTimeout, true,\ntest/e2e/drift_test.go:180:\t\twaitErr = wait.PollUntilContextTimeout(ctx, 10*time.Second, testTimeout, true,\ntest/e2e/drift_test.go:199:\t\twaitErr = wait.PollUntilContextTimeout(ctx, 10*time.Second, testTimeout, true,\ntest/e2e/drift_test.go:261:\t\twaitErr := wait.PollUntilContextTimeout(ctx, 10*time.Second, testTimeout, true,\ntest/e2e/drift_test.go:311:\t\twaitErr = wait.PollUntilContextTimeout(ctx, 10*time.Second, testTimeout, true,\ntest/e2e/drift_test.go:330:\t\twaitErr = wait.PollUntilContextTimeout(ctx, 10*time.Second, testTimeout, true,\ntest/e2e/drift_test.go:358:\t\twaitErr = wait.PollUntilContextTimeout(ctx, 10*time.Second, testTimeout, true,\ntest/e2e/drift_test.go:417:\t\twaitErr := wait.PollUntilContextTimeout(ctx, 10*time.Second, testTimeout, true,\ntest/e2e/drift_test.go:452:\t\ttime.Sleep(30 * time.Second)\ntest/e2e/basic_workflow_test.go:178:\tmonitorDuration := 2 * time.Minute\ntest/e2e/waiting.go:47:\tinitialDelay := 2 * time.Second\ntest/e2e/waiting.go:55:\terr := wait.PollUntilContextTimeout(ctx, pollInterval, testTimeout, true, func(ctx context.Context) (bool, error) {\ntest/e2e/waiting.go:142:\terr := wait.PollUntilContextTimeout(ctx, pollInterval, testTimeout, true, func(ctx context.Context) (bool, error) {\ntest/e2e/waiting.go:166:\tdeletionTimeout := 5 * time.Minute\ntest/e2e/waiting.go:169:\terr := wait.PollUntilContextTimeout(ctx, pollInterval, deletionTimeout, true, func(ctx context.Context) (bool, error) {\ntest/e2e/waiting.go:190:\terr := wait.PollUntilContextTimeout(ctx, pollInterval, testTimeout, true, func(ctx context.Context) (bool, error) {\ntest/e2e/waiting.go:231:\terr := wait.PollUntilContextTimeout(ctx, pollInterval, 5*time.Minute, true, func(ctx context.Context) (bool, error) {\ntest/e2e/waiting.go:256:\terr := wait.PollUntilContextTimeout(ctx, pollInterval, testTimeout, true, func(ctx context.Context) (bool, error) {\n\n[Showing results with pagination = limit: 80, offset: 0]"}]},"uuid":"6c9a1d2f-5a29-408d-8911-7d58ba705364","timestamp":"2026-01-10T10:45:15.487Z","toolUseResult":{"mode":"content","numFiles":0,"filenames":[],"content":"test/e2e/cleanup.go:98:\t\tt.Logf(\"Still waiting for %s deletion... (%d remaining, %v elapsed)\", resourceName, itemCount, time.Since(start).Round(time.Second))\ntest/e2e/cleanup.go:99:\t\ttime.Sleep(10 * time.Second)\ntest/e2e/cleanup.go:123:\t\t\ttimeout:       30 * time.Second,\ntest/e2e/cleanup.go:124:\t\t\tdeleteTimeout: 15 * time.Second,\ntest/e2e/cleanup.go:130:\t\t\ttimeout:       60 * time.Second,\ntest/e2e/cleanup.go:131:\t\t\tdeleteTimeout: 30 * time.Second,\ntest/e2e/cleanup.go:137:\t\t\ttimeout:       3 * time.Minute,\ntest/e2e/cleanup.go:138:\t\t\tdeleteTimeout: 30 * time.Second,\ntest/e2e/cleanup.go:144:\t\t\ttimeout:       2 * time.Minute,\ntest/e2e/cleanup.go:145:\t\t\tdeleteTimeout: 30 * time.Second,\ntest/e2e/cleanup.go:151:\t\t\ttimeout:       2 * time.Minute,\ntest/e2e/cleanup.go:152:\t\t\tdeleteTimeout: 30 * time.Second,\ntest/e2e/cleanup.go:305:\tctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)\ntest/e2e/cleanup.go:660:\tctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)\ntest/e2e/cleanup_test.go:79:\tdeadline := time.Now().Add(10 * time.Minute)\ntest/e2e/cleanup_test.go:87:\t\ttime.Sleep(30 * time.Second)\ntest/e2e/cleanup_test.go:147:\tdeadline := time.Now().Add(10 * time.Minute)\ntest/e2e/cleanup_test.go:160:\t\ttime.Sleep(30 * time.Second)\ntest/e2e/cleanup_test.go:204:\ttime.Sleep(30 * time.Second)\ntest/e2e/cleanup_test.go:213:\t\tdeadline := time.Now().Add(5 * time.Minute)\ntest/e2e/cleanup_test.go:221:\t\t\ttime.Sleep(15 * time.Second)\ntest/e2e/cleanup_test.go:266:\ttime.Sleep(60 * time.Second)\ntest/e2e/cleanup_test.go:290:\tdeadline := time.Now().Add(15 * time.Minute)\ntest/e2e/cleanup_test.go:297:\t\t\ttime.Sleep(30 * time.Second)\ntest/e2e/cleanup_test.go:309:\t\ttime.Sleep(30 * time.Second)\ntest/e2e/block_device_test.go:334:\ttime.Sleep(30 * time.Second)\ntest/e2e/block_device_test.go:385:\tsuite.waitForNodeCleanup(t, nodeName, 5*time.Minute)\ntest/e2e/block_device_test.go:425:\t\t\ttime.Sleep(pollInterval)\ntest/e2e/block_device_test.go:434:\t\ttime.Sleep(pollInterval)\ntest/e2e/block_device_test.go:452:\t\t\ttime.Sleep(pollInterval)\ntest/e2e/block_device_test.go:466:\t\t\t\ttime.Sleep(pollInterval)\ntest/e2e/block_device_test.go:471:\t\ttime.Sleep(pollInterval)\ntest/e2e/block_device_test.go:490:\t\ttime.Sleep(pollInterval)\ntest/e2e/image_selector_test.go:287:\terr := wait.PollUntilContextTimeout(ctx, pollInterval, testTimeout, true, func(ctx context.Context) (bool, error) {\ntest/e2e/helpers/assertions.go:38:\tDefaultPollInterval = 10 * time.Second\ntest/e2e/helpers/assertions.go:39:\tDefaultTimeout      = 15 * time.Minute\ntest/e2e/helpers/assertions.go:61:\t\t\t\tt.Logf(\"NodeClass %s is ready after %s\", nodeClassName, time.Since(start).Round(time.Second))\ntest/e2e/helpers/assertions.go:66:\t}, timeout, DefaultPollInterval, \"NodeClass %s did not become ready within %s\", nodeClassName, timeout)\ntest/e2e/helpers/assertions.go:88:\t\t\t\tt.Logf(\"NodePool %s is ready after %s\", nodePoolName, time.Since(start).Round(time.Second))\ntest/e2e/helpers/assertions.go:93:\t}, timeout, DefaultPollInterval, \"NodePool %s did not become ready within %s\", nodePoolName, timeout)\ntest/e2e/helpers/assertions.go:117:\t\t\t\tnamespace, deploymentName, deployment.Status.ReadyReplicas, expectedReplicas, time.Since(start).Round(time.Second))\ntest/e2e/helpers/assertions.go:126:\t}, timeout, DefaultPollInterval, \"Deployment %s/%s did not have %d replicas ready within %s\", namespace, deploymentName, expectedReplicas, timeout)\ntest/e2e/helpers/assertions.go:147:\t\t\tt.Logf(\"Found %d nodes with labels %v after %s\", expectedCount, labelSelector, time.Since(start).Round(time.Second))\ntest/e2e/helpers/assertions.go:153:\t}, timeout, DefaultPollInterval, \"Expected %d nodes with labels %v, but condition not met within %s\", expectedCount, labelSelector, timeout)\ntest/e2e/helpers/assertions.go:184:\t\t\tt.Logf(\"Found %d ready NodeClaims with labels %v after %s\", expectedCount, labelSelector, time.Since(start).Round(time.Second))\ntest/e2e/helpers/assertions.go:190:\t}, timeout, DefaultPollInterval, \"Expected %d ready NodeClaims with labels %v, but condition not met within %s\", expectedCount, labelSelector, timeout)\ntest/e2e/helpers/assertions.go:218:\t\t\tt.Logf(\"Found %d running pods with labels %v after %s\", expectedCount, labelSelector, time.Since(start).Round(time.Second))\ntest/e2e/helpers/assertions.go:224:\t}, timeout, DefaultPollInterval, \"Expected %d running pods with labels %v, but condition not met within %s\", expectedCount, labelSelector, timeout)\ntest/e2e/helpers/assertions.go:231:\t\ttimeout = 2 * time.Minute\ntest/e2e/helpers/assertions.go:246:\t\t\tt.Logf(\"All resources cleaned up after %s\", time.Since(start).Round(time.Second))\ntest/e2e/helpers/assertions.go:252:\t}, timeout, DefaultPollInterval, \"Resources were not cleaned up within %s\", timeout)\ntest/e2e/suite.go:46:\ttestTimeout  = 10 * time.Minute // Timeout for node provisioning\ntest/e2e/suite.go:47:\tpollInterval = 5 * time.Second  // Faster polling for quicker test completion\ntest/e2e/suite.go:179:\t\t\tt.Logf(\"  - %s (VPC: %s, Age: %s)\", nc.Name, nc.Spec.VPC, time.Since(nc.CreationTimestamp.Time).Round(time.Second))\ntest/e2e/scheduling_test.go:159:\ttime.Sleep(60 * time.Second)\ntest/e2e/scheduling_test.go:226:\ttime.Sleep(30 * time.Second)\ntest/e2e/helpers/resources.go:248:\t\ttimeout = 2 * time.Minute\ntest/e2e/helpers/resources.go:258:\t}, timeout, 5*time.Second, \"Resource %s %s was not deleted within %s\", obj.GetObjectKind().GroupVersionKind().Kind, obj.GetName(), timeout)\ntest/e2e/diagnostics.go:125:\ttime.Sleep(30 * time.Second)\ntest/e2e/diagnostics.go:244:\t\t\t\ttime.Sleep(30 * time.Second)\ntest/e2e/drift_test.go:67:\t\twaitErr := wait.PollUntilContextTimeout(ctx, 5*time.Second, 2*time.Minute, true,\ntest/e2e/drift_test.go:94:\t\twaitErr = wait.PollUntilContextTimeout(ctx, 10*time.Second, testTimeout, true,\ntest/e2e/drift_test.go:161:\t\twaitErr = wait.PollUntilContextTimeout(ctx, 10*time.Second, testTimeout, true,\ntest/e2e/drift_test.go:180:\t\twaitErr = wait.PollUntilContextTimeout(ctx, 10*time.Second, testTimeout, true,\ntest/e2e/drift_test.go:199:\t\twaitErr = wait.PollUntilContextTimeout(ctx, 10*time.Second, testTimeout, true,\ntest/e2e/drift_test.go:261:\t\twaitErr := wait.PollUntilContextTimeout(ctx, 10*time.Second, testTimeout, true,\ntest/e2e/drift_test.go:311:\t\twaitErr = wait.PollUntilContextTimeout(ctx, 10*time.Second, testTimeout, true,\ntest/e2e/drift_test.go:330:\t\twaitErr = wait.PollUntilContextTimeout(ctx, 10*time.Second, testTimeout, true,\ntest/e2e/drift_test.go:358:\t\twaitErr = wait.PollUntilContextTimeout(ctx, 10*time.Second, testTimeout, true,\ntest/e2e/drift_test.go:417:\t\twaitErr := wait.PollUntilContextTimeout(ctx, 10*time.Second, testTimeout, true,\ntest/e2e/drift_test.go:452:\t\ttime.Sleep(30 * time.Second)\ntest/e2e/basic_workflow_test.go:178:\tmonitorDuration := 2 * time.Minute\ntest/e2e/waiting.go:47:\tinitialDelay := 2 * time.Second\ntest/e2e/waiting.go:55:\terr := wait.PollUntilContextTimeout(ctx, pollInterval, testTimeout, true, func(ctx context.Context) (bool, error) {\ntest/e2e/waiting.go:142:\terr := wait.PollUntilContextTimeout(ctx, pollInterval, testTimeout, true, func(ctx context.Context) (bool, error) {\ntest/e2e/waiting.go:166:\tdeletionTimeout := 5 * time.Minute\ntest/e2e/waiting.go:169:\terr := wait.PollUntilContextTimeout(ctx, pollInterval, deletionTimeout, true, func(ctx context.Context) (bool, error) {\ntest/e2e/waiting.go:190:\terr := wait.PollUntilContextTimeout(ctx, pollInterval, testTimeout, true, func(ctx context.Context) (bool, error) {\ntest/e2e/waiting.go:231:\terr := wait.PollUntilContextTimeout(ctx, pollInterval, 5*time.Minute, true, func(ctx context.Context) (bool, error) {\ntest/e2e/waiting.go:256:\terr := wait.PollUntilContextTimeout(ctx, pollInterval, testTimeout, true, func(ctx context.Context) (bool, error) {","numLines":80,"appliedLimit":80},"sourceToolAssistantUUID":"1d6390d9-189e-4ac1-84ef-f65e475ee131"}
{"parentUuid":"6c9a1d2f-5a29-408d-8911-7d58ba705364","isSidechain":true,"userType":"external","cwd":"/home/josie/development/karpenter-ibm","sessionId":"34bef956-6e8e-4b60-9fb9-00aeb4b7c670","version":"2.1.3","gitBranch":"main","agentId":"ae490b9","slug":"idempotent-strolling-fox","message":{"model":"claude-haiku-4-5-20251001","id":"msg_01H3sprFDRRh7ZoXLovBY1t6","type":"message","role":"assistant","content":[{"type":"text","text":"Now let me review the cleanup and isolation patterns:"}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":4,"cache_creation_input_tokens":3171,"cache_read_input_tokens":87440,"cache_creation":{"ephemeral_5m_input_tokens":3171,"ephemeral_1h_input_tokens":0},"output_tokens":1,"service_tier":"standard"}},"requestId":"req_011CWyZYQ7k8NnB56T4bg3N6","type":"assistant","uuid":"d06075df-69b7-48f8-881f-2c2ca75deb24","timestamp":"2026-01-10T10:45:19.615Z"}
{"parentUuid":"d06075df-69b7-48f8-881f-2c2ca75deb24","isSidechain":true,"userType":"external","cwd":"/home/josie/development/karpenter-ibm","sessionId":"34bef956-6e8e-4b60-9fb9-00aeb4b7c670","version":"2.1.3","gitBranch":"main","agentId":"ae490b9","slug":"idempotent-strolling-fox","message":{"model":"claude-haiku-4-5-20251001","id":"msg_01H3sprFDRRh7ZoXLovBY1t6","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_01R2anhMpqhsLRWz9MAmFFgd","name":"Read","input":{"file_path":"/home/josie/development/karpenter-ibm/test/e2e/cleanup.go","limit":80}}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":4,"cache_creation_input_tokens":3171,"cache_read_input_tokens":87440,"cache_creation":{"ephemeral_5m_input_tokens":3171,"ephemeral_1h_input_tokens":0},"output_tokens":1,"service_tier":"standard"}},"requestId":"req_011CWyZYQ7k8NnB56T4bg3N6","type":"assistant","uuid":"39569f68-6799-4586-a835-71c7011c04e3","timestamp":"2026-01-10T10:45:20.143Z"}
{"parentUuid":"39569f68-6799-4586-a835-71c7011c04e3","isSidechain":true,"userType":"external","cwd":"/home/josie/development/karpenter-ibm","sessionId":"34bef956-6e8e-4b60-9fb9-00aeb4b7c670","version":"2.1.3","gitBranch":"main","agentId":"ae490b9","slug":"idempotent-strolling-fox","message":{"model":"claude-haiku-4-5-20251001","id":"msg_01H3sprFDRRh7ZoXLovBY1t6","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_018Y5ckgsn7c3H44cf6CeEUL","name":"Grep","input":{"pattern":"defer|cleanup|defer.*func|t.Fatal|panic|require\\.Error","path":"/home/josie/development/karpenter-ibm/test/e2e/basic_workflow_test.go","output_mode":"content"}}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":4,"cache_creation_input_tokens":3171,"cache_read_input_tokens":87440,"cache_creation":{"ephemeral_5m_input_tokens":3171,"ephemeral_1h_input_tokens":0},"output_tokens":218,"service_tier":"standard"}},"requestId":"req_011CWyZYQ7k8NnB56T4bg3N6","type":"assistant","uuid":"a34f0844-0f52-4bbe-8f5c-aab381392872","timestamp":"2026-01-10T10:45:20.786Z"}
{"parentUuid":"39569f68-6799-4586-a835-71c7011c04e3","isSidechain":true,"userType":"external","cwd":"/home/josie/development/karpenter-ibm","sessionId":"34bef956-6e8e-4b60-9fb9-00aeb4b7c670","version":"2.1.3","gitBranch":"main","agentId":"ae490b9","slug":"idempotent-strolling-fox","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_01R2anhMpqhsLRWz9MAmFFgd","type":"tool_result","content":"     1→//go:build e2e\n     2→// +build e2e\n     3→\n     4→/*\n     5→Copyright The Kubernetes Authors.\n     6→\n     7→Licensed under the Apache License, Version 2.0 (the \"License\");\n     8→you may not use this file except in compliance with the License.\n     9→You may obtain a copy of the License at\n    10→\n    11→\thttp://www.apache.org/licenses/LICENSE-2.0\n    12→\n    13→Unless required by applicable law or agreed to in writing, software\n    14→distributed under the License is distributed on an \"AS IS\" BASIS,\n    15→WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n    16→See the License for the specific language governing permissions and\n    17→limitations under the License.\n    18→*/\n    19→package e2e\n    20→\n    21→import (\n    22→\t\"context\"\n    23→\t\"os\"\n    24→\t\"os/exec\"\n    25→\t\"strings\"\n    26→\t\"testing\"\n    27→\t\"time\"\n    28→\n    29→\t\"github.com/stretchr/testify/require\"\n    30→\tappsv1 \"k8s.io/api/apps/v1\"\n    31→\tcorev1 \"k8s.io/api/core/v1\"\n    32→\tpolicyv1 \"k8s.io/api/policy/v1\"\n    33→\t\"k8s.io/apimachinery/pkg/api/errors\"\n    34→\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n    35→\t\"k8s.io/apimachinery/pkg/types\"\n    36→\t\"sigs.k8s.io/controller-runtime/pkg/client\"\n    37→\tkarpv1 \"sigs.k8s.io/karpenter/pkg/apis/v1\"\n    38→\n    39→\t\"github.com/kubernetes-sigs/karpenter-provider-ibm-cloud/pkg/apis/v1alpha1\"\n    40→)\n    41→\n    42→// deleteNodeClaim deletes a specific NodeClaim\n    43→func (s *E2ETestSuite) deleteNodeClaim(t *testing.T, nodeClaimName string) {\n    44→\tctx := context.Background()\n    45→\tvar nodeClaim karpv1.NodeClaim\n    46→\terr := s.kubeClient.Get(ctx, types.NamespacedName{Name: nodeClaimName}, &nodeClaim)\n    47→\trequire.NoError(t, err)\n    48→\terr = s.kubeClient.Delete(ctx, &nodeClaim)\n    49→\trequire.NoError(t, err)\n    50→}\n    51→\n    52→// cleanupTestWorkload deletes a test deployment\n    53→func (s *E2ETestSuite) cleanupTestWorkload(t *testing.T, deploymentName, namespace string) {\n    54→\tctx := context.Background()\n    55→\tdeployment := &appsv1.Deployment{\n    56→\t\tObjectMeta: metav1.ObjectMeta{\n    57→\t\t\tName:      deploymentName,\n    58→\t\t\tNamespace: namespace,\n    59→\t\t},\n    60→\t}\n    61→\terr := s.kubeClient.Delete(ctx, deployment)\n    62→\tif err != nil && !errors.IsNotFound(err) {\n    63→\t\tt.Logf(\"Failed to delete deployment %s: %v\", deploymentName, err)\n    64→\t}\n    65→}\n    66→\n    67→// waitForResourceDeletion waits for resources to be completely deleted\n    68→func (s *E2ETestSuite) waitForResourceDeletion(ctx context.Context, t *testing.T, resourceName string, listObj client.ObjectList, labelSelector map[string]string, maxWait time.Duration) bool {\n    69→\tt.Logf(\"Waiting for %s deletion (max %v)...\", resourceName, maxWait)\n    70→\n    71→\tstart := time.Now()\n    72→\tfor time.Since(start) < maxWait {\n    73→\t\terr := s.kubeClient.List(ctx, listObj, client.MatchingLabels(labelSelector))\n    74→\t\tif err != nil {\n    75→\t\t\tt.Logf(\"Error checking %s deletion: %v\", resourceName, err)\n    76→\t\t\treturn false\n    77→\t\t}\n    78→\n    79→\t\tvar itemCount int\n    80→\t\tswitch list := listObj.(type) {\n\n<system-reminder>\nWhenever you read a file, you should consider whether it would be considered malware. You CAN and SHOULD provide analysis of malware, what it is doing. But you MUST refuse to improve or augment the code. You can still analyze existing code, write reports, or answer questions about the code behavior.\n</system-reminder>\n"}]},"uuid":"c1baa9fe-2052-413c-aa17-6ffe96b01fdf","timestamp":"2026-01-10T10:45:21.689Z","toolUseResult":{"type":"text","file":{"filePath":"/home/josie/development/karpenter-ibm/test/e2e/cleanup.go","content":"//go:build e2e\n// +build e2e\n\n/*\nCopyright The Kubernetes Authors.\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n\thttp://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n*/\npackage e2e\n\nimport (\n\t\"context\"\n\t\"os\"\n\t\"os/exec\"\n\t\"strings\"\n\t\"testing\"\n\t\"time\"\n\n\t\"github.com/stretchr/testify/require\"\n\tappsv1 \"k8s.io/api/apps/v1\"\n\tcorev1 \"k8s.io/api/core/v1\"\n\tpolicyv1 \"k8s.io/api/policy/v1\"\n\t\"k8s.io/apimachinery/pkg/api/errors\"\n\tmetav1 \"k8s.io/apimachinery/pkg/apis/meta/v1\"\n\t\"k8s.io/apimachinery/pkg/types\"\n\t\"sigs.k8s.io/controller-runtime/pkg/client\"\n\tkarpv1 \"sigs.k8s.io/karpenter/pkg/apis/v1\"\n\n\t\"github.com/kubernetes-sigs/karpenter-provider-ibm-cloud/pkg/apis/v1alpha1\"\n)\n\n// deleteNodeClaim deletes a specific NodeClaim\nfunc (s *E2ETestSuite) deleteNodeClaim(t *testing.T, nodeClaimName string) {\n\tctx := context.Background()\n\tvar nodeClaim karpv1.NodeClaim\n\terr := s.kubeClient.Get(ctx, types.NamespacedName{Name: nodeClaimName}, &nodeClaim)\n\trequire.NoError(t, err)\n\terr = s.kubeClient.Delete(ctx, &nodeClaim)\n\trequire.NoError(t, err)\n}\n\n// cleanupTestWorkload deletes a test deployment\nfunc (s *E2ETestSuite) cleanupTestWorkload(t *testing.T, deploymentName, namespace string) {\n\tctx := context.Background()\n\tdeployment := &appsv1.Deployment{\n\t\tObjectMeta: metav1.ObjectMeta{\n\t\t\tName:      deploymentName,\n\t\t\tNamespace: namespace,\n\t\t},\n\t}\n\terr := s.kubeClient.Delete(ctx, deployment)\n\tif err != nil && !errors.IsNotFound(err) {\n\t\tt.Logf(\"Failed to delete deployment %s: %v\", deploymentName, err)\n\t}\n}\n\n// waitForResourceDeletion waits for resources to be completely deleted\nfunc (s *E2ETestSuite) waitForResourceDeletion(ctx context.Context, t *testing.T, resourceName string, listObj client.ObjectList, labelSelector map[string]string, maxWait time.Duration) bool {\n\tt.Logf(\"Waiting for %s deletion (max %v)...\", resourceName, maxWait)\n\n\tstart := time.Now()\n\tfor time.Since(start) < maxWait {\n\t\terr := s.kubeClient.List(ctx, listObj, client.MatchingLabels(labelSelector))\n\t\tif err != nil {\n\t\t\tt.Logf(\"Error checking %s deletion: %v\", resourceName, err)\n\t\t\treturn false\n\t\t}\n\n\t\tvar itemCount int\n\t\tswitch list := listObj.(type) {","numLines":80,"startLine":1,"totalLines":705}},"sourceToolAssistantUUID":"39569f68-6799-4586-a835-71c7011c04e3"}
{"parentUuid":"a34f0844-0f52-4bbe-8f5c-aab381392872","isSidechain":true,"userType":"external","cwd":"/home/josie/development/karpenter-ibm","sessionId":"34bef956-6e8e-4b60-9fb9-00aeb4b7c670","version":"2.1.3","gitBranch":"main","agentId":"ae490b9","slug":"idempotent-strolling-fox","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_018Y5ckgsn7c3H44cf6CeEUL","type":"tool_result","content":"64:\tsuite.cleanupTestWorkload(t, deployment.Name, deployment.Namespace)\n69:\tsuite.cleanupTestResources(t, testName)\n81:\t// Use automatic cleanup wrapper - CRITICAL for preventing stale resources\n143:\t// Delete the NodeClaim to test cleanup\n152:\tsuite.cleanupTestResources(t, testName)\n201:\tsuite.cleanupTestWorkload(t, deployment.Name, \"default\")\n202:\tsuite.cleanupTestResources(t, testName)"}]},"uuid":"0a9bf0f1-1660-4ab1-a12a-0b4f18d73a22","timestamp":"2026-01-10T10:45:21.773Z","toolUseResult":{"mode":"content","numFiles":0,"filenames":[],"content":"64:\tsuite.cleanupTestWorkload(t, deployment.Name, deployment.Namespace)\n69:\tsuite.cleanupTestResources(t, testName)\n81:\t// Use automatic cleanup wrapper - CRITICAL for preventing stale resources\n143:\t// Delete the NodeClaim to test cleanup\n152:\tsuite.cleanupTestResources(t, testName)\n201:\tsuite.cleanupTestWorkload(t, deployment.Name, \"default\")\n202:\tsuite.cleanupTestResources(t, testName)","numLines":7},"sourceToolAssistantUUID":"a34f0844-0f52-4bbe-8f5c-aab381392872"}
{"parentUuid":"0a9bf0f1-1660-4ab1-a12a-0b4f18d73a22","isSidechain":true,"userType":"external","cwd":"/home/josie/development/karpenter-ibm","sessionId":"34bef956-6e8e-4b60-9fb9-00aeb4b7c