#!/bin/bash
# Troubleshoot connectivity from NSX-T network (10.20.5.0/24) to worker node (10.250.64.10)
# Run this script from a machine on the 10.20.5.0/24 network (e.g., 10.20.5.69 or master at 10.20.5.71)

set -euo pipefail

WORKER_IP="10.250.64.10"

echo "=== Connectivity Tests from NSX-T Network to Worker $WORKER_IP ==="
echo

# Test ICMP (ping)
echo "1. Testing ICMP (ping)..."
if ping -c 3 -W 2 "$WORKER_IP" > /dev/null 2>&1; then
    echo "   ✓ ICMP: SUCCESS"
else
    echo "   ✗ ICMP: FAILED (This is expected - no ICMP rule in security group)"
fi
echo

# Test RKE2 Required Ports
echo "2. Testing RKE2 Required TCP Ports..."

declare -A ports=(
    ["6443"]="Kubernetes API Server"
    ["9345"]="RKE2 Supervisor API"
    ["10250"]="Kubelet API"
    ["10248"]="Kubelet Health"
    ["10249"]="Kube-Proxy Metrics"
    ["10256"]="Kube-Proxy Health"
    ["8472"]="Cilium VXLAN"
    ["4240"]="Cilium Health"
    ["4244"]="Cilium Hubble"
)

for port in "${!ports[@]}"; do
    desc="${ports[$port]}"
    printf "   Testing port %5s (%-25s): " "$port" "$desc"
    if timeout 3 bash -c "echo > /dev/tcp/$WORKER_IP/$port" 2>/dev/null; then
        echo "✓ OPEN"
    elif nc -zv -w 2 "$WORKER_IP" "$port" 2>&1 | grep -q succeeded; then
        echo "✓ OPEN"
    else
        echo "✗ CLOSED/FILTERED"
    fi
done
echo

# Test UDP ports
echo "3. Testing RKE2 Required UDP Ports..."
echo "   Port 8472 (Cilium VXLAN UDP): Manual check needed (use: nc -u -zv -w 2 $WORKER_IP 8472)"
echo

# Test kubelet TLS specifically
echo "4. Testing Kubelet TLS Handshake..."
if command -v openssl > /dev/null 2>&1; then
    echo "   Attempting TLS connection to $WORKER_IP:10250..."
    if timeout 5 openssl s_client -connect "$WORKER_IP:10250" -showcerts < /dev/null 2>&1 | grep -q "Verify return code"; then
        echo "   ✓ TLS handshake successful"
        timeout 5 openssl s_client -connect "$WORKER_IP:10250" -showcerts < /dev/null 2>&1 | grep "Verify return code" | head -1
    else
        echo "   ✗ TLS handshake failed or timeout"
    fi
else
    echo "   ⚠ openssl not available"
fi
echo

# Check if we can reach kubelet health endpoint
echo "5. Testing Kubelet Health Endpoint (HTTP on 10248)..."
if curl -s -m 3 "http://$WORKER_IP:10248/healthz" > /dev/null 2>&1; then
    echo "   ✓ Kubelet health endpoint reachable"
    curl -s -m 3 "http://$WORKER_IP:10248/healthz"
else
    echo "   ✗ Kubelet health endpoint not reachable"
fi
echo

# Test if routes are correct
echo "6. Checking Routing..."
echo "   Route to $WORKER_IP:"
ip route get "$WORKER_IP" || echo "   ✗ No route found"
echo

echo "=== Summary ==="
echo "If multiple ports are blocked, this indicates:"
echo "  1. Firewall on the worker node itself (iptables/firewalld)"
echo "  2. Missing security group rules (less likely - we saw some rules exist)"
echo "  3. Network path issue (GRE tunnel, transit gateway)"
echo
echo "Expected results:"
echo "  - ICMP should FAIL (no rule in security group)"
echo "  - Port 10250 should be OPEN (rule exists for 10.20.0.0/16)"
echo "  - Ports 9345, 6443, 8472, 4240 should currently FAIL (no rules)"
echo
echo "If even port 10250 fails, the issue is likely:"
echo "  a) Local firewall on worker node blocking traffic"
echo "  b) GRE tunnel configuration not working properly"
echo "  c) Worker node's iptables/Cilium blocking traffic"
