#!/bin/bash # Freeleaps Kubernetes Cluster Bootstrap Script # This script bootstraps a complete Kubernetes cluster from Azure VMs set -e # Colors for output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' # No Color # Configuration SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" FREELEAPS_OPS_DIR="$(dirname "$SCRIPT_DIR")" INVENTORY_FILE="$FREELEAPS_OPS_DIR/cluster/ansible/manifests/inventory.ini" KUBESPRAY_DIR="$FREELEAPS_OPS_DIR/3rd/kubespray" MANIFESTS_DIR="$FREELEAPS_OPS_DIR/cluster/manifests" BIN_DIR="$FREELEAPS_OPS_DIR/cluster/bin" # Function to print colored output print_status() { echo -e "${BLUE}[INFO]${NC} $1" } print_success() { echo -e "${GREEN}[SUCCESS]${NC} $1" } print_warning() { echo -e "${YELLOW}[WARNING]${NC} $1" } print_error() { echo -e "${RED}[ERROR]${NC} $1" } # Function to check prerequisites check_prerequisites() { print_status "Checking prerequisites..." # Check if we're in the right directory if [[ ! -f "$INVENTORY_FILE" ]]; then print_error "Inventory file not found: $INVENTORY_FILE" print_error "Please run this script from the freeleaps-ops/docs directory" exit 1 fi # Check if kubespray exists if [[ ! -d "$KUBESPRAY_DIR" ]]; then print_error "Kubespray directory not found: $KUBESPRAY_DIR" exit 1 fi # Check required tools local missing_tools=() if ! command -v ansible &> /dev/null; then missing_tools+=("ansible") fi if ! command -v az &> /dev/null; then missing_tools+=("azure-cli") fi if ! command -v kubectl &> /dev/null; then missing_tools+=("kubectl") fi if [[ ${#missing_tools[@]} -gt 0 ]]; then print_error "Missing required tools: ${missing_tools[*]}" print_warning "Please install missing tools before proceeding" exit 1 fi print_success "All prerequisites are met" } # Function to verify Azure VMs verify_azure_vms() { print_status "Verifying Azure VMs..." # Get VMs from inventory local vms=() while IFS= read -r line; do if [[ $line =~ ^[a-zA-Z0-9-]+ ]]; then vm_name=$(echo "$line" | awk '{print $1}') vms+=("$vm_name") fi done < "$INVENTORY_FILE" print_status "Found VMs in inventory: ${vms[*]}" # Check VM status in Azure for vm in "${vms[@]}"; do local power_state=$(az vm show --resource-group k8s --name "$vm" --query "powerState" -o tsv 2>/dev/null) if [[ "$power_state" != "VM running" ]]; then print_warning "VM $vm is not running (state: $power_state)" read -p "Do you want to start VM $vm? (y/N): " -n 1 -r echo if [[ $REPLY =~ ^[Yy]$ ]]; then az vm start --resource-group k8s --name "$vm" print_status "Starting VM $vm..." sleep 30 fi else print_success "VM $vm is running" fi done } # Function to test connectivity test_connectivity() { print_status "Testing connectivity to all VMs..." cd "$(dirname "$INVENTORY_FILE")" if ansible -i inventory.ini all -m ping -kK; then print_success "Connectivity to all VMs verified" else print_error "Connectivity test failed" print_warning "Please check:" print_warning "1. VMs are running" print_warning "2. Network security groups allow SSH (port 22)" print_warning "3. SSH credentials are correct" exit 1 fi } # Function to bootstrap Kubernetes cluster bootstrap_cluster() { print_status "Bootstrapping Kubernetes cluster..." cd "$KUBESPRAY_DIR" print_status "Running Kubespray cluster installation..." print_warning "This process may take 15-30 minutes..." if ansible-playbook -i ../../cluster/ansible/manifests/inventory.ini ./cluster.yml -kK -b; then print_success "Kubernetes cluster bootstrapped successfully" else print_error "Cluster bootstrap failed" print_warning "Check the Ansible output for errors" exit 1 fi } # Function to get kubeconfig get_kubeconfig() { print_status "Retrieving kubeconfig..." # Get the first master node IP local master_ip=$(grep -A 10 "\[kube_control_plane\]" "$INVENTORY_FILE" | grep ansible_host | head -1 | awk '{print $2}' | cut -d'=' -f2) if [[ -z "$master_ip" ]]; then print_error "Could not find master node IP in inventory" exit 1 fi print_status "Getting kubeconfig from master node: $master_ip" # Create .kube directory if it doesn't exist mkdir -p ~/.kube # Get kubeconfig from master node ssh wwwadmin@mathmast.com@"$master_ip" "sudo cat /etc/kubernetes/admin.conf" > ~/.kube/config if [[ $? -eq 0 ]]; then print_success "Kubeconfig retrieved successfully" else print_error "Failed to retrieve kubeconfig" exit 1 fi } # Function to verify cluster verify_cluster() { print_status "Verifying cluster installation..." # Wait for cluster to be ready local max_attempts=30 local attempt=1 while [[ $attempt -le $max_attempts ]]; do if kubectl get nodes &> /dev/null; then print_success "Cluster is accessible" break fi print_status "Waiting for cluster to be ready... (attempt $attempt/$max_attempts)" sleep 30 ((attempt++)) done if [[ $attempt -gt $max_attempts ]]; then print_error "Cluster verification failed" print_warning "Troubleshooting steps:" print_warning "1. Check VM resources (CPU, memory)" print_warning "2. Check network connectivity between nodes" print_warning "3. Check Ansible logs for errors" print_warning "4. Verify inventory file configuration" exit 1 fi # Check node status print_status "Checking node status..." kubectl get nodes # Wait for all nodes to be ready local ready_nodes=$(kubectl get nodes --no-headers | grep -c "Ready") local total_nodes=$(kubectl get nodes --no-headers | wc -l) if [[ $ready_nodes -eq $total_nodes ]]; then print_success "All $total_nodes nodes are ready" else print_warning "Only $ready_nodes/$total_nodes nodes are ready" kubectl get nodes fi # Check system pods print_status "Checking system pods..." kubectl get pods -n kube-system # Wait for critical system pods print_status "Waiting for critical system pods..." local critical_pods=("kube-apiserver" "kube-controller-manager" "kube-scheduler" "etcd") for pod_prefix in "${critical_pods[@]}"; do local max_pod_attempts=20 local pod_attempt=1 while [[ $pod_attempt -le $max_pod_attempts ]]; do if kubectl get pods -n kube-system | grep -q "$pod_prefix.*Running"; then print_success "$pod_prefix is running" break fi if [[ $pod_attempt -eq $max_pod_attempts ]]; then print_warning "$pod_prefix is not running" kubectl get pods -n kube-system | grep "$pod_prefix" fi sleep 10 ((pod_attempt++)) done done # Check cluster info print_status "Checking cluster info..." kubectl cluster-info } # Function to deploy infrastructure deploy_infrastructure() { print_status "Deploying infrastructure components..." cd "$MANIFESTS_DIR" # Deploy in order local components=( "freeleaps-controls-system" "freeleaps-devops-system" "freeleaps-monitoring-system" "freeleaps-logging-system" "freeleaps-data-platform" ) for component in "${components[@]}"; do if [[ -d "$component" ]]; then print_status "Deploying $component..." kubectl apply -f "$component/" # Wait for deployment to stabilize print_status "Waiting for $component to stabilize..." sleep 30 else print_warning "Component directory not found: $component" fi done print_success "Infrastructure deployment completed" } # Function to setup authentication setup_authentication() { print_status "Setting up authentication..." cd "$BIN_DIR" if [[ -f "freeleaps-cluster-authenticator" ]]; then print_status "Running authentication setup..." ./freeleaps-cluster-authenticator auth else print_warning "Authentication script not found" print_warning "Please run authentication setup manually" fi } # Function to display final status display_final_status() { print_success "Kubernetes cluster bootstrap completed!" echo echo "=== Cluster Status ===" kubectl get nodes echo echo "=== System Pods ===" kubectl get pods -n kube-system echo echo "=== Infrastructure Status ===" kubectl get pods --all-namespaces | grep -E "(argocd|cert-manager|prometheus|grafana)" echo echo "=== Next Steps ===" echo "1. Verify all components are running: kubectl get pods --all-namespaces" echo "2. Access ArgoCD: kubectl port-forward svc/argocd-server -n freeleaps-devops-system 8080:80" echo "3. Access Grafana: kubectl port-forward svc/kube-prometheus-stack-grafana -n freeleaps-monitoring-system 3000:80" echo "4. Setup authentication: cd $BIN_DIR && ./freeleaps-cluster-authenticator auth" echo "5. Deploy applications via ArgoCD" } # Main function main() { echo "==========================================" echo "Freeleaps Kubernetes Cluster Bootstrap" echo "==========================================" echo # Check prerequisites check_prerequisites # Verify Azure VMs verify_azure_vms # Test connectivity test_connectivity # Bootstrap cluster bootstrap_cluster # Get kubeconfig get_kubeconfig # Verify cluster verify_cluster # Deploy infrastructure deploy_infrastructure # Setup authentication setup_authentication # Display final status display_final_status } # Handle script arguments if [[ $# -eq 0 ]]; then main else case "$1" in --help|-h) echo "Usage: $0 [OPTIONS]" echo echo "Options:" echo " --help, -h Show this help message" echo " --verify Only verify prerequisites and connectivity" echo " --bootstrap Only bootstrap the cluster (skip infrastructure)" echo echo "This script bootstraps a complete Kubernetes cluster from Azure VMs." exit 0 ;; --verify) check_prerequisites verify_azure_vms test_connectivity print_success "Verification completed successfully" ;; --bootstrap) check_prerequisites verify_azure_vms test_connectivity bootstrap_cluster get_kubeconfig verify_cluster print_success "Cluster bootstrap completed" ;; *) print_error "Unknown option: $1" echo "Use --help for usage information" exit 1 ;; esac fi