mirror of
https://github.com/community-scripts/ProxmoxVE.git
synced 2025-05-15 16:20:25 +00:00
Update monitor-all.sh (#4437)
✅ Summary of Changes from the Initial Version 🧩 1. Tag-Based Filtering (Core Feature) New feature: Only restart instances (VMs or containers) that have the mon-restart tag. This makes monitoring and auto-restart controllable directly from the Proxmox Web UI, without editing scripts or services. Set via GUI: VM → Options → Tags → mon-restart Set via CLI: qm set <vmid> -tags mon-restart or pct set <ctid> -tags mon-restart This is the primary new control mechanism, making the script safer, more flexible, and user-friendly. 🧰 2. Backward-Compatible Exclusion Mechanism The original feature that lets you exclude instances via CLI arguments is preserved: bash Copy Edit ./ping-instances.sh 101 300 These IDs will always be skipped regardless of tag. 🧠 3. Intelligent Responsiveness Checks For VMs: Uses qm guest cmd <id> ping to check responsiveness via the QEMU guest agent. No longer relies on network-level ping, which can be misleading or blocked. For containers (CTs): Uses traditional ping to IP addresses obtained from pct exec, since CTs don’t support QEMU agent. ⛔ 4. Instance Skipping Improvements Instances are now skipped if: They are explicitly excluded via CLI. They are templates. They are configured with onboot: 0 or missing. They lack the mon-restart tag, regardless of other status. 🪵 5. Same Logging Behavior All output continues to go to /var/log/ping-instances.log for persistent tracking. Verbose messages were added for traceability (e.g., why a VM or CT was skipped). 🎯 Why This Matters With tag-based control, admins can now manage restart behavior dynamically from the Proxmox Web UI, making the script: More secure (no accidental restarts). More maintainable (no script edits needed). More user-friendly (integrated with the UI workflow).
This commit is contained in:
parent
7740ab68f7
commit
16c0d09d6b
@ -16,8 +16,17 @@ cat <<"EOF"
|
|||||||
EOF
|
EOF
|
||||||
|
|
||||||
add() {
|
add() {
|
||||||
|
echo -e "\n IMPORTANT: Tag-Based Monitoring Enabled"
|
||||||
|
echo "Only VMs and containers with the tag 'mon-restart' will be automatically restarted by this service."
|
||||||
|
echo
|
||||||
|
echo "🔧 How to add the tag:"
|
||||||
|
echo " → Proxmox Web UI: Go to VM/CT → Options → Tags → Add 'mon-restart'"
|
||||||
|
echo " → CLI: qm set <vmid> -tags mon-restart"
|
||||||
|
echo " pct set <ctid> -tags mon-restart"
|
||||||
|
echo
|
||||||
|
|
||||||
while true; do
|
while true; do
|
||||||
read -p "This script will add Monitor All to Proxmox VE. Proceed(y/n)?" yn
|
read -p "This script will add Monitor All to Proxmox VE. Proceed (y/n)? " yn
|
||||||
case $yn in
|
case $yn in
|
||||||
[Yy]*) break ;;
|
[Yy]*) break ;;
|
||||||
[Nn]*) exit ;;
|
[Nn]*) exit ;;
|
||||||
@ -25,34 +34,34 @@ add() {
|
|||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
|
|
||||||
echo '#!/usr/bin/env bash
|
cat <<'EOF' >/usr/local/bin/ping-instances.sh
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
# Read excluded instances from command line arguments
|
# Read excluded instances from command line arguments
|
||||||
excluded_instances=("$@")
|
excluded_instances=("$@")
|
||||||
echo "Excluded instances: ${excluded_instances[@]}"
|
echo "Excluded instances: ${excluded_instances[@]}"
|
||||||
|
|
||||||
while true; do
|
while true; do
|
||||||
|
|
||||||
for instance in $(pct list | awk '\''{if(NR>1) print $1}'\''; qm list | awk '\''{if(NR>1) print $1}'\''); do
|
for instance in $(pct list | awk 'NR>1 {print $1}'; qm list | awk 'NR>1 {print $1}'); do
|
||||||
# Skip excluded instances
|
# Skip excluded instances
|
||||||
if [[ " ${excluded_instances[@]} " =~ " ${instance} " ]]; then
|
if [[ " ${excluded_instances[@]} " =~ " ${instance} " ]]; then
|
||||||
echo "Skipping $instance because it is excluded"
|
echo "Skipping $instance because it is excluded"
|
||||||
continue
|
continue
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Determine the type of the instance (container or virtual machine)
|
# Determine type and set config command
|
||||||
if pct status $instance >/dev/null 2>&1; then
|
if pct status $instance >/dev/null 2>&1; then
|
||||||
# It is a container
|
type="ct"
|
||||||
config_cmd="pct config"
|
config_cmd="pct config"
|
||||||
IP=$(pct exec $instance ip a s dev eth0 | awk '\''/inet / {print $2}'\'' | cut -d/ -f1)
|
|
||||||
else
|
else
|
||||||
# It is a virtual machine
|
type="vm"
|
||||||
config_cmd="qm config"
|
config_cmd="qm config"
|
||||||
IP=$(qm guest cmd $instance network-get-interfaces | egrep -o "([0-9]{1,3}\.){3}[0-9]{1,3}" | grep -E "192\.|10\." | head -n 1)
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Skip instances based on onboot and templates
|
# Skip templates and onboot-disabled
|
||||||
onboot=$($config_cmd $instance | grep -q "onboot: 0" || ( ! $config_cmd $instance | grep -q "onboot" ) && echo "true" || echo "false")
|
onboot=$($config_cmd $instance | grep -q "onboot: 0" || ( ! $config_cmd $instance | grep -q "onboot" ) && echo "true" || echo "false")
|
||||||
template=$($config_cmd $instance | grep template | grep -q "template:" && echo "true" || echo "false")
|
template=$($config_cmd $instance | grep -q "^template:" && echo "true" || echo "false")
|
||||||
|
|
||||||
if [ "$onboot" == "true" ]; then
|
if [ "$onboot" == "true" ]; then
|
||||||
echo "Skipping $instance because it is set not to boot"
|
echo "Skipping $instance because it is set not to boot"
|
||||||
@ -62,36 +71,49 @@ while true; do
|
|||||||
continue
|
continue
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Ping the instance
|
# Check for mon-restart tag
|
||||||
|
has_tag=$($config_cmd $instance | grep -q "tags:.*mon-restart" && echo "true" || echo "false")
|
||||||
|
if [ "$has_tag" != "true" ]; then
|
||||||
|
echo "Skipping $instance because it does not have 'mon-restart' tag"
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Responsiveness check and restart if needed
|
||||||
|
if [ "$type" == "vm" ]; then
|
||||||
|
# Check if guest agent responds
|
||||||
|
if qm guest cmd $instance ping >/dev/null 2>&1; then
|
||||||
|
echo "VM $instance is responsive via guest agent"
|
||||||
|
else
|
||||||
|
echo "$(date): VM $instance is not responding to agent ping, restarting..."
|
||||||
|
if qm status $instance | grep -q "status: running"; then
|
||||||
|
qm stop $instance >/dev/null 2>&1
|
||||||
|
sleep 5
|
||||||
|
fi
|
||||||
|
qm start $instance >/dev/null 2>&1
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
# Container: get IP and ping
|
||||||
|
IP=$(pct exec $instance ip a s dev eth0 | awk '/inet / {print $2}' | cut -d/ -f1)
|
||||||
if ! ping -c 1 $IP >/dev/null 2>&1; then
|
if ! ping -c 1 $IP >/dev/null 2>&1; then
|
||||||
# If the instance can not be pinged, stop and start it
|
|
||||||
if pct status $instance >/dev/null 2>&1; then
|
|
||||||
# It is a container
|
|
||||||
echo "$(date): CT $instance is not responding, restarting..."
|
echo "$(date): CT $instance is not responding, restarting..."
|
||||||
pct stop $instance >/dev/null 2>&1
|
pct stop $instance >/dev/null 2>&1
|
||||||
sleep 5
|
sleep 5
|
||||||
pct start $instance >/dev/null 2>&1
|
pct start $instance >/dev/null 2>&1
|
||||||
else
|
else
|
||||||
# It is a virtual machine
|
echo "CT $instance is responsive"
|
||||||
if qm status $instance | grep -q "status: running"; then
|
|
||||||
echo "$(date): VM $instance is not responding, restarting..."
|
|
||||||
qm stop $instance >/dev/null 2>&1
|
|
||||||
sleep 5
|
|
||||||
else
|
|
||||||
echo "$(date): VM $instance is not running, starting..."
|
|
||||||
fi
|
|
||||||
qm start $instance >/dev/null 2>&1
|
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
# Wait for 5 minutes. (Edit to your needs)
|
|
||||||
echo "$(date): Pausing for 5 minutes..."
|
echo "$(date): Pausing for 5 minutes..."
|
||||||
sleep 300
|
sleep 300
|
||||||
done >/var/log/ping-instances.log 2>&1' >/usr/local/bin/ping-instances.sh
|
|
||||||
|
done >/var/log/ping-instances.log 2>&1
|
||||||
|
EOF
|
||||||
|
|
||||||
touch /var/log/ping-instances.log
|
touch /var/log/ping-instances.log
|
||||||
# Change file permissions to executable
|
|
||||||
chmod +x /usr/local/bin/ping-instances.sh
|
chmod +x /usr/local/bin/ping-instances.sh
|
||||||
|
|
||||||
cat <<EOF >/etc/systemd/system/ping-instances.timer
|
cat <<EOF >/etc/systemd/system/ping-instances.timer
|
||||||
[Unit]
|
[Unit]
|
||||||
Description=Delay ping-instances.service by 5 minutes
|
Description=Delay ping-instances.service by 5 minutes
|
||||||
@ -104,17 +126,17 @@ OnUnitActiveSec=300
|
|||||||
WantedBy=timers.target
|
WantedBy=timers.target
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
# Create ping-instances.service
|
|
||||||
cat <<EOF >/etc/systemd/system/ping-instances.service
|
cat <<EOF >/etc/systemd/system/ping-instances.service
|
||||||
[Unit]
|
[Unit]
|
||||||
Description=Ping instances every 5 minutes and restarts if necessary
|
Description=Ping instances every 5 minutes and restart if necessary
|
||||||
After=ping-instances.timer
|
After=ping-instances.timer
|
||||||
Requires=ping-instances.timer
|
Requires=ping-instances.timer
|
||||||
|
|
||||||
[Service]
|
[Service]
|
||||||
Type=simple
|
Type=simple
|
||||||
# To specify which CT/VM should be excluded, add the CT/VM ID at the end of the line where ExecStart=/usr/local/bin/ping-instances.sh is specified.
|
# To exclude specific instances, pass IDs to ExecStart, e.g.:
|
||||||
# For example: ExecStart=/usr/local/bin/ping-instances.sh 100 102
|
# ExecStart=/usr/local/bin/ping-instances.sh 100 200
|
||||||
# Virtual machines without the QEMU guest agent installed must be excluded.
|
# Instances must also have the 'mon-restart' tag to be monitored
|
||||||
|
|
||||||
ExecStart=/usr/local/bin/ping-instances.sh
|
ExecStart=/usr/local/bin/ping-instances.sh
|
||||||
Restart=always
|
Restart=always
|
||||||
@ -125,39 +147,33 @@ StandardError=file:/var/log/ping-instances.log
|
|||||||
WantedBy=multi-user.target
|
WantedBy=multi-user.target
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
# Reload daemon, enable and start ping-instances.service
|
|
||||||
systemctl daemon-reload
|
systemctl daemon-reload
|
||||||
systemctl enable -q --now ping-instances.timer
|
systemctl enable -q --now ping-instances.timer
|
||||||
systemctl enable -q --now ping-instances.service
|
systemctl enable -q --now ping-instances.service
|
||||||
clear
|
clear
|
||||||
echo -e "\n To view Monitor All logs: cat /var/log/ping-instances.log"
|
echo -e "\n Monitor All installed."
|
||||||
|
echo "📄 To view logs: cat /var/log/ping-instances.log"
|
||||||
|
echo "⚙️ Make sure your VMs or containers have the 'mon-restart' tag to be monitored."
|
||||||
}
|
}
|
||||||
|
|
||||||
remove() {
|
remove() {
|
||||||
systemctl disable -q --now ping-instances.timer
|
systemctl disable -q --now ping-instances.timer
|
||||||
systemctl disable -q --now ping-instances.service
|
systemctl disable -q --now ping-instances.service
|
||||||
rm /etc/systemd/system/ping-instances.service /etc/systemd/system/ping-instances.timer /usr/local/bin/ping-instances.sh /var/log/ping-instances.log
|
rm -f /etc/systemd/system/ping-instances.service
|
||||||
echo "Removed Monitor All from Proxmox VE"
|
rm -f /etc/systemd/system/ping-instances.timer
|
||||||
|
rm -f /usr/local/bin/ping-instances.sh
|
||||||
|
rm -f /var/log/ping-instances.log
|
||||||
|
echo "Monitor All removed from Proxmox VE"
|
||||||
}
|
}
|
||||||
|
|
||||||
# Define options for the whiptail menu
|
|
||||||
OPTIONS=(Add "Add Monitor-All to Proxmox VE"
|
OPTIONS=(Add "Add Monitor-All to Proxmox VE"
|
||||||
Remove "Remove Monitor-All from Proxmox VE")
|
Remove "Remove Monitor-All from Proxmox VE")
|
||||||
|
|
||||||
# Show the whiptail menu and save the user's choice
|
|
||||||
CHOICE=$(whiptail --backtitle "Proxmox VE Helper Scripts" --title "Monitor-All for Proxmox VE" --menu "Select an option:" 10 58 2 \
|
CHOICE=$(whiptail --backtitle "Proxmox VE Helper Scripts" --title "Monitor-All for Proxmox VE" --menu "Select an option:" 10 58 2 \
|
||||||
"${OPTIONS[@]}" 3>&1 1>&2 2>&3)
|
"${OPTIONS[@]}" 3>&1 1>&2 2>&3)
|
||||||
|
|
||||||
# Check the user's choice and perform the corresponding action
|
|
||||||
case $CHOICE in
|
case $CHOICE in
|
||||||
"Add")
|
"Add") add ;;
|
||||||
add
|
"Remove") remove ;;
|
||||||
;;
|
*) echo "Exiting..."; exit 0 ;;
|
||||||
"Remove")
|
|
||||||
remove
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
echo "Exiting..."
|
|
||||||
exit 0
|
|
||||||
;;
|
|
||||||
esac
|
esac
|
||||||
|
Loading…
x
Reference in New Issue
Block a user