Auto restart by log event
If an error happen in the last 5 minutes, 300 seconds, of events in "/var/log/message" the routine will restart the service.
# ./myscript.sh <number of seconds>
$ ./myscript.sh 300
This script convert date end time from "/var/log/message" to EPOCH and get the the number of lines with error that match the regex, then if there is lines with error inside the specified window, in seconds, the routine will restart the service.
#!/bin/bash
GET_COUNT() {
cat /var/log/messages | \
egrep "ERROR - my specific message of fail 1|ERROR - my specific message of fail 2" | \
awk '{ print $1 " " $2 " "$3 } ' | \
while read -r line
do
date -d "$line" +"%s"
done | \
while read EPOCH
do
if [ $EPOCH -gt $1 ]
then
echo $EPOCH
fi
done | wc -l | awk '{print $1} '
}
RESTART_APP() {
date
systemctl restart MYAPP.service
echo service restarted
}
export WINDOW=$1
export NOW=$(date +"%s")
export SINCE=$(expr $(date +"%s") - $WINDOW)
if [ $(GET_COUNT $SINCE) -gt 0 ]
then
RESTART_APP
fi