I outlined my journey to sub-second-per-host nagios monitoring via the salt-mine and check_mk here:
http://garthwaite.org/saltmine_check_mk_agent.html
The article walks through weeks of on and off tinkering to get it all working. I'll summarize the solution:
Create a custom check_mk module for all minions:
#!/usr/bin/env python
''' Support for running check_mk_agent over salt '''
import os
import salt.utils
from salt.exceptions import SaltException
def __virtual__():
''' Only load the module if check_mk_agent is installed '''
if os.path.exists('/usr/bin/check_mk_agent'):
return 'check_mk'
return False
def agent():
''' Return the output of check_mk_agent '''
return __salt__['cmd.run']('/usr/bin/check_mk_agent')
Set minion's mine interval to one minute:
salt '*' file.append /etc/salt/minion.d/mine.conf "mine_interval: 1"
Configure the monitoring server to pull all the minion's check_mk_agent output into a single json file, then configure check_mk to query that file instead of any network queries. All accomplished with the following script on the monitoring minion:
#!/usr/bin/env python
import sys
import json
import fcntl
DATAFILE="/dev/shm/cmk.json"
NAG_UID = 105
NAG_GID = 107
def do_update():
import os
import salt.client
caller = salt.client.Caller()
data = caller.function('mine.get', '*', 'check_mk.agent')
lockfile = open(DATAFILE+".lock", "w")
fcntl.flock(lockfile, fcntl.LOCK_EX)
datafile = open(DATAFILE, "w")
datafile.write(json.dumps(data))
for f in (DATAFILE, DATAFILE+".lock"):
os.chmod(f, 0644)
os.chown(f, NAG_UID, NAG_GID)
def get_agent(minion):
lockfile = open(DATAFILE+".lock", "w")
fcntl.flock(lockfile, fcntl.LOCK_SH)
data = json.load(file(DATAFILE))
return data[minion]
if __name__ == '__main__':
if len(sys.argv) != 2:
print "Usage: mine_agent.py --update | <minion id>"
elif sys.argv[1] in ['--update', '-u']:
do_update()
else:
minion = sys.argv[1]
print get_agent(minion)
Update every minute:
$ cat /etc/cron.d/retrieve_mined_minion_data
*/1 * * * * root /etc/check_mk/mine_agent.py --update
Finally: Change the datasource for all nagios targets in /etc/check_mk/main.mk:
datasource_programs = [
( '/etc/check_mk/mine_agent.py <HOST>', ['mine'], ALL_HOSTS ),
]