#!/bin/sh # Log total disk activity. # This script logs disk activity, with the aim of helping to guage flash disk # lifetimes. (Flash disks supposedly support limited numbers of write cycles.) # The kernel records the total number of sectors read and written for each disk # since the last boot for us. We need to accumulate this data. But we don't want # to rely on the system always being shut down cleanly in order to save the data. # Instead we'll run this script from cron, so that in the event of an unclean # shutdown we have lost only a limited amount of data. We can also create an init # script to run it on shutdown. # We'll record the time of the last boot with each record written. In that way, # when computing the totals we can discard all but the last record with each boot # time. # Log file(s) where the records will be written. There's not much point in keeping # this only on the disk that you're monitoring as the data is most interesting when # the disk has failed. You can therefore specify multiple log files, all of which # will get all of the data. hostname=`hostname` logfiles="/var/log/diskstats-${hostname} /home/diskstats-${hostname}.log" # Get the time of the last boot, in seconds-since-the-epoch. # Is "last" the best way to do this? Its output is not straightforward to parse. I # had hoped to find something useful in /proc, but I can't find what I need. # The output of last looks like this: # reboot system boot 2.6.19 Tue Dec 12 08:56 - 20:00 (11:04) # # wtmp begins Sat Apr 1 01:20:03 2006 # This gets the "Tue Dec 12 08:56" portion: boottime_fmt=`last -1 reboot | awk 'NR==1 {print $5,$6,$7,$8}'` # Now convert to seconds-since-the-epoch: boottime_secs=`date +%s -d "${boottime_fmt}"` # Get the disk statistics. # See Documentation/iostats.txt for details of the formats. { case `uname -r` in 2.6.*) # For kernel 2.6, typical data from /proc/diskstats is: # 3 0 hda 23013 2383 430466 63692 7483 5929 107344 646004 0 229336 709696 # 3 1 hda1 25379 430266 13418 107344 # The first two fields are the device major/minor numbers. Whole disks have more # fields than partitions, and we're interested in whole disks. We're also not # interested in ram disks and mtd blocks, which are filtered out based on their # major numbers of 1 and 31 respectively. # The number of sectors read and written are in the 6th and 10th fields respectively. cat /proc/diskstats | awk '$1!=1 && $1!=31 && NF==14 { print $3, $6, $10; }' ;; 2.4.*) For kernel 2.4, typical data from /proc/partitions is: # 3 0 39082680 hda 446216 784926 9550688 4382310 424847 312726 5922052 19310380 0 3376340 23705160 # 3 1 9221278 hda1 35486 0 35496 38030 0 0 0 0 0 38030 38030 # This is similar to the 2.6 format but with an extra field that we have to skip, # and with equally-many fields for disks and partitions. I think that the best way # to find only whole disks is using the device minor number. # I have not tested this. The only 2.4 machine I have is a UML virtual machine, and # it doesn't seem to have any of these fields in /proc/partitions. cat /proc/partitions | awk '$1!=1 && $1!=31 && ($2%8)==0 { print $4, $7, $11; }' ;; *) echo "Sorry, your kernel is not supported." exit 1 esac } | { # Read that data disk at a time while read disk reads writes do # We really need to map from disk names to IDs of some sort, since the names can # change from one boot to the next depending on driver startup order. # We have a choice of UUIDs or IDs. I'm not sure of the difference. IDs look # more inteligable, but are they unique? If you choose to use UUIDs instead, make # sure you keep an independent record of what disks they correspond to. # I can't find a better way of getting this data than searching the links in # /dev/disk: where does udev get the IDs from when it makes the links? devnode="/dev/${disk}" disk_id=`find -L /dev/disk/by-id -samefile "${devnode}" -printf '%f\n'` # Put together a log file entry: logline="${boottime_secs} \"${disk_id}\" ${reads} ${writes}" # Append it to each logfile: for f in ${logfiles} do echo ${logline} >> $f done done }