From 2f95f7cda8d0388406c894ebba272c6c3231dc54 Mon Sep 17 00:00:00 2001 From: Masamichi Takagi Date: Fri, 9 Sep 2016 19:17:40 +0900 Subject: [PATCH] Modify interrupt load balancing policy on reboot/stop When rebooting: 1. Stop irqbalance 2. Modify /proc/irq/*/smp_affinity so that McKernel cores are not included 3. Start irqbalance with McKernel cores and IHK IRQ banned from load balancing When stopping: 1. Stop irqbalance 2. Restore /proc/irq/*/smp_affinity 3. Restart irqbalance with the system default settings refs #760 --- Makefile.in | 4 +++ arch/x86/tools/irqbalance_mck.in.in | 28 +++++++++++++++++++++ arch/x86/tools/irqbalance_mck.service.in | 10 ++++++++ arch/x86/tools/mcreboot-smp-x86.sh.in | 22 ++++++++++++++++ arch/x86/tools/mcstop+release-smp-x86.sh.in | 9 +++++++ configure | 9 ++++++- configure.ac | 6 +++++ 7 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 arch/x86/tools/irqbalance_mck.in.in create mode 100644 arch/x86/tools/irqbalance_mck.service.in diff --git a/Makefile.in b/Makefile.in index 7fb90c88..b0b3f147 100755 --- a/Makefile.in +++ b/Makefile.in @@ -1,5 +1,6 @@ TARGET = @TARGET@ SBINDIR = @SBINDIR@ +ETCDIR = @ETCDIR@ MANDIR = @MANDIR@ all:: @@ -48,6 +49,9 @@ install:: mkdir -p -m 755 $(SBINDIR); \ install -m 755 arch/x86/tools/mcreboot-smp-x86.sh $(SBINDIR)/mcreboot.sh; \ install -m 755 arch/x86/tools/mcstop+release-smp-x86.sh $(SBINDIR)/mcstop+release.sh; \ + mkdir -p -m 755 $(ETCDIR); \ + install -m 644 arch/x86/tools/irqbalance_mck.service $(ETCDIR)/irqbalance_mck.service; \ + install -m 644 arch/x86/tools/irqbalance_mck.in $(ETCDIR)/irqbalance_mck.in; \ mkdir -p -m 755 $(MANDIR)/man1; \ install -m 644 arch/x86/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \ ;; \ diff --git a/arch/x86/tools/irqbalance_mck.in.in b/arch/x86/tools/irqbalance_mck.in.in new file mode 100644 index 00000000..1e103769 --- /dev/null +++ b/arch/x86/tools/irqbalance_mck.in.in @@ -0,0 +1,28 @@ +# irqbalance is a daemon process that distributes interrupts across +# CPUS on SMP systems. The default is to rebalance once every 10 +# seconds. This is the environment file that is specified to systemd via the +# EnvironmentFile key in the service unit file (or via whatever method the init +# system you're using has. +# +# ONESHOT=yes +# after starting, wait for a minute, then look at the interrupt +# load and balance it once; after balancing exit and do not change +# it again. +#IRQBALANCE_ONESHOT= + +# +# IRQBALANCE_BANNED_CPUS +# 64 bit bitmask which allows you to indicate which cpu's should +# be skipped when reblancing irqs. Cpu numbers which have their +# corresponding bits set to one in this mask will not have any +# irq's assigned to them on rebalance +# +IRQBALANCE_BANNED_CPUS=%mask% + +# +# IRQBALANCE_ARGS +# append any args here to the irqbalance daemon as documented in the man page +# +IRQBALANCE_ARGS=--banirq=%banirq% + + diff --git a/arch/x86/tools/irqbalance_mck.service.in b/arch/x86/tools/irqbalance_mck.service.in new file mode 100644 index 00000000..53722f08 --- /dev/null +++ b/arch/x86/tools/irqbalance_mck.service.in @@ -0,0 +1,10 @@ +[Unit] +Description=irqbalance daemon +After=syslog.target + +[Service] +EnvironmentFile=@ETCDIR@/irqbalance_mck +ExecStart=/usr/sbin/irqbalance --foreground $IRQBALANCE_ARGS + +[Install] +WantedBy=multi-user.target diff --git a/arch/x86/tools/mcreboot-smp-x86.sh.in b/arch/x86/tools/mcreboot-smp-x86.sh.in index 902280ed..3f9c58c8 100644 --- a/arch/x86/tools/mcreboot-smp-x86.sh.in +++ b/arch/x86/tools/mcreboot-smp-x86.sh.in @@ -15,6 +15,7 @@ prefix="@prefix@" BINDIR="${prefix}/bin" SBINDIR="${prefix}/sbin" +ETCDIR=@ETCDIR@ KMODDIR="${prefix}/kmod" KERNDIR="${prefix}/@TARGET@/kernel" ENABLE_MCOVERLAYFS="@ENABLE_MCOVERLAYFS@" @@ -210,3 +211,24 @@ then pkill mcklogd SBINDIR=${SBINDIR} ${SBINDIR}/mcklogd -i ${INTERVAL} -f ${facility} fi + +# Modify interrupt load-balance policy +if [ "`systemctl status irqbalance_mck.service 2> /dev/null |grep -E 'Active: active'`" != "" -o "`systemctl status irqbalance.service 2> /dev/null |grep -E 'Active: active'`" != "" ]; then + systemctl stop irqbalance_mck.service 2>/dev/null + if ! systemctl stop irqbalance.service 2>/dev/null ; then echo "error: stopping irqbalance" >&2; exit 1; fi; + + if ! etcdir=@ETCDIR@ perl -e 'use File::Copy qw(copy); $etcdir=$ENV{'etcdir'}; @files = grep { -f } glob "/proc/irq/*/smp_affinity"; foreach $file (@files) { $rel = substr($file, 1); $dir=substr($rel, 0, length($rel)-length("/smp_affinity")); if(0) { print "cp $file $etcdir/$rel\n";} if(system("mkdir -p $etcdir/$dir")){ exit 1;} if(!copy($file,"$etcdir/$rel")){ exit 1;} }' ; then echo "error: saving /proc/irq/*/smp_affinity" >&2; exit 1; fi; + + ncpus=`lscpu | grep -E '^CPU\(s\):' | awk '{print $2}'` + smp_affinity_mask=`echo $cpus | ncpus=$ncpus perl -e 'while(<>){@tokens = split /,/;foreach $token (@tokens) {@nums = split /-/,$token; for($num = $nums[0]; $num <= $nums[$#nums]; $num++) {$ndx=int($num/32); $mask[$ndx] |= (1<<($num % 32))}}} $nint32s = int(($ENV{'ncpus'}+31)/32); for($j = $nint32s - 1; $j >= 0; $j--) { if($j != $nint32s - 1){print ",";} $nblks = $j == $nint32s - 1 ? int(($ENV{'ncpus'} % 32)/4) : 8; for($i = $nblks - 1;$i >= 0;$i--){ printf("%01x",($mask[$j] >> ($i*4)) & 0xf);}}'` + + if ! ncpus=$ncpus smp_affinity_mask=$smp_affinity_mask perl -e '@dirs = grep { -d } glob "/proc/irq/*"; foreach $dir (@dirs) { $hit = 0; $affinity_str = `cat $dir/smp_affinity`; chomp $affinity_str; @int32strs = split /,/, $affinity_str; @int32strs_mask=split /,/, $ENV{'smp_affinity_mask'}; for($i=0;$i <= $#int32strs_mask; $i++) { $int32strs_inv[$i] = sprintf("%08x",hex($int32strs_mask[$i])^0xffffffff); if($i == 0) { $len = int((($ENV{'ncpus'}%32)+3)/4); $int32strs_inv[$i] = substr($int32strs_inv[$i], -$len, $len); } } $inv = join(",", @int32strs_inv); $nint32s = int(($ENV{'ncpus'}+31)/32); for($j = $nint32s - 1; $j >= 0; $j--) { if(hex($int32strs[$nint32s - 1 - $j]) & hex($int32strs_mask[$nint32s - 1 - $j])) { $hit = 1; }} if($hit == 1) { $cmd = "echo $inv > $dir/smp_affinity 2>/dev/null"; system $cmd;}}'; then echo "error: modifying /proc/irq/*/smp_affinity" >&2; exit 1; fi; + + banirq=`cat /proc/interrupts| perl -e 'while(<>) { if(/^\s*(\d+).*IHK\-SMP\s*$/) {print $1;}}'` + + sed "s/%mask%/$smp_affinity_mask/g" $ETCDIR/irqbalance_mck.in | sed "s/%banirq%/$banirq/g" > $ETCDIR/irqbalance_mck + if ! systemctl link $ETCDIR/irqbalance_mck.service >/dev/null 2>/dev/null; then echo "error: linking irqbalance_mck" >&2; exit 1; fi; + if ! systemctl start irqbalance_mck.service 2>/dev/null ; then echo "error: starting irqbalance_mck" >&2; exit 1; fi; +# echo cpus=$cpus mask=$smp_affinity_mask banirq=$banirq +fi + diff --git a/arch/x86/tools/mcstop+release-smp-x86.sh.in b/arch/x86/tools/mcstop+release-smp-x86.sh.in index e612f6d1..9010b26c 100644 --- a/arch/x86/tools/mcstop+release-smp-x86.sh.in +++ b/arch/x86/tools/mcstop+release-smp-x86.sh.in @@ -10,6 +10,7 @@ prefix="@prefix@" BINDIR="@BINDIR@" SBINDIR="@SBINDIR@" +ETCDIR=@ETCDIR@ KMODDIR="@KMODDIR@" KERNDIR="@KERNDIR@" @@ -51,3 +52,11 @@ fi # Stop mcklogd pkill mcklogd + +# Restore interrupt load-balance policy +if [ "`systemctl status irqbalance_mck.service 2> /dev/null |grep -E 'Active: active'`" != "" ]; then + if ! systemctl stop irqbalance_mck.service 2>/dev/null ; then echo "error: stopping irqbalance_mck" >&2; exit 1; fi; + if ! systemctl disable irqbalance_mck.service >/dev/null 2>/dev/null; then echo "error: disabling irqbalance_mck" >&2; exit 1; fi; + if ! etcdir=@ETCDIR@ perl -e '$etcdir=$ENV{'etcdir'}; @files = grep { -f } glob "$etcdir/proc/irq/*/smp_affinity"; foreach $file (@files) { $dest = substr($file, length($etcdir)); if(0) {print "cp $file $dest\n";} system("cp $file $dest 2>/dev/null"); }' ; then echo "error: restoring /proc/irq/*/smp_affinity" >&2; exit 1; fi; + if ! systemctl start irqbalance.service; then echo "error: starting irqbalance" >&2; exit 1; fi; +fi diff --git a/configure b/configure index b42ebb56..48805d5c 100755 --- a/configure +++ b/configure @@ -632,6 +632,7 @@ ENABLE_MCOVERLAYFS MANDIR KERNDIR KMODDIR +ETCDIR SBINDIR BINDIR TARGET @@ -3031,6 +3032,9 @@ case $WITH_TARGET in if test "X$SBINDIR" = X; then SBINDIR="$prefix/sbin" fi + if test "X$ETCDIR" = X; then + ETCDIR="$prefix/etc" + fi if test "X$KMODDIR" = X; then KMODDIR="$prefix/kmod" fi @@ -3882,11 +3886,12 @@ fi + ac_config_headers="$ac_config_headers executer/config.h" -ac_config_files="$ac_config_files Makefile executer/user/Makefile executer/kernel/mcctrl/Makefile executer/kernel/mcctrl/arch/x86_64/Makefile executer/kernel/mcoverlayfs/Makefile kernel/Makefile kernel/Makefile.build arch/x86/tools/mcreboot-attached-mic.sh arch/x86/tools/mcshutdown-attached-mic.sh arch/x86/tools/mcreboot-builtin-x86.sh arch/x86/tools/mcreboot-smp-x86.sh arch/x86/tools/mcstop+release-smp-x86.sh arch/x86/tools/mcshutdown-builtin-x86.sh arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in" +ac_config_files="$ac_config_files Makefile executer/user/Makefile executer/kernel/mcctrl/Makefile executer/kernel/mcctrl/arch/x86_64/Makefile executer/kernel/mcoverlayfs/Makefile kernel/Makefile kernel/Makefile.build arch/x86/tools/mcreboot-attached-mic.sh arch/x86/tools/mcshutdown-attached-mic.sh arch/x86/tools/mcreboot-builtin-x86.sh arch/x86/tools/mcreboot-smp-x86.sh arch/x86/tools/mcstop+release-smp-x86.sh arch/x86/tools/mcshutdown-builtin-x86.sh arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in arch/x86/tools/irqbalance_mck.service arch/x86/tools/irqbalance_mck.in" if test "x$enable_dcfa" = xyes; then : @@ -4599,6 +4604,8 @@ do "arch/x86/tools/mcstop+release-smp-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcstop+release-smp-x86.sh" ;; "arch/x86/tools/mcshutdown-builtin-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcshutdown-builtin-x86.sh" ;; "arch/x86/tools/mcreboot.1") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in" ;; + "arch/x86/tools/irqbalance_mck.service") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/irqbalance_mck.service" ;; + "arch/x86/tools/irqbalance_mck.in") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/irqbalance_mck.in" ;; "kernel/Makefile.dcfa") CONFIG_FILES="$CONFIG_FILES kernel/Makefile.dcfa" ;; *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;; diff --git a/configure.ac b/configure.ac index 2c87aa63..a5e702f0 100644 --- a/configure.ac +++ b/configure.ac @@ -146,6 +146,9 @@ case $WITH_TARGET in if test "X$SBINDIR" = X; then SBINDIR="$prefix/sbin" fi + if test "X$ETCDIR" = X; then + ETCDIR="$prefix/etc" + fi if test "X$KMODDIR" = X; then KMODDIR="$prefix/kmod" fi @@ -278,6 +281,7 @@ AC_SUBST(KDIR) AC_SUBST(TARGET) AC_SUBST(BINDIR) AC_SUBST(SBINDIR) +AC_SUBST(ETCDIR) AC_SUBST(KMODDIR) AC_SUBST(KERNDIR) AC_SUBST(MANDIR) @@ -307,6 +311,8 @@ AC_CONFIG_FILES([ arch/x86/tools/mcstop+release-smp-x86.sh arch/x86/tools/mcshutdown-builtin-x86.sh arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in + arch/x86/tools/irqbalance_mck.service + arch/x86/tools/irqbalance_mck.in ]) AS_IF([test "x$enable_dcfa" = xyes], [