[lustre-discuss] lustre 2.10.5 or 2.11.0

Riccardo Veraldi Riccardo.Veraldi at cnaf.infn.it
Fri Oct 19 16:52:08 PDT 2018


On 10/19/18 12:37 PM, Mohr Jr, Richard Frank (Rick Mohr) wrote:
>> On Oct 17, 2018, at 7:30 PM, Riccardo Veraldi <Riccardo.Veraldi at cnaf.infn.it> wrote:
>>
>> anyway especially regarding the OSSes you may eventually need some ZFS module parameters optimizations regarding vdev_write and vdev_read max to increase those values higher than default. You may also disable ZIL, change the redundant_metadata to "most"  atime off.
>>
>> I could send you a list of parameters that in my case work well.
> Riccardo,
>
> Would you mind sharing your ZFS parameters with the mailing list?  I would be interested to see which options you have changed.
>
> --
> Rick Mohr
> Senior HPC System Administrator
> National Institute for Computational Sciences
> http://www.nics.tennessee.edu
>
this worked for me on my high performance cluster

options zfs zfs_prefetch_disable=1
options zfs zfs_txg_history=120
options zfs metaslab_debug_unload=1
#
options zfs zfs_vdev_scheduler=deadline
options zfs zfs_vdev_async_write_active_min_dirty_percent=20
#
options zfs zfs_vdev_scrub_min_active=48
options zfs zfs_vdev_scrub_max_active=128
#
options zfs zfs_vdev_sync_write_min_active=8
options zfs zfs_vdev_sync_write_max_active=32
options zfs zfs_vdev_sync_read_min_active=8
options zfs zfs_vdev_sync_read_max_active=32
options zfs zfs_vdev_async_read_min_active=8
options zfs zfs_vdev_async_read_max_active=32
options zfs zfs_top_maxinflight=320
options zfs zfs_txg_timeout=30
options zfs zfs_dirty_data_max_percent=40
options zfs zfs_vdev_async_write_min_active=8
options zfs zfs_vdev_async_write_max_active=32

##############

these the zfs attributes that I changed on the OSSes:

zfs set mountpoint=none $ostpool

zfs set sync=disabled $ostpool

zfs set atime=off $ostpool

zfs set redundant_metadata=most $ostpool

zfs set xattr=sa $ostpool

zfs set recordsize=1M $ostpool

#################


these the ko2iblnd parameters for FDR Mellanox IB interfaces

options ko2iblnd timeout=100 peer_credits=63 credits=2560 
concurrent_sends=63 ntx=2048 fmr_pool_size=1280 fmr_flush_trigger=1024 
ntx=5120

############

these the ksocklnd paramaters

options ksocklnd sock_timeout=100 credits=2560 peer_credits=63

##############

these other parameters that I did tweak

echo 32 > /sys/module/ptlrpc/parameters/max_ptlrpcds
echo 3 > /sys/module/ptlrpc/parameters/ptlrpcd_bind_policy

lctl set_param timeout=600
lctl set_param ldlm_timeout=200
lctl set_param at_min=250
lctl set_param at_max=600

###########

Also I run this script at boot time to redefine IRQ assignments for hard 
drives spanned across all CPUs, not needed for kernel > 4.4

#!/bin/sh
# numa_smp.sh
device=$1
cpu1=$2
cpu2=$3
cpu=$cpu1
grep $1 /proc/interrupts|awk '{print $1}'|sed 's/://'|while read int
do
   echo $cpu > /proc/irq/$int/smp_affinity_list
   echo "echo CPU $cpu > /proc/irq/$a/smp_affinity_list"
   if [ $cpu = $cpu2 ]
   then
      cpu=$cpu1
   else
      ((cpu=$cpu+1))
   fi
done



More information about the lustre-discuss mailing list