I've been working with sparse files in order to replicate Linux fs permissions on a Windows samba mount an idea from here: https://www.thanassis.space/backup.html.
While testing sparse files though, I've discovered an, interesting, problem wherein a "full" host drive does not reported as a write failure to an application. Instead, when the host drive fills up, the write continues and completes successfully, even though the drive was full.
In order to test this I have the following setup.
[root@ip-172-31-61-147 ~]# df -h
Filesystem Size Used Avail Use% Mounted on
/dev/xvda1 8.0G 944M 7.1G 12% /
devtmpfs 1.9G 0 1.9G 0% /dev
tmpfs 1.8G 0 1.8G 0% /dev/shm
tmpfs 1.8G 49M 1.7G 3% /run
tmpfs 1.8G 0 1.8G 0% /sys/fs/cgroup
tmpfs 354M 0 354M 0% /run/user/1000
/dev/xvdb1 4.0G 33M 4.0G 1% /mnt
tmpfs 354M 0 354M 0% /run/user/0
I have mounted a 4G partition on /mnt
on which I will create a 10G sparse file.
[root@ip-172-31-61-147 ~]# dd of=/mnt/file bs=1G count=0 seek=10
0+0 records in
0+0 records out
0 bytes (0 B) copied, 3.0097e-05 s, 0.0 kB/s
The file is created correctly and takes up no actual space on the partition, however appears to be a 10G file.
[root@ip-172-31-61-147 ~]# ls -lh /mnt/
total 0
-rw-r--r--. 1 root root 10G Aug 28 21:10 file
[root@ip-172-31-61-147 ~]# du -h /mnt/
0 /mnt/
I allocate a file-system in the space and mount it (verified via losetup).
[root@ip-172-31-61-147 ~]# mkfs.xfs /mnt/file
meta-data=/mnt/file isize=512 agcount=4, agsize=655360 blks
= sectsz=512 attr=2, projid32bit=1
= crc=1 finobt=0, sparse=0
data = bsize=4096 blocks=2621440, imaxpct=25
= sunit=0 swidth=0 blks
naming =version 2 bsize=4096 ascii-ci=0 ftype=1
log =internal log bsize=4096 blocks=2560, version=2
= sectsz=512 sunit=0 blks, lazy-count=1
realtime =none extsz=4096 blocks=0, rtextents=0
[root@ip-172-31-61-147 ~]# mount /mnt/file /srv/
[root@ip-172-31-61-147 ~]# losetup -j /mnt/file
/dev/loop0: [51729]:67 (/mnt/file)
[root@ip-172-31-61-147 ~]# df -h
Filesystem Size Used Avail Use% Mounted on
/dev/xvda1 8.0G 945M 7.1G 12% /
devtmpfs 1.9G 0 1.9G 0% /dev
tmpfs 1.8G 0 1.8G 0% /dev/shm
tmpfs 1.8G 49M 1.7G 3% /run
tmpfs 1.8G 0 1.8G 0% /sys/fs/cgroup
tmpfs 354M 0 354M 0% /run/user/1000
/dev/xvdb1 4.0G 43M 4.0G 2% /mnt
tmpfs 354M 0 354M 0% /run/user/0
/dev/loop0 10G 33M 10G 1% /srv
[root@ip-172-31-61-147 ~]#
On this mount point I can write files as normal, indicating it is working correctly.
[root@ip-172-31-61-147 ~]# dd if=/dev/zero of=/srv/init_file bs=1GiB count=1
1+0 records in
1+0 records out
1073741824 bytes (1.1 GB) copied, 8.22444 s, 131 MB/s
[root@ip-172-31-61-147 ~]# df -h
Filesystem Size Used Avail Use% Mounted on
/dev/xvda1 8.0G 945M 7.1G 12% /
devtmpfs 1.9G 0 1.9G 0% /dev
tmpfs 1.8G 0 1.8G 0% /dev/shm
tmpfs 1.8G 49M 1.7G 3% /run
tmpfs 1.8G 0 1.8G 0% /sys/fs/cgroup
tmpfs 354M 0 354M 0% /run/user/1000
/dev/xvdb1 4.0G 1.1G 3.0G 27% /mnt
tmpfs 354M 0 354M 0% /run/user/0
/dev/loop0 10G 1.1G 9.0G 11% /srv
[root@ip-172-31-61-147 ~]# ls -lh /srv/
total 1.0G
-rw-r--r--. 1 root root 1.0G Aug 28 21:19 init_file
[root@ip-172-31-61-147 ~]#
[root@ip-172-31-61-147 ~]# df -h
Filesystem Size Used Avail Use% Mounted on
/dev/xvda1 8.0G 945M 7.1G 12% /
devtmpfs 1.9G 0 1.9G 0% /dev
tmpfs 1.8G 0 1.8G 0% /dev/shm
tmpfs 1.8G 49M 1.7G 3% /run
tmpfs 1.8G 0 1.8G 0% /sys/fs/cgroup
tmpfs 354M 0 354M 0% /run/user/1000
/dev/xvdb1 4.0G 1.1G 3.0G 27% /mnt
tmpfs 354M 0 354M 0% /run/user/0
/dev/loop0 10G 1.1G 9.0G 11% /srv
Now, when I attempt to create a file that should overrun the host drive, it will also write correctly and not report an error.
[root@ip-172-31-61-147 ~]# dd if=/dev/zero of=/srv/too_large_a_file bs=1GiB count=4
4+0 records in
4+0 records out
4294967296 bytes (4.3 GB) copied, 49.9905 s, 85.9 MB/s
[root@ip-172-31-61-147 ~]# echo $?
0
Relevant data:
[root@ip-172-31-61-147 ~]# df -h
Filesystem Size Used Avail Use% Mounted on
/dev/xvda1 8.0G 949M 7.1G 12% /
devtmpfs 1.9G 0 1.9G 0% /dev
tmpfs 1.8G 0 1.8G 0% /dev/shm
tmpfs 1.8G 49M 1.7G 3% /run
tmpfs 1.8G 0 1.8G 0% /sys/fs/cgroup
tmpfs 354M 0 354M 0% /run/user/1000
/dev/xvdb1 4.0G 4.0G 20K 100% /mnt
tmpfs 354M 0 354M 0% /run/user/0
/dev/loop0 10G 5.1G 5.0G 51% /srv
[root@ip-172-31-61-147 ~]# du -h /srv/
5.0G /srv/
[root@ip-172-31-61-147 ~]# du -h /srv/*
1.0G /srv/init_file
4.0G /srv/too_large_a_file
[root@ip-172-31-61-147 ~]# ls -lh /srv/
total 5.0G
-rw-r--r--. 1 root root 1.0G Aug 28 21:19 init_file
-rw-r--r--. 1 root root 4.0G Aug 28 21:24 too_large_a_file
[root@ip-172-31-61-147 ~]#
I attempted to replicate this behavior using other utilities such as rsync
and cp
. The also do not report an error, and instead fail silently.
Using cp:
[root@ip-172-31-61-147 ~]# cp -v too_large_a_file /srv/
‘too_large_a_file’ -> ‘/srv/too_large_a_file’
[root@ip-172-31-61-147 ~]# echo $?
0
[root@ip-172-31-61-147 ~]# ls -lhtr /srv/
total 5.0G
-rw-r--r--. 1 root root 5.0G Aug 28 21:31 too_large_a_file
[root@ip-172-31-61-147 ~]# df -h
Filesystem Size Used Avail Use% Mounted on
/dev/xvda1 8.0G 6.0G 2.1G 75% /
devtmpfs 1.9G 0 1.9G 0% /dev
tmpfs 1.8G 0 1.8G 0% /dev/shm
tmpfs 1.8G 57M 1.7G 4% /run
tmpfs 1.8G 0 1.8G 0% /sys/fs/cgroup
tmpfs 354M 0 354M 0% /run/user/1000
/dev/xvdb1 4.0G 4.0G 20K 100% /mnt
tmpfs 354M 0 354M 0% /run/user/0
/dev/loop0 10G 5.1G 5.0G 51% /srv
Using rsync:
[root@ip-172-31-61-147 ~]# rsync -vvv too_large_a_file /srv/
[sender] make_file(too_large_a_file,*,0)
send_file_list done
send_files starting
server_recv(2) starting pid=16569
received 1 names
recv_file_list done
get_local_name count=1 /srv/
generator starting pid=16569
delta-transmission disabled for local transfer or --whole-file
recv_generator(too_large_a_file,0)
send_files(0, too_large_a_file)
send_files mapped too_large_a_file of size 5368709120
calling match_sums too_large_a_file
too_large_a_file
sending file_sum
false_alarms=0 hash_hits=0 matches=0
sender finished too_large_a_file
send_files phase=1
recv_files(1) starting
generate_files phase=1
recv_files(too_large_a_file)
got file_sum
renaming .too_large_a_file.CwVApY to too_large_a_file
recv_files phase=1
generate_files phase=2
send_files phase=2
send files finished
total: matches=0 hash_hits=0 false_alarms=0 data=5368709120
recv_files phase=2
generate_files phase=3
recv_files finished
generate_files finished
sent 5369364558 bytes received 31 bytes 57426359.24 bytes/sec
total size is 5368709120 speedup is 1.00
[sender] _exit_cleanup(code=0, file=main.c, line=1052): about to call exit(0)
[root@ip-172-31-61-147 ~]# df -h
Filesystem Size Used Avail Use% Mounted on
/dev/xvda1 8.0G 6.0G 2.1G 75% /
devtmpfs 1.9G 0 1.9G 0% /dev
tmpfs 1.8G 0 1.8G 0% /dev/shm
tmpfs 1.8G 73M 1.7G 5% /run
tmpfs 1.8G 0 1.8G 0% /sys/fs/cgroup
tmpfs 354M 0 354M 0% /run/user/1000
/dev/xvdb1 4.0G 4.0G 20K 100% /mnt
tmpfs 354M 0 354M 0% /run/user/0
/dev/loop0 10G 5.1G 5.0G 51% /srv
Upon running md5sum
I am even more confused:
[root@ip-172-31-61-147 ~]# md5sum /root/too_large_a_file
ec4bcc8776ea04479b786e063a9ace45 /root/too_large_a_file
[root@ip-172-31-61-147 ~]# md5sum /srv/too_large_a_file
ec4bcc8776ea04479b786e063a9ace45 /srv/too_large_a_file
It appears the full file is preserved, although how, I have no idea.
In researching this, I found this question where someone was doing the same thing but using an encrypted setup, they identified the same problem but were unable to solve it (in fact they recommended it be opened a as a new question!): Creating a grow-on-demand encrypted volume with LUKS.
Any help would be great.
Edit: Per request the kernel and system information
[root@ip-172-31-61-147 ~]# uname -a
Linux ip-172-31-61-147 3.10.0-514.16.1.el7.x86_64 #1 SMP Wed Apr 12 15:04:24 UTC 2017 x86_64 x86_64 x86_64 GNU/Linux
[root@ip-172-31-61-147 ~]# cat /etc/redhat-release
CentOS Linux release 7.3.1611 (Core)
Based on the provided answer I created new files to test the /dev/zero
theory. I created a file with all ones and attempted to replicate my findings and found it was still successful.
[root@ip-172-31-61-147 ~]# tr '\0' '\377' < /dev/zero | dd bs=1 count=5G of=~/too_large_a_file
[root@ip-172-31-61-147 ~]# du -h too_large_a_file
982M too_large_a_file
I then put multiple copies of that file on the sparse-file mount (e.g. cp too_large_a_file /srv/too_large_a_file_1
)
I was able to copy six of those files onto the external with none of the copies failing.
[root@ip-172-31-61-147 ~]# ls -lh /srv/
total 4.8G
-rw-r--r--. 1 root root 982M Aug 29 00:14 too_large_a_file
-rw-r--r--. 1 root root 982M Aug 29 00:26 too_large_a_file_2
-rw-r--r--. 1 root root 982M Aug 29 02:34 too_large_a_file_3
-rw-r--r--. 1 root root 982M Aug 29 02:34 too_large_a_file_4
-rw-r--r--. 1 root root 982M Aug 29 02:35 too_large_a_file_5
The drive is clearly maxed out here as well.
Filesystem 1K-blocks Used Available Use% Mounted on
/dev/xvda1 8377344 1975040 6402304 24% /
devtmpfs 1920720 0 1920720 0% /dev
tmpfs 1809256 0 1809256 0% /dev/shm
tmpfs 1809256 82192 1727064 5% /run
tmpfs 1809256 0 1809256 0% /sys/fs/cgroup
/dev/xvdb1 4183040 4183020 20 100% /mnt
tmpfs 361852 0 361852 0% /run/user/1000
/dev/loop0 10475520 5055864 5419656 49% /srv
The md5sums are as follows for all the files and the original.
[root@ip-172-31-61-147 ~]# md5sum too_large_a_file
e8154ef97a3eb2bd13aea04b823a4570 too_large_a_file
[root@ip-172-31-61-147 ~]# md5sum /srv/*
e8154ef97a3eb2bd13aea04b823a4570 /srv/too_large_a_file
e8154ef97a3eb2bd13aea04b823a4570 /srv/too_large_a_file_2
e8154ef97a3eb2bd13aea04b823a4570 /srv/too_large_a_file_3
e8154ef97a3eb2bd13aea04b823a4570 /srv/too_large_a_file_4
154248d2eeaf5791dfc8199e51daadbc /srv/too_large_a_file_5
I'll add this is something clearly affecting the system, because adding a sixth file crashed it. Note: after the copy finished the system became responsive again.
Edit 2: Added du
information.
[root@ip-172-31-61-147 ~]# du -h /srv/*
982M /srv/too_large_a_file
982M /srv/too_large_a_file_2
982M /srv/too_large_a_file_3
982M /srv/too_large_a_file_4
982M /srv/too_large_a_file_5
Edit 3: Memory information I tested this by removing the files that "overran" the buffer, then copying one back and dropping caches and seeing what happened.
[root@ip-172-31-61-147 ~]# rm /srv/too_large_a_file_5
rm: remove regular file ‘/srv/too_large_a_file_5’? y
[root@ip-172-31-61-147 ~]# cp too_large_a_file /srv/too_large_a_file_5
[root@ip-172-31-61-147 ~]# df -h
Filesystem Size Used Avail Use% Mounted on
/dev/xvda1 8.0G 2.2G 5.9G 28% /
devtmpfs 1.9G 0 1.9G 0% /dev
tmpfs 1.8G 0 1.8G 0% /dev/shm
tmpfs 1.8G 97M 1.7G 6% /run
tmpfs 1.8G 0 1.8G 0% /sys/fs/cgroup
/dev/xvdb1 4.0G 4.0G 20K 100% /mnt
tmpfs 354M 0 354M 0% /run/user/1000
/dev/loop0 10G 4.9G 5.2G 49% /srv
[root@ip-172-31-61-147 ~]# free -m && sync && echo 3 > /proc/sys/vm/drop_caches && free -m
total used free shared buff/cache available
Mem: 3533 93 1210 104 2229 3091
Swap: 0 0 0
total used free shared buff/cache available
Mem: 3533 94 3281 104 157 3183
Swap: 0 0 0
[root@ip-172-31-61-147 ~]# df -h
Filesystem Size Used Avail Use% Mounted on
/dev/xvda1 8.0G 2.2G 5.9G 28% /
devtmpfs 1.9G 0 1.9G 0% /dev
tmpfs 1.8G 0 1.8G 0% /dev/shm
tmpfs 1.8G 105M 1.7G 6% /run
tmpfs 1.8G 0 1.8G 0% /sys/fs/cgroup
/dev/xvdb1 4.0G 4.0G 20K 100% /mnt
tmpfs 354M 0 354M 0% /run/user/1000
/dev/loop0 10G 4.9G 5.2G 49% /srv
[root@ip-172-31-61-147 ~]# md5sum /srv/too_large_a_file_5
154248d2eeaf5791dfc8199e51daadbc /srv/too_large_a_file_5
[root@ip-172-31-61-147 ~]# du -ah /srv/
982M /srv/too_large_a_file
982M /srv/too_large_a_file_2
982M /srv/too_large_a_file_3
982M /srv/too_large_a_file_4
982M /srv/too_large_a_file_5
4.8G /srv/
[root@ip-172-31-61-147 ~]# ls -lh /srv/
total 4.8G
-rw-r--r--. 1 root root 982M Aug 29 00:14 too_large_a_file
-rw-r--r--. 1 root root 982M Aug 29 00:26 too_large_a_file_2
-rw-r--r--. 1 root root 982M Aug 29 02:34 too_large_a_file_3
-rw-r--r--. 1 root root 982M Aug 29 12:16 too_large_a_file_4
-rw-r--r--. 1 root root 982M Aug 29 12:27 too_large_a_file_5
[root@ip-172-31-61-147 ~]#
Edit 4: Memory information continued
I was suspicious of my findings to I did one more test, it appears to be a bit enlightening. It seems that the file does get changed after caches are dumped, the md5sum
output does update.
[root@ip-172-31-61-147 ~]# rm /srv/too_large_a_file_5
rm: remove regular file ‘/srv/too_large_a_file_5’? y
[root@ip-172-31-61-147 ~]# cp too_large_a_file /srv/too_large_a_file_5
(reverse-i-search)`m': r^C/srv/too_large_a_file_5
[root@ip-172-31-61-147 ~]# md5sum /srv/too_large_a_file_5
e8154ef97a3eb2bd13aea04b823a4570 /srv/too_large_a_file_5
[root@ip-172-31-61-147 ~]#
[root@ip-172-31-61-147 ~]#
[root@ip-172-31-61-147 ~]# md5sum /srv/too_large_a_file_5
e8154ef97a3eb2bd13aea04b823a4570 /srv/too_large_a_file_5
[root@ip-172-31-61-147 ~]# free -m && sync && echo 3 > /proc/sys/vm/drop_caches && free -m
total used free shared buff/cache available
Mem: 3533 93 298 104 3141 3091
Swap: 0 0 0
total used free shared buff/cache available
Mem: 3533 93 3274 112 166 3175
Swap: 0 0 0
[root@ip-172-31-61-147 ~]# md5sum /srv/too_large_a_file_5
154248d2eeaf5791dfc8199e51daadbc /srv/too_large_a_file_5
[root@ip-172-31-61-147 ~]# df -h
Filesystem Size Used Avail Use% Mounted on
/dev/xvda1 8.0G 2.2G 5.9G 28% /
devtmpfs 1.9G 0 1.9G 0% /dev
tmpfs 1.8G 0 1.8G 0% /dev/shm
tmpfs 1.8G 113M 1.7G 7% /run
tmpfs 1.8G 0 1.8G 0% /sys/fs/cgroup
/dev/xvdb1 4.0G 4.0G 20K 100% /mnt
tmpfs 354M 0 354M 0% /run/user/1000
/dev/loop0 10G 4.9G 5.2G 49% /srv
After reboot
[root@ip-172-31-61-147 ~]# mount /mnt/file /srv/
[root@ip-172-31-61-147 ~]# df -h
Filesystem Size Used Avail Use% Mounted on
/dev/xvda1 8.0G 2.2G 5.9G 28% /
devtmpfs 1.9G 0 1.9G 0% /dev
tmpfs 1.8G 0 1.8G 0% /dev/shm
tmpfs 1.8G 17M 1.8G 1% /run
tmpfs 1.8G 0 1.8G 0% /sys/fs/cgroup
tmpfs 354M 0 354M 0% /run/user/1000
/dev/xvdb1 4.0G 4.0G 20K 100% /mnt
/dev/loop0 10G 4.9G 5.2G 49% /srv
[root@ip-172-31-61-147 ~]#
Edit 5: Sync information
It appears that mounting the file in a sync fashion generates the error.
[root@ip-172-31-61-147 ~]# mount -odefaults,sync /mnt/file /srv/
[root@ip-172-31-61-147 ~]# mount
sysfs on /sys type sysfs (rw,nosuid,nodev,noexec,relatime,seclabel)
..snip..
/dev/xvdb1 on /mnt type xfs (rw,relatime,seclabel,attr2,inode64,noquota)
/mnt/file on /srv type xfs (rw,relatime,sync,seclabel,wsync,attr2,inode64,noquota)
[root@ip-172-31-61-147 ~]# cp too_large_a_file /srv/too_large_a_file_5
[root@ip-172-31-61-147 ~]# cp too_large_a_file /srv/too_large_a_file_5
cp: error writing ‘/srv/too_large_a_file_5’: Input/output error
cp: failed to extend ‘/srv/too_large_a_file_5’: Input/output error
[root@ip-172-31-61-147 ~]# ls /srv/
too_large_a_file too_large_a_file_2 too_large_a_file_3 too_large_a_file_4 too_large_a_file_5