Set O_NONBLOCK for reads and writes on unix (#20133)
Tracing syscalls, opening and reading an `xl.meta` looks like this:
```
openat(AT_FDCWD, "/mnt/drive1/ss8-old/testbucket/ObjSize4MiBThreads72/(554O51H/peTb(0iztdbTKw59.csv/xl.meta", O_RDONLY|O_NOATIME|O_CLOEXEC) = 34 <0.000>
fcntl(34, F_GETFL) = 0x48000 (flags O_RDONLY|O_LARGEFILE|O_NOATIME) <0.000>
fcntl(34, F_SETFL, O_RDONLY|O_NONBLOCK|O_LARGEFILE|O_NOATIME) = 0 <0.000>
epoll_ctl(4, EPOLL_CTL_ADD, 34, {events=EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLET, data={u32=3172471557, u64=8145488475984499461}}) = -1 EPERM (Operation not permitted) <0.000>
fcntl(34, F_GETFL) = 0x48800 (flags O_RDONLY|O_NONBLOCK|O_LARGEFILE|O_NOATIME) <0.000>
fcntl(34, F_SETFL, O_RDONLY|O_LARGEFILE|O_NOATIME) = 0 <0.000>
fstat(34, {st_mode=S_IFREG|0644, st_size=354, ...}) = 0 <0.000>
read(34, "XL2 \1\0\3\0\306\0\0\1P\2\2\1\304$\225\304\20\0\0\0\0\0\0\0\0\0\0\0"..., 354) = 354 <0.000>
close(34) = 0 <0.000>
```
Everything until `fstat` is the `os.Open` call.
Looking at the code: https://github.com/golang/go/blob/master/src/os/file_unix.go#L212-L243
It seems for every file it "tries" to see if it is pollable. This causes `syscall.SetNonblock(fd, true)` to be called. This is the first `F_SETFL`.
It then calls `f.pfd.Init("file", true)`. This will attempt to set it as pollable using `epoll_ctl`. This will always fail for files. It therefore calls `syscall.SetNonblock(fd, false)` resulting in the second `F_SETFL`.
If we set the `O_NONBLOCK` call on the initial open, we should avoid the 4 `fcntl` syscalls per file.
I don't see any way to avoid the `epoll_ctl` call, since kind is either `kindOpenFile` or `kindNonBlock`, so "pollable" will always be true. However avoiding 4 of 6 syscalls still seems worth it.
This should not have any effect, since files will end up with "nonblock" anyway.
2024-07-23 12:36:24 -04:00
|
|
|
//go:build unix && !darwin && !freebsd
|
2021-02-24 03:14:16 -05:00
|
|
|
|
2021-04-18 15:41:13 -04:00
|
|
|
// Copyright (c) 2015-2021 MinIO, Inc.
|
|
|
|
//
|
|
|
|
// This file is part of MinIO Object Storage stack
|
|
|
|
//
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
|
|
// it under the terms of the GNU Affero General Public License as published by
|
|
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
|
|
// (at your option) any later version.
|
|
|
|
//
|
|
|
|
// This program is distributed in the hope that it will be useful
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
// GNU Affero General Public License for more details.
|
|
|
|
//
|
|
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
2021-02-24 03:14:16 -05:00
|
|
|
|
|
|
|
package cmd
|
|
|
|
|
|
|
|
import (
|
|
|
|
"os"
|
Set O_NONBLOCK for reads and writes on unix (#20133)
Tracing syscalls, opening and reading an `xl.meta` looks like this:
```
openat(AT_FDCWD, "/mnt/drive1/ss8-old/testbucket/ObjSize4MiBThreads72/(554O51H/peTb(0iztdbTKw59.csv/xl.meta", O_RDONLY|O_NOATIME|O_CLOEXEC) = 34 <0.000>
fcntl(34, F_GETFL) = 0x48000 (flags O_RDONLY|O_LARGEFILE|O_NOATIME) <0.000>
fcntl(34, F_SETFL, O_RDONLY|O_NONBLOCK|O_LARGEFILE|O_NOATIME) = 0 <0.000>
epoll_ctl(4, EPOLL_CTL_ADD, 34, {events=EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLET, data={u32=3172471557, u64=8145488475984499461}}) = -1 EPERM (Operation not permitted) <0.000>
fcntl(34, F_GETFL) = 0x48800 (flags O_RDONLY|O_NONBLOCK|O_LARGEFILE|O_NOATIME) <0.000>
fcntl(34, F_SETFL, O_RDONLY|O_LARGEFILE|O_NOATIME) = 0 <0.000>
fstat(34, {st_mode=S_IFREG|0644, st_size=354, ...}) = 0 <0.000>
read(34, "XL2 \1\0\3\0\306\0\0\1P\2\2\1\304$\225\304\20\0\0\0\0\0\0\0\0\0\0\0"..., 354) = 354 <0.000>
close(34) = 0 <0.000>
```
Everything until `fstat` is the `os.Open` call.
Looking at the code: https://github.com/golang/go/blob/master/src/os/file_unix.go#L212-L243
It seems for every file it "tries" to see if it is pollable. This causes `syscall.SetNonblock(fd, true)` to be called. This is the first `F_SETFL`.
It then calls `f.pfd.Init("file", true)`. This will attempt to set it as pollable using `epoll_ctl`. This will always fail for files. It therefore calls `syscall.SetNonblock(fd, false)` resulting in the second `F_SETFL`.
If we set the `O_NONBLOCK` call on the initial open, we should avoid the 4 `fcntl` syscalls per file.
I don't see any way to avoid the `epoll_ctl` call, since kind is either `kindOpenFile` or `kindNonBlock`, so "pollable" will always be true. However avoiding 4 of 6 syscalls still seems worth it.
This should not have any effect, since files will end up with "nonblock" anyway.
2024-07-23 12:36:24 -04:00
|
|
|
"syscall"
|
2021-02-24 03:14:16 -05:00
|
|
|
)
|
|
|
|
|
|
|
|
var (
|
|
|
|
// Disallow updating access times
|
Set O_NONBLOCK for reads and writes on unix (#20133)
Tracing syscalls, opening and reading an `xl.meta` looks like this:
```
openat(AT_FDCWD, "/mnt/drive1/ss8-old/testbucket/ObjSize4MiBThreads72/(554O51H/peTb(0iztdbTKw59.csv/xl.meta", O_RDONLY|O_NOATIME|O_CLOEXEC) = 34 <0.000>
fcntl(34, F_GETFL) = 0x48000 (flags O_RDONLY|O_LARGEFILE|O_NOATIME) <0.000>
fcntl(34, F_SETFL, O_RDONLY|O_NONBLOCK|O_LARGEFILE|O_NOATIME) = 0 <0.000>
epoll_ctl(4, EPOLL_CTL_ADD, 34, {events=EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLET, data={u32=3172471557, u64=8145488475984499461}}) = -1 EPERM (Operation not permitted) <0.000>
fcntl(34, F_GETFL) = 0x48800 (flags O_RDONLY|O_NONBLOCK|O_LARGEFILE|O_NOATIME) <0.000>
fcntl(34, F_SETFL, O_RDONLY|O_LARGEFILE|O_NOATIME) = 0 <0.000>
fstat(34, {st_mode=S_IFREG|0644, st_size=354, ...}) = 0 <0.000>
read(34, "XL2 \1\0\3\0\306\0\0\1P\2\2\1\304$\225\304\20\0\0\0\0\0\0\0\0\0\0\0"..., 354) = 354 <0.000>
close(34) = 0 <0.000>
```
Everything until `fstat` is the `os.Open` call.
Looking at the code: https://github.com/golang/go/blob/master/src/os/file_unix.go#L212-L243
It seems for every file it "tries" to see if it is pollable. This causes `syscall.SetNonblock(fd, true)` to be called. This is the first `F_SETFL`.
It then calls `f.pfd.Init("file", true)`. This will attempt to set it as pollable using `epoll_ctl`. This will always fail for files. It therefore calls `syscall.SetNonblock(fd, false)` resulting in the second `F_SETFL`.
If we set the `O_NONBLOCK` call on the initial open, we should avoid the 4 `fcntl` syscalls per file.
I don't see any way to avoid the `epoll_ctl` call, since kind is either `kindOpenFile` or `kindNonBlock`, so "pollable" will always be true. However avoiding 4 of 6 syscalls still seems worth it.
This should not have any effect, since files will end up with "nonblock" anyway.
2024-07-23 12:36:24 -04:00
|
|
|
// Add non-block to avoid syscall to attempt to set epoll on files.
|
|
|
|
readMode = os.O_RDONLY | 0x40000 | syscall.O_NONBLOCK // O_NOATIME
|
2021-02-24 03:14:16 -05:00
|
|
|
|
|
|
|
// Write with data sync only used only for `xl.meta` writes
|
Set O_NONBLOCK for reads and writes on unix (#20133)
Tracing syscalls, opening and reading an `xl.meta` looks like this:
```
openat(AT_FDCWD, "/mnt/drive1/ss8-old/testbucket/ObjSize4MiBThreads72/(554O51H/peTb(0iztdbTKw59.csv/xl.meta", O_RDONLY|O_NOATIME|O_CLOEXEC) = 34 <0.000>
fcntl(34, F_GETFL) = 0x48000 (flags O_RDONLY|O_LARGEFILE|O_NOATIME) <0.000>
fcntl(34, F_SETFL, O_RDONLY|O_NONBLOCK|O_LARGEFILE|O_NOATIME) = 0 <0.000>
epoll_ctl(4, EPOLL_CTL_ADD, 34, {events=EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLET, data={u32=3172471557, u64=8145488475984499461}}) = -1 EPERM (Operation not permitted) <0.000>
fcntl(34, F_GETFL) = 0x48800 (flags O_RDONLY|O_NONBLOCK|O_LARGEFILE|O_NOATIME) <0.000>
fcntl(34, F_SETFL, O_RDONLY|O_LARGEFILE|O_NOATIME) = 0 <0.000>
fstat(34, {st_mode=S_IFREG|0644, st_size=354, ...}) = 0 <0.000>
read(34, "XL2 \1\0\3\0\306\0\0\1P\2\2\1\304$\225\304\20\0\0\0\0\0\0\0\0\0\0\0"..., 354) = 354 <0.000>
close(34) = 0 <0.000>
```
Everything until `fstat` is the `os.Open` call.
Looking at the code: https://github.com/golang/go/blob/master/src/os/file_unix.go#L212-L243
It seems for every file it "tries" to see if it is pollable. This causes `syscall.SetNonblock(fd, true)` to be called. This is the first `F_SETFL`.
It then calls `f.pfd.Init("file", true)`. This will attempt to set it as pollable using `epoll_ctl`. This will always fail for files. It therefore calls `syscall.SetNonblock(fd, false)` resulting in the second `F_SETFL`.
If we set the `O_NONBLOCK` call on the initial open, we should avoid the 4 `fcntl` syscalls per file.
I don't see any way to avoid the `epoll_ctl` call, since kind is either `kindOpenFile` or `kindNonBlock`, so "pollable" will always be true. However avoiding 4 of 6 syscalls still seems worth it.
This should not have any effect, since files will end up with "nonblock" anyway.
2024-07-23 12:36:24 -04:00
|
|
|
writeMode = 0x1000 | syscall.O_NONBLOCK // O_DSYNC
|
2021-02-24 03:14:16 -05:00
|
|
|
)
|