diff --git a/pkg/cgroup/linux.go b/pkg/cgroup/linux.go new file mode 100644 index 000000000..34854bbd8 --- /dev/null +++ b/pkg/cgroup/linux.go @@ -0,0 +1,177 @@ +// +build linux + +/* + * Minio Cloud Storage, (C) 2017 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Package cgroup implements parsing for all the cgroup +// categories and functionality in a simple way. +package cgroup + +import ( + "bufio" + "bytes" + "fmt" + "io" + "io/ioutil" + "os" + "os/exec" + "path/filepath" + "strconv" + "strings" +) + +// DO NOT EDIT following constants are chosen defaults for any kernel +// after 3.x, please open a github issue https://github.com/minio/minio/issues +// and discuss first if you wish to change this. +const ( + // Default string for looking for kernel memory param. + memoryLimitKernelParam = "memory.limit_in_bytes" + + // Points to sys path memory path. + cgroupMemSysPath = "/sys/fs/cgroup/memory" + + // Default docker prefix. + dockerPrefixName = "/docker/" + + // Proc controller group path. + cgroupFileTemplate = "/proc/%d/cgroup" +) + +// CGEntries - represents all the entries in a process cgroup file +// at /proc//cgroup as key value pairs. +type CGEntries map[string]string + +// GetEntries reads and parses all the cgroup entries for a given process. +func GetEntries(pid int) (CGEntries, error) { + r, err := os.Open(fmt.Sprintf(cgroupFileTemplate, pid)) + if err != nil { + return nil, err + } + defer r.Close() + return parseProcCGroup(r) +} + +// parseProcCGroup - cgroups are always in the following +// format once enabled you need to know the pid of the +// application you are looking for so that the the +// following parsing logic only parses the file located +// at /proc//cgroup. +// +// CGROUP entries id, component and path are always in +// the following format. ``ID:COMPONENT:PATH`` +// +// Following code block parses this information and +// returns a procCGroup which is a parsed list of all +// the line by line entires from /proc//cgroup. +func parseProcCGroup(r io.Reader) (CGEntries, error) { + var cgEntries = CGEntries{} + + // Start reading cgroup categories line by line + // and process them into procCGroup structure. + scanner := bufio.NewScanner(r) + for scanner.Scan() { + line := scanner.Text() + + tokens := strings.SplitN(line, ":", 3) + if len(tokens) < 3 { + continue + } + + name, path := tokens[1], tokens[2] + for _, token := range strings.Split(name, ",") { + name = strings.TrimPrefix(token, "name=") + cgEntries[name] = path + } + } + + // Return upon any error while reading the cgroup categories. + if err := scanner.Err(); err != nil { + return nil, err + } + + return cgEntries, nil +} + +// Fetch value of the cgroup kernel param from the cgroup manager, +// if cgroup manager is configured we should just rely on `cgm` cli +// to fetch all the values for us. +func getManagerKernValue(cname, path, kernParam string) (limit uint64, err error) { + + cmd := exec.Command("cgm", "getvalue", cname, path, kernParam) + var out bytes.Buffer + cmd.Stdout = &out + if err = cmd.Run(); err != nil { + return 0, err + } + + // Parse the cgm output. + limit, err = strconv.ParseUint(strings.TrimSpace(out.String()), 10, 64) + return limit, err +} + +// Get cgroup memory limit file path. +func getMemoryLimitFilePath(cgPath string) string { + path := cgroupMemSysPath + + // Docker generates weird cgroup paths that don't + // really exist on the file system. + // + // For example on regular Linux OS : + // `/user.slice/user-1000.slice/session-1.scope` + // + // But they exist as a bind mount on Docker and + // are not accessible : `/docker/` + // + // We we will just ignore if there is `/docker` in the + // path ignore and fall back to : + // `/sys/fs/cgroup/memory/memory.limit_in_bytes` + if !strings.HasPrefix(cgPath, dockerPrefixName) { + path = filepath.Join(path, cgPath) + } + + // Final path. + return filepath.Join(path, memoryLimitKernelParam) +} + +// GetMemoryLimit - Fetches cgroup memory limit either from +// a file path at '/sys/fs/cgroup/memory', if path fails then +// fallback to querying cgroup manager. +func GetMemoryLimit(pid int) (limit uint64, err error) { + var cg CGEntries + cg, err = GetEntries(pid) + if err != nil { + return 0, err + } + + path := cg["memory"] + + limit, err = getManagerKernValue("memory", path, memoryLimitKernelParam) + if err != nil { + + // Upon any failure returned from `cgm`, on some systems cgm + // might not be installed. We fallback to using the the sysfs + // path instead to lookup memory limits. + var b []byte + b, err = ioutil.ReadFile(getMemoryLimitFilePath(path)) + if err != nil { + return 0, err + } + + limit, err = strconv.ParseUint(strings.TrimSpace(string(b)), 10, 64) + } + + return limit, err +} diff --git a/pkg/cgroup/linux_test.go b/pkg/cgroup/linux_test.go new file mode 100644 index 000000000..02b817a50 --- /dev/null +++ b/pkg/cgroup/linux_test.go @@ -0,0 +1,140 @@ +// +build linux + +/* + * Minio Cloud Storage, (C) 2017 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package cgroup + +import ( + "io/ioutil" + "os" + "testing" +) + +// Testing parsing correctness for various process cgroup files. +func TestProcCGroup(t *testing.T) { + tmpPath, err := ioutil.TempFile("", "cgroup") + if err != nil { + t.Fatal(err) + } + defer os.Remove(tmpPath.Name()) + + cgroup := ` +11:memory:/user.slice +10:blkio:/user.slice +9:hugetlb:/ +8:net_cls,net_prio:/ +7:perf_event:/ +6:pids:/user.slice/user-1000.slice +5:devices:/user.slice +4:cpuset:/ +3:cpu,cpuacct:/user.slice +2:freezer:/ +1:name=systemd:/user.slice/user-1000.slice/session-1.scope +` + _, err = tmpPath.WriteString(cgroup) + if err != nil { + t.Fatal(err) + } + + // Seek back to read from the beginning. + tmpPath.Seek(0, 0) + + cg, err := parseProcCGroup(tmpPath) + if err != nil { + t.Fatal(err) + } + + path := cg["memory"] + if len(path) == 0 { + t.Fatal("Path component cannot be empty") + } + + if path != "/user.slice" { + t.Fatal("Path component cannot be empty") + } + + path = cg["systemd"] + if path != "/user.slice/user-1000.slice/session-1.scope" { + t.Fatal("Path component cannot be empty") + } + + // Mixed cgroups with different group names. + cgroup = ` +11:memory:/newtest/newtest +10:blkio:/user.slice +9:hugetlb:/ +8:net_cls,net_prio:/ +7:perf_event:/ +6:pids:/user.slice/user-1000.slice +5:devices:/user.slice +4:cpuset:/ +3:cpu,cpuacct:/newtest/newtest +2:freezer:/ +1:name=systemd:/user.slice/user-1000.slice/session-1.scope +` + + // Seek back to read from the beginning. + tmpPath.Seek(0, 0) + + _, err = tmpPath.WriteString(cgroup) + if err != nil { + t.Fatal(err) + } + + // Seek back to read from the beginning. + tmpPath.Seek(0, 0) + + cg, err = parseProcCGroup(tmpPath) + if err != nil { + t.Fatal(err) + } + + path = cg["memory"] + if path != "/newtest/newtest" { + t.Fatal("Path component cannot be empty") + } + + path = cg["systemd"] + if path != "/user.slice/user-1000.slice/session-1.scope" { + t.Fatal("Path component cannot be empty") + } + +} + +// Tests cgroup memory limit path construction. +func TestMemoryLimitPath(t *testing.T) { + testCases := []struct { + cgroupPath string + expectedPath string + }{ + { + cgroupPath: "/user.slice", + expectedPath: "/sys/fs/cgroup/memory/user.slice/memory.limit_in_bytes", + }, + { + cgroupPath: "/docker/testing", + expectedPath: "/sys/fs/cgroup/memory/memory.limit_in_bytes", + }, + } + + for i, testCase := range testCases { + actualPath := getMemoryLimitFilePath(testCase.cgroupPath) + if actualPath != testCase.expectedPath { + t.Fatalf("Test: %d: Expected: %s, got %s", i+1, testCase.expectedPath, actualPath) + } + } +} diff --git a/pkg/cgroup/others.go b/pkg/cgroup/others.go new file mode 100644 index 000000000..d36d4d7f5 --- /dev/null +++ b/pkg/cgroup/others.go @@ -0,0 +1,19 @@ +// +build !linux + +/* + * Minio Cloud Storage, (C) 2017 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package cgroup diff --git a/pkg/sys/stats_linux.go b/pkg/sys/stats_linux.go index 6cd5c6002..dbb91bb12 100644 --- a/pkg/sys/stats_linux.go +++ b/pkg/sys/stats_linux.go @@ -6,7 +6,7 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - *shouldP + * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software @@ -18,14 +18,65 @@ package sys -import "syscall" +import ( + "os" + "syscall" -// GetStats - return system statistics. -func GetStats() (stats Stats, err error) { - var si syscall.Sysinfo_t - if err = syscall.Sysinfo(&si); err == nil { - stats.TotalRAM = uint64(si.Totalram) + "github.com/minio/minio/pkg/cgroup" +) + +// Get the final system memory limit chosen by the user. +// by default without any configuration on a vanilla Linux +// system you would see physical RAM limit. If cgroup +// is configured at some point in time this function +// would return the memory limit chosen for the given pid. +func getMemoryLimit() (sysLimit uint64, err error) { + if sysLimit, err = getSysinfoMemoryLimit(); err != nil { + // Physical memory info is not accessible, just exit here. + return 0, err } - return stats, err + // Following code is deliberately ignoring the error. + cGroupLimit, gerr := cgroup.GetMemoryLimit(os.Getpid()) + if gerr != nil { + // Upon error just return system limit. + return sysLimit, nil + } + + // cgroup limit is lesser than system limit means + // user wants to limit the memory usage further + // treat cgroup limit as the system limit. + if cGroupLimit <= sysLimit { + sysLimit = cGroupLimit + } + + // Final system limit. + return sysLimit, nil + +} + +// Get physical RAM size of the node. +func getSysinfoMemoryLimit() (limit uint64, err error) { + var si syscall.Sysinfo_t + if err = syscall.Sysinfo(&si); err != nil { + return 0, err + } + + // Total RAM is always the multiplicative value + // of unit size and total ram. + limit = uint64(si.Unit) * si.Totalram + return limit, nil +} + +// GetStats - return system statistics, currently only +// supported value is TotalRAM. +func GetStats() (stats Stats, err error) { + var limit uint64 + limit, err = getMemoryLimit() + if err != nil { + return Stats{}, err + } + + stats.TotalRAM = limit + return stats, nil } diff --git a/pkg/sys/stats_test.go b/pkg/sys/stats_test.go index e2666639e..178bd9d1e 100644 --- a/pkg/sys/stats_test.go +++ b/pkg/sys/stats_test.go @@ -1,5 +1,5 @@ /* - * Minio Cloud Storage, (C) 2016,2017 Minio, Inc. + * Minio Cloud Storage, (C) 2016, 2017 Minio, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License.