Skip to content

Commit

Permalink
support multiple user-facing network interfaces (multi-homing)
Browse files Browse the repository at this point in the history
* part three

Signed-off-by: Alex Aizman <[email protected]>
  • Loading branch information
alex-aizman committed Dec 1, 2023
1 parent 3ecd430 commit 539e819
Show file tree
Hide file tree
Showing 9 changed files with 120 additions and 101 deletions.
69 changes: 34 additions & 35 deletions ais/htrun.go
Original file line number Diff line number Diff line change
Expand Up @@ -261,107 +261,106 @@ func (h *htrun) init(config *cmn.Config) {
// steps 1 thru 4
func (h *htrun) initSnode(config *cmn.Config) {
var (
pubAddr1, pubAddr2 meta.NetInfo
intraControlAddr meta.NetInfo
intraDataAddr meta.NetInfo
port = strconv.Itoa(config.HostNet.Port)
proto = config.Net.HTTP.Proto
pubAddr meta.NetInfo
pubExtra []meta.NetInfo
ctrlAddr meta.NetInfo
dataAddr meta.NetInfo
port = strconv.Itoa(config.HostNet.Port)
proto = config.Net.HTTP.Proto
)

addrList, err := getLocalIPv4s(config)
if err != nil {
cos.ExitLogf("failed to get local IP addr list: %v", err)
}

// 1. pub net
pub1, pub2 := multihome(config.HostNet.Hostname)
pub, extra := multihome(config.HostNet.Hostname)

if k8s.IsK8s() && config.HostNet.Hostname != "" {
// K8s: skip IP addr validation
// public hostname could be a load balancer's external IP or a service DNS
nlog.Infof("K8s deployment: skipping hostname validation for %q", config.HostNet.Hostname)
pubAddr1.Init(proto, pub1, port)
pubAddr.Init(proto, pub, port)
} else {
if err = initNetInfo(&pubAddr1, config, addrList, proto, config.HostNet.Hostname, port); err != nil {
if err = initNetInfo(&pubAddr, config, addrList, proto, config.HostNet.Hostname, port); err != nil {
cos.ExitLogf("failed to get %s IPv4/hostname: %v", cmn.NetPublic, err)
}
}
// multi-home (when config.HostNet.Hostname is a comma-separated list)
// using the same pub port
if pub2 != "" {
if pub2 == pub1 {
cos.ExitLogf("%s (user) multihome access: cannot have two identical addresses: %q",
cmn.NetPublic, config.HostNet.Hostname)
if l := len(extra); l > 0 {
pubExtra = make([]meta.NetInfo, l)
for i, addr := range extra {
pubExtra[i].Init(proto, addr, port)
}
pubAddr2.Init(proto, pub2, port)
nlog.Infof("%s (user) access: %v and %v", cmn.NetPublic, pubAddr1, pubAddr2)
} else {
nlog.Infof("%s (user) access: %v (%q)", cmn.NetPublic, pubAddr1, config.HostNet.Hostname)
nlog.Infof("%s (user) access: %v (%q)", cmn.NetPublic, pubAddr, config.HostNet.Hostname)
}

// 2. intra-cluster
intraControlAddr = pubAddr1
ctrlAddr = pubAddr
if config.HostNet.UseIntraControl {
icport := strconv.Itoa(config.HostNet.PortIntraControl)
err = initNetInfo(&intraControlAddr, config, addrList, proto, config.HostNet.HostnameIntraControl, icport)
err = initNetInfo(&ctrlAddr, config, addrList, proto, config.HostNet.HostnameIntraControl, icport)
if err != nil {
cos.ExitLogf("failed to get %s IPv4/hostname: %v", cmn.NetIntraControl, err)
}
var s string
if config.HostNet.HostnameIntraControl != "" {
s = " (config: " + config.HostNet.HostnameIntraControl + ")"
}
nlog.Infof("%s access: %v%s", cmn.NetIntraControl, intraControlAddr, s)
nlog.Infof("%s access: %v%s", cmn.NetIntraControl, ctrlAddr, s)
}
intraDataAddr = pubAddr1
dataAddr = pubAddr
if config.HostNet.UseIntraData {
idport := strconv.Itoa(config.HostNet.PortIntraData)
err = initNetInfo(&intraDataAddr, config, addrList, proto, config.HostNet.HostnameIntraData, idport)
err = initNetInfo(&dataAddr, config, addrList, proto, config.HostNet.HostnameIntraData, idport)
if err != nil {
cos.ExitLogf("failed to get %s IPv4/hostname: %v", cmn.NetIntraData, err)
}
var s string
if config.HostNet.HostnameIntraData != "" {
s = " (config: " + config.HostNet.HostnameIntraData + ")"
}
nlog.Infof("%s access: %v%s", cmn.NetIntraData, intraDataAddr, s)
nlog.Infof("%s access: %v%s", cmn.NetIntraData, dataAddr, s)
}

// 3. validate
mustDiffer(pubAddr1,
mustDiffer(pubAddr,
config.HostNet.Port,
true,
intraControlAddr,
ctrlAddr,
config.HostNet.PortIntraControl,
config.HostNet.UseIntraControl,
"pub/ctl",
)
mustDiffer(pubAddr1,
mustDiffer(pubAddr,
config.HostNet.Port,
true,
intraDataAddr,
dataAddr,
config.HostNet.PortIntraData,
config.HostNet.UseIntraData,
"pub/data",
)
mustDiffer(intraDataAddr,
mustDiffer(dataAddr,
config.HostNet.PortIntraData,
config.HostNet.UseIntraData,
intraControlAddr,
ctrlAddr,
config.HostNet.PortIntraControl,
config.HostNet.UseIntraControl,
"ctl/data",
)

// 4. new Snode
h.si = &meta.Snode{
PubNet: pubAddr1,
ControlNet: intraControlAddr,
DataNet: intraDataAddr,
}
if !pubAddr2.IsEmpty() {
h.si.PubExtra = make([]meta.NetInfo, 1)
h.si.PubExtra[0] = pubAddr2
PubNet: pubAddr,
ControlNet: ctrlAddr,
DataNet: dataAddr,
}
if l := len(pubExtra); l > 0 {
h.si.PubExtra = make([]meta.NetInfo, l)
copy(h.si.PubExtra, pubExtra)
nlog.Infof("%s (multihome) access: %v and %v", cmn.NetPublic, pubAddr, h.si.PubExtra)
}
}

Expand Down
5 changes: 4 additions & 1 deletion ais/test/downloader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -636,9 +636,12 @@ func TestDownloadStatus(t *testing.T) {
longFileName = tools.GenerateNotConflictingObjectName(shortFileName, "longFile", bck, m.smap)
)

// NOTE Dec 1/23: gs://nvdata-openimages started to return 403

files := map[string]string{
shortFileName: "https://raw.githubusercontent.com/NVIDIA/aistore/master/README.md",
longFileName: "https://storage.googleapis.com/nvdata-openimages/openimages-train-000001.tar",
// longFileName: "https://storage.googleapis.com/nvdata-openimages/openimages-train-000001.tar",
longFileName: "https://raw.githubusercontent.com/NVIDIA/aistore/master/docs/images/ais-s3-tf.gif",
}

clearDownloadList(t)
Expand Down
25 changes: 17 additions & 8 deletions ais/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ import (
"github.com/NVIDIA/aistore/cluster/meta"
"github.com/NVIDIA/aistore/cmn"
"github.com/NVIDIA/aistore/cmn/cos"
"github.com/NVIDIA/aistore/cmn/debug"
"github.com/NVIDIA/aistore/cmn/k8s"
"github.com/NVIDIA/aistore/cmn/nlog"
)
Expand Down Expand Up @@ -183,15 +182,25 @@ func _localIP(config *cmn.Config, addrList []*localIPv4Info) (ip net.IP, err err
return ip, nil
}

func multihome(configuredIPv4s string) (pub1, pub2 string) {
if !strings.Contains(configuredIPv4s, cmn.HostnameListSepa) {
return configuredIPv4s, ""
func multihome(configuredIPv4s string) (pub string, extra []string) {
if i := strings.IndexByte(configuredIPv4s, cmn.HostnameListSepa[0]); i <= 0 {
cos.ExitAssertLog(i < 0, "invalid format:", configuredIPv4s)
return configuredIPv4s, nil
}

// trim + validation
lst := strings.Split(configuredIPv4s, cmn.HostnameListSepa)
debug.Assert(len(lst) == 2, lst)
pub1, pub2 = strings.TrimSpace(lst[0]), strings.TrimSpace(lst[1])
nlog.Infof("multihome pub1: %s (%v), pub2: %s (%v)", pub1, net.ParseIP(pub1), pub2, net.ParseIP(pub2))
return
pub, extra = strings.TrimSpace(lst[0]), lst[1:]
for i := range extra {
extra[i] = strings.TrimSpace(extra[i])
cos.ExitAssertLog(len(extra[i]) > 0, "invalid format (empty value):", configuredIPv4s)
cos.ExitAssertLog(extra[i] != pub, "duplicated addr or hostname:", configuredIPv4s)
for j := 0; j < i; j++ {
cos.ExitAssertLog(extra[i] != extra[j], "duplicated addr or hostname:", configuredIPv4s)
}
}
nlog.Infof("multihome: %s and %v", pub, extra)
return pub, extra
}

// choose one of the local IPv4s if local config doesn't contain (explicitly) specified
Expand Down
2 changes: 1 addition & 1 deletion cluster/meta/hrw.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ func (smap *Smap) HrwMultiHome(uname string) (si *Snode, netName string, err err
if i == 0 {
return si, cmn.NetPublic, nil
}
return si, si.PubExtra[i-1].Hostname, nil
return si, si.PubExtra[i-1].URL, nil
}

func (smap *Smap) HrwHash2T(digest uint64) (si *Snode, err error) {
Expand Down
26 changes: 10 additions & 16 deletions cluster/meta/smap.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,7 @@ func (d *Snode) String() string { return d.Name() }

func (d *Snode) SetName() {
name := d.StringEx()
if d.name != "" && d.name != name {
cos.AssertMsg(false, d.name+" vs. "+name)
}
debug.Assert(d.name == "" || d.name == name, name, d.name)
d.name = name
}

Expand Down Expand Up @@ -156,23 +154,19 @@ func (d *Snode) nameNets() string {
return fmt.Sprintf("%s(%s)", d.Name(), d.PubNet.URL)
}

func (d *Snode) URL(network string) string {
func (d *Snode) URL(network string) (u string) {
switch network {
case cmn.NetPublic:
return d.PubNet.URL
u = d.PubNet.URL
case cmn.NetIntraControl:
return d.ControlNet.URL
u = d.ControlNet.URL
case cmn.NetIntraData:
return d.DataNet.URL
default: // multi-home
for i := range d.PubExtra {
if d.PubExtra[i].Hostname == network {
return d.PubExtra[i].URL
}
}
u = d.DataNet.URL
default: // (exclusively via HrwMultiHome)
debug.Assert(strings.Contains(network, "://"), network) // "is URI" per rfc2396.txt
u = network
}
debug.Assert(false, "unknown network '"+network+"'")
return ""
return u
}

// TODO [feature]
Expand Down Expand Up @@ -294,7 +288,7 @@ func (ni *NetInfo) Init(proto, hostname, port string) {
ep := _ep(hostname, port)
ni.Hostname = hostname
ni.Port = port
ni.URL = fmt.Sprintf("%s://%s", proto, ep)
ni.URL = proto + "://" + ep // rfc2396.txt "Uniform Resource Identifiers (URI): Generic Syntax"
ni.tcpEndpoint = ep
}

Expand Down
2 changes: 1 addition & 1 deletion cmd/cli/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ go 1.21

// direct
require (
github.com/NVIDIA/aistore v1.3.22-0.20231201002937-e082683f6388
github.com/NVIDIA/aistore v1.3.22-0.20231201003409-3ecd430019b0
github.com/fatih/color v1.16.0
github.com/json-iterator/go v1.1.12
github.com/onsi/ginkgo v1.16.5
Expand Down
4 changes: 2 additions & 2 deletions cmd/cli/go.sum
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
code.cloudfoundry.org/bytefmt v0.0.0-20190710193110-1eb035ffe2b6/go.mod h1:wN/zk7mhREp/oviagqUXY3EwuHhWyOvAdsn5Y4CzOrc=
github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
github.com/NVIDIA/aistore v1.3.22-0.20231201002937-e082683f6388 h1:vVelvWdh71DhS12hlnhE2UYqxjSMMKmgy9a87bAevN0=
github.com/NVIDIA/aistore v1.3.22-0.20231201002937-e082683f6388/go.mod h1:cOTgDt5fVCQOB+rnvYZgVFRF3dEzPqu8f22F3F+Yvtg=
github.com/NVIDIA/aistore v1.3.22-0.20231201003409-3ecd430019b0 h1:mgnlUcAFvCn4OUS5uYhgvPryCYluEQmOLnfXDyh4Te8=
github.com/NVIDIA/aistore v1.3.22-0.20231201003409-3ecd430019b0/go.mod h1:cOTgDt5fVCQOB+rnvYZgVFRF3dEzPqu8f22F3F+Yvtg=
github.com/OneOfOne/xxhash v1.2.8 h1:31czK/TI9sNkxIKfaUfGlU47BAxQ0ztGgd9vPyqimf8=
github.com/OneOfOne/xxhash v1.2.8/go.mod h1:eZbhyaAYD41SGSSsnmcpxVoRiQ/MPUTjUdIIOT9Um7Q=
github.com/VividCortex/ewma v1.1.1/go.mod h1:2Tkkvm3sRDVXaiyucHiACn4cqf7DpdyLvmxzcbUokwA=
Expand Down
37 changes: 0 additions & 37 deletions cmn/cos/err.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ package cos
import (
"context"
"errors"
"flag"
"fmt"
"net"
"net/http"
Expand All @@ -18,7 +17,6 @@ import (
"syscall"

"github.com/NVIDIA/aistore/cmn/debug"
"github.com/NVIDIA/aistore/cmn/nlog"
)

type (
Expand Down Expand Up @@ -173,41 +171,6 @@ func (e *ErrSignal) ExitCode() int { return 128 + int(e.signal) }
func NewSignalError(s syscall.Signal) *ErrSignal { return &ErrSignal{signal: s} }
func (e *ErrSignal) Error() string { return fmt.Sprintf("Signal %d", e.signal) }

//
// Abnormal Termination
//

const fatalPrefix = "FATAL ERROR: "

func Exitf(f string, a ...any) {
msg := fmt.Sprintf(fatalPrefix+f, a...)
_exit(msg)
}

// +log
func ExitLogf(f string, a ...any) {
msg := fmt.Sprintf(fatalPrefix+f, a...)
if flag.Parsed() {
nlog.ErrorDepth(1, msg+"\n")
nlog.Flush(nlog.ActExit)
}
_exit(msg)
}

func ExitLog(a ...any) {
msg := fatalPrefix + fmt.Sprint(a...)
if flag.Parsed() {
nlog.ErrorDepth(1, msg+"\n")
nlog.Flush(nlog.ActExit)
}
_exit(msg)
}

func _exit(msg string) {
fmt.Fprintln(os.Stderr, msg)
os.Exit(1)
}

//
// url.Error
//
Expand Down
51 changes: 51 additions & 0 deletions cmn/cos/exit.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// Package cos provides common low-level types and utilities for all aistore projects.
/*
* Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
*/
package cos

import (
"flag"
"fmt"
"os"

"github.com/NVIDIA/aistore/cmn/nlog"
)

const fatalPrefix = "FATAL ERROR: "

func _exit(msg string) {
fmt.Fprintln(os.Stderr, msg)
os.Exit(1)
}

func Exitf(f string, a ...any) {
msg := fmt.Sprintf(fatalPrefix+f, a...)
_exit(msg)
}

// +log
func ExitLogf(f string, a ...any) {
msg := fmt.Sprintf(fatalPrefix+f, a...)
if flag.Parsed() {
nlog.ErrorDepth(1, msg+"\n")
nlog.Flush(nlog.ActExit)
}
_exit(msg)
}

func ExitLog(a ...any) {
msg := fatalPrefix + fmt.Sprint(a...)
if flag.Parsed() {
nlog.ErrorDepth(1, msg+"\n")
nlog.Flush(nlog.ActExit)
}
_exit(msg)
}

// +condition
func ExitAssertLog(cond bool, a ...any) {
if !cond {
ExitLog(a...)
}
}

0 comments on commit 539e819

Please sign in to comment.