Skip to content

Commit

Permalink
Merge branch 'release-branch/1.76' into cpierre/coreweave-1.76
Browse files Browse the repository at this point in the history
  • Loading branch information
ChandonPierre committed Nov 10, 2024
2 parents e30d459 + 1edcf9d commit 896e96f
Show file tree
Hide file tree
Showing 6 changed files with 114 additions and 23 deletions.
2 changes: 1 addition & 1 deletion VERSION.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.76.2
1.76.6
6 changes: 5 additions & 1 deletion health/health.go
Original file line number Diff line number Diff line change
Expand Up @@ -1051,11 +1051,15 @@ func (t *Tracker) updateBuiltinWarnablesLocked() {
ArgDuration: d.Round(time.Second).String(),
})
}
} else {
} else if homeDERP != 0 {
t.setUnhealthyLocked(noDERPConnectionWarnable, Args{
ArgDERPRegionID: fmt.Sprint(homeDERP),
ArgDERPRegionName: t.derpRegionNameLocked(homeDERP),
})
} else {
// No DERP home yet determined yet. There's probably some
// other problem or things are just starting up.
t.setHealthyLocked(noDERPConnectionWarnable)
}

if !t.ipnWantRunning {
Expand Down
68 changes: 53 additions & 15 deletions net/netcheck/netcheck.go
Original file line number Diff line number Diff line change
Expand Up @@ -391,10 +391,11 @@ type probePlan map[string][]probe
// sortRegions returns the regions of dm first sorted
// from fastest to slowest (based on the 'last' report),
// end in regions that have no data.
func sortRegions(dm *tailcfg.DERPMap, last *Report) (prev []*tailcfg.DERPRegion) {
func sortRegions(dm *tailcfg.DERPMap, last *Report, preferredDERP int) (prev []*tailcfg.DERPRegion) {
prev = make([]*tailcfg.DERPRegion, 0, len(dm.Regions))
for _, reg := range dm.Regions {
if reg.Avoid {
// include an otherwise avoid region if it is the current preferred region
if reg.Avoid && reg.RegionID != preferredDERP {
continue
}
prev = append(prev, reg)
Expand All @@ -419,9 +420,19 @@ func sortRegions(dm *tailcfg.DERPMap, last *Report) (prev []*tailcfg.DERPRegion)
// a full report, all regions are scanned.)
const numIncrementalRegions = 3

// makeProbePlan generates the probe plan for a DERPMap, given the most
// recent report and whether IPv6 is configured on an interface.
func makeProbePlan(dm *tailcfg.DERPMap, ifState *netmon.State, last *Report) (plan probePlan) {
// makeProbePlan generates the probe plan for a DERPMap, given the most recent
// report and the current home DERP. preferredDERP is passed independently of
// last (report) because last is currently nil'd to indicate a desire for a full
// netcheck.
//
// TODO(raggi,jwhited): refactor the callers and this function to be more clear
// about full vs. incremental netchecks, and remove the need for the history
// hiding. This was avoided in an incremental change due to exactly this kind of
// distant coupling.
// TODO(raggi): change from "preferred DERP" from a historical report to "home
// DERP" as in what DERP is the current home connection, this would further
// reduce flap events.
func makeProbePlan(dm *tailcfg.DERPMap, ifState *netmon.State, last *Report, preferredDERP int) (plan probePlan) {
if last == nil || len(last.RegionLatency) == 0 {
return makeProbePlanInitial(dm, ifState)
}
Expand All @@ -432,9 +443,34 @@ func makeProbePlan(dm *tailcfg.DERPMap, ifState *netmon.State, last *Report) (pl
had4 := len(last.RegionV4Latency) > 0
had6 := len(last.RegionV6Latency) > 0
hadBoth := have6if && had4 && had6
for ri, reg := range sortRegions(dm, last) {
if ri == numIncrementalRegions {
break
// #13969 ensure that the home region is always probed.
// If a netcheck has unstable latency, such as a user with large amounts of
// bufferbloat or a highly congested connection, there are cases where a full
// netcheck may observe a one-off high latency to the current home DERP. Prior
// to the forced inclusion of the home DERP, this would result in an
// incremental netcheck following such an event to cause a home DERP move, with
// restoration back to the home DERP on the next full netcheck ~5 minutes later
// - which is highly disruptive when it causes shifts in geo routed subnet
// routers. By always including the home DERP in the incremental netcheck, we
// ensure that the home DERP is always probed, even if it observed a recenet
// poor latency sample. This inclusion enables the latency history checks in
// home DERP selection to still take effect.
// planContainsHome indicates whether the home DERP has been added to the probePlan,
// if there is no prior home, then there's no home to additionally include.
planContainsHome := preferredDERP == 0
for ri, reg := range sortRegions(dm, last, preferredDERP) {
regIsHome := reg.RegionID == preferredDERP
if ri >= numIncrementalRegions {
// planned at least numIncrementalRegions regions and that includes the
// last home region (or there was none), plan complete.
if planContainsHome {
break
}
// planned at least numIncrementalRegions regions, but not the home region,
// check if this is the home region, if not, skip it.
if !regIsHome {
continue
}
}
var p4, p6 []probe
do4 := have4if
Expand All @@ -445,7 +481,7 @@ func makeProbePlan(dm *tailcfg.DERPMap, ifState *netmon.State, last *Report) (pl
tries := 1
isFastestTwo := ri < 2

if isFastestTwo {
if isFastestTwo || regIsHome {
tries = 2
} else if hadBoth {
// For dual stack machines, make the 3rd & slower nodes alternate
Expand All @@ -456,14 +492,15 @@ func makeProbePlan(dm *tailcfg.DERPMap, ifState *netmon.State, last *Report) (pl
do4, do6 = false, true
}
}
if !isFastestTwo && !had6 {
if !regIsHome && !isFastestTwo && !had6 {
do6 = false
}

if reg.RegionID == last.PreferredDERP {
if regIsHome {
// But if we already had a DERP home, try extra hard to
// make sure it's there so we don't flip flop around.
tries = 4
planContainsHome = true
}

for try := 0; try < tries; try++ {
Expand Down Expand Up @@ -788,9 +825,10 @@ func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap, opts *GetRe
c.curState = rs
last := c.last

// Even if we're doing a non-incremental update, we may want to try our
// preferred DERP region for captive portal detection. Save that, if we
// have it.
// Extract preferredDERP from the last report, if available. This will be used
// in captive portal detection and DERP flapping suppression. Ideally this would
// be the current active home DERP rather than the last report preferred DERP,
// but only the latter is presently available.
var preferredDERP int
if last != nil {
preferredDERP = last.PreferredDERP
Expand Down Expand Up @@ -847,7 +885,7 @@ func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap, opts *GetRe

var plan probePlan
if opts == nil || !opts.OnlyTCP443 {
plan = makeProbePlan(dm, ifState, last)
plan = makeProbePlan(dm, ifState, last, preferredDERP)
}

// If we're doing a full probe, also check for a captive portal. We
Expand Down
42 changes: 40 additions & 2 deletions net/netcheck/netcheck_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -576,14 +576,52 @@ func TestMakeProbePlan(t *testing.T) {
"region-3-v4": []probe{p("3a", 4)},
},
},
{
// #13969: ensure that the prior/current home region is always included in
// probe plans, so that we don't flap between regions due to a single major
// netcheck having excluded the home region due to a spuriously high sample.
name: "ensure_home_region_inclusion",
dm: basicMap,
have6if: true,
last: &Report{
RegionLatency: map[int]time.Duration{
1: 50 * time.Millisecond,
2: 20 * time.Millisecond,
3: 30 * time.Millisecond,
4: 40 * time.Millisecond,
},
RegionV4Latency: map[int]time.Duration{
1: 50 * time.Millisecond,
2: 20 * time.Millisecond,
},
RegionV6Latency: map[int]time.Duration{
3: 30 * time.Millisecond,
4: 40 * time.Millisecond,
},
PreferredDERP: 1,
},
want: probePlan{
"region-1-v4": []probe{p("1a", 4), p("1a", 4, 60*ms), p("1a", 4, 220*ms), p("1a", 4, 330*ms)},
"region-1-v6": []probe{p("1a", 6), p("1a", 6, 60*ms), p("1a", 6, 220*ms), p("1a", 6, 330*ms)},
"region-2-v4": []probe{p("2a", 4), p("2b", 4, 24*ms)},
"region-2-v6": []probe{p("2a", 6), p("2b", 6, 24*ms)},
"region-3-v4": []probe{p("3a", 4), p("3b", 4, 36*ms)},
"region-3-v6": []probe{p("3a", 6), p("3b", 6, 36*ms)},
"region-4-v4": []probe{p("4a", 4)},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
ifState := &netmon.State{
HaveV6: tt.have6if,
HaveV4: !tt.no4,
}
got := makeProbePlan(tt.dm, ifState, tt.last)
preferredDERP := 0
if tt.last != nil {
preferredDERP = tt.last.PreferredDERP
}
got := makeProbePlan(tt.dm, ifState, tt.last, preferredDERP)
if !reflect.DeepEqual(got, tt.want) {
t.Errorf("unexpected plan; got:\n%v\nwant:\n%v\n", got, tt.want)
}
Expand Down Expand Up @@ -756,7 +794,7 @@ func TestSortRegions(t *testing.T) {
report.RegionLatency[3] = time.Second * time.Duration(6)
report.RegionLatency[4] = time.Second * time.Duration(0)
report.RegionLatency[5] = time.Second * time.Duration(2)
sortedMap := sortRegions(unsortedMap, report)
sortedMap := sortRegions(unsortedMap, report, 0)

// Sorting by latency this should result in rid: 5, 2, 1, 3
// rid 4 with latency 0 should be at the end
Expand Down
8 changes: 7 additions & 1 deletion net/sockstats/sockstats_tsgo.go
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,13 @@ func setNetMon(netMon *netmon.Monitor) {
if ifName == "" {
return
}
ifIndex := state.Interface[ifName].Index
// DefaultRouteInterface and Interface are gathered at different points in time.
// Check for existence first, to avoid a nil pointer dereference.
iface, ok := state.Interface[ifName]
if !ok {
return
}
ifIndex := iface.Index
sockStats.mu.Lock()
defer sockStats.mu.Unlock()
// Ignore changes to unknown interfaces -- it would require
Expand Down
11 changes: 8 additions & 3 deletions wgengine/magicsock/derp.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,10 +158,10 @@ func (c *Conn) maybeSetNearestDERP(report *netcheck.Report) (preferredDERP int)
} else {
connectedToControl = c.health.GetInPollNetMap()
}
c.mu.Lock()
myDerp := c.myDerp
c.mu.Unlock()
if !connectedToControl {
c.mu.Lock()
myDerp := c.myDerp
c.mu.Unlock()
if myDerp != 0 {
metricDERPHomeNoChangeNoControl.Add(1)
return myDerp
Expand All @@ -178,6 +178,11 @@ func (c *Conn) maybeSetNearestDERP(report *netcheck.Report) (preferredDERP int)
// one.
preferredDERP = c.pickDERPFallback()
}
if preferredDERP != myDerp {
c.logf(
"magicsock: home DERP changing from derp-%d [%dms] to derp-%d [%dms]",
c.myDerp, report.RegionLatency[myDerp].Milliseconds(), preferredDERP, report.RegionLatency[preferredDERP].Milliseconds())
}
if !c.setNearestDERP(preferredDERP) {
preferredDERP = 0
}
Expand Down

0 comments on commit 896e96f

Please sign in to comment.