// Copyright 2017 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.package traceimport ()// MutatorUtil is a change in mutator utilization at a particular// time. Mutator utilization functions are represented as a// time-ordered []MutatorUtil.typeMutatorUtilstruct { Time int64// Util is the mean mutator utilization starting at Time. This // is in the range [0, 1]. Util float64}// UtilFlags controls the behavior of MutatorUtilization.typeUtilFlagsintconst (// UtilSTW means utilization should account for STW events. // This includes non-GC STW events, which are typically user-requested.UtilSTWUtilFlags = 1 << iota// UtilBackground means utilization should account for // background mark workers.UtilBackground// UtilAssist means utilization should account for mark // assists.UtilAssist// UtilSweep means utilization should account for sweeping.UtilSweep// UtilPerProc means each P should be given a separate // utilization function. Otherwise, there is a single function // and each P is given a fraction of the utilization.UtilPerProc)// MutatorUtilizationV2 returns a set of mutator utilization functions// for the given v2 trace, passed as an io.Reader. Each function will// always end with 0 utilization. The bounds of each function are implicit// in the first and last event; outside of these bounds each function is// undefined.//// If the UtilPerProc flag is not given, this always returns a single// utilization function. Otherwise, it returns one function per P.func ( []Event, UtilFlags) [][]MutatorUtil {// Set up a bunch of analysis state.typestruct {// gc > 0 indicates that GC is active on this P.int// series the logical series number for this P. This // is necessary because Ps may be removed and then // re-added, and then the new P needs a new series.int }typestruct {// time at which procs changed.int64// n is the number of procs at that point.int } := [][]MutatorUtil{} := 0 := []{} := make(map[GoID]bool) := make(map[GoID]GoState) := make(map[GoID]bool) := []{} := false// Helpers. := func( Range) bool {return &UtilSTW != 0 && isGCSTW() } := func( Range) bool {return &UtilAssist != 0 && isGCMarkAssist() } := func( Range) bool {return &UtilSweep != 0 && isGCSweep() }// Iterate through the trace, tracking mutator utilization.var *Eventfor := range { := &[] = // Process the event.switch .Kind() {caseEventSync: = truecaseEventMetric: := .Metric()if .Name != "/sched/gomaxprocs:threads" {break } := int(.Value.Uint64())iflen() > {if &UtilPerProc != 0 {// End each P's series.for , := range [:] { [.] = addUtil([.], MutatorUtil{int64(.Time()), 0}) } } = [:] }forlen() < {// Start new P's series. := 0if &UtilPerProc != 0 || len() == 0 { = len() = append(, []MutatorUtil{{int64(.Time()), 1}}) } = append(, {: }) }iflen() == 0 || != [len()-1]. { = append(, {: int64(.Time()), : }) } }iflen() == 0 {// We can't start doing any analysis until we see what GOMAXPROCS is. // It will show up very early in the trace, but we need to be robust to // something else being emitted beforehand.continue }switch .Kind() {caseEventRangeActive:if {// If we've seen a sync, then we can be sure we're not finding out about // something late; we have complete information after that point, and these // active events will just be redundant.break }// This range is active back to the start of the trace. We're failing to account // for this since we just found out about it now. Fix up the mutator utilization. // // N.B. A trace can't start during a STW, so we don't handle it here. := .Range()switch {case ():if ![.Goroutine()].Executing() {// If the goroutine isn't executing, then the fact that it was in mark // assist doesn't actually count.break }// This G has been in a mark assist *and running on its P* since the start // of the trace.fallthroughcase ():// This P has been in sweep (or mark assist, from above) in the start of the trace. // // We don't need to do anything if UtilPerProc is set. If we get an event like // this for a running P, it must show up the first time a P is mentioned. Therefore, // this P won't actually have any MutatorUtils on its list yet. // // However, if UtilPerProc isn't set, then we probably have data from other procs // and from previous events. We need to fix that up.if &UtilPerProc != 0 {break }// Subtract out 1/gomaxprocs mutator utilization for all time periods // from the beginning of the trace until now. , := 0, 0for < len([0]) {if < len()-1 && [+1]. < [0][].Time { ++continue } [0][].Util -= float64(1) / float64([].)if [0][].Util < 0 { [0][].Util = 0 } ++ } }// After accounting for the portion we missed, this just acts like the // beginning of a new range.fallthroughcaseEventRangeBegin: := .Range()if () { ++ } elseif () { [.Proc()].++ } elseif () { [.Proc()].++if := .Scope.Goroutine(); != NoGoroutine { [] = true } }caseEventRangeEnd: := .Range()if () { -- } elseif () { [.Proc()].-- } elseif () { [.Proc()].--if := .Scope.Goroutine(); != NoGoroutine {delete(, ) } }caseEventStateTransition: := .StateTransition()if .Resource.Kind != ResourceGoroutine {break } , := .Goroutine() := .Resource.Goroutine()if [] || [] {if !.Executing() && .Executing() {// Started running while doing GC things. [.Proc()].++ } elseif .Executing() && !.Executing() {// Stopped running while doing GC things. [.Proc()].-- } } [] = caseEventLabel: := .Label()if &UtilBackground != 0 && strings.HasPrefix(.Label, "GC ") && .Label != "GC (idle)" {// Background mark worker. // // If we're in per-proc mode, we don't // count dedicated workers because // they kick all of the goroutines off // that P, so don't directly // contribute to goroutine latency.if !(&UtilPerProc != 0 && .Label == "GC (dedicated)") { [.Goroutine()] = true [.Proc()].++ } } }if &UtilPerProc == 0 {// Compute the current average utilization.iflen() == 0 {continue } := 0if > 0 { = len() } else {for := range {if []. > 0 { ++ } } } := MutatorUtil{int64(.Time()), 1 - float64()/float64(len())}// Record the utilization change. (Since // len(ps) == len(out), we know len(out) > 0.) [0] = addUtil([0], ) } else {// Check for per-P utilization changes.for := range { := &[] := 1.0if > 0 || . > 0 { = 0.0 } [.] = addUtil([.], MutatorUtil{int64(.Time()), }) } } }// No events in the stream.if == nil {returnnil }// Add final 0 utilization event to any remaining series. This // is important to mark the end of the trace. The exact value // shouldn't matter since no window should extend beyond this, // but using 0 is symmetric with the start of the trace. := MutatorUtil{int64(.Time()), 0}for := range { [[].] = addUtil([[].], ) }return}func addUtil( []MutatorUtil, MutatorUtil) []MutatorUtil {iflen() > 0 {if .Util == [len()-1].Util {// No change.return }if .Time == [len()-1].Time {// Take the lowest utilization at a time stamp.if .Util < [len()-1].Util { [len()-1] = }return } }returnappend(, )}// totalUtil is total utilization, measured in nanoseconds. This is a// separate type primarily to distinguish it from mean utilization,// which is also a float64.type totalUtil float64func totalUtilOf( float64, int64) totalUtil {returntotalUtil( * float64())}// mean returns the mean utilization over dur.func ( totalUtil) ( time.Duration) float64 {returnfloat64() / float64()}// An MMUCurve is the minimum mutator utilization curve across// multiple window sizes.typeMMUCurvestruct { series []mmuSeries}type mmuSeries struct { util []MutatorUtil// sums[j] is the cumulative sum of util[:j]. sums []totalUtil// bands summarizes util in non-overlapping bands of duration // bandDur. bands []mmuBand// bandDur is the duration of each band. bandDur int64}type mmuBand struct {// minUtil is the minimum instantaneous mutator utilization in // this band. minUtil float64// cumUtil is the cumulative total mutator utilization between // time 0 and the left edge of this band. cumUtil totalUtil// integrator is the integrator for the left edge of this // band. integrator integrator}// NewMMUCurve returns an MMU curve for the given mutator utilization// function.func ( [][]MutatorUtil) *MMUCurve { := make([]mmuSeries, len())for , := range { [] = newMMUSeries() }return &MMUCurve{}}// bandsPerSeries is the number of bands to divide each series into.// This is only changed by tests.var bandsPerSeries = 1000func newMMUSeries( []MutatorUtil) mmuSeries {// Compute cumulative sum. := make([]totalUtil, len())varMutatorUtilvartotalUtilfor , := range { += totalUtilOf(.Util, .Time-.Time) [] = = }// Divide the utilization curve up into equal size // non-overlapping "bands" and compute a summary for each of // these bands. // // Compute the duration of each band. := bandsPerSeriesif > len() {// There's no point in having lots of bands if there // aren't many events. = len() } := [len()-1].Time - [0].Time := ( + int64() - 1) / int64()if < 1 { = 1 }// Compute the bands. There are numBands+1 bands in order to // record the final cumulative sum. := make([]mmuBand, +1) := mmuSeries{, , , } := integrator{&, 0}for := range { , := .bandTime() := .advance() := .pos := 1.0for := ; < len() && [].Time < ; ++ { = math.Min(, [].Util) } [] = mmuBand{, , } }return}func ( *mmuSeries) ( int) (, int64) { = int64()*.bandDur + .util[0].Time = + .bandDurreturn}type bandUtil struct {// Utilization series index series int// Band index i int// Lower bound of mutator utilization for all windows // with a left edge in this band. utilBound float64}type bandUtilHeap []bandUtilfunc ( bandUtilHeap) () int {returnlen()}func ( bandUtilHeap) (, int) bool {return [].utilBound < [].utilBound}func ( bandUtilHeap) (, int) { [], [] = [], []}func ( *bandUtilHeap) ( any) { * = append(*, .(bandUtil))}func ( *bandUtilHeap) () any { := (*)[len(*)-1] * = (*)[:len(*)-1]return}// UtilWindow is a specific window at Time.typeUtilWindowstruct { Time int64// MutatorUtil is the mean mutator utilization in this window. MutatorUtil float64}type utilHeap []UtilWindowfunc ( utilHeap) () int {returnlen()}func ( utilHeap) (, int) bool {if [].MutatorUtil != [].MutatorUtil {return [].MutatorUtil > [].MutatorUtil }return [].Time > [].Time}func ( utilHeap) (, int) { [], [] = [], []}func ( *utilHeap) ( any) { * = append(*, .(UtilWindow))}func ( *utilHeap) () any { := (*)[len(*)-1] * = (*)[:len(*)-1]return}// An accumulator takes a windowed mutator utilization function and// tracks various statistics for that function.type accumulator struct { mmu float64// bound is the mutator utilization bound where adding any // mutator utilization above this bound cannot affect the // accumulated statistics. bound float64// Worst N window tracking nWorst int wHeap utilHeap// Mutator utilization distribution tracking mud *mud// preciseMass is the distribution mass that must be precise // before accumulation is stopped. preciseMass float64// lastTime and lastMU are the previous point added to the // windowed mutator utilization function. lastTime int64 lastMU float64}// resetTime declares a discontinuity in the windowed mutator// utilization function by resetting the current time.func ( *accumulator) () {// This only matters for distribution collection, since that's // the only thing that depends on the progression of the // windowed mutator utilization function. .lastTime = math.MaxInt64}// addMU adds a point to the windowed mutator utilization function at// (time, mu). This must be called for monotonically increasing values// of time.//// It returns true if further calls to addMU would be pointless.func ( *accumulator) ( int64, float64, time.Duration) bool {if < .mmu { .mmu = } .bound = .mmuif .nWorst == 0 {// If the minimum has reached zero, it can't go any // lower, so we can stop early.return == 0 }// Consider adding this window to the n worst.iflen(.wHeap) < .nWorst || < .wHeap[0].MutatorUtil {// This window is lower than the K'th worst window. // // Check if there's any overlapping window // already in the heap and keep whichever is // worse.for , := range .wHeap {if +int64() > .Time && .Time+int64() > {if .MutatorUtil <= {// Keep the first window.goto } else {// Replace it with this window.heap.Remove(&.wHeap, )break } } }heap.Push(&.wHeap, UtilWindow{, })iflen(.wHeap) > .nWorst {heap.Pop(&.wHeap) } : }iflen(.wHeap) < .nWorst {// We don't have N windows yet, so keep accumulating. .bound = 1.0 } else {// Anything above the least worst window has no effect. .bound = math.Max(.bound, .wHeap[0].MutatorUtil) }if .mud != nil {if .lastTime != math.MaxInt64 {// Update distribution. .mud.add(.lastMU, , float64(-.lastTime)) } .lastTime, .lastMU = , if , , := .mud.approxInvCumulativeSum(); { .bound = math.Max(.bound, ) } else {// We haven't accumulated enough total precise // mass yet to even reach our goal, so keep // accumulating. .bound = 1 }// It's not worth checking percentiles every time, so // just keep accumulating this band.returnfalse }// If we've found enough 0 utilizations, we can stop immediately.returnlen(.wHeap) == .nWorst && .wHeap[0].MutatorUtil == 0}// MMU returns the minimum mutator utilization for the given time// window. This is the minimum utilization for all windows of this// duration across the execution. The returned value is in the range// [0, 1].func ( *MMUCurve) ( time.Duration) ( float64) { := accumulator{mmu: 1.0, bound: 1.0} .mmu(, &)return .mmu}// Examples returns n specific examples of the lowest mutator// utilization for the given window size. The returned windows will be// disjoint (otherwise there would be a huge number of// mostly-overlapping windows at the single lowest point). There are// no guarantees on which set of disjoint windows this returns.func ( *MMUCurve) ( time.Duration, int) ( []UtilWindow) { := accumulator{mmu: 1.0, bound: 1.0, nWorst: } .mmu(, &)sort.Sort(sort.Reverse(.wHeap))return ([]UtilWindow)(.wHeap)}// MUD returns mutator utilization distribution quantiles for the// given window size.//// The mutator utilization distribution is the distribution of mean// mutator utilization across all windows of the given window size in// the trace.//// The minimum mutator utilization is the minimum (0th percentile) of// this distribution. (However, if only the minimum is desired, it's// more efficient to use the MMU method.)func ( *MMUCurve) ( time.Duration, []float64) []float64 {iflen() == 0 {return []float64{} }// Each unrefined band contributes a known total mass to the // distribution (bandDur except at the end), but in an unknown // way. However, we know that all the mass it contributes must // be at or above its worst-case mean mutator utilization. // // Hence, we refine bands until the highest desired // distribution quantile is less than the next worst-case mean // mutator utilization. At this point, all further // contributions to the distribution must be beyond the // desired quantile and hence cannot affect it. // // First, find the highest desired distribution quantile. := [0]for , := range {if > { = } }// The distribution's mass is in units of time (it's not // normalized because this would make it more annoying to // account for future contributions of unrefined bands). The // total final mass will be the duration of the trace itself // minus the window size. Using this, we can compute the mass // corresponding to quantile maxQ.varint64for , := range .series { := .util[len(.util)-1].Time - .util[0].Timeif >= int64() { += - int64() } } := float64() * // Accumulate the MUD until we have precise information for // everything to the left of qMass. := accumulator{mmu: 1.0, bound: 1.0, preciseMass: , mud: new(mud)} .mud.setTrackMass() .mmu(, &)// Evaluate the quantiles on the accumulated MUD. := make([]float64, len())for := range { , := .mud.invCumulativeSum(float64() * [])ifmath.IsNaN() {// There are a few legitimate ways this can // happen: // // 1. If the window is the full trace // duration, then the windowed MU function is // only defined at a single point, so the MU // distribution is not well-defined. // // 2. If there are no events, then the MU // distribution has no mass. // // Either way, all of the quantiles will have // converged toward the MMU at this point. = .mmu } [] = }return}func ( *MMUCurve) ( time.Duration, *accumulator) {if <= 0 { .mmu = 0return }varbandUtilHeap := make([]time.Duration, len(.series))for , := range .series { [] = if := time.Duration(.util[len(.util)-1].Time - .util[0].Time); > { [] = } := bandUtilHeap(.mkBandUtil(, []))if == nil { = } else { = append(, ...) } }// Process bands from lowest utilization bound to highest.heap.Init(&)// Refine each band into a precise window and MMU until // refining the next lowest band can no longer affect the MMU // or windows.forlen() > 0 && [0].utilBound < .bound { := [0].series .series[].bandMMU([0].i, [], )heap.Pop(&) }}func ( *mmuSeries) ( int, time.Duration) []bandUtil {// For each band, compute the worst-possible total mutator // utilization for all windows that start in that band.// minBands is the minimum number of bands a window can span // and maxBands is the maximum number of bands a window can // span in any alignment. := int((int64() + .bandDur - 1) / .bandDur) := int((int64() + 2*(.bandDur-1)) / .bandDur)if > 1 && < 2 {panic("maxBands < 2") } := int64() % .bandDur := len(.bands) - + 1if < 0 { = 0 } := make([]bandUtil, )for := range {// To compute the worst-case MU, we assume the minimum // for any bands that are only partially overlapped by // some window and the mean for any bands that are // completely covered by all windows.vartotalUtil// Find the lowest and second lowest of the partial // bands. := .bands[].minUtil := .bands[+-1].minUtil := .bands[+-1].minUtil := math.Min(, math.Min(, ))// Assume the worst window maximally overlaps the // worst minimum and then the rest overlaps the second // worst minimum.if == 1 { += totalUtilOf(, int64()) } else { += totalUtilOf(, .bandDur) := 0.0switch {case == : = math.Min(, )case == : = math.Min(, )case == : = math.Min(, ) } += totalUtilOf(, ) }// Add the total mean MU of bands that are completely // overlapped by all windows.if > 2 { += .bands[+-1].cumUtil - .bands[+1].cumUtil } [] = bandUtil{, , .mean()} }return}// bandMMU computes the precise minimum mutator utilization for// windows with a left edge in band bandIdx.func ( *mmuSeries) ( int, time.Duration, *accumulator) { := .util// We think of the mutator utilization over time as the // box-filtered utilization function, which we call the // "windowed mutator utilization function". The resulting // function is continuous and piecewise linear (unless // window==0, which we handle elsewhere), where the boundaries // between segments occur when either edge of the window // encounters a change in the instantaneous mutator // utilization function. Hence, the minimum of this function // will always occur when one of the edges of the window // aligns with a utilization change, so these are the only // points we need to consider. // // We compute the mutator utilization function incrementally // by tracking the integral from t=0 to the left edge of the // window and to the right edge of the window. := .bands[].integrator := , := .bandTime()if := [len()-1].Time - int64(); < { = } .resetTime()for {// Advance edges to time and time+window. := (.advance(+int64()) - .advance()).mean()if .addMU(, , ) {break }if == {break }// The maximum slope of the windowed mutator // utilization function is 1/window, so we can always // advance the time by at least (mu - mmu) * window // without dropping below mmu. := + int64((-.bound)*float64())// Advance the window to the next time where either // the left or right edge of the window encounters a // change in the utilization curve.if , := .next(), .next(+int64())-int64(); < { = } else { = }if < { = }if >= {// For MMUs we could stop here, but for MUDs // it's important that we span the entire // band. = } }}// An integrator tracks a position in a utilization function and// integrates it.type integrator struct { u *mmuSeries// pos is the index in u.util of the current time's non-strict // predecessor. pos int}// advance returns the integral of the utilization function from 0 to// time. advance must be called on monotonically increasing values of// times.func ( *integrator) ( int64) totalUtil { , := .u.util, .pos// Advance pos until pos+1 is time's strict successor (making // pos time's non-strict predecessor). // // Very often, this will be nearby, so we optimize that case, // but it may be arbitrarily far away, so we handled that // efficiently, too.const = 8if + < len() && [+].Time > {// Nearby. Use a linear scan.for +1 < len() && [+1].Time <= { ++ } } else {// Far. Binary search for time's strict successor. , := , len()for < { := int(uint(+) >> 1)if [].Time <= { = + 1 } else { = } } = - 1// Non-strict predecessor. } .pos = vartotalUtilif != [].Time { = totalUtilOf([].Util, -[].Time) }return .u.sums[] + }// next returns the smallest time t' > time of a change in the// utilization function.func ( *integrator) ( int64) int64 {for , := range .u.util[.pos:] {if .Time > {return .Time } }return1<<63 - 1}func isGCSTW( Range) bool {returnstrings.HasPrefix(.Name, "stop-the-world") && strings.Contains(.Name, "GC")}func isGCMarkAssist( Range) bool {return .Name == "GC mark assist"}func isGCSweep( Range) bool {return .Name == "GC incremental sweep"}
The pages are generated with Goldsv0.7.3. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds.