5 Commits

Author SHA1 Message Date
eca2b35057 Fix crash when deleting an actor 2025-09-17 20:55:33 -04:00
d34af2c4ee Add Cleanupable interface
The Cleanup method is called on actors when they exit
2025-09-17 20:45:01 -04:00
e21cd9ed11 The cron actor now respects --fast-timing 2025-09-17 17:03:27 -04:00
70dc9702bd Add a --fast-timing option to make things happen sooner
The smaller time scale is better for debugging
2025-09-17 14:54:12 -04:00
613e21597b Add a --crash-on-error flag to crash whenever an actor returns err 2025-09-17 11:36:12 -04:00
4 changed files with 92 additions and 15 deletions

View File

@@ -127,3 +127,10 @@ type RunShutdownable interface {
// should be shut down.
Shutdown(ctx context.Context) error
}
// Cleanupable is any object that must be cleaned up after it has stopped for
// good. Actors which implement this interface will be cleaned up after they
// are deleted from the environment.
type Cleanupable interface {
Cleanup(ctx context.Context) error
}

View File

@@ -7,6 +7,7 @@ import "context"
// tasks on time intervals.
type cron struct {
trimFunc func() bool
fastTiming bool
timing struct {
trimInterval time.Duration
}
@@ -28,6 +29,9 @@ func (this *cron) Configure (config Config) error {
}
this.timing.trimInterval = value
}
if this.fastTiming {
this.timing.trimInterval = time.Second * 10
}
return nil
}

View File

@@ -19,6 +19,7 @@ const defaultRestartInitialInterval = 8 * time.Second
const defaultRestartInitialIncrement = 8 * time.Second
const defaultRestartInitialMaximum = 1 * time.Hour
const defaultResetTimeout = 8 * time.Minute
const defaultCleanupTimeout = 1 * time.Minute
const defaultTrimInterval = 1 * time.Minute
const defaultTrimTimeout = 1 * time.Minute
const defaultShutdownTimeout = 8 * time.Minute
@@ -33,6 +34,7 @@ type environment struct {
done context.CancelCauseFunc
group sync.WaitGroup
conf MutableConfig
cron *cron
// flags stores information from built-in flags.
flags struct {
@@ -42,6 +44,8 @@ type environment struct {
configFile string
verbose bool
crash bool
crashOnError bool
fastTiming bool
}
// running stores whether the environment is currently running.
@@ -59,6 +63,7 @@ type environment struct {
restartIntervalIncrement atomicDuration
restartIntervalMaximum atomicDuration
resetTimeout atomicDuration
cleanupTimeout atomicDuration
trimTimeout atomicDuration
shutdownTimeout atomicDuration
}
@@ -79,10 +84,11 @@ func (this *environment) Run(name, description string, actors ...Actor) {
this.name = name
this.description = description
this.actors = usync.NewRWMonitor(&actorSets { })
this.addToSets(actors...)
this.addToSets(&cron {
this.cron = &cron {
trimFunc: this.phase70_5Trimming,
})
}
this.addToSets(actors...)
this.addToSets(this.cron)
if !this.phase10FlagParsing() { os.Exit(2) }
if !this.phase13PidFileCreation() { os.Exit(1) }
@@ -111,7 +117,12 @@ func (this *environment) Add(ctx context.Context, actors ...Actor) error {
}
}
err := this.initializeActors(ctx, initializable...)
if err != nil { return err }
if err != nil {
if this.flags.crashOnError {
panic(fmt.Sprint(err))
}
return err
}
for _, actor := range actors {
if actor, ok := actor.(Configurable); ok {
err := actor.Configure(this.conf)
@@ -140,7 +151,9 @@ func (this *environment) Del(ctx context.Context, actors ...Actor) error {
if info.stopped != nil {
channels = append(channels, info.stopped)
}
info.done()
if info.done != nil {
info.done()
}
}
for _, channel := range channels {
if channel == nil { continue }
@@ -229,11 +242,30 @@ func (this *environment) start(actor Actor) {
// counter will be decremented. note that this function will never increment the
// wait group counter, so start should usually be used instead.
func (this *environment) run(actor Actor) {
typ := actor.Type()
// clean up when done
defer this.group.Done()
defer func() {
this.group.Done()
this.delFromSets(actor)
if actor, ok := actor.(Cleanupable); ok {
ctx, done := context.WithTimeout(
context.Background(),
defaul(
this.timing.cleanupTimeout.Load(),
defaultCleanupTimeout))
defer done()
err := actor.Cleanup(ctx)
if err != nil {
log.Printf("XXX [%s] failed to cleanup: %v", typ, err)
if this.flags.crashOnError {
panic(fmt.Sprint(err))
}
}
}
}()
// logging
typ := actor.Type()
if this.Verb() { log.Printf("(i) [%s] running", typ) }
var stopErr error
var exited bool
@@ -246,6 +278,9 @@ func (this *environment) run(actor Actor) {
}
} else {
log.Printf("!!! [%s] stopped with error: %v", typ, stopErr)
if this.flags.crashOnError {
panic(fmt.Sprint(stopErr))
}
}
}()
@@ -311,11 +346,17 @@ func (this *environment) runRunnable(ctx context.Context, actor Runnable) (stopE
} else {
// failure
log.Printf("XXX [%s] failed: %v", typ, err)
if this.flags.crashOnError {
panic(fmt.Sprint(err))
}
}
// restart logic
if time.Since(lastStart) < restartThreshold {
log.Printf("!!! [%s] failed too soon, restarting in %v", typ, restartInterval)
if this.flags.crashOnError {
panic("failed too soon")
}
timer := time.NewTimer(restartInterval)
select {
case <- timer.C:
@@ -341,6 +382,9 @@ func (this *environment) runRunnable(ctx context.Context, actor Runnable) (stopE
err := actor.Reset(ctx)
if err != nil {
log.Printf("XXX [%s] failed to reset", typ)
if this.flags.crashOnError {
panic("failed to reset")
}
}
}()
if this.Verb() { log.Printf(".// [%s] reset", typ) }
@@ -400,6 +444,7 @@ func (this *environment) applyConfig() error {
}
return nil
}
// TODO: trim interval
err := parseDuration("init-timeout", &this.timing.initTimeout)
if err != nil { return err }
err = parseDuration("restart-threshold", &this.timing.restartThreshold)
@@ -412,10 +457,17 @@ func (this *environment) applyConfig() error {
if err != nil { return err }
err = parseDuration("reset-timeout", &this.timing.resetTimeout)
if err != nil { return err }
err = parseDuration("cleanup-timeout", &this.timing.cleanupTimeout)
if err != nil { return err }
err = parseDuration("trim-timeout", &this.timing.trimTimeout)
if err != nil { return err }
err = parseDuration("shutdown-timeout", &this.timing.shutdownTimeout)
if err != nil { return err }
if this.flags.fastTiming {
this.timing.shutdownTimeout.Store(time.Second * 10)
}
return nil
}

View File

@@ -19,13 +19,15 @@ func (this *environment) phase10FlagParsing() bool {
name: this.name,
description: this.description,
}
flagHelp := set.Flag('h', "help", "Display usage information and exit", nil)
flagPidFile := set.Flag('p', "pid-file", "Write the PID to the specified file", cli.ValString)
flagUser := set.Flag('u', "user", "The user:group to run as", cli.ValString)
flagLogDirectory := set.Flag('l', "log-directory", "Write logs to the specified directory", cli.ValString)
flagConfigFile := set.Flag('c', "config-file", "Use this configuration file", cli.ValString)
flagVerbose := set.Flag('v', "verbose", "Enable verbose output/logging", nil)
flagCrash := set.Flag(0, "crash", "Crash when an actor panics", nil)
flagHelp := set.Flag('h', "help", "Display usage information and exit", nil)
flagPidFile := set.Flag('p', "pid-file", "Write the PID to the specified file", cli.ValString)
flagUser := set.Flag('u', "user", "The user:group to run as", cli.ValString)
flagLogDirectory := set.Flag('l', "log-directory", "Write logs to the specified directory", cli.ValString)
flagConfigFile := set.Flag('c', "config-file", "Use this configuration file", cli.ValString)
flagVerbose := set.Flag('v', "verbose", "(debug) Enable verbose output/logging", nil)
flagCrash := set.Flag(0, "crash", "(debug) Crash when an actor panics", nil)
flagCrashOnError := set.Flag(0, "crash-on-error", "(debug) Crash when an actor experiences any error", nil)
flagFastTiming := set.Flag(0, "fast-timing", "(debug) Make timed things happen faster/more often", nil)
// ask actors to add flags
actors, done := this.actors.RBorrow()
@@ -65,6 +67,14 @@ func (this *environment) phase10FlagParsing() bool {
if _, ok := flagCrash.First(); ok {
this.flags.crash = true
}
if _, ok := flagCrashOnError.First(); ok {
this.flags.crash = true
this.flags.crashOnError = true
}
if _, ok := flagFastTiming.First(); ok {
this.flags.fastTiming = true
this.cron.fastTiming = true
}
return true
}
@@ -259,6 +269,9 @@ func (this *environment) phase70_5Trimming() bool {
}()
if err := this.trimActors(this.ctx, trimmable...); err != nil {
log.Println(".// (70.5) failed to trim:", err)
if this.flags.crashOnError {
panic(err)
}
return false
}
if this.Verb() { log.Println(".// (70.5) trimmed") }
@@ -266,6 +279,7 @@ func (this *environment) phase70_5Trimming() bool {
}
func (this *environment) phase80Shutdown() bool {
logActors(All())
ctx, done := context.WithTimeout(
context.Background(),
defaul(this.timing.shutdownTimeout.Load(), defaultShutdownTimeout))
@@ -274,7 +288,7 @@ func (this *environment) phase80Shutdown() bool {
<- ctx.Done()
if errors.Is(context.Cause(ctx), context.DeadlineExceeded) {
log.Println("XXX (80) shutdown timeout expired, performing emergency halt")
if Verb() {
if Verb() || this.flags.crashOnError {
dumpBuffer := make([]byte, 8192)
runtime.Stack(dumpBuffer, true)
log.Printf("XXX (80) stack trace of all goroutines:\n%s", dumpBuffer)