Skip to content
Open
28 changes: 28 additions & 0 deletions agent/agentcontainers/acmock/acmock.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

181 changes: 165 additions & 16 deletions agent/agentcontainers/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import (
"github.com/coder/coder/v2/agent/agentexec"
"github.com/coder/coder/v2/agent/usershell"
"github.com/coder/coder/v2/coderd/httpapi"
"github.com/coder/coder/v2/coderd/httpapi/httperror"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/codersdk/agentsdk"
"github.com/coder/coder/v2/provisioner"
Expand Down Expand Up @@ -743,11 +744,14 @@ func (api *API) Routes() http.Handler {
// /-route was dropped. We can drop the /devcontainers prefix here too.
r.Route("/devcontainers/{devcontainer}", func(r chi.Router) {
r.Post("/recreate", api.handleDevcontainerRecreate)
r.Delete("/", api.handleDevcontainerDelete)
})

return r
}

// broadcastUpdatesLocked sends the current state to any listening clients.
// This method assumes that api.mu is held.
func (api *API) broadcastUpdatesLocked() {
// Broadcast state changes to WebSocket listeners.
for _, ch := range api.updateChans {
Expand Down Expand Up @@ -1019,6 +1023,12 @@ func (api *API) processUpdatedContainersLocked(ctx context.Context, updated code
case dc.Status == codersdk.WorkspaceAgentDevcontainerStatusStarting:
continue // This state is handled by the recreation routine.

case dc.Status == codersdk.WorkspaceAgentDevcontainerStatusStopping:
continue // This state is handled by the stopping routine.

case dc.Status == codersdk.WorkspaceAgentDevcontainerStatusDeleting:
continue // This state is handled by the delete routine.

case dc.Status == codersdk.WorkspaceAgentDevcontainerStatusError && (dc.Container == nil || dc.Container.CreatedAt.Before(api.recreateErrorTimes[dc.WorkspaceFolder])):
continue // The devcontainer needs to be recreated.

Expand Down Expand Up @@ -1224,6 +1234,155 @@ func (api *API) getContainers() (codersdk.WorkspaceAgentListContainersResponse,
}, nil
}

// devcontainerByIDLocked attempts to find a devcontainer by its ID.
// This method assumes that api.mu is held.
func (api *API) devcontainerByIDLocked(devcontainerID string) (codersdk.WorkspaceAgentDevcontainer, error) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With methods like this it's nice to add a comment that screams at you to make sure you hold the mutex when you call it.

Also, it would make sense to use this method in handleDevcontainerRecreate().

for _, knownDC := range api.knownDevcontainers {
if knownDC.ID.String() == devcontainerID {
return knownDC, nil
}
}

return codersdk.WorkspaceAgentDevcontainer{}, httperror.NewResponseError(http.StatusNotFound, codersdk.Response{
Message: "Devcontainer not found.",
Detail: fmt.Sprintf("Could not find devcontainer with ID: %q", devcontainerID),
})
}

func (api *API) handleDevcontainerDelete(w http.ResponseWriter, r *http.Request) {
var (
ctx = r.Context()
devcontainerID = chi.URLParam(r, "devcontainer")
)

if devcontainerID == "" {
httpapi.Write(ctx, w, http.StatusBadRequest, codersdk.Response{
Message: "Missing devcontainer ID",
Detail: "Devcontainer ID is required to delete a devcontainer.",
})
return
}

api.mu.Lock()

dc, err := api.devcontainerByIDLocked(devcontainerID)
if err != nil {
api.mu.Unlock()
httperror.WriteResponseError(ctx, w, err)
return
}

// NOTE(DanielleMaywood):
// We currently do not support canceling the startup of a dev container.
if dc.Status.Transitioning() {
api.mu.Unlock()

httpapi.Write(ctx, w, http.StatusConflict, codersdk.Response{
Message: "Unable to delete transitioning devcontainer",
Detail: fmt.Sprintf("Devcontainer %q is currently %s and cannot be deleted.", dc.Name, dc.Status),
})
return
}

var (
containerID string
subAgentID uuid.UUID
)
if dc.Container != nil {
containerID = dc.Container.ID
}
if proc, hasSubAgent := api.injectedSubAgentProcs[dc.WorkspaceFolder]; hasSubAgent && proc.agent.ID != uuid.Nil {
subAgentID = proc.agent.ID
proc.stop()
}

dc.Status = codersdk.WorkspaceAgentDevcontainerStatusStopping
dc.Error = ""
api.knownDevcontainers[dc.WorkspaceFolder] = dc
api.broadcastUpdatesLocked()
api.mu.Unlock()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We didn't update devcontainer state, re-injection can happen at this point.

Also, why do we unlock just to re-acquire lock 3 lines below?

Suggestion: Drop this Unlock and both next Lock's

Alternatively: Move stopping here, the no-container case will just transition from stopping very quickly which probably fine?


// Stop and remove the container if it exists.
if containerID != "" {
if err := api.ccli.Stop(ctx, containerID); err != nil {
api.logger.Error(ctx, "unable to stop container", slog.Error(err))

api.mu.Lock()
dc.Status = codersdk.WorkspaceAgentDevcontainerStatusError
dc.Error = err.Error()
api.knownDevcontainers[dc.WorkspaceFolder] = dc
api.broadcastUpdatesLocked()
api.mu.Unlock()

httpapi.Write(ctx, w, http.StatusInternalServerError, codersdk.Response{
Message: "An error occurred stopping the container",
Detail: err.Error(),
})
return
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If following above advice, Lock here.

}

api.mu.Lock()
dc.Status = codersdk.WorkspaceAgentDevcontainerStatusDeleting
dc.Error = ""
api.knownDevcontainers[dc.WorkspaceFolder] = dc
api.broadcastUpdatesLocked()
api.mu.Unlock()

if containerID != "" {
if err := api.ccli.Remove(ctx, containerID); err != nil {
api.logger.Error(ctx, "unable to remove container", slog.Error(err))

api.mu.Lock()
dc.Status = codersdk.WorkspaceAgentDevcontainerStatusError
dc.Error = err.Error()
api.knownDevcontainers[dc.WorkspaceFolder] = dc
api.broadcastUpdatesLocked()
api.mu.Unlock()

httpapi.Write(ctx, w, http.StatusInternalServerError, codersdk.Response{
Message: "An error occurred removing the container",
Detail: err.Error(),
})
return
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Error paths seem to be lacking state handling for the devcontainer. Should probably update dc.Error, perhaps others?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have gone ahead and implemented some better error handling

}

// Delete the subagent if it exists.
if subAgentID != uuid.Nil {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we run this cleanup even if stop/remove fails?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess I can't see a reason for not cleaning it up

client := *api.subAgentClient.Load()
if err := client.Delete(ctx, subAgentID); err != nil {
api.logger.Error(ctx, "unable to delete agent", slog.Error(err))

api.mu.Lock()
dc.Status = codersdk.WorkspaceAgentDevcontainerStatusError
dc.Error = err.Error()
api.knownDevcontainers[dc.WorkspaceFolder] = dc
api.broadcastUpdatesLocked()
api.mu.Unlock()

httpapi.Write(ctx, w, http.StatusInternalServerError, codersdk.Response{
Message: "An error occurred deleting the agent",
Detail: err.Error(),
})
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These failures leave the API in a weird state, we've entered a terminal state (stopping), and can't re-invoke delete even if the errors are ephemeral. The container will become immortal.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've slightly reworked this. On error they now enter WorkspaceAgentDevcontainerStatusError with the Error field filled in.

return
}
}

api.mu.Lock()
delete(api.devcontainerNames, dc.Name)
delete(api.knownDevcontainers, dc.WorkspaceFolder)
delete(api.devcontainerLogSourceIDs, dc.WorkspaceFolder)
delete(api.recreateSuccessTimes, dc.WorkspaceFolder)
delete(api.recreateErrorTimes, dc.WorkspaceFolder)
delete(api.usingWorkspaceFolderName, dc.WorkspaceFolder)
delete(api.injectedSubAgentProcs, dc.WorkspaceFolder)
api.broadcastUpdatesLocked()
api.mu.Unlock()

httpapi.Write(ctx, w, http.StatusNoContent, nil)
}

// handleDevcontainerRecreate handles the HTTP request to recreate a
// devcontainer by referencing the container.
func (api *API) handleDevcontainerRecreate(w http.ResponseWriter, r *http.Request) {
Expand All @@ -1240,28 +1399,18 @@ func (api *API) handleDevcontainerRecreate(w http.ResponseWriter, r *http.Reques

api.mu.Lock()

var dc codersdk.WorkspaceAgentDevcontainer
for _, knownDC := range api.knownDevcontainers {
if knownDC.ID.String() == devcontainerID {
dc = knownDC
break
}
}
if dc.ID == uuid.Nil {
dc, err := api.devcontainerByIDLocked(devcontainerID)
if err != nil {
api.mu.Unlock()

httpapi.Write(ctx, w, http.StatusNotFound, codersdk.Response{
Message: "Devcontainer not found.",
Detail: fmt.Sprintf("Could not find devcontainer with ID: %q", devcontainerID),
})
httperror.WriteResponseError(ctx, w, err)
return
}
if dc.Status == codersdk.WorkspaceAgentDevcontainerStatusStarting {
if dc.Status.Transitioning() {
api.mu.Unlock()

httpapi.Write(ctx, w, http.StatusConflict, codersdk.Response{
Message: "Devcontainer recreation already in progress",
Detail: fmt.Sprintf("Recreation for devcontainer %q is already underway.", dc.Name),
Message: "Unable to recreate transitioning devcontainer",
Detail: fmt.Sprintf("Devcontainer %q is currently %s and cannot be restarted.", dc.Name, dc.Status),
})
return
}
Expand Down
Loading
Loading