From 557a4308bccdfcdfd1da0244e5581e2ce94b3f81 Mon Sep 17 00:00:00 2001 From: Sas Swart Date: Thu, 4 Dec 2025 07:35:11 +0000 Subject: [PATCH 1/9] wip: add a scaletest load generator for bridge --- cli/exp_scaletest.go | 2 + cli/exp_scaletest_bridge.go | 197 ++++++++++++ cli/exp_scaletest_llmmock.go | 107 +++++++ scaletest/bridge/config.go | 33 ++ scaletest/bridge/metrics.go | 32 ++ scaletest/bridge/run.go | 94 ++++++ scaletest/llmmock/server.go | 529 +++++++++++++++++++++++++++++++ scaletest/llmmock/server_test.go | 473 +++++++++++++++++++++++++++ scaletest/llmmock/types.go | 47 +++ 9 files changed, 1514 insertions(+) create mode 100644 cli/exp_scaletest_bridge.go create mode 100644 cli/exp_scaletest_llmmock.go create mode 100644 scaletest/bridge/config.go create mode 100644 scaletest/bridge/metrics.go create mode 100644 scaletest/bridge/run.go create mode 100644 scaletest/llmmock/server.go create mode 100644 scaletest/llmmock/server_test.go create mode 100644 scaletest/llmmock/types.go diff --git a/cli/exp_scaletest.go b/cli/exp_scaletest.go index 419b1955477b9..cf79ec7ebcaaa 100644 --- a/cli/exp_scaletest.go +++ b/cli/exp_scaletest.go @@ -67,6 +67,8 @@ func (r *RootCmd) scaletestCmd() *serpent.Command { r.scaletestTaskStatus(), r.scaletestSMTP(), r.scaletestPrebuilds(), + r.scaletestBridge(), + r.scaletestLLMMock(), }, } diff --git a/cli/exp_scaletest_bridge.go b/cli/exp_scaletest_bridge.go new file mode 100644 index 0000000000000..cbfa9769c9d0c --- /dev/null +++ b/cli/exp_scaletest_bridge.go @@ -0,0 +1,197 @@ +//go:build !slim + +package cli + +import ( + "context" + "fmt" + "net/http" + "os/signal" + "strconv" + "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" + "golang.org/x/xerrors" + + "cdr.dev/slog" + + "github.com/coder/coder/v2/codersdk" + "github.com/coder/coder/v2/scaletest/bridge" + "github.com/coder/coder/v2/scaletest/createusers" + "github.com/coder/coder/v2/scaletest/harness" + "github.com/coder/serpent" +) + +func (r *RootCmd) scaletestBridge() *serpent.Command { + var ( + userCount int64 + noCleanup bool + tracingFlags = &scaletestTracingFlags{} + + // This test requires unlimited concurrency. + timeoutStrategy = &timeoutFlags{} + cleanupStrategy = newScaletestCleanupStrategy() + output = &scaletestOutputFlags{} + prometheusFlags = &scaletestPrometheusFlags{} + ) + + cmd := &serpent.Command{ + Use: "bridge", + Short: "Generate load on the AI Bridge service.", + Handler: func(inv *serpent.Invocation) error { + ctx := inv.Context() + client, err := r.InitClient(inv) + if err != nil { + return err + } + + notifyCtx, stop := signal.NotifyContext(ctx, StopSignals...) + defer stop() + ctx = notifyCtx + + me, err := requireAdmin(ctx, client) + if err != nil { + return err + } + + client.HTTPClient = &http.Client{ + Transport: &codersdk.HeaderTransport{ + Transport: http.DefaultTransport, + Header: map[string][]string{ + codersdk.BypassRatelimitHeader: {"true"}, + }, + }, + } + + if userCount <= 0 { + return xerrors.Errorf("--user-count must be greater than 0") + } + + outputs, err := output.parse() + if err != nil { + return xerrors.Errorf("could not parse --output flags") + } + + tracerProvider, closeTracing, tracingEnabled, err := tracingFlags.provider(ctx) + if err != nil { + return xerrors.Errorf("create tracer provider: %w", err) + } + tracer := tracerProvider.Tracer(scaletestTracerName) + + reg := prometheus.NewRegistry() + metrics := bridge.NewMetrics(reg) + + logger := inv.Logger + prometheusSrvClose := ServeHandler(ctx, logger, promhttp.HandlerFor(reg, promhttp.HandlerOpts{}), prometheusFlags.Address, "prometheus") + defer prometheusSrvClose() + + defer func() { + _, _ = fmt.Fprintln(inv.Stderr, "\nUploading traces...") + if err := closeTracing(ctx); err != nil { + _, _ = fmt.Fprintf(inv.Stderr, "\nError uploading traces: %+v\n", err) + } + // Wait for prometheus metrics to be scraped + _, _ = fmt.Fprintf(inv.Stderr, "Waiting %s for prometheus metrics to be scraped\n", prometheusFlags.Wait) + <-time.After(prometheusFlags.Wait) + }() + + _, _ = fmt.Fprintln(inv.Stderr, "Creating users...") + + configs := make([]bridge.Config, 0, userCount) + for range userCount { + config := bridge.Config{ + User: createusers.Config{ + OrganizationID: me.OrganizationIDs[0], + }, + Metrics: metrics, + } + if err := config.Validate(); err != nil { + return xerrors.Errorf("validate config: %w", err) + } + configs = append(configs, config) + } + + th := harness.NewTestHarness(timeoutStrategy.wrapStrategy(harness.ConcurrentExecutionStrategy{}), cleanupStrategy.toStrategy()) + + for i, config := range configs { + id := strconv.Itoa(i) + name := fmt.Sprintf("bridge-%s", id) + var runner harness.Runnable = bridge.NewRunner(client, config) + if tracingEnabled { + runner = &runnableTraceWrapper{ + tracer: tracer, + spanName: name, + runner: runner, + } + } + + th.AddRun(name, id, runner) + } + + _, _ = fmt.Fprintln(inv.Stderr, "Running bridge scaletest...") + testCtx, testCancel := timeoutStrategy.toContext(ctx) + defer testCancel() + err = th.Run(testCtx) + if err != nil { + return xerrors.Errorf("run test harness (harness failure, not a test failure): %w", err) + } + + // If the command was interrupted, skip stats. + if notifyCtx.Err() != nil { + return notifyCtx.Err() + } + + res := th.Results() + + for _, o := range outputs { + err = o.write(res, inv.Stdout) + if err != nil { + return xerrors.Errorf("write output %q to %q: %w", o.format, o.path, err) + } + } + + if !noCleanup { + _, _ = fmt.Fprintln(inv.Stderr, "\nCleaning up...") + cleanupCtx, cleanupCancel := cleanupStrategy.toContext(ctx) + defer cleanupCancel() + err = th.Cleanup(cleanupCtx) + if err != nil { + return xerrors.Errorf("cleanup tests: %w", err) + } + } + + if res.TotalFail > 0 { + return xerrors.New("load test failed, see above for more details") + } + + return nil + }, + } + + cmd.Options = serpent.OptionSet{ + { + Flag: "user-count", + FlagShorthand: "c", + Env: "CODER_SCALETEST_BRIDGE_USER_COUNT", + Description: "Required: Total number of users to create.", + Value: serpent.Int64Of(&userCount), + Required: true, + }, + { + Flag: "no-cleanup", + Env: "CODER_SCALETEST_NO_CLEANUP", + Description: "Do not clean up resources after the test completes.", + Value: serpent.BoolOf(&noCleanup), + }, + } + + tracingFlags.attach(&cmd.Options) + timeoutStrategy.attach(&cmd.Options) + cleanupStrategy.attach(&cmd.Options) + output.attach(&cmd.Options) + prometheusFlags.attach(&cmd.Options) + return cmd +} + + diff --git a/cli/exp_scaletest_llmmock.go b/cli/exp_scaletest_llmmock.go new file mode 100644 index 0000000000000..4f5fcf4fe57e2 --- /dev/null +++ b/cli/exp_scaletest_llmmock.go @@ -0,0 +1,107 @@ +//go:build !slim + +package cli + +import ( + "fmt" + "os/signal" + "time" + + "golang.org/x/xerrors" + + "cdr.dev/slog" + "cdr.dev/slog/sloggers/sloghuman" + "github.com/coder/coder/v2/scaletest/llmmock" + "github.com/coder/serpent" +) + +func (*RootCmd) scaletestLLMMock() *serpent.Command { + var ( + hostAddress string + apiPort int64 + purgeAtCount int64 + ) + cmd := &serpent.Command{ + Use: "llm-mock", + Short: "Start a mock LLM API server for testing", + Long: `Start a mock LLM API server that simulates OpenAI and Anthropic APIs with an HTTP API +server that can be used to query intercepted requests and purge stored data.`, + Handler: func(inv *serpent.Invocation) error { + ctx := inv.Context() + notifyCtx, stop := signal.NotifyContext(ctx, StopSignals...) + defer stop() + ctx = notifyCtx + + logger := slog.Make(sloghuman.Sink(inv.Stderr)).Leveled(slog.LevelInfo) + config := llmmock.Config{ + HostAddress: hostAddress, + APIPort: int(apiPort), + Logger: logger, + } + srv := new(llmmock.Server) + + if err := srv.Start(ctx, config); err != nil { + return xerrors.Errorf("start mock LLM server: %w", err) + } + defer func() { + _ = srv.Stop() + }() + + _, _ = fmt.Fprintf(inv.Stdout, "Mock LLM API server started on %s\n", srv.APIAddress()) + _, _ = fmt.Fprintf(inv.Stdout, " OpenAI endpoint: %s/v1/chat/completions\n", srv.APIAddress()) + _, _ = fmt.Fprintf(inv.Stdout, " Anthropic endpoint: %s/v1/messages\n", srv.APIAddress()) + _, _ = fmt.Fprintf(inv.Stdout, " Query API: %s/api/requests\n", srv.APIAddress()) + if purgeAtCount > 0 { + _, _ = fmt.Fprintf(inv.Stdout, " Auto-purge when request count reaches %d\n", purgeAtCount) + } + + ticker := time.NewTicker(10 * time.Second) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + _, _ = fmt.Fprintf(inv.Stdout, "\nTotal requests received since last purge: %d\n", srv.RequestCount()) + return nil + case <-ticker.C: + count := srv.RequestCount() + if count > 0 { + _, _ = fmt.Fprintf(inv.Stdout, "Requests received: %d\n", count) + } + + if purgeAtCount > 0 && int64(count) >= purgeAtCount { + _, _ = fmt.Fprintf(inv.Stdout, "Request count (%d) reached threshold (%d). Purging...\n", count, purgeAtCount) + srv.Purge() + continue + } + } + } + }, + } + + cmd.Options = []serpent.Option{ + { + Flag: "host-address", + Env: "CODER_SCALETEST_LLM_MOCK_HOST_ADDRESS", + Default: "localhost", + Description: "Host address to bind the mock LLM API server.", + Value: serpent.StringOf(&hostAddress), + }, + { + Flag: "api-port", + Env: "CODER_SCALETEST_LLM_MOCK_API_PORT", + Description: "Port for the HTTP API server. Uses a random port if not specified.", + Value: serpent.Int64Of(&apiPort), + }, + { + Flag: "purge-at-count", + Env: "CODER_SCALETEST_LLM_MOCK_PURGE_AT_COUNT", + Default: "100000", + Description: "Maximum number of requests to keep before auto-purging. Set to 0 to disable.", + Value: serpent.Int64Of(&purgeAtCount), + }, + } + + return cmd +} + diff --git a/scaletest/bridge/config.go b/scaletest/bridge/config.go new file mode 100644 index 0000000000000..255c57ced186c --- /dev/null +++ b/scaletest/bridge/config.go @@ -0,0 +1,33 @@ +package bridge + +import ( + "golang.org/x/xerrors" + + "github.com/google/uuid" + + "github.com/coder/coder/v2/scaletest/createusers" +) + +type Config struct { + // User is the configuration for the user to create. + User createusers.Config `json:"user"` + + Metrics *Metrics `json:"-"` +} + +func (c Config) Validate() error { + // The runner always needs an org; ensure we propagate it into the user config. + if c.User.OrganizationID == uuid.Nil { + return xerrors.New("user organization_id must be set") + } + + if err := c.User.Validate(); err != nil { + return xerrors.Errorf("user config: %w", err) + } + + if c.Metrics == nil { + return xerrors.New("metrics must be set") + } + + return nil +} diff --git a/scaletest/bridge/metrics.go b/scaletest/bridge/metrics.go new file mode 100644 index 0000000000000..978de1e825a34 --- /dev/null +++ b/scaletest/bridge/metrics.go @@ -0,0 +1,32 @@ +package bridge + +import ( + "github.com/prometheus/client_golang/prometheus" +) + +type Metrics struct { + bridgeErrors *prometheus.CounterVec +} + +func NewMetrics(reg prometheus.Registerer) *Metrics { + if reg == nil { + reg = prometheus.DefaultRegisterer + } + + errors := prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: "coderd", + Subsystem: "scaletest", + Name: "bridge_errors_total", + Help: "Total number of bridge errors", + }, []string{"action"}) + + reg.MustRegister(errors) + + return &Metrics{ + bridgeErrors: errors, + } +} + +func (m *Metrics) AddError(action string) { + m.bridgeErrors.WithLabelValues(action).Inc() +} diff --git a/scaletest/bridge/run.go b/scaletest/bridge/run.go new file mode 100644 index 0000000000000..b520c7951fe95 --- /dev/null +++ b/scaletest/bridge/run.go @@ -0,0 +1,94 @@ +package bridge + +import ( + "context" + "fmt" + "io" + + "golang.org/x/xerrors" + + "cdr.dev/slog" + "cdr.dev/slog/sloggers/sloghuman" + + "github.com/coder/coder/v2/coderd/tracing" + "github.com/coder/coder/v2/codersdk" + "github.com/coder/coder/v2/scaletest/createusers" + "github.com/coder/coder/v2/scaletest/harness" + "github.com/coder/coder/v2/scaletest/loadtestutil" + "github.com/coder/quartz" +) + +type Runner struct { + client *codersdk.Client + cfg Config + + createUserRunner *createusers.Runner + + clock quartz.Clock +} + +func NewRunner(client *codersdk.Client, cfg Config) *Runner { + return &Runner{ + client: client, + cfg: cfg, + clock: quartz.NewReal(), + } +} + +func (r *Runner) WithClock(clock quartz.Clock) *Runner { + r.clock = clock + return r +} + +var ( + _ harness.Runnable = &Runner{} + _ harness.Cleanable = &Runner{} + _ harness.Collectable = &Runner{} +) + +func (r *Runner) Run(ctx context.Context, id string, logs io.Writer) error { + ctx, span := tracing.StartSpan(ctx) + defer span.End() + + logs = loadtestutil.NewSyncWriter(logs) + logger := slog.Make(sloghuman.Sink(logs)).Leveled(slog.LevelDebug) + r.client.SetLogger(logger) + r.client.SetLogBodies(true) + + r.createUserRunner = createusers.NewRunner(r.client, r.cfg.User) + newUserAndToken, err := r.createUserRunner.RunReturningUser(ctx, id, logs) + if err != nil { + r.cfg.Metrics.AddError("create_user") + return xerrors.Errorf("create user: %w", err) + } + newUser := newUserAndToken.User + newUserClient := codersdk.New(r.client.URL, + codersdk.WithSessionToken(newUserAndToken.SessionToken), + codersdk.WithLogger(logger), + codersdk.WithLogBodies()) + + logger.Info(ctx, "runner user created", slog.F("username", newUser.Username), slog.F("user_id", newUser.ID.String())) + + logger.Info(ctx, "bridge runner is ready") + + _ = newUserClient + // TODO: Implement bridge load generation logic here + + return nil +} + +func (r *Runner) Cleanup(ctx context.Context, id string, logs io.Writer) error { + if r.createUserRunner != nil { + _, _ = fmt.Fprintln(logs, "Cleaning up user...") + if err := r.createUserRunner.Cleanup(ctx, id, logs); err != nil { + return xerrors.Errorf("cleanup user: %w", err) + } + } + + return nil +} + +func (r *Runner) GetMetrics() map[string]any { + // TODO: Return actual metrics when bridge load generation is implemented + return map[string]any{} +} diff --git a/scaletest/llmmock/server.go b/scaletest/llmmock/server.go new file mode 100644 index 0000000000000..e1b25bc62e42f --- /dev/null +++ b/scaletest/llmmock/server.go @@ -0,0 +1,529 @@ +package llmmock + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net" + "net/http" + "slices" + "strings" + "sync" + "time" + + "github.com/google/uuid" + "golang.org/x/xerrors" + + "cdr.dev/slog" +) + +// Server wraps the LLM mock server and provides an HTTP API to retrieve requests. +type Server struct { + httpServer *http.Server + httpListener net.Listener + logger slog.Logger + + hostAddress string + apiPort int + + // Storage for intercepted requests + records []RequestRecord + recordsMu sync.RWMutex +} + +type Config struct { + HostAddress string + APIPort int + Logger slog.Logger +} + +type openAIRequest struct { + Model string `json:"model"` + Messages []openAIMessage `json:"messages"` + Stream bool `json:"stream,omitempty"` + Extra map[string]interface{} `json:"-"` +} + +type openAIMessage struct { + Role string `json:"role"` + Content string `json:"content"` +} + +type openAIResponse struct { + ID string `json:"id"` + Object string `json:"object"` + Created int64 `json:"created"` + Model string `json:"model"` + Choices []struct { + Index int `json:"index"` + Message openAIMessage `json:"message"` + FinishReason string `json:"finish_reason"` + } `json:"choices"` + Usage struct { + PromptTokens int `json:"prompt_tokens"` + CompletionTokens int `json:"completion_tokens"` + TotalTokens int `json:"total_tokens"` + } `json:"usage"` +} + +type anthropicRequest struct { + Model string `json:"model"` + Messages []anthropicMessage `json:"messages"` + Stream bool `json:"stream,omitempty"` + MaxTokens int `json:"max_tokens"` + Extra map[string]interface{} `json:"-"` +} + +type anthropicMessage struct { + Role string `json:"role"` + Content string `json:"content"` +} + +type anthropicResponse struct { + ID string `json:"id"` + Type string `json:"type"` + Role string `json:"role"` + Content []struct { + Type string `json:"type"` + Text string `json:"text"` + } `json:"content"` + Model string `json:"model"` + StopReason string `json:"stop_reason"` + StopSequence *string `json:"stop_sequence"` + Usage struct { + InputTokens int `json:"input_tokens"` + OutputTokens int `json:"output_tokens"` + } `json:"usage"` +} + +func (s *Server) Start(ctx context.Context, cfg Config) error { + s.hostAddress = cfg.HostAddress + s.apiPort = cfg.APIPort + s.logger = cfg.Logger + + if err := s.startAPIServer(ctx); err != nil { + return xerrors.Errorf("start API server: %w", err) + } + + return nil +} + +func (s *Server) Stop() error { + if s.httpServer != nil { + shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + if err := s.httpServer.Shutdown(shutdownCtx); err != nil { + return xerrors.Errorf("shutdown HTTP server: %w", err) + } + } + return nil +} + +func (s *Server) APIAddress() string { + return fmt.Sprintf("http://%s:%d", s.hostAddress, s.apiPort) +} + +func (s *Server) RequestCount() int { + s.recordsMu.RLock() + defer s.recordsMu.RUnlock() + return len(s.records) +} + +func (s *Server) Purge() { + s.recordsMu.Lock() + defer s.recordsMu.Unlock() + s.records = s.records[:0] +} + +func (s *Server) startAPIServer(ctx context.Context) error { + mux := http.NewServeMux() + + // LLM API endpoints + mux.HandleFunc("POST /v1/chat/completions", s.handleOpenAI) + mux.HandleFunc("POST /v1/messages", s.handleAnthropic) + + // Query API endpoints + mux.HandleFunc("GET /api/requests", s.handleGetRequests) + mux.HandleFunc("POST /api/purge", s.handlePurge) + + s.httpServer = &http.Server{ + Handler: mux, + ReadHeaderTimeout: 10 * time.Second, + } + + listener, err := net.Listen("tcp", fmt.Sprintf("%s:%d", s.hostAddress, s.apiPort)) + if err != nil { + return xerrors.Errorf("listen on %s:%d: %w", s.hostAddress, s.apiPort, err) + } + s.httpListener = listener + + tcpAddr, valid := listener.Addr().(*net.TCPAddr) + if !valid { + err := listener.Close() + if err != nil { + s.logger.Error(ctx, "failed to close listener", slog.Error(err)) + } + return xerrors.Errorf("listener returned invalid address: %T", listener.Addr()) + } + s.apiPort = tcpAddr.Port + + go func() { + if err := s.httpServer.Serve(listener); err != nil && !errors.Is(err, http.ErrServerClosed) { + s.logger.Error(ctx, "http API server error", slog.Error(err)) + } + }() + + return nil +} + +func (s *Server) handleOpenAI(w http.ResponseWriter, r *http.Request) { + ctx := r.Context() + requestID := uuid.New() + now := time.Now() + + // Read request body + bodyBytes, err := io.ReadAll(r.Body) + if err != nil { + s.logger.Error(ctx, "failed to read request body", slog.Error(err)) + http.Error(w, "failed to read request body", http.StatusBadRequest) + return + } + + // Parse request + var req openAIRequest + if err := json.Unmarshal(bodyBytes, &req); err != nil { + s.logger.Error(ctx, "failed to parse OpenAI request", slog.Error(err)) + http.Error(w, "invalid request body", http.StatusBadRequest) + return + } + + // Extract user ID from Authorization header if present + userID := s.extractUserID(r) + + // Store request + requestSummary := RequestSummary{ + ID: requestID, + Timestamp: now, + Provider: ProviderOpenAI, + Model: req.Model, + UserID: userID, + Stream: req.Stream, + RequestBody: string(bodyBytes), + } + + // Generate mock response + var resp openAIResponse + resp.ID = fmt.Sprintf("chatcmpl-%s", requestID.String()[:8]) + resp.Object = "chat.completion" + resp.Created = now.Unix() + resp.Model = req.Model + resp.Choices = []struct { + Index int `json:"index"` + Message openAIMessage `json:"message"` + FinishReason string `json:"finish_reason"` + }{ + { + Index: 0, + Message: openAIMessage{ + Role: "assistant", + Content: "This is a mock response from OpenAI.", + }, + FinishReason: "stop", + }, + } + resp.Usage.PromptTokens = 10 + resp.Usage.CompletionTokens = 5 + resp.Usage.TotalTokens = 15 + + responseBody, _ := json.Marshal(resp) + responseTime := time.Now() + + // Store response + responseSummary := ResponseSummary{ + RequestID: requestID, + Timestamp: responseTime, + Status: http.StatusOK, + Stream: req.Stream, + FinishReason: "stop", + PromptTokens: resp.Usage.PromptTokens, + OutputTokens: resp.Usage.CompletionTokens, + TotalTokens: resp.Usage.TotalTokens, + ResponseBody: string(responseBody), + } + + s.recordsMu.Lock() + s.records = append(s.records, RequestRecord{ + Request: requestSummary, + Response: &responseSummary, + }) + s.recordsMu.Unlock() + + // Send response + if req.Stream { + s.sendOpenAIStream(w, resp) + } else { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, _ = w.Write(responseBody) + } +} + +func (s *Server) handleAnthropic(w http.ResponseWriter, r *http.Request) { + ctx := r.Context() + requestID := uuid.New() + now := time.Now() + + // Read request body + bodyBytes, err := io.ReadAll(r.Body) + if err != nil { + s.logger.Error(ctx, "failed to read request body", slog.Error(err)) + http.Error(w, "failed to read request body", http.StatusBadRequest) + return + } + + // Parse request + var req anthropicRequest + if err := json.Unmarshal(bodyBytes, &req); err != nil { + s.logger.Error(ctx, "failed to parse Anthropic request", slog.Error(err)) + http.Error(w, "invalid request body", http.StatusBadRequest) + return + } + + // Extract user ID from Authorization header if present + userID := s.extractUserID(r) + + // Store request + requestSummary := RequestSummary{ + ID: requestID, + Timestamp: now, + Provider: ProviderAnthropic, + Model: req.Model, + UserID: userID, + Stream: req.Stream, + RequestBody: string(bodyBytes), + } + + // Generate mock response + var resp anthropicResponse + resp.ID = fmt.Sprintf("msg_%s", requestID.String()[:8]) + resp.Type = "message" + resp.Role = "assistant" + resp.Content = []struct { + Type string `json:"type"` + Text string `json:"text"` + }{ + { + Type: "text", + Text: "This is a mock response from Anthropic.", + }, + } + resp.Model = req.Model + resp.StopReason = "end_turn" + resp.Usage.InputTokens = 10 + resp.Usage.OutputTokens = 5 + + responseBody, _ := json.Marshal(resp) + responseTime := time.Now() + + // Store response + responseSummary := ResponseSummary{ + RequestID: requestID, + Timestamp: responseTime, + Status: http.StatusOK, + Stream: req.Stream, + FinishReason: resp.StopReason, + PromptTokens: resp.Usage.InputTokens, + OutputTokens: resp.Usage.OutputTokens, + TotalTokens: resp.Usage.InputTokens + resp.Usage.OutputTokens, + ResponseBody: string(responseBody), + } + + s.recordsMu.Lock() + s.records = append(s.records, RequestRecord{ + Request: requestSummary, + Response: &responseSummary, + }) + s.recordsMu.Unlock() + + // Send response + if req.Stream { + s.sendAnthropicStream(w, resp) + } else { + w.Header().Set("Content-Type", "application/json") + w.Header().Set("anthropic-version", "2023-06-01") + w.WriteHeader(http.StatusOK) + _, _ = w.Write(responseBody) + } +} + +func (s *Server) sendOpenAIStream(w http.ResponseWriter, resp openAIResponse) { + w.Header().Set("Content-Type", "text/event-stream") + w.Header().Set("Cache-Control", "no-cache") + w.Header().Set("Connection", "keep-alive") + w.WriteHeader(http.StatusOK) + + // Send initial chunk + chunk := map[string]interface{}{ + "id": resp.ID, + "object": "chat.completion.chunk", + "created": resp.Created, + "model": resp.Model, + "choices": []map[string]interface{}{ + { + "index": 0, + "delta": map[string]interface{}{ + "role": "assistant", + "content": resp.Choices[0].Message.Content, + }, + "finish_reason": nil, + }, + }, + } + chunkBytes, _ := json.Marshal(chunk) + _, _ = fmt.Fprintf(w, "data: %s\n\n", chunkBytes) + + // Send final chunk + finalChunk := map[string]interface{}{ + "id": resp.ID, + "object": "chat.completion.chunk", + "created": resp.Created, + "model": resp.Model, + "choices": []map[string]interface{}{ + { + "index": 0, + "delta": map[string]interface{}{}, + "finish_reason": resp.Choices[0].FinishReason, + }, + }, + } + finalChunkBytes, _ := json.Marshal(finalChunk) + _, _ = fmt.Fprintf(w, "data: %s\n\n", finalChunkBytes) + _, _ = fmt.Fprintf(w, "data: [DONE]\n\n") +} + +func (s *Server) sendAnthropicStream(w http.ResponseWriter, resp anthropicResponse) { + w.Header().Set("Content-Type", "text/event-stream") + w.Header().Set("Cache-Control", "no-cache") + w.Header().Set("Connection", "keep-alive") + w.Header().Set("anthropic-version", "2023-06-01") + w.WriteHeader(http.StatusOK) + + // Send message_start event + startEvent := map[string]interface{}{ + "type": "message_start", + "message": map[string]interface{}{ + "id": resp.ID, + "type": resp.Type, + "role": resp.Role, + "model": resp.Model, + }, + } + startBytes, _ := json.Marshal(startEvent) + _, _ = fmt.Fprintf(w, "data: %s\n\n", startBytes) + + // Send content_block_start event + contentStartEvent := map[string]interface{}{ + "type": "content_block_start", + "index": 0, + "content_block": map[string]interface{}{ + "type": "text", + "text": resp.Content[0].Text, + }, + } + contentStartBytes, _ := json.Marshal(contentStartEvent) + _, _ = fmt.Fprintf(w, "data: %s\n\n", contentStartBytes) + + // Send content_block_delta event + deltaEvent := map[string]interface{}{ + "type": "content_block_delta", + "index": 0, + "delta": map[string]interface{}{ + "type": "text_delta", + "text": resp.Content[0].Text, + }, + } + deltaBytes, _ := json.Marshal(deltaEvent) + _, _ = fmt.Fprintf(w, "data: %s\n\n", deltaBytes) + + // Send content_block_stop event + contentStopEvent := map[string]interface{}{ + "type": "content_block_stop", + "index": 0, + } + contentStopBytes, _ := json.Marshal(contentStopEvent) + _, _ = fmt.Fprintf(w, "data: %s\n\n", contentStopBytes) + + // Send message_delta event + deltaMsgEvent := map[string]interface{}{ + "type": "message_delta", + "delta": map[string]interface{}{ + "stop_reason": resp.StopReason, + "stop_sequence": resp.StopSequence, + }, + "usage": resp.Usage, + } + deltaMsgBytes, _ := json.Marshal(deltaMsgEvent) + _, _ = fmt.Fprintf(w, "data: %s\n\n", deltaMsgBytes) + + // Send message_stop event + stopEvent := map[string]interface{}{ + "type": "message_stop", + } + stopBytes, _ := json.Marshal(stopEvent) + _, _ = fmt.Fprintf(w, "data: %s\n\n", stopBytes) +} + +func (s *Server) handleGetRequests(w http.ResponseWriter, r *http.Request) { + s.recordsMu.RLock() + records := slices.Clone(s.records) + s.recordsMu.RUnlock() + + // Apply filters + userID := r.URL.Query().Get("user_id") + providerStr := r.URL.Query().Get("provider") + + var filtered []RequestRecord + for _, record := range records { + if userID != "" && record.Request.UserID != userID { + continue + } + if providerStr != "" && string(record.Request.Provider) != providerStr { + continue + } + filtered = append(filtered, record) + } + + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(filtered); err != nil { + s.logger.Warn(r.Context(), "failed to encode JSON response", slog.Error(err)) + } +} + +func (s *Server) handlePurge(w http.ResponseWriter, _ *http.Request) { + s.Purge() + w.WriteHeader(http.StatusOK) +} + +func (s *Server) extractUserID(r *http.Request) string { + // Try to extract user ID from Authorization header + auth := r.Header.Get("Authorization") + if auth == "" { + return "" + } + + // For now, just return a simple identifier + // In a real scenario, this might parse a JWT or API key + // For scale tests, we can use the token itself or extract from it + if strings.HasPrefix(auth, "Bearer ") { + token := strings.TrimPrefix(auth, "Bearer ") + // Use first 8 chars as a simple identifier + if len(token) > 8 { + return token[:8] + } + return token + } + + return "" +} diff --git a/scaletest/llmmock/server_test.go b/scaletest/llmmock/server_test.go new file mode 100644 index 0000000000000..5107ca0c7a8d2 --- /dev/null +++ b/scaletest/llmmock/server_test.go @@ -0,0 +1,473 @@ +package llmmock_test + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "net/http" + "strings" + "testing" + + "github.com/stretchr/testify/require" + + "cdr.dev/slog/sloggers/slogtest" + "github.com/coder/coder/v2/scaletest/llmmock" + "github.com/coder/coder/v2/testutil" +) + +func TestServer_StartStop(t *testing.T) { + t.Parallel() + + ctx := context.Background() + srv := new(llmmock.Server) + err := srv.Start(ctx, llmmock.Config{ + HostAddress: "127.0.0.1", + APIPort: 0, + Logger: slogtest.Make(t, nil), + }) + require.NoError(t, err) + require.NotEmpty(t, srv.APIAddress()) + + err = srv.Stop() + require.NoError(t, err) +} + +func TestServer_OpenAIRequest(t *testing.T) { + t.Parallel() + + ctx := context.Background() + srv := new(llmmock.Server) + err := srv.Start(ctx, llmmock.Config{ + HostAddress: "127.0.0.1", + APIPort: 0, + Logger: slogtest.Make(t, nil), + }) + require.NoError(t, err) + defer srv.Stop() + + reqBody := map[string]interface{}{ + "model": "gpt-4", + "messages": []map[string]interface{}{ + { + "role": "user", + "content": "Hello, world!", + }, + }, + "stream": false, + } + bodyBytes, err := json.Marshal(reqBody) + require.NoError(t, err) + + url := fmt.Sprintf("%s/v1/chat/completions", srv.APIAddress()) + req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(bodyBytes)) + require.NoError(t, err) + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Authorization", "Bearer test-token-12345") + + resp, err := http.DefaultClient.Do(req) + require.NoError(t, err) + defer resp.Body.Close() + + require.Equal(t, http.StatusOK, resp.StatusCode) + require.Equal(t, "application/json", resp.Header.Get("Content-Type")) + + var openAIResp map[string]interface{} + err = json.NewDecoder(resp.Body).Decode(&openAIResp) + require.NoError(t, err) + require.Equal(t, "chat.completion", openAIResp["object"]) + + require.Eventually(t, func() bool { + return srv.RequestCount() == 1 + }, testutil.WaitShort, testutil.IntervalMedium) + + // Query stored requests + apiURL := fmt.Sprintf("%s/api/requests", srv.APIAddress()) + apiReq, err := http.NewRequestWithContext(ctx, http.MethodGet, apiURL, nil) + require.NoError(t, err) + + apiResp, err := http.DefaultClient.Do(apiReq) + require.NoError(t, err) + defer apiResp.Body.Close() + + var records []llmmock.RequestRecord + err = json.NewDecoder(apiResp.Body).Decode(&records) + require.NoError(t, err) + require.Len(t, records, 1) + require.Equal(t, llmmock.ProviderOpenAI, records[0].Request.Provider) + require.Equal(t, "gpt-4", records[0].Request.Model) + require.Equal(t, false, records[0].Request.Stream) + require.NotNil(t, records[0].Response) + require.Equal(t, "stop", records[0].Response.FinishReason) +} + +func TestServer_AnthropicRequest(t *testing.T) { + t.Parallel() + + ctx := context.Background() + srv := new(llmmock.Server) + err := srv.Start(ctx, llmmock.Config{ + HostAddress: "127.0.0.1", + APIPort: 0, + Logger: slogtest.Make(t, nil), + }) + require.NoError(t, err) + defer srv.Stop() + + reqBody := map[string]interface{}{ + "model": "claude-3-opus-20240229", + "messages": []map[string]interface{}{ + { + "role": "user", + "content": "Hello, world!", + }, + }, + "max_tokens": 1024, + "stream": false, + } + bodyBytes, err := json.Marshal(reqBody) + require.NoError(t, err) + + url := fmt.Sprintf("%s/v1/messages", srv.APIAddress()) + req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(bodyBytes)) + require.NoError(t, err) + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Authorization", "Bearer test-token-67890") + req.Header.Set("anthropic-version", "2023-06-01") + + resp, err := http.DefaultClient.Do(req) + require.NoError(t, err) + defer resp.Body.Close() + + require.Equal(t, http.StatusOK, resp.StatusCode) + require.Equal(t, "application/json", resp.Header.Get("Content-Type")) + + var anthropicResp map[string]interface{} + err = json.NewDecoder(resp.Body).Decode(&anthropicResp) + require.NoError(t, err) + require.Equal(t, "message", anthropicResp["type"]) + + require.Eventually(t, func() bool { + return srv.RequestCount() == 1 + }, testutil.WaitShort, testutil.IntervalMedium) + + // Query stored requests + apiURL := fmt.Sprintf("%s/api/requests?provider=anthropic", srv.APIAddress()) + apiReq, err := http.NewRequestWithContext(ctx, http.MethodGet, apiURL, nil) + require.NoError(t, err) + + apiResp, err := http.DefaultClient.Do(apiReq) + require.NoError(t, err) + defer apiResp.Body.Close() + + var records []llmmock.RequestRecord + err = json.NewDecoder(apiResp.Body).Decode(&records) + require.NoError(t, err) + require.Len(t, records, 1) + require.Equal(t, llmmock.ProviderAnthropic, records[0].Request.Provider) + require.Equal(t, "claude-3-opus-20240229", records[0].Request.Model) + require.Equal(t, false, records[0].Request.Stream) + require.NotNil(t, records[0].Response) + require.Equal(t, "end_turn", records[0].Response.FinishReason) +} + +func TestServer_OpenAIStreaming(t *testing.T) { + t.Parallel() + + ctx := context.Background() + srv := new(llmmock.Server) + err := srv.Start(ctx, llmmock.Config{ + HostAddress: "127.0.0.1", + APIPort: 0, + Logger: slogtest.Make(t, nil), + }) + require.NoError(t, err) + defer srv.Stop() + + reqBody := map[string]interface{}{ + "model": "gpt-4", + "messages": []map[string]interface{}{ + { + "role": "user", + "content": "Hello!", + }, + }, + "stream": true, + } + bodyBytes, err := json.Marshal(reqBody) + require.NoError(t, err) + + url := fmt.Sprintf("%s/v1/chat/completions", srv.APIAddress()) + req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(bodyBytes)) + require.NoError(t, err) + req.Header.Set("Content-Type", "application/json") + + resp, err := http.DefaultClient.Do(req) + require.NoError(t, err) + defer resp.Body.Close() + + require.Equal(t, http.StatusOK, resp.StatusCode) + require.Equal(t, "text/event-stream", resp.Header.Get("Content-Type")) + + // Read streaming response + buf := make([]byte, 4096) + n, err := resp.Body.Read(buf) + require.NoError(t, err) + content := string(buf[:n]) + require.Contains(t, content, "data:") + require.Contains(t, content, "chat.completion.chunk") + + require.Eventually(t, func() bool { + return srv.RequestCount() == 1 + }, testutil.WaitShort, testutil.IntervalMedium) + + // Verify stored request has stream flag + apiURL := fmt.Sprintf("%s/api/requests", srv.APIAddress()) + apiReq, err := http.NewRequestWithContext(ctx, http.MethodGet, apiURL, nil) + require.NoError(t, err) + + apiResp, err := http.DefaultClient.Do(apiReq) + require.NoError(t, err) + defer apiResp.Body.Close() + + var records []llmmock.RequestRecord + err = json.NewDecoder(apiResp.Body).Decode(&records) + require.NoError(t, err) + require.Len(t, records, 1) + require.Equal(t, true, records[0].Request.Stream) + require.Equal(t, true, records[0].Response.Stream) +} + +func TestServer_AnthropicStreaming(t *testing.T) { + t.Parallel() + + ctx := context.Background() + srv := new(llmmock.Server) + err := srv.Start(ctx, llmmock.Config{ + HostAddress: "127.0.0.1", + APIPort: 0, + Logger: slogtest.Make(t, nil), + }) + require.NoError(t, err) + defer srv.Stop() + + reqBody := map[string]interface{}{ + "model": "claude-3-opus-20240229", + "messages": []map[string]interface{}{ + { + "role": "user", + "content": "Hello!", + }, + }, + "max_tokens": 1024, + "stream": true, + } + bodyBytes, err := json.Marshal(reqBody) + require.NoError(t, err) + + url := fmt.Sprintf("%s/v1/messages", srv.APIAddress()) + req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(bodyBytes)) + require.NoError(t, err) + req.Header.Set("Content-Type", "application/json") + req.Header.Set("anthropic-version", "2023-06-01") + + resp, err := http.DefaultClient.Do(req) + require.NoError(t, err) + defer resp.Body.Close() + + require.Equal(t, http.StatusOK, resp.StatusCode) + require.Equal(t, "text/event-stream", resp.Header.Get("Content-Type")) + + // Read streaming response + buf := make([]byte, 4096) + n, err := resp.Body.Read(buf) + require.NoError(t, err) + content := string(buf[:n]) + require.Contains(t, content, "data:") + require.Contains(t, content, "message_start") + + require.Eventually(t, func() bool { + return srv.RequestCount() == 1 + }, testutil.WaitShort, testutil.IntervalMedium) + + // Verify stored request has stream flag + apiURL := fmt.Sprintf("%s/api/requests?provider=anthropic", srv.APIAddress()) + apiReq, err := http.NewRequestWithContext(ctx, http.MethodGet, apiURL, nil) + require.NoError(t, err) + + apiResp, err := http.DefaultClient.Do(apiReq) + require.NoError(t, err) + defer apiResp.Body.Close() + + var records []llmmock.RequestRecord + err = json.NewDecoder(apiResp.Body).Decode(&records) + require.NoError(t, err) + require.Len(t, records, 1) + require.Equal(t, true, records[0].Request.Stream) + require.Equal(t, true, records[0].Response.Stream) +} + +func TestServer_FilterByUserID(t *testing.T) { + t.Parallel() + + ctx := context.Background() + srv := new(llmmock.Server) + err := srv.Start(ctx, llmmock.Config{ + HostAddress: "127.0.0.1", + APIPort: 0, + Logger: slogtest.Make(t, nil), + }) + require.NoError(t, err) + defer srv.Stop() + + // Send request with user token 1 + reqBody1 := map[string]interface{}{ + "model": "gpt-4", + "messages": []map[string]interface{}{ + {"role": "user", "content": "Hello"}, + }, + } + bodyBytes1, _ := json.Marshal(reqBody1) + url1 := fmt.Sprintf("%s/v1/chat/completions", srv.APIAddress()) + req1, _ := http.NewRequestWithContext(ctx, http.MethodPost, url1, bytes.NewReader(bodyBytes1)) + req1.Header.Set("Content-Type", "application/json") + req1.Header.Set("Authorization", "Bearer user-token-12345") + _, _ = http.DefaultClient.Do(req1) + + // Send request with user token 2 + reqBody2 := map[string]interface{}{ + "model": "gpt-4", + "messages": []map[string]interface{}{ + {"role": "user", "content": "World"}, + }, + } + bodyBytes2, _ := json.Marshal(reqBody2) + url2 := fmt.Sprintf("%s/v1/chat/completions", srv.APIAddress()) + req2, _ := http.NewRequestWithContext(ctx, http.MethodPost, url2, bytes.NewReader(bodyBytes2)) + req2.Header.Set("Content-Type", "application/json") + req2.Header.Set("Authorization", "Bearer user-token-67890") + _, _ = http.DefaultClient.Do(req2) + + require.Eventually(t, func() bool { + return srv.RequestCount() == 2 + }, testutil.WaitShort, testutil.IntervalMedium) + + // Filter by user_id (first 8 chars of token) + apiURL := fmt.Sprintf("%s/api/requests?user_id=user-tok", srv.APIAddress()) + apiReq, err := http.NewRequestWithContext(ctx, http.MethodGet, apiURL, nil) + require.NoError(t, err) + + apiResp, err := http.DefaultClient.Do(apiReq) + require.NoError(t, err) + defer apiResp.Body.Close() + + var records []llmmock.RequestRecord + err = json.NewDecoder(apiResp.Body).Decode(&records) + require.NoError(t, err) + require.Len(t, records, 1) + require.True(t, strings.HasPrefix(records[0].Request.UserID, "user-tok")) +} + +func TestServer_FilterByProvider(t *testing.T) { + t.Parallel() + + ctx := context.Background() + srv := new(llmmock.Server) + err := srv.Start(ctx, llmmock.Config{ + HostAddress: "127.0.0.1", + APIPort: 0, + Logger: slogtest.Make(t, nil), + }) + require.NoError(t, err) + defer srv.Stop() + + // Send OpenAI request + reqBody1 := map[string]interface{}{ + "model": "gpt-4", + "messages": []map[string]interface{}{ + {"role": "user", "content": "Hello"}, + }, + } + bodyBytes1, _ := json.Marshal(reqBody1) + url1 := fmt.Sprintf("%s/v1/chat/completions", srv.APIAddress()) + req1, _ := http.NewRequestWithContext(ctx, http.MethodPost, url1, bytes.NewReader(bodyBytes1)) + req1.Header.Set("Content-Type", "application/json") + _, _ = http.DefaultClient.Do(req1) + + // Send Anthropic request + reqBody2 := map[string]interface{}{ + "model": "claude-3-opus-20240229", + "messages": []map[string]interface{}{ + {"role": "user", "content": "World"}, + }, + "max_tokens": 1024, + } + bodyBytes2, _ := json.Marshal(reqBody2) + url2 := fmt.Sprintf("%s/v1/messages", srv.APIAddress()) + req2, _ := http.NewRequestWithContext(ctx, http.MethodPost, url2, bytes.NewReader(bodyBytes2)) + req2.Header.Set("Content-Type", "application/json") + req2.Header.Set("anthropic-version", "2023-06-01") + _, _ = http.DefaultClient.Do(req2) + + require.Eventually(t, func() bool { + return srv.RequestCount() == 2 + }, testutil.WaitShort, testutil.IntervalMedium) + + // Filter by provider + apiURL := fmt.Sprintf("%s/api/requests?provider=openai", srv.APIAddress()) + apiReq, err := http.NewRequestWithContext(ctx, http.MethodGet, apiURL, nil) + require.NoError(t, err) + + apiResp, err := http.DefaultClient.Do(apiReq) + require.NoError(t, err) + defer apiResp.Body.Close() + + var records []llmmock.RequestRecord + err = json.NewDecoder(apiResp.Body).Decode(&records) + require.NoError(t, err) + require.Len(t, records, 1) + require.Equal(t, llmmock.ProviderOpenAI, records[0].Request.Provider) +} + +func TestServer_Purge(t *testing.T) { + t.Parallel() + + ctx := context.Background() + srv := new(llmmock.Server) + err := srv.Start(ctx, llmmock.Config{ + HostAddress: "127.0.0.1", + APIPort: 0, + Logger: slogtest.Make(t, nil), + }) + require.NoError(t, err) + defer srv.Stop() + + // Send a request + reqBody := map[string]interface{}{ + "model": "gpt-4", + "messages": []map[string]interface{}{ + {"role": "user", "content": "Hello"}, + }, + } + bodyBytes, _ := json.Marshal(reqBody) + url := fmt.Sprintf("%s/v1/chat/completions", srv.APIAddress()) + req, _ := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(bodyBytes)) + req.Header.Set("Content-Type", "application/json") + _, _ = http.DefaultClient.Do(req) + + require.Eventually(t, func() bool { + return srv.RequestCount() == 1 + }, testutil.WaitShort, testutil.IntervalMedium) + + // Purge + purgeURL := fmt.Sprintf("%s/api/purge", srv.APIAddress()) + purgeReq, err := http.NewRequestWithContext(ctx, http.MethodPost, purgeURL, nil) + require.NoError(t, err) + + purgeResp, err := http.DefaultClient.Do(purgeReq) + require.NoError(t, err) + defer purgeResp.Body.Close() + require.Equal(t, http.StatusOK, purgeResp.StatusCode) + + require.Equal(t, 0, srv.RequestCount()) +} diff --git a/scaletest/llmmock/types.go b/scaletest/llmmock/types.go new file mode 100644 index 0000000000000..f1f4e1772153a --- /dev/null +++ b/scaletest/llmmock/types.go @@ -0,0 +1,47 @@ +package llmmock + +import ( + "time" + + "github.com/google/uuid" +) + +// Provider represents the LLM provider type. +type Provider string + +const ( + ProviderOpenAI Provider = "openai" + ProviderAnthropic Provider = "anthropic" +) + +// RequestSummary contains metadata about an intercepted LLM API request. +type RequestSummary struct { + ID uuid.UUID `json:"id"` + Timestamp time.Time `json:"timestamp"` + Provider Provider `json:"provider"` + Model string `json:"model"` + UserID string `json:"user_id,omitempty"` + Stream bool `json:"stream"` + // Request body as JSON string for reference + RequestBody string `json:"request_body,omitempty"` +} + +// ResponseSummary contains metadata about an LLM API response. +type ResponseSummary struct { + RequestID uuid.UUID `json:"request_id"` + Timestamp time.Time `json:"timestamp"` + Status int `json:"status"` + Stream bool `json:"stream"` + FinishReason string `json:"finish_reason,omitempty"` // OpenAI: finish_reason, Anthropic: stop_reason + PromptTokens int `json:"prompt_tokens,omitempty"` + OutputTokens int `json:"output_tokens,omitempty"` // OpenAI: completion_tokens, Anthropic: output_tokens + TotalTokens int `json:"total_tokens,omitempty"` + // Response body as JSON string for reference (non-streaming) or first chunk (streaming) + ResponseBody string `json:"response_body,omitempty"` +} + +// RequestRecord combines request and response information. +type RequestRecord struct { + Request RequestSummary `json:"request"` + Response *ResponseSummary `json:"response,omitempty"` +} From 7f1bc1ab5d38bb8d23ff7b7bf2a21921a897ff26 Mon Sep 17 00:00:00 2001 From: Sas Swart Date: Fri, 5 Dec 2025 13:05:23 +0000 Subject: [PATCH 2/9] chore,llm(bridge): add a load generator and mock upstream for bridge scaletesting --- cli/exp_scaletest_bridge.go | 103 ++++++++++++--- cli/exp_scaletest_llmmock.go | 1 - scaletest/bridge/config.go | 40 +++++- scaletest/bridge/metrics.go | 46 ++++++- scaletest/bridge/run.go | 236 +++++++++++++++++++++++++++++++---- 5 files changed, 380 insertions(+), 46 deletions(-) diff --git a/cli/exp_scaletest_bridge.go b/cli/exp_scaletest_bridge.go index cbfa9769c9d0c..e5cb17875d49b 100644 --- a/cli/exp_scaletest_bridge.go +++ b/cli/exp_scaletest_bridge.go @@ -3,7 +3,6 @@ package cli import ( - "context" "fmt" "net/http" "os/signal" @@ -14,8 +13,6 @@ import ( "github.com/prometheus/client_golang/prometheus/promhttp" "golang.org/x/xerrors" - "cdr.dev/slog" - "github.com/coder/coder/v2/codersdk" "github.com/coder/coder/v2/scaletest/bridge" "github.com/coder/coder/v2/scaletest/createusers" @@ -25,8 +22,13 @@ import ( func (r *RootCmd) scaletestBridge() *serpent.Command { var ( - userCount int64 - noCleanup bool + userCount int64 + noCleanup bool + directURL string + directToken string + requestCount int64 + model string + stream bool tracingFlags = &scaletestTracingFlags{} // This test requires unlimited concurrency. @@ -50,9 +52,14 @@ func (r *RootCmd) scaletestBridge() *serpent.Command { defer stop() ctx = notifyCtx - me, err := requireAdmin(ctx, client) - if err != nil { - return err + var me codersdk.User + if directURL == "" { + // Full mode requires admin access + var err error + me, err = requireAdmin(ctx, client) + if err != nil { + return err + } } client.HTTPClient = &http.Client{ @@ -64,10 +71,23 @@ func (r *RootCmd) scaletestBridge() *serpent.Command { }, } + // Validate: user count is always required (controls concurrency) if userCount <= 0 { return xerrors.Errorf("--user-count must be greater than 0") } + // Set defaults + if requestCount <= 0 { + requestCount = 1 + } + if model == "" { + model = "gpt-4" + } + + // userCount always controls the number of runners (concurrency) + // Each runner makes requestCount requests + runnerCount := userCount + outputs, err := output.parse() if err != nil { return xerrors.Errorf("could not parse --output flags") @@ -96,16 +116,35 @@ func (r *RootCmd) scaletestBridge() *serpent.Command { <-time.After(prometheusFlags.Wait) }() - _, _ = fmt.Fprintln(inv.Stderr, "Creating users...") + if directURL == "" { + _, _ = fmt.Fprintln(inv.Stderr, "Creating users...") + } else { + _, _ = fmt.Fprintf(inv.Stderr, "Direct mode: making requests to %s\n", directURL) + } - configs := make([]bridge.Config, 0, userCount) - for range userCount { + configs := make([]bridge.Config, 0, runnerCount) + for range runnerCount { config := bridge.Config{ - User: createusers.Config{ + Metrics: metrics, + RequestCount: int(requestCount), + Model: model, + Stream: stream, + } + + if directURL != "" { + // Direct mode + config.DirectURL = directURL + config.DirectToken = directToken + } else { + // Full mode + if len(me.OrganizationIDs) == 0 { + return xerrors.Errorf("admin user must have at least one organization") + } + config.User = createusers.Config{ OrganizationID: me.OrganizationIDs[0], - }, - Metrics: metrics, + } } + if err := config.Validate(); err != nil { return xerrors.Errorf("validate config: %w", err) } @@ -174,10 +213,42 @@ func (r *RootCmd) scaletestBridge() *serpent.Command { Flag: "user-count", FlagShorthand: "c", Env: "CODER_SCALETEST_BRIDGE_USER_COUNT", - Description: "Required: Total number of users to create.", + Description: "Required: Number of concurrent runners (in full mode, each creates a user).", Value: serpent.Int64Of(&userCount), Required: true, }, + { + Flag: "direct-url", + Env: "CODER_SCALETEST_BRIDGE_DIRECT_URL", + Description: "URL to make requests to directly (enables direct mode, conflicts with --user-count).", + Value: serpent.StringOf(&directURL), + }, + { + Flag: "direct-token", + Env: "CODER_SCALETEST_BRIDGE_DIRECT_TOKEN", + Description: "Bearer token for direct mode (optional, uses client token if not set).", + Value: serpent.StringOf(&directToken), + }, + { + Flag: "request-count", + Env: "CODER_SCALETEST_BRIDGE_REQUEST_COUNT", + Default: "1", + Description: "Number of requests to make per runner.", + Value: serpent.Int64Of(&requestCount), + }, + { + Flag: "model", + Env: "CODER_SCALETEST_BRIDGE_MODEL", + Default: "gpt-4", + Description: "Model to use for requests.", + Value: serpent.StringOf(&model), + }, + { + Flag: "stream", + Env: "CODER_SCALETEST_BRIDGE_STREAM", + Description: "Enable streaming requests.", + Value: serpent.BoolOf(&stream), + }, { Flag: "no-cleanup", Env: "CODER_SCALETEST_NO_CLEANUP", @@ -193,5 +264,3 @@ func (r *RootCmd) scaletestBridge() *serpent.Command { prometheusFlags.attach(&cmd.Options) return cmd } - - diff --git a/cli/exp_scaletest_llmmock.go b/cli/exp_scaletest_llmmock.go index 4f5fcf4fe57e2..ae14d3d4fcc81 100644 --- a/cli/exp_scaletest_llmmock.go +++ b/cli/exp_scaletest_llmmock.go @@ -104,4 +104,3 @@ server that can be used to query intercepted requests and purge stored data.`, return cmd } - diff --git a/scaletest/bridge/config.go b/scaletest/bridge/config.go index 255c57ced186c..be1eea6e7036c 100644 --- a/scaletest/bridge/config.go +++ b/scaletest/bridge/config.go @@ -10,13 +10,46 @@ import ( type Config struct { // User is the configuration for the user to create. + // Required in full mode (when DirectURL is not set). User createusers.Config `json:"user"` + // DirectURL is the URL to make requests to directly. + // If set, enables direct mode and skips user creation. + DirectURL string `json:"direct_url"` + + // DirectToken is the Bearer token for direct mode. + // If not set in direct mode, uses the client's token. + DirectToken string `json:"direct_token"` + + // RequestCount is the number of requests to make per runner. + RequestCount int `json:"request_count"` + + // Model is the model to use for requests. + Model string `json:"model"` + + // Stream indicates whether to use streaming requests. + Stream bool `json:"stream"` + Metrics *Metrics `json:"-"` } func (c Config) Validate() error { - // The runner always needs an org; ensure we propagate it into the user config. + if c.Metrics == nil { + return xerrors.New("metrics must be set") + } + + // In direct mode, DirectURL must be set. + if c.DirectURL != "" { + if c.RequestCount <= 0 { + return xerrors.New("request_count must be greater than 0") + } + if c.Model == "" { + return xerrors.New("model must be set") + } + return nil + } + + // In full mode, User config is required. if c.User.OrganizationID == uuid.Nil { return xerrors.New("user organization_id must be set") } @@ -25,8 +58,9 @@ func (c Config) Validate() error { return xerrors.Errorf("user config: %w", err) } - if c.Metrics == nil { - return xerrors.New("metrics must be set") + // Validate full mode has reasonable values (defaults will be set in CLI if not provided). + if c.RequestCount < 0 { + return xerrors.New("request_count must be non-negative") } return nil diff --git a/scaletest/bridge/metrics.go b/scaletest/bridge/metrics.go index 978de1e825a34..25a35f3e52bb4 100644 --- a/scaletest/bridge/metrics.go +++ b/scaletest/bridge/metrics.go @@ -5,7 +5,10 @@ import ( ) type Metrics struct { - bridgeErrors *prometheus.CounterVec + bridgeErrors *prometheus.CounterVec + bridgeRequests *prometheus.CounterVec + bridgeDuration prometheus.Histogram + bridgeTokensTotal *prometheus.CounterVec } func NewMetrics(reg prometheus.Registerer) *Metrics { @@ -20,13 +23,50 @@ func NewMetrics(reg prometheus.Registerer) *Metrics { Help: "Total number of bridge errors", }, []string{"action"}) - reg.MustRegister(errors) + requests := prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: "coderd", + Subsystem: "scaletest", + Name: "bridge_requests_total", + Help: "Total number of bridge requests", + }, []string{"status"}) + + duration := prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: "coderd", + Subsystem: "scaletest", + Name: "bridge_request_duration_seconds", + Help: "Duration of bridge requests in seconds", + Buckets: prometheus.DefBuckets, + }) + + tokens := prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: "coderd", + Subsystem: "scaletest", + Name: "bridge_response_tokens_total", + Help: "Total number of tokens in bridge responses", + }, []string{"type"}) + + reg.MustRegister(errors, requests, duration, tokens) return &Metrics{ - bridgeErrors: errors, + bridgeErrors: errors, + bridgeRequests: requests, + bridgeDuration: duration, + bridgeTokensTotal: tokens, } } func (m *Metrics) AddError(action string) { m.bridgeErrors.WithLabelValues(action).Inc() } + +func (m *Metrics) AddRequest(status string) { + m.bridgeRequests.WithLabelValues(status).Inc() +} + +func (m *Metrics) ObserveDuration(duration float64) { + m.bridgeDuration.Observe(duration) +} + +func (m *Metrics) AddTokens(tokenType string, count int64) { + m.bridgeTokensTotal.WithLabelValues(tokenType).Add(float64(count)) +} diff --git a/scaletest/bridge/run.go b/scaletest/bridge/run.go index b520c7951fe95..c36bdf477a70f 100644 --- a/scaletest/bridge/run.go +++ b/scaletest/bridge/run.go @@ -1,9 +1,13 @@ package bridge import ( + "bytes" "context" + "encoding/json" "fmt" "io" + "net/http" + "time" "golang.org/x/xerrors" @@ -24,14 +28,23 @@ type Runner struct { createUserRunner *createusers.Runner - clock quartz.Clock + clock quartz.Clock + httpClient *http.Client + + // Metrics tracking + requestCount int64 + successCount int64 + failureCount int64 + totalDuration time.Duration + totalTokens int64 } func NewRunner(client *codersdk.Client, cfg Config) *Runner { return &Runner{ - client: client, - cfg: cfg, - clock: quartz.NewReal(), + client: client, + cfg: cfg, + clock: quartz.NewReal(), + httpClient: &http.Client{Timeout: 30 * time.Second}, } } @@ -52,33 +65,201 @@ func (r *Runner) Run(ctx context.Context, id string, logs io.Writer) error { logs = loadtestutil.NewSyncWriter(logs) logger := slog.Make(sloghuman.Sink(logs)).Leveled(slog.LevelDebug) - r.client.SetLogger(logger) - r.client.SetLogBodies(true) - r.createUserRunner = createusers.NewRunner(r.client, r.cfg.User) - newUserAndToken, err := r.createUserRunner.RunReturningUser(ctx, id, logs) + var token string + var requestURL string + + // Determine mode: direct or full + if r.cfg.DirectURL != "" { + // Direct mode: skip user creation + requestURL = r.cfg.DirectURL + if r.cfg.DirectToken != "" { + token = r.cfg.DirectToken + } else if r.client.SessionToken() != "" { + token = r.client.SessionToken() + } + logger.Info(ctx, "bridge runner in direct mode", slog.F("url", requestURL)) + } else { + // Full mode: create user and use AI Bridge endpoint + r.client.SetLogger(logger) + r.client.SetLogBodies(true) + + r.createUserRunner = createusers.NewRunner(r.client, r.cfg.User) + newUserAndToken, err := r.createUserRunner.RunReturningUser(ctx, id, logs) + if err != nil { + r.cfg.Metrics.AddError("create_user") + return xerrors.Errorf("create user: %w", err) + } + newUser := newUserAndToken.User + token = newUserAndToken.SessionToken + + logger.Info(ctx, "runner user created", slog.F("username", newUser.Username), slog.F("user_id", newUser.ID.String())) + + // Construct AI Bridge URL + requestURL = fmt.Sprintf("%s/api/v2/aibridge/openai/v1/chat/completions", r.client.URL) + } + + // Set defaults if not provided + requestCount := r.cfg.RequestCount + if requestCount <= 0 { + requestCount = 1 + } + model := r.cfg.Model + if model == "" { + model = "gpt-4" + } + + logger.Info(ctx, "bridge runner is ready", + slog.F("request_count", requestCount), + slog.F("model", model), + slog.F("stream", r.cfg.Stream), + ) + + // Make requests + for i := 0; i < requestCount; i++ { + if err := r.makeRequest(ctx, logger, requestURL, token, model, i); err != nil { + logger.Warn(ctx, "request failed", slog.F("request_num", i+1), slog.Error(err)) + r.cfg.Metrics.AddError("request") + r.failureCount++ + r.cfg.Metrics.AddRequest("failure") + // Continue making requests even if one fails + continue + } + r.successCount++ + r.cfg.Metrics.AddRequest("success") + r.requestCount++ + } + + logger.Info(ctx, "bridge runner completed", + slog.F("total_requests", r.requestCount), + slog.F("success", r.successCount), + slog.F("failure", r.failureCount), + ) + + return nil +} + +func (r *Runner) makeRequest(ctx context.Context, logger slog.Logger, url, token, model string, requestNum int) error { + start := r.clock.Now() + + // Prepare request body + reqBody := map[string]interface{}{ + "model": model, + "messages": []map[string]string{ + { + "role": "user", + "content": fmt.Sprintf("Hello, this is test request #%d from the bridge load generator.", requestNum+1), + }, + }, + "stream": r.cfg.Stream, + } + + bodyBytes, err := json.Marshal(reqBody) if err != nil { - r.cfg.Metrics.AddError("create_user") - return xerrors.Errorf("create user: %w", err) + return xerrors.Errorf("marshal request body: %w", err) } - newUser := newUserAndToken.User - newUserClient := codersdk.New(r.client.URL, - codersdk.WithSessionToken(newUserAndToken.SessionToken), - codersdk.WithLogger(logger), - codersdk.WithLogBodies()) - logger.Info(ctx, "runner user created", slog.F("username", newUser.Username), slog.F("user_id", newUser.ID.String())) + req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(bodyBytes)) + if err != nil { + return xerrors.Errorf("create request: %w", err) + } - logger.Info(ctx, "bridge runner is ready") + req.Header.Set("Content-Type", "application/json") + if token != "" { + req.Header.Set("Authorization", "Bearer "+token) + } - _ = newUserClient - // TODO: Implement bridge load generation logic here + logger.Debug(ctx, "making bridge request", + slog.F("url", url), + slog.F("request_num", requestNum+1), + slog.F("model", model), + ) + + resp, err := r.httpClient.Do(req) + if err != nil { + return xerrors.Errorf("execute request: %w", err) + } + defer resp.Body.Close() + + duration := r.clock.Since(start) + r.totalDuration += duration + r.cfg.Metrics.ObserveDuration(duration.Seconds()) + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return xerrors.Errorf("request failed with status %d: %s", resp.StatusCode, string(body)) + } + + // Handle response + if r.cfg.Stream { + return r.handleStreamingResponse(ctx, logger, resp) + } + + return r.handleNonStreamingResponse(ctx, logger, resp) +} + +func (r *Runner) handleNonStreamingResponse(ctx context.Context, logger slog.Logger, resp *http.Response) error { + var response struct { + ID string `json:"id"` + Model string `json:"model"` + Choices []struct { + Message struct { + Content string `json:"content"` + } `json:"message"` + } `json:"choices"` + Usage struct { + PromptTokens int `json:"prompt_tokens"` + CompletionTokens int `json:"completion_tokens"` + TotalTokens int `json:"total_tokens"` + } `json:"usage"` + } + + if err := json.NewDecoder(resp.Body).Decode(&response); err != nil { + return xerrors.Errorf("decode response: %w", err) + } + + if len(response.Choices) > 0 { + logger.Debug(ctx, "received response", + slog.F("response_id", response.ID), + slog.F("content_length", len(response.Choices[0].Message.Content)), + ) + } + + // Track token usage if available + if response.Usage.TotalTokens > 0 { + r.totalTokens += int64(response.Usage.TotalTokens) + r.cfg.Metrics.AddTokens("input", int64(response.Usage.PromptTokens)) + r.cfg.Metrics.AddTokens("output", int64(response.Usage.CompletionTokens)) + } return nil } +func (r *Runner) handleStreamingResponse(ctx context.Context, logger slog.Logger, resp *http.Response) error { + // For streaming, we just read until the stream ends + // The mock server sends a simple stream format + buf := make([]byte, 4096) + totalRead := 0 + for { + n, err := resp.Body.Read(buf) + if n > 0 { + totalRead += n + } + if err == io.EOF { + break + } + if err != nil { + return xerrors.Errorf("read stream: %w", err) + } + } + + logger.Debug(ctx, "received streaming response", slog.F("bytes_read", totalRead)) + return nil +} + func (r *Runner) Cleanup(ctx context.Context, id string, logs io.Writer) error { - if r.createUserRunner != nil { + // Only cleanup user in full mode + if r.cfg.DirectURL == "" && r.createUserRunner != nil { _, _ = fmt.Fprintln(logs, "Cleaning up user...") if err := r.createUserRunner.Cleanup(ctx, id, logs); err != nil { return xerrors.Errorf("cleanup user: %w", err) @@ -89,6 +270,17 @@ func (r *Runner) Cleanup(ctx context.Context, id string, logs io.Writer) error { } func (r *Runner) GetMetrics() map[string]any { - // TODO: Return actual metrics when bridge load generation is implemented - return map[string]any{} + avgDuration := time.Duration(0) + if r.requestCount > 0 { + avgDuration = r.totalDuration / time.Duration(r.requestCount) + } + + return map[string]any{ + "request_count": r.requestCount, + "success_count": r.successCount, + "failure_count": r.failureCount, + "total_duration": r.totalDuration.String(), + "avg_duration": avgDuration.String(), + "total_tokens": r.totalTokens, + } } From cebc226e378feca62697a080313e737ec2916793 Mon Sep 17 00:00:00 2001 From: Sas Swart Date: Mon, 8 Dec 2025 14:27:37 +0000 Subject: [PATCH 3/9] wip: allow the bridge load generator to authenticate with coder --- cli/exp_scaletest_bridge.go | 45 ++++++++++++++++++++++---------- scaletest/bridge/config.go | 52 ++++++++++++++++++++++++------------- scaletest/bridge/run.go | 15 ++++++----- 3 files changed, 73 insertions(+), 39 deletions(-) diff --git a/cli/exp_scaletest_bridge.go b/cli/exp_scaletest_bridge.go index e5cb17875d49b..27ddcb90dac2c 100644 --- a/cli/exp_scaletest_bridge.go +++ b/cli/exp_scaletest_bridge.go @@ -24,7 +24,8 @@ func (r *RootCmd) scaletestBridge() *serpent.Command { var ( userCount int64 noCleanup bool - directURL string + mode string + upstreamURL string directToken string requestCount int64 model string @@ -52,14 +53,22 @@ func (r *RootCmd) scaletestBridge() *serpent.Command { defer stop() ctx = notifyCtx + // Validate mode + if mode != "bridge" && mode != "direct" { + return xerrors.Errorf("--mode must be either 'bridge' or 'direct', got %q", mode) + } + var me codersdk.User - if directURL == "" { - // Full mode requires admin access + if mode == "bridge" { + // Bridge mode requires admin access to create users var err error me, err = requireAdmin(ctx, client) if err != nil { return err } + } else if upstreamURL == "" { + // Direct mode requires upstream URL + return xerrors.Errorf("--upstream-url must be set when using --mode direct") } client.HTTPClient = &http.Client{ @@ -116,27 +125,28 @@ func (r *RootCmd) scaletestBridge() *serpent.Command { <-time.After(prometheusFlags.Wait) }() - if directURL == "" { - _, _ = fmt.Fprintln(inv.Stderr, "Creating users...") + if mode == "bridge" { + _, _ = fmt.Fprintln(inv.Stderr, "Bridge mode: creating users and making requests through AI Bridge...") } else { - _, _ = fmt.Fprintf(inv.Stderr, "Direct mode: making requests to %s\n", directURL) + _, _ = fmt.Fprintf(inv.Stderr, "Direct mode: making requests directly to %s\n", upstreamURL) } configs := make([]bridge.Config, 0, runnerCount) for range runnerCount { config := bridge.Config{ + Mode: bridge.RequestMode(mode), Metrics: metrics, RequestCount: int(requestCount), Model: model, Stream: stream, } - if directURL != "" { + if mode == "direct" { // Direct mode - config.DirectURL = directURL + config.UpstreamURL = upstreamURL config.DirectToken = directToken } else { - // Full mode + // Bridge mode if len(me.OrganizationIDs) == 0 { return xerrors.Errorf("admin user must have at least one organization") } @@ -213,15 +223,22 @@ func (r *RootCmd) scaletestBridge() *serpent.Command { Flag: "user-count", FlagShorthand: "c", Env: "CODER_SCALETEST_BRIDGE_USER_COUNT", - Description: "Required: Number of concurrent runners (in full mode, each creates a user).", + Description: "Required: Number of concurrent runners (in bridge mode, each creates a user).", Value: serpent.Int64Of(&userCount), Required: true, }, { - Flag: "direct-url", - Env: "CODER_SCALETEST_BRIDGE_DIRECT_URL", - Description: "URL to make requests to directly (enables direct mode, conflicts with --user-count).", - Value: serpent.StringOf(&directURL), + Flag: "mode", + Env: "CODER_SCALETEST_BRIDGE_MODE", + Default: "direct", + Description: "Request mode: 'bridge' (create users and use AI Bridge) or 'direct' (make requests directly to upstream-url).", + Value: serpent.StringOf(&mode), + }, + { + Flag: "upstream-url", + Env: "CODER_SCALETEST_BRIDGE_UPSTREAM_URL", + Description: "URL to make requests to directly (required in direct mode, e.g., http://localhost:8080/v1/chat/completions).", + Value: serpent.StringOf(&upstreamURL), }, { Flag: "direct-token", diff --git a/scaletest/bridge/config.go b/scaletest/bridge/config.go index be1eea6e7036c..cc47362e18d6d 100644 --- a/scaletest/bridge/config.go +++ b/scaletest/bridge/config.go @@ -8,14 +8,26 @@ import ( "github.com/coder/coder/v2/scaletest/createusers" ) +type RequestMode string + +const ( + RequestModeBridge RequestMode = "bridge" + RequestModeDirect RequestMode = "direct" +) + type Config struct { + // Mode determines how requests are made. + // "bridge": Create users in Coder and use their session tokens to make requests through AI Bridge. + // "direct": Make requests directly to UpstreamURL without user creation. + Mode RequestMode `json:"mode"` + // User is the configuration for the user to create. - // Required in full mode (when DirectURL is not set). + // Required in bridge mode. User createusers.Config `json:"user"` - // DirectURL is the URL to make requests to directly. - // If set, enables direct mode and skips user creation. - DirectURL string `json:"direct_url"` + // UpstreamURL is the URL to make requests to directly. + // Only used in direct mode. + UpstreamURL string `json:"upstream_url"` // DirectToken is the Bearer token for direct mode. // If not set in direct mode, uses the client's token. @@ -38,30 +50,34 @@ func (c Config) Validate() error { return xerrors.New("metrics must be set") } - // In direct mode, DirectURL must be set. - if c.DirectURL != "" { - if c.RequestCount <= 0 { - return xerrors.New("request_count must be greater than 0") - } - if c.Model == "" { - return xerrors.New("model must be set") + // Validate mode + if c.Mode != RequestModeBridge && c.Mode != RequestModeDirect { + return xerrors.New("mode must be either 'bridge' or 'direct'") + } + + if c.RequestCount <= 0 { + return xerrors.New("request_count must be greater than 0") + } + if c.Model == "" { + return xerrors.New("model must be set") + } + + if c.Mode == RequestModeDirect { + // In direct mode, UpstreamURL must be set. + if c.UpstreamURL == "" { + return xerrors.New("upstream_url must be set in direct mode") } return nil } - // In full mode, User config is required. + // In bridge mode, User config is required. if c.User.OrganizationID == uuid.Nil { - return xerrors.New("user organization_id must be set") + return xerrors.New("user organization_id must be set in bridge mode") } if err := c.User.Validate(); err != nil { return xerrors.Errorf("user config: %w", err) } - // Validate full mode has reasonable values (defaults will be set in CLI if not provided). - if c.RequestCount < 0 { - return xerrors.New("request_count must be non-negative") - } - return nil } diff --git a/scaletest/bridge/run.go b/scaletest/bridge/run.go index c36bdf477a70f..e1232d4a4e0d9 100644 --- a/scaletest/bridge/run.go +++ b/scaletest/bridge/run.go @@ -69,10 +69,10 @@ func (r *Runner) Run(ctx context.Context, id string, logs io.Writer) error { var token string var requestURL string - // Determine mode: direct or full - if r.cfg.DirectURL != "" { - // Direct mode: skip user creation - requestURL = r.cfg.DirectURL + // Determine mode: direct or bridge + if r.cfg.Mode == RequestModeDirect { + // Direct mode: skip user creation, use upstream URL directly + requestURL = r.cfg.UpstreamURL if r.cfg.DirectToken != "" { token = r.cfg.DirectToken } else if r.client.SessionToken() != "" { @@ -80,7 +80,7 @@ func (r *Runner) Run(ctx context.Context, id string, logs io.Writer) error { } logger.Info(ctx, "bridge runner in direct mode", slog.F("url", requestURL)) } else { - // Full mode: create user and use AI Bridge endpoint + // Bridge mode: create user and use AI Bridge endpoint r.client.SetLogger(logger) r.client.SetLogBodies(true) @@ -97,6 +97,7 @@ func (r *Runner) Run(ctx context.Context, id string, logs io.Writer) error { // Construct AI Bridge URL requestURL = fmt.Sprintf("%s/api/v2/aibridge/openai/v1/chat/completions", r.client.URL) + logger.Info(ctx, "bridge runner in bridge mode", slog.F("url", requestURL)) } // Set defaults if not provided @@ -258,8 +259,8 @@ func (r *Runner) handleStreamingResponse(ctx context.Context, logger slog.Logger } func (r *Runner) Cleanup(ctx context.Context, id string, logs io.Writer) error { - // Only cleanup user in full mode - if r.cfg.DirectURL == "" && r.createUserRunner != nil { + // Only cleanup user in bridge mode + if r.cfg.Mode == RequestModeBridge && r.createUserRunner != nil { _, _ = fmt.Fprintln(logs, "Cleaning up user...") if err := r.createUserRunner.Cleanup(ctx, id, logs); err != nil { return xerrors.Errorf("cleanup user: %w", err) From adabff5f38c96b71d7db50dc0027eccf05e57fef Mon Sep 17 00:00:00 2001 From: Sas Swart Date: Mon, 15 Dec 2025 16:27:29 +0000 Subject: [PATCH 4/9] chore: add scale test load generator and mock llm server for bridge --- cli/exp_scaletest_bridge.go | 117 +++++----- cli/exp_scaletest_llmmock.go | 112 +++++---- scaletest/bridge/config.go | 12 + scaletest/bridge/run.go | 259 ++++++++++++++++++--- scaletest/llmmock/server.go | 436 +++++++++++++++++------------------ 5 files changed, 575 insertions(+), 361 deletions(-) diff --git a/cli/exp_scaletest_bridge.go b/cli/exp_scaletest_bridge.go index 27ddcb90dac2c..b45da22cc1432 100644 --- a/cli/exp_scaletest_bridge.go +++ b/cli/exp_scaletest_bridge.go @@ -7,10 +7,8 @@ import ( "net/http" "os/signal" "strconv" - "time" "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promhttp" "golang.org/x/xerrors" "github.com/coder/coder/v2/codersdk" @@ -22,26 +20,39 @@ import ( func (r *RootCmd) scaletestBridge() *serpent.Command { var ( - userCount int64 - noCleanup bool - mode string - upstreamURL string - directToken string - requestCount int64 - model string - stream bool - tracingFlags = &scaletestTracingFlags{} - - // This test requires unlimited concurrency. + userCount int64 + noCleanup bool + mode string + upstreamURL string + directToken string + provider string + requestCount int64 + model string + stream bool + requestPayloadSize int64 + timeoutStrategy = &timeoutFlags{} cleanupStrategy = newScaletestCleanupStrategy() output = &scaletestOutputFlags{} - prometheusFlags = &scaletestPrometheusFlags{} ) cmd := &serpent.Command{ Use: "bridge", Short: "Generate load on the AI Bridge service.", + Long: `Generate load on the AI Bridge service by making requests to OpenAI or Anthropic APIs. + +Examples: + # Test OpenAI API through bridge + coder scaletest bridge --mode bridge --provider openai --user-count 10 --request-count 5 + + # Test Anthropic API through bridge + coder scaletest bridge --mode bridge --provider anthropic --user-count 10 --request-count 5 + + # Test directly against mock server + coder scaletest bridge --mode direct --provider openai --upstream-url http://localhost:8080/v1/chat/completions + +The load generator builds conversation history over time, with each request including +all previous messages in the conversation.`, Handler: func(inv *serpent.Invocation) error { ctx := inv.Context() client, err := r.InitClient(inv) @@ -53,21 +64,18 @@ func (r *RootCmd) scaletestBridge() *serpent.Command { defer stop() ctx = notifyCtx - // Validate mode if mode != "bridge" && mode != "direct" { return xerrors.Errorf("--mode must be either 'bridge' or 'direct', got %q", mode) } var me codersdk.User if mode == "bridge" { - // Bridge mode requires admin access to create users var err error me, err = requireAdmin(ctx, client) if err != nil { return err } } else if upstreamURL == "" { - // Direct mode requires upstream URL return xerrors.Errorf("--upstream-url must be set when using --mode direct") } @@ -80,21 +88,24 @@ func (r *RootCmd) scaletestBridge() *serpent.Command { }, } - // Validate: user count is always required (controls concurrency) if userCount <= 0 { return xerrors.Errorf("--user-count must be greater than 0") } - // Set defaults if requestCount <= 0 { requestCount = 1 } + if provider == "" { + provider = "openai" + } if model == "" { - model = "gpt-4" + if provider == "anthropic" { + model = "claude-3-opus-20240229" + } else { + model = "gpt-4" + } } - // userCount always controls the number of runners (concurrency) - // Each runner makes requestCount requests runnerCount := userCount outputs, err := output.parse() @@ -102,29 +113,9 @@ func (r *RootCmd) scaletestBridge() *serpent.Command { return xerrors.Errorf("could not parse --output flags") } - tracerProvider, closeTracing, tracingEnabled, err := tracingFlags.provider(ctx) - if err != nil { - return xerrors.Errorf("create tracer provider: %w", err) - } - tracer := tracerProvider.Tracer(scaletestTracerName) - reg := prometheus.NewRegistry() metrics := bridge.NewMetrics(reg) - logger := inv.Logger - prometheusSrvClose := ServeHandler(ctx, logger, promhttp.HandlerFor(reg, promhttp.HandlerOpts{}), prometheusFlags.Address, "prometheus") - defer prometheusSrvClose() - - defer func() { - _, _ = fmt.Fprintln(inv.Stderr, "\nUploading traces...") - if err := closeTracing(ctx); err != nil { - _, _ = fmt.Fprintf(inv.Stderr, "\nError uploading traces: %+v\n", err) - } - // Wait for prometheus metrics to be scraped - _, _ = fmt.Fprintf(inv.Stderr, "Waiting %s for prometheus metrics to be scraped\n", prometheusFlags.Wait) - <-time.After(prometheusFlags.Wait) - }() - if mode == "bridge" { _, _ = fmt.Fprintln(inv.Stderr, "Bridge mode: creating users and making requests through AI Bridge...") } else { @@ -134,19 +125,19 @@ func (r *RootCmd) scaletestBridge() *serpent.Command { configs := make([]bridge.Config, 0, runnerCount) for range runnerCount { config := bridge.Config{ - Mode: bridge.RequestMode(mode), - Metrics: metrics, - RequestCount: int(requestCount), - Model: model, - Stream: stream, + Mode: bridge.RequestMode(mode), + Metrics: metrics, + Provider: provider, + RequestCount: int(requestCount), + Model: model, + Stream: stream, + RequestPayloadSize: int(requestPayloadSize), } if mode == "direct" { - // Direct mode config.UpstreamURL = upstreamURL config.DirectToken = directToken } else { - // Bridge mode if len(me.OrganizationIDs) == 0 { return xerrors.Errorf("admin user must have at least one organization") } @@ -167,14 +158,6 @@ func (r *RootCmd) scaletestBridge() *serpent.Command { id := strconv.Itoa(i) name := fmt.Sprintf("bridge-%s", id) var runner harness.Runnable = bridge.NewRunner(client, config) - if tracingEnabled { - runner = &runnableTraceWrapper{ - tracer: tracer, - spanName: name, - runner: runner, - } - } - th.AddRun(name, id, runner) } @@ -223,7 +206,7 @@ func (r *RootCmd) scaletestBridge() *serpent.Command { Flag: "user-count", FlagShorthand: "c", Env: "CODER_SCALETEST_BRIDGE_USER_COUNT", - Description: "Required: Number of concurrent runners (in bridge mode, each creates a user).", + Description: "Required: Number of concurrent runners (in bridge mode, each creates a coder user).", Value: serpent.Int64Of(&userCount), Required: true, }, @@ -246,11 +229,18 @@ func (r *RootCmd) scaletestBridge() *serpent.Command { Description: "Bearer token for direct mode (optional, uses client token if not set).", Value: serpent.StringOf(&directToken), }, + { + Flag: "provider", + Env: "CODER_SCALETEST_BRIDGE_PROVIDER", + Default: "openai", + Description: "API provider to use: 'openai' or 'anthropic'.", + Value: serpent.StringOf(&provider), + }, { Flag: "request-count", Env: "CODER_SCALETEST_BRIDGE_REQUEST_COUNT", Default: "1", - Description: "Number of requests to make per runner.", + Description: "Number of sequential requests to make per runner.", Value: serpent.Int64Of(&requestCount), }, { @@ -266,6 +256,13 @@ func (r *RootCmd) scaletestBridge() *serpent.Command { Description: "Enable streaming requests.", Value: serpent.BoolOf(&stream), }, + { + Flag: "request-payload-size", + Env: "CODER_SCALETEST_BRIDGE_REQUEST_PAYLOAD_SIZE", + Default: "0", + Description: "Size in bytes of the request payload (user message content). If 0, uses default message content.", + Value: serpent.Int64Of(&requestPayloadSize), + }, { Flag: "no-cleanup", Env: "CODER_SCALETEST_NO_CLEANUP", @@ -274,10 +271,8 @@ func (r *RootCmd) scaletestBridge() *serpent.Command { }, } - tracingFlags.attach(&cmd.Options) timeoutStrategy.attach(&cmd.Options) cleanupStrategy.attach(&cmd.Options) output.attach(&cmd.Options) - prometheusFlags.attach(&cmd.Options) return cmd } diff --git a/cli/exp_scaletest_llmmock.go b/cli/exp_scaletest_llmmock.go index ae14d3d4fcc81..2d03a08938ebb 100644 --- a/cli/exp_scaletest_llmmock.go +++ b/cli/exp_scaletest_llmmock.go @@ -4,6 +4,7 @@ package cli import ( "fmt" + "net/http" "os/signal" "time" @@ -17,26 +18,40 @@ import ( func (*RootCmd) scaletestLLMMock() *serpent.Command { var ( - hostAddress string - apiPort int64 - purgeAtCount int64 + address string + artificialLatency time.Duration + responsePayloadSize int64 + + pprofEnable bool + pprofAddress string + + traceEnable bool ) cmd := &serpent.Command{ Use: "llm-mock", Short: "Start a mock LLM API server for testing", - Long: `Start a mock LLM API server that simulates OpenAI and Anthropic APIs with an HTTP API -server that can be used to query intercepted requests and purge stored data.`, + Long: `Start a mock LLM API server that simulates OpenAI and Anthropic APIs`, Handler: func(inv *serpent.Invocation) error { - ctx := inv.Context() - notifyCtx, stop := signal.NotifyContext(ctx, StopSignals...) + ctx, stop := signal.NotifyContext(inv.Context(), StopSignals...) defer stop() - ctx = notifyCtx logger := slog.Make(sloghuman.Sink(inv.Stderr)).Leveled(slog.LevelInfo) + + if pprofEnable { + _ = http.DefaultServeMux + closePprof := ServeHandler(ctx, logger, nil, pprofAddress, "pprof") + defer closePprof() + logger.Info(ctx, "pprof server started", slog.F("address", pprofAddress)) + } + config := llmmock.Config{ - HostAddress: hostAddress, - APIPort: int(apiPort), - Logger: logger, + Address: address, + Logger: logger, + ArtificialLatency: artificialLatency, + ResponsePayloadSize: int(responsePayloadSize), + PprofEnable: pprofEnable, + PprofAddress: pprofAddress, + TraceEnable: traceEnable, } srv := new(llmmock.Server) @@ -50,55 +65,54 @@ server that can be used to query intercepted requests and purge stored data.`, _, _ = fmt.Fprintf(inv.Stdout, "Mock LLM API server started on %s\n", srv.APIAddress()) _, _ = fmt.Fprintf(inv.Stdout, " OpenAI endpoint: %s/v1/chat/completions\n", srv.APIAddress()) _, _ = fmt.Fprintf(inv.Stdout, " Anthropic endpoint: %s/v1/messages\n", srv.APIAddress()) - _, _ = fmt.Fprintf(inv.Stdout, " Query API: %s/api/requests\n", srv.APIAddress()) - if purgeAtCount > 0 { - _, _ = fmt.Fprintf(inv.Stdout, " Auto-purge when request count reaches %d\n", purgeAtCount) - } - - ticker := time.NewTicker(10 * time.Second) - defer ticker.Stop() - - for { - select { - case <-ctx.Done(): - _, _ = fmt.Fprintf(inv.Stdout, "\nTotal requests received since last purge: %d\n", srv.RequestCount()) - return nil - case <-ticker.C: - count := srv.RequestCount() - if count > 0 { - _, _ = fmt.Fprintf(inv.Stdout, "Requests received: %d\n", count) - } - if purgeAtCount > 0 && int64(count) >= purgeAtCount { - _, _ = fmt.Fprintf(inv.Stdout, "Request count (%d) reached threshold (%d). Purging...\n", count, purgeAtCount) - srv.Purge() - continue - } - } - } + <-ctx.Done() + return nil }, } cmd.Options = []serpent.Option{ { - Flag: "host-address", - Env: "CODER_SCALETEST_LLM_MOCK_HOST_ADDRESS", + Flag: "address", + Env: "CODER_SCALETEST_LLM_MOCK_ADDRESS", Default: "localhost", - Description: "Host address to bind the mock LLM API server.", - Value: serpent.StringOf(&hostAddress), + Description: "Address to bind the mock LLM API server. Can include a port (e.g., 'localhost:8080' or ':8080'). Uses a random port if no port is specified.", + Value: serpent.StringOf(&address), + }, + { + Flag: "artificial-latency", + Env: "CODER_SCALETEST_LLM_MOCK_ARTIFICIAL_LATENCY", + Default: "0s", + Description: "Artificial latency to add to each response (e.g., 100ms, 1s). Simulates slow upstream processing.", + Value: serpent.DurationOf(&artificialLatency), + }, + { + Flag: "response-payload-size", + Env: "CODER_SCALETEST_LLM_MOCK_RESPONSE_PAYLOAD_SIZE", + Default: "0", + Description: "Size in bytes of the response payload. If 0, uses default context-aware responses.", + Value: serpent.Int64Of(&responsePayloadSize), + }, + { + Flag: "pprof-enable", + Env: "CODER_SCALETEST_LLM_MOCK_PPROF_ENABLE", + Default: "false", + Description: "Serve pprof metrics on the address defined by pprof-address.", + Value: serpent.BoolOf(&pprofEnable), }, { - Flag: "api-port", - Env: "CODER_SCALETEST_LLM_MOCK_API_PORT", - Description: "Port for the HTTP API server. Uses a random port if not specified.", - Value: serpent.Int64Of(&apiPort), + Flag: "pprof-address", + Env: "CODER_SCALETEST_LLM_MOCK_PPROF_ADDRESS", + Default: "127.0.0.1:6060", + Description: "The bind address to serve pprof.", + Value: serpent.StringOf(&pprofAddress), }, { - Flag: "purge-at-count", - Env: "CODER_SCALETEST_LLM_MOCK_PURGE_AT_COUNT", - Default: "100000", - Description: "Maximum number of requests to keep before auto-purging. Set to 0 to disable.", - Value: serpent.Int64Of(&purgeAtCount), + Flag: "trace-enable", + Env: "CODER_SCALETEST_LLM_MOCK_TRACE_ENABLE", + Default: "false", + Description: "Whether application tracing data is collected. It exports to a backend configured by environment variables. See: https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/protocol/exporter.md.", + Value: serpent.BoolOf(&traceEnable), }, } diff --git a/scaletest/bridge/config.go b/scaletest/bridge/config.go index cc47362e18d6d..ac7f7460a3aea 100644 --- a/scaletest/bridge/config.go +++ b/scaletest/bridge/config.go @@ -33,6 +33,9 @@ type Config struct { // If not set in direct mode, uses the client's token. DirectToken string `json:"direct_token"` + // Provider is the API provider to use: "openai" or "anthropic". + Provider string `json:"provider"` + // RequestCount is the number of requests to make per runner. RequestCount int `json:"request_count"` @@ -42,6 +45,10 @@ type Config struct { // Stream indicates whether to use streaming requests. Stream bool `json:"stream"` + // RequestPayloadSize is the size in bytes of the request payload (user message content). + // If 0, uses default message content. + RequestPayloadSize int `json:"request_payload_size"` + Metrics *Metrics `json:"-"` } @@ -62,6 +69,11 @@ func (c Config) Validate() error { return xerrors.New("model must be set") } + // Validate provider + if c.Provider != "openai" && c.Provider != "anthropic" { + return xerrors.New("provider must be either 'openai' or 'anthropic'") + } + if c.Mode == RequestModeDirect { // In direct mode, UpstreamURL must be set. if c.UpstreamURL == "" { diff --git a/scaletest/bridge/run.go b/scaletest/bridge/run.go index e1232d4a4e0d9..8404be86885cd 100644 --- a/scaletest/bridge/run.go +++ b/scaletest/bridge/run.go @@ -7,6 +7,7 @@ import ( "fmt" "io" "net/http" + "strings" "time" "golang.org/x/xerrors" @@ -14,6 +15,12 @@ import ( "cdr.dev/slog" "cdr.dev/slog/sloggers/sloghuman" + "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" + "go.opentelemetry.io/otel/attribute" + semconv "go.opentelemetry.io/otel/semconv/v1.14.0" + "go.opentelemetry.io/otel/semconv/v1.14.0/httpconv" + "go.opentelemetry.io/otel/trace" + "github.com/coder/coder/v2/coderd/tracing" "github.com/coder/coder/v2/codersdk" "github.com/coder/coder/v2/scaletest/createusers" @@ -22,6 +29,55 @@ import ( "github.com/coder/quartz" ) +type tracingContextKey struct{} +type tracingContext struct { + provider string + model string + stream bool + requestNum int + mode RequestMode +} + +type tracingTransport struct { + cfg Config + underlying http.RoundTripper +} + +func newTracingTransport(cfg Config, underlying http.RoundTripper) *tracingTransport { + if underlying == nil { + underlying = http.DefaultTransport + } + return &tracingTransport{ + cfg: cfg, + underlying: otelhttp.NewTransport(underlying), + } +} + +func (t *tracingTransport) RoundTrip(req *http.Request) (*http.Response, error) { + aibridgeCtx, hasAIBridgeCtx := req.Context().Value(tracingContextKey{}).(tracingContext) + + resp, err := t.underlying.RoundTrip(req) + + if hasAIBridgeCtx { + ctx := req.Context() + if resp != nil && resp.Request != nil { + ctx = resp.Request.Context() + } + span := trace.SpanFromContext(ctx) + if span.IsRecording() { + span.SetAttributes( + attribute.String("aibridge.provider", aibridgeCtx.provider), + attribute.String("aibridge.model", aibridgeCtx.model), + attribute.Bool("aibridge.stream", aibridgeCtx.stream), + attribute.Int("aibridge.request_num", aibridgeCtx.requestNum), + attribute.String("aibridge.mode", string(aibridgeCtx.mode)), + ) + } + } + + return resp, err +} + type Runner struct { client *codersdk.Client cfg Config @@ -31,7 +87,6 @@ type Runner struct { clock quartz.Clock httpClient *http.Client - // Metrics tracking requestCount int64 successCount int64 failureCount int64 @@ -41,10 +96,13 @@ type Runner struct { func NewRunner(client *codersdk.Client, cfg Config) *Runner { return &Runner{ - client: client, - cfg: cfg, - clock: quartz.NewReal(), - httpClient: &http.Client{Timeout: 30 * time.Second}, + client: client, + cfg: cfg, + clock: quartz.NewReal(), + httpClient: &http.Client{ + Timeout: 30 * time.Second, + Transport: newTracingTransport(cfg, http.DefaultTransport), + }, } } @@ -69,7 +127,6 @@ func (r *Runner) Run(ctx context.Context, id string, logs io.Writer) error { var token string var requestURL string - // Determine mode: direct or bridge if r.cfg.Mode == RequestModeDirect { // Direct mode: skip user creation, use upstream URL directly requestURL = r.cfg.UpstreamURL @@ -95,12 +152,15 @@ func (r *Runner) Run(ctx context.Context, id string, logs io.Writer) error { logger.Info(ctx, "runner user created", slog.F("username", newUser.Username), slog.F("user_id", newUser.ID.String())) - // Construct AI Bridge URL - requestURL = fmt.Sprintf("%s/api/v2/aibridge/openai/v1/chat/completions", r.client.URL) - logger.Info(ctx, "bridge runner in bridge mode", slog.F("url", requestURL)) + // Construct AI Bridge URL based on provider + if r.cfg.Provider == "anthropic" { + requestURL = fmt.Sprintf("%s/api/v2/aibridge/anthropic/v1/messages", r.client.URL) + } else { + requestURL = fmt.Sprintf("%s/api/v2/aibridge/openai/v1/chat/completions", r.client.URL) + } + logger.Info(ctx, "bridge runner in bridge mode", slog.F("url", requestURL), slog.F("provider", r.cfg.Provider)) } - // Set defaults if not provided requestCount := r.cfg.RequestCount if requestCount <= 0 { requestCount = 1 @@ -116,13 +176,17 @@ func (r *Runner) Run(ctx context.Context, id string, logs io.Writer) error { slog.F("stream", r.cfg.Stream), ) - // Make requests for i := 0; i < requestCount; i++ { if err := r.makeRequest(ctx, logger, requestURL, token, model, i); err != nil { - logger.Warn(ctx, "request failed", slog.F("request_num", i+1), slog.Error(err)) + logger.Warn(ctx, "bridge request failed", + slog.F("request_num", i+1), + slog.F("error_type", "request_failed"), + slog.Error(err), + ) r.cfg.Metrics.AddError("request") - r.failureCount++ r.cfg.Metrics.AddRequest("failure") + r.failureCount++ + // Continue making requests even if one fails continue } @@ -137,22 +201,67 @@ func (r *Runner) Run(ctx context.Context, id string, logs io.Writer) error { slog.F("failure", r.failureCount), ) + // Fail the run if any request failed + if r.failureCount > 0 { + return xerrors.Errorf("bridge runner failed: %d out of %d requests failed", r.failureCount, requestCount) + } + return nil } func (r *Runner) makeRequest(ctx context.Context, logger slog.Logger, url, token, model string, requestNum int) error { start := r.clock.Now() - // Prepare request body - reqBody := map[string]interface{}{ - "model": model, - "messages": []map[string]string{ - { - "role": "user", - "content": fmt.Sprintf("Hello, this is test request #%d from the bridge load generator.", requestNum+1), - }, - }, - "stream": r.cfg.Stream, + ctx = context.WithValue(ctx, tracingContextKey{}, tracingContext{ + provider: r.cfg.Provider, + model: model, + stream: r.cfg.Stream, + requestNum: requestNum + 1, + mode: r.cfg.Mode, + }) + + var content string + if r.cfg.RequestPayloadSize > 0 { + pattern := "x" + repeated := strings.Repeat(pattern, r.cfg.RequestPayloadSize) + content = repeated[:r.cfg.RequestPayloadSize] + } else { + content = fmt.Sprintf("Hello, this is test request #%d from the bridge load generator.", requestNum+1) + } + + newUserMessage := map[string]string{ + "role": "user", + "content": content, + } + messages := make([]map[string]string, 0) + messages = append(messages, newUserMessage) + + var reqBody map[string]interface{} + if r.cfg.Provider == "anthropic" { + anthropicMessages := make([]map[string]interface{}, 0, len(messages)) + for _, msg := range messages { + anthropicMessages = append(anthropicMessages, map[string]interface{}{ + "role": msg["role"], + "content": []map[string]string{ + { + "type": "text", + "text": msg["content"], + }, + }, + }) + } + reqBody = map[string]interface{}{ + "model": model, + "messages": anthropicMessages, + "max_tokens": 1024, + "stream": r.cfg.Stream, + } + } else { + reqBody = map[string]interface{}{ + "model": model, + "messages": messages, + "stream": r.cfg.Stream, + } } bodyBytes, err := json.Marshal(reqBody) @@ -178,28 +287,55 @@ func (r *Runner) makeRequest(ctx context.Context, logger slog.Logger, url, token resp, err := r.httpClient.Do(req) if err != nil { + span := trace.SpanFromContext(req.Context()) + if span.IsRecording() { + span.RecordError(err) + } + logger.Warn(ctx, "request failed during execution", + slog.F("request_num", requestNum+1), + slog.Error(err), + ) return xerrors.Errorf("execute request: %w", err) } defer resp.Body.Close() + span := trace.SpanFromContext(req.Context()) + if span.IsRecording() { + span.SetAttributes(semconv.HTTPStatusCodeKey.Int(resp.StatusCode)) + span.SetStatus(httpconv.ClientStatus(resp.StatusCode)) + } + duration := r.clock.Since(start) r.totalDuration += duration r.cfg.Metrics.ObserveDuration(duration.Seconds()) if resp.StatusCode != http.StatusOK { body, _ := io.ReadAll(resp.Body) - return xerrors.Errorf("request failed with status %d: %s", resp.StatusCode, string(body)) + err := xerrors.Errorf("request failed with status %d: %s", resp.StatusCode, string(body)) + span.RecordError(err) + return err } - // Handle response if r.cfg.Stream { - return r.handleStreamingResponse(ctx, logger, resp) + err := r.handleStreamingResponse(ctx, logger, resp) + if err != nil { + span.RecordError(err) + return err + } + return nil } - return r.handleNonStreamingResponse(ctx, logger, resp) + return r.handleNonStreamingResponse(ctx, logger, resp, requestNum) } -func (r *Runner) handleNonStreamingResponse(ctx context.Context, logger slog.Logger, resp *http.Response) error { +func (r *Runner) handleNonStreamingResponse(ctx context.Context, logger slog.Logger, resp *http.Response, requestNum int) error { + if r.cfg.Provider == "anthropic" { + return r.handleAnthropicResponse(ctx, logger, resp, requestNum) + } + return r.handleOpenAIResponse(ctx, logger, resp, requestNum) +} + +func (r *Runner) handleOpenAIResponse(ctx context.Context, logger slog.Logger, resp *http.Response, _ int) error { var response struct { ID string `json:"id"` Model string `json:"model"` @@ -219,14 +355,15 @@ func (r *Runner) handleNonStreamingResponse(ctx context.Context, logger slog.Log return xerrors.Errorf("decode response: %w", err) } + var assistantContent string if len(response.Choices) > 0 { + assistantContent = response.Choices[0].Message.Content logger.Debug(ctx, "received response", slog.F("response_id", response.ID), - slog.F("content_length", len(response.Choices[0].Message.Content)), + slog.F("content_length", len(assistantContent)), ) } - // Track token usage if available if response.Usage.TotalTokens > 0 { r.totalTokens += int64(response.Usage.TotalTokens) r.cfg.Metrics.AddTokens("input", int64(response.Usage.PromptTokens)) @@ -236,12 +373,56 @@ func (r *Runner) handleNonStreamingResponse(ctx context.Context, logger slog.Log return nil } -func (r *Runner) handleStreamingResponse(ctx context.Context, logger slog.Logger, resp *http.Response) error { - // For streaming, we just read until the stream ends - // The mock server sends a simple stream format +func (r *Runner) handleAnthropicResponse(ctx context.Context, logger slog.Logger, resp *http.Response, _ int) error { + var response struct { + ID string `json:"id"` + Model string `json:"model"` + Content []struct { + Type string `json:"type"` + Text string `json:"text"` + } `json:"content"` + Usage struct { + InputTokens int `json:"input_tokens"` + OutputTokens int `json:"output_tokens"` + } `json:"usage"` + } + + if err := json.NewDecoder(resp.Body).Decode(&response); err != nil { + return xerrors.Errorf("decode response: %w", err) + } + + var assistantContent string + if len(response.Content) > 0 { + assistantContent = response.Content[0].Text + logger.Debug(ctx, "received response", + slog.F("response_id", response.ID), + slog.F("content_length", len(assistantContent)), + ) + } + + totalTokens := response.Usage.InputTokens + response.Usage.OutputTokens + if totalTokens > 0 { + r.totalTokens += int64(totalTokens) + r.cfg.Metrics.AddTokens("input", int64(response.Usage.InputTokens)) + r.cfg.Metrics.AddTokens("output", int64(response.Usage.OutputTokens)) + } + + return nil +} + +func (*Runner) handleStreamingResponse(ctx context.Context, logger slog.Logger, resp *http.Response) error { buf := make([]byte, 4096) totalRead := 0 for { + // Check for context cancellation before each read + if ctx.Err() != nil { + logger.Warn(ctx, "streaming response canceled", + slog.F("bytes_read", totalRead), + slog.Error(ctx.Err()), + ) + return xerrors.Errorf("stream canceled: %w", ctx.Err()) + } + n, err := resp.Body.Read(buf) if n > 0 { totalRead += n @@ -250,6 +431,18 @@ func (r *Runner) handleStreamingResponse(ctx context.Context, logger slog.Logger break } if err != nil { + // Check if error is due to context cancellation + if xerrors.Is(err, context.Canceled) || xerrors.Is(err, context.DeadlineExceeded) { + logger.Warn(ctx, "streaming response read canceled", + slog.F("bytes_read", totalRead), + slog.Error(err), + ) + return xerrors.Errorf("stream read canceled: %w", err) + } + logger.Warn(ctx, "streaming response read error", + slog.F("bytes_read", totalRead), + slog.Error(err), + ) return xerrors.Errorf("read stream: %w", err) } } diff --git a/scaletest/llmmock/server.go b/scaletest/llmmock/server.go index e1b25bc62e42f..d3ced5f844b1a 100644 --- a/scaletest/llmmock/server.go +++ b/scaletest/llmmock/server.go @@ -5,18 +5,24 @@ import ( "encoding/json" "errors" "fmt" - "io" "net" "net/http" - "slices" "strings" - "sync" "time" "github.com/google/uuid" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/propagation" + semconv "go.opentelemetry.io/otel/semconv/v1.14.0" + "go.opentelemetry.io/otel/semconv/v1.14.0/httpconv" + "go.opentelemetry.io/otel/semconv/v1.14.0/netconv" + "go.opentelemetry.io/otel/trace" "golang.org/x/xerrors" "cdr.dev/slog" + + "github.com/coder/coder/v2/coderd/pproflabel" + "github.com/coder/coder/v2/coderd/tracing" ) // Server wraps the LLM mock server and provides an HTTP API to retrieve requests. @@ -25,25 +31,29 @@ type Server struct { httpListener net.Listener logger slog.Logger - hostAddress string - apiPort int + address string + artificialLatency time.Duration + responsePayloadSize int - // Storage for intercepted requests - records []RequestRecord - recordsMu sync.RWMutex + tracerProvider trace.TracerProvider + closeTracing func(context.Context) error } type Config struct { - HostAddress string - APIPort int - Logger slog.Logger + Address string + Logger slog.Logger + ArtificialLatency time.Duration + ResponsePayloadSize int + + PprofEnable bool + PprofAddress string + + TraceEnable bool } -type openAIRequest struct { - Model string `json:"model"` - Messages []openAIMessage `json:"messages"` - Stream bool `json:"stream,omitempty"` - Extra map[string]interface{} `json:"-"` +type llmRequest struct { + Model string `json:"model"` + Stream bool `json:"stream,omitempty"` } type openAIMessage struct { @@ -68,19 +78,6 @@ type openAIResponse struct { } `json:"usage"` } -type anthropicRequest struct { - Model string `json:"model"` - Messages []anthropicMessage `json:"messages"` - Stream bool `json:"stream,omitempty"` - MaxTokens int `json:"max_tokens"` - Extra map[string]interface{} `json:"-"` -} - -type anthropicMessage struct { - Role string `json:"role"` - Content string `json:"content"` -} - type anthropicResponse struct { ID string `json:"id"` Type string `json:"type"` @@ -99,9 +96,29 @@ type anthropicResponse struct { } func (s *Server) Start(ctx context.Context, cfg Config) error { - s.hostAddress = cfg.HostAddress - s.apiPort = cfg.APIPort + s.address = cfg.Address s.logger = cfg.Logger + s.artificialLatency = cfg.ArtificialLatency + s.responsePayloadSize = cfg.ResponsePayloadSize + + if cfg.TraceEnable { + otel.SetTextMapPropagator( + propagation.NewCompositeTextMapPropagator( + propagation.TraceContext{}, + propagation.Baggage{}, + ), + ) + + tracerProvider, closeTracing, err := tracing.TracerProvider(ctx, "llm-mock", tracing.TracerOpts{ + Default: cfg.TraceEnable, + }) + if err != nil { + s.logger.Warn(ctx, "failed to initialize tracing", slog.Error(err)) + } else { + s.tracerProvider = tracerProvider + s.closeTracing = closeTracing + } + } if err := s.startAPIServer(ctx); err != nil { return xerrors.Errorf("start API server: %w", err) @@ -118,107 +135,91 @@ func (s *Server) Stop() error { return xerrors.Errorf("shutdown HTTP server: %w", err) } } + if s.closeTracing != nil { + shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + if err := s.closeTracing(shutdownCtx); err != nil { + s.logger.Warn(shutdownCtx, "failed to close tracing", slog.Error(err)) + } + } return nil } func (s *Server) APIAddress() string { - return fmt.Sprintf("http://%s:%d", s.hostAddress, s.apiPort) -} - -func (s *Server) RequestCount() int { - s.recordsMu.RLock() - defer s.recordsMu.RUnlock() - return len(s.records) -} - -func (s *Server) Purge() { - s.recordsMu.Lock() - defer s.recordsMu.Unlock() - s.records = s.records[:0] + return fmt.Sprintf("http://%s", s.address) } func (s *Server) startAPIServer(ctx context.Context) error { mux := http.NewServeMux() - // LLM API endpoints mux.HandleFunc("POST /v1/chat/completions", s.handleOpenAI) mux.HandleFunc("POST /v1/messages", s.handleAnthropic) - // Query API endpoints - mux.HandleFunc("GET /api/requests", s.handleGetRequests) - mux.HandleFunc("POST /api/purge", s.handlePurge) + var handler http.Handler = mux + if s.tracerProvider != nil { + handler = s.tracingMiddleware(handler) + } s.httpServer = &http.Server{ - Handler: mux, + Handler: handler, ReadHeaderTimeout: 10 * time.Second, } - listener, err := net.Listen("tcp", fmt.Sprintf("%s:%d", s.hostAddress, s.apiPort)) + listener, err := net.Listen("tcp", fmt.Sprintf(s.address)) if err != nil { - return xerrors.Errorf("listen on %s:%d: %w", s.hostAddress, s.apiPort, err) + return xerrors.Errorf("listen on %s: %w", s.address, err) } s.httpListener = listener - tcpAddr, valid := listener.Addr().(*net.TCPAddr) - if !valid { - err := listener.Close() - if err != nil { - s.logger.Error(ctx, "failed to close listener", slog.Error(err)) - } - return xerrors.Errorf("listener returned invalid address: %T", listener.Addr()) - } - s.apiPort = tcpAddr.Port - - go func() { + pproflabel.Go(ctx, pproflabel.Service("llm-mock"), func(ctx context.Context) { if err := s.httpServer.Serve(listener); err != nil && !errors.Is(err, http.ErrServerClosed) { s.logger.Error(ctx, "http API server error", slog.Error(err)) } - }() + }) return nil } func (s *Server) handleOpenAI(w http.ResponseWriter, r *http.Request) { + pproflabel.Do(r.Context(), pproflabel.Service("llm-mock"), func(ctx context.Context) { + s.handleOpenAIWithLabels(w, r.WithContext(ctx)) + }) +} + +func (s *Server) handleOpenAIWithLabels(w http.ResponseWriter, r *http.Request) { + s.logger.Debug(r.Context(), "handling OpenAI request") + defer s.logger.Debug(r.Context(), "handled OpenAI request") + ctx := r.Context() requestID := uuid.New() now := time.Now() - // Read request body - bodyBytes, err := io.ReadAll(r.Body) - if err != nil { - s.logger.Error(ctx, "failed to read request body", slog.Error(err)) - http.Error(w, "failed to read request body", http.StatusBadRequest) - return - } - - // Parse request - var req openAIRequest - if err := json.Unmarshal(bodyBytes, &req); err != nil { + var req llmRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { s.logger.Error(ctx, "failed to parse OpenAI request", slog.Error(err)) http.Error(w, "invalid request body", http.StatusBadRequest) return } - // Extract user ID from Authorization header if present - userID := s.extractUserID(r) - - // Store request - requestSummary := RequestSummary{ - ID: requestID, - Timestamp: now, - Provider: ProviderOpenAI, - Model: req.Model, - UserID: userID, - Stream: req.Stream, - RequestBody: string(bodyBytes), + if s.artificialLatency > 0 { + time.Sleep(s.artificialLatency) } - // Generate mock response var resp openAIResponse resp.ID = fmt.Sprintf("chatcmpl-%s", requestID.String()[:8]) resp.Object = "chat.completion" resp.Created = now.Unix() resp.Model = req.Model + + var responseContent string + if s.responsePayloadSize > 0 { + pattern := "x" + repeated := strings.Repeat(pattern, s.responsePayloadSize) + responseContent = repeated[:s.responsePayloadSize] + } else { + responseContent = "This is a mock response from OpenAI." + } + resp.Choices = []struct { Index int `json:"index"` Message openAIMessage `json:"message"` @@ -228,95 +229,77 @@ func (s *Server) handleOpenAI(w http.ResponseWriter, r *http.Request) { Index: 0, Message: openAIMessage{ Role: "assistant", - Content: "This is a mock response from OpenAI.", + Content: responseContent, }, FinishReason: "stop", }, } + resp.Usage.PromptTokens = 10 resp.Usage.CompletionTokens = 5 resp.Usage.TotalTokens = 15 responseBody, _ := json.Marshal(resp) - responseTime := time.Now() - - // Store response - responseSummary := ResponseSummary{ - RequestID: requestID, - Timestamp: responseTime, - Status: http.StatusOK, - Stream: req.Stream, - FinishReason: "stop", - PromptTokens: resp.Usage.PromptTokens, - OutputTokens: resp.Usage.CompletionTokens, - TotalTokens: resp.Usage.TotalTokens, - ResponseBody: string(responseBody), - } - - s.recordsMu.Lock() - s.records = append(s.records, RequestRecord{ - Request: requestSummary, - Response: &responseSummary, - }) - s.recordsMu.Unlock() - // Send response if req.Stream { - s.sendOpenAIStream(w, resp) + s.sendOpenAIStream(ctx, w, resp) } else { w.Header().Set("Content-Type", "application/json") w.WriteHeader(http.StatusOK) - _, _ = w.Write(responseBody) + if _, err := w.Write(responseBody); err != nil { + s.logger.Error(ctx, "failed to write OpenAI response", + slog.F("request_id", requestID), + slog.Error(err), + slog.F("error_type", "write_error"), + slog.F("likely_cause", "network_error"), + ) + } } } func (s *Server) handleAnthropic(w http.ResponseWriter, r *http.Request) { + pproflabel.Do(r.Context(), pproflabel.Service("llm-mock"), func(ctx context.Context) { + s.handleAnthropicWithLabels(w, r.WithContext(ctx)) + }) +} + +func (s *Server) handleAnthropicWithLabels(w http.ResponseWriter, r *http.Request) { ctx := r.Context() requestID := uuid.New() - now := time.Now() - // Read request body - bodyBytes, err := io.ReadAll(r.Body) - if err != nil { - s.logger.Error(ctx, "failed to read request body", slog.Error(err)) - http.Error(w, "failed to read request body", http.StatusBadRequest) - return - } + var req llmRequest - // Parse request - var req anthropicRequest - if err := json.Unmarshal(bodyBytes, &req); err != nil { - s.logger.Error(ctx, "failed to parse Anthropic request", slog.Error(err)) + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + s.logger.Error(ctx, "failed to parse LLM request", slog.Error(err)) http.Error(w, "invalid request body", http.StatusBadRequest) return } - // Extract user ID from Authorization header if present - userID := s.extractUserID(r) - - // Store request - requestSummary := RequestSummary{ - ID: requestID, - Timestamp: now, - Provider: ProviderAnthropic, - Model: req.Model, - UserID: userID, - Stream: req.Stream, - RequestBody: string(bodyBytes), + if s.artificialLatency > 0 { + time.Sleep(s.artificialLatency) } - // Generate mock response var resp anthropicResponse resp.ID = fmt.Sprintf("msg_%s", requestID.String()[:8]) resp.Type = "message" resp.Role = "assistant" + + var responseText string + if s.responsePayloadSize > 0 { + pattern := "x" + repeated := strings.Repeat(pattern, s.responsePayloadSize) + responseText = repeated[:s.responsePayloadSize] + } else { + responseText = "This is a mock response from Anthropic." + } + resp.Content = []struct { Type string `json:"type"` Text string `json:"text"` }{ { Type: "text", - Text: "This is a mock response from Anthropic.", + Text: responseText, }, } resp.Model = req.Model @@ -325,45 +308,44 @@ func (s *Server) handleAnthropic(w http.ResponseWriter, r *http.Request) { resp.Usage.OutputTokens = 5 responseBody, _ := json.Marshal(resp) - responseTime := time.Now() - - // Store response - responseSummary := ResponseSummary{ - RequestID: requestID, - Timestamp: responseTime, - Status: http.StatusOK, - Stream: req.Stream, - FinishReason: resp.StopReason, - PromptTokens: resp.Usage.InputTokens, - OutputTokens: resp.Usage.OutputTokens, - TotalTokens: resp.Usage.InputTokens + resp.Usage.OutputTokens, - ResponseBody: string(responseBody), - } - - s.recordsMu.Lock() - s.records = append(s.records, RequestRecord{ - Request: requestSummary, - Response: &responseSummary, - }) - s.recordsMu.Unlock() - // Send response if req.Stream { - s.sendAnthropicStream(w, resp) + s.sendAnthropicStream(ctx, w, resp) } else { w.Header().Set("Content-Type", "application/json") w.Header().Set("anthropic-version", "2023-06-01") w.WriteHeader(http.StatusOK) - _, _ = w.Write(responseBody) + if _, err := w.Write(responseBody); err != nil { + s.logger.Error(ctx, "failed to write Anthropic response", + slog.F("request_id", requestID), + slog.Error(err), + slog.F("error_type", "write_error"), + slog.F("likely_cause", "network_error"), + ) + } } } -func (s *Server) sendOpenAIStream(w http.ResponseWriter, resp openAIResponse) { +func (s *Server) sendOpenAIStream(ctx context.Context, w http.ResponseWriter, resp openAIResponse) { w.Header().Set("Content-Type", "text/event-stream") w.Header().Set("Cache-Control", "no-cache") w.Header().Set("Connection", "keep-alive") w.WriteHeader(http.StatusOK) + // Helper function to write with error checking + writeChunk := func(data string) bool { + if _, err := fmt.Fprintf(w, "%s", data); err != nil { + s.logger.Error(ctx, "failed to write OpenAI stream chunk", + slog.F("response_id", resp.ID), + slog.Error(err), + slog.F("error_type", "write_error"), + slog.F("likely_cause", "network_error"), + ) + return false + } + return true + } + // Send initial chunk chunk := map[string]interface{}{ "id": resp.ID, @@ -382,7 +364,9 @@ func (s *Server) sendOpenAIStream(w http.ResponseWriter, resp openAIResponse) { }, } chunkBytes, _ := json.Marshal(chunk) - _, _ = fmt.Fprintf(w, "data: %s\n\n", chunkBytes) + if !writeChunk(fmt.Sprintf("data: %s\n\n", chunkBytes)) { + return + } // Send final chunk finalChunk := map[string]interface{}{ @@ -399,18 +383,32 @@ func (s *Server) sendOpenAIStream(w http.ResponseWriter, resp openAIResponse) { }, } finalChunkBytes, _ := json.Marshal(finalChunk) - _, _ = fmt.Fprintf(w, "data: %s\n\n", finalChunkBytes) - _, _ = fmt.Fprintf(w, "data: [DONE]\n\n") + if !writeChunk(fmt.Sprintf("data: %s\n\n", finalChunkBytes)) { + return + } + writeChunk("data: [DONE]\n\n") } -func (s *Server) sendAnthropicStream(w http.ResponseWriter, resp anthropicResponse) { +func (s *Server) sendAnthropicStream(ctx context.Context, w http.ResponseWriter, resp anthropicResponse) { w.Header().Set("Content-Type", "text/event-stream") w.Header().Set("Cache-Control", "no-cache") w.Header().Set("Connection", "keep-alive") w.Header().Set("anthropic-version", "2023-06-01") w.WriteHeader(http.StatusOK) - // Send message_start event + writeChunk := func(data string) bool { + if _, err := fmt.Fprintf(w, "%s", data); err != nil { + s.logger.Error(ctx, "failed to write Anthropic stream chunk", + slog.F("response_id", resp.ID), + slog.Error(err), + slog.F("error_type", "write_error"), + slog.F("likely_cause", "network_error"), + ) + return false + } + return true + } + startEvent := map[string]interface{}{ "type": "message_start", "message": map[string]interface{}{ @@ -421,7 +419,9 @@ func (s *Server) sendAnthropicStream(w http.ResponseWriter, resp anthropicRespon }, } startBytes, _ := json.Marshal(startEvent) - _, _ = fmt.Fprintf(w, "data: %s\n\n", startBytes) + if !writeChunk(fmt.Sprintf("data: %s\n\n", startBytes)) { + return + } // Send content_block_start event contentStartEvent := map[string]interface{}{ @@ -433,7 +433,9 @@ func (s *Server) sendAnthropicStream(w http.ResponseWriter, resp anthropicRespon }, } contentStartBytes, _ := json.Marshal(contentStartEvent) - _, _ = fmt.Fprintf(w, "data: %s\n\n", contentStartBytes) + if !writeChunk(fmt.Sprintf("data: %s\n\n", contentStartBytes)) { + return + } // Send content_block_delta event deltaEvent := map[string]interface{}{ @@ -445,7 +447,9 @@ func (s *Server) sendAnthropicStream(w http.ResponseWriter, resp anthropicRespon }, } deltaBytes, _ := json.Marshal(deltaEvent) - _, _ = fmt.Fprintf(w, "data: %s\n\n", deltaBytes) + if !writeChunk(fmt.Sprintf("data: %s\n\n", deltaBytes)) { + return + } // Send content_block_stop event contentStopEvent := map[string]interface{}{ @@ -453,7 +457,9 @@ func (s *Server) sendAnthropicStream(w http.ResponseWriter, resp anthropicRespon "index": 0, } contentStopBytes, _ := json.Marshal(contentStopEvent) - _, _ = fmt.Fprintf(w, "data: %s\n\n", contentStopBytes) + if !writeChunk(fmt.Sprintf("data: %s\n\n", contentStopBytes)) { + return + } // Send message_delta event deltaMsgEvent := map[string]interface{}{ @@ -465,65 +471,59 @@ func (s *Server) sendAnthropicStream(w http.ResponseWriter, resp anthropicRespon "usage": resp.Usage, } deltaMsgBytes, _ := json.Marshal(deltaMsgEvent) - _, _ = fmt.Fprintf(w, "data: %s\n\n", deltaMsgBytes) + if !writeChunk(fmt.Sprintf("data: %s\n\n", deltaMsgBytes)) { + return + } // Send message_stop event stopEvent := map[string]interface{}{ "type": "message_stop", } stopBytes, _ := json.Marshal(stopEvent) - _, _ = fmt.Fprintf(w, "data: %s\n\n", stopBytes) + writeChunk(fmt.Sprintf("data: %s\n\n", stopBytes)) } -func (s *Server) handleGetRequests(w http.ResponseWriter, r *http.Request) { - s.recordsMu.RLock() - records := slices.Clone(s.records) - s.recordsMu.RUnlock() +func (s *Server) tracingMiddleware(next http.Handler) http.Handler { + tracer := s.tracerProvider.Tracer("llm-mock") - // Apply filters - userID := r.URL.Query().Get("user_id") - providerStr := r.URL.Query().Get("provider") + return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { + // Wrap response writer with StatusWriter for tracing + sw := &tracing.StatusWriter{ResponseWriter: rw} - var filtered []RequestRecord - for _, record := range records { - if userID != "" && record.Request.UserID != userID { - continue - } - if providerStr != "" && string(record.Request.Provider) != providerStr { - continue - } - filtered = append(filtered, record) - } + // Extract trace context from headers + propagator := otel.GetTextMapPropagator() + hc := propagation.HeaderCarrier(r.Header) + ctx := propagator.Extract(r.Context(), hc) - w.Header().Set("Content-Type", "application/json") - if err := json.NewEncoder(w).Encode(filtered); err != nil { - s.logger.Warn(r.Context(), "failed to encode JSON response", slog.Error(err)) - } -} + // Start span with initial name (will be updated after handler) + ctx, span := tracer.Start(ctx, fmt.Sprintf("%s %s", r.Method, r.RequestURI)) + defer span.End() + r = r.WithContext(ctx) -func (s *Server) handlePurge(w http.ResponseWriter, _ *http.Request) { - s.Purge() - w.WriteHeader(http.StatusOK) -} + // Inject trace context into response headers + if span.SpanContext().HasTraceID() && span.SpanContext().HasSpanID() { + rw.Header().Set("X-Trace-ID", span.SpanContext().TraceID().String()) + rw.Header().Set("X-Span-ID", span.SpanContext().SpanID().String()) -func (s *Server) extractUserID(r *http.Request) string { - // Try to extract user ID from Authorization header - auth := r.Header.Get("Authorization") - if auth == "" { - return "" - } - - // For now, just return a simple identifier - // In a real scenario, this might parse a JWT or API key - // For scale tests, we can use the token itself or extract from it - if strings.HasPrefix(auth, "Bearer ") { - token := strings.TrimPrefix(auth, "Bearer ") - // Use first 8 chars as a simple identifier - if len(token) > 8 { - return token[:8] + hc := propagation.HeaderCarrier(rw.Header()) + propagator.Inject(ctx, hc) } - return token - } - return "" + // Execute the handler + next.ServeHTTP(sw, r) + + // Update span with final route and response information + route := r.URL.Path + span.SetName(fmt.Sprintf("%s %s", r.Method, route)) + span.SetAttributes(netconv.Transport("tcp")) + span.SetAttributes(httpconv.ServerRequest("llm-mock", r)...) + span.SetAttributes(semconv.HTTPRouteKey.String(route)) + + status := sw.Status + if status == 0 { + status = http.StatusOK + } + span.SetAttributes(semconv.HTTPStatusCodeKey.Int(status)) + span.SetStatus(httpconv.ServerStatus(status)) + }) } From 27490adf0e995c0e32ac2dcce84c9792f1f8b375 Mon Sep 17 00:00:00 2001 From: Sas Swart Date: Tue, 16 Dec 2025 10:04:14 +0000 Subject: [PATCH 5/9] chore: add observability for bridge scaletests --- .../local-observability/alloy/config.alloy | 91 + .../local-observability/docker-compose.yml | 132 ++ .../provisioning/dashboards/bridge.json | 1991 +++++++++++++++++ .../provisioning/dashboards/dashboard.yml | 13 + .../provisioning/datasources/prometheus.yml | 31 + .../provisioning/datasources/tempo.yml | 38 + .../local-observability/logs/.gitignore | 1 + .../bridge/local-observability/loki/loki.yml | 26 + .../prometheus/prometheus.yml | 40 + .../local-observability/promtail/promtail.yml | 36 + .../local-observability/pyroscope/agent.yml | 12 + .../pyroscope/pyroscope.yml | 31 + .../local-observability/tempo/tempo.yml | 69 + 13 files changed, 2511 insertions(+) create mode 100644 scaletest/bridge/local-observability/alloy/config.alloy create mode 100644 scaletest/bridge/local-observability/docker-compose.yml create mode 100644 scaletest/bridge/local-observability/grafana/provisioning/dashboards/bridge.json create mode 100644 scaletest/bridge/local-observability/grafana/provisioning/dashboards/dashboard.yml create mode 100644 scaletest/bridge/local-observability/grafana/provisioning/datasources/prometheus.yml create mode 100644 scaletest/bridge/local-observability/grafana/provisioning/datasources/tempo.yml create mode 100644 scaletest/bridge/local-observability/logs/.gitignore create mode 100644 scaletest/bridge/local-observability/loki/loki.yml create mode 100644 scaletest/bridge/local-observability/prometheus/prometheus.yml create mode 100644 scaletest/bridge/local-observability/promtail/promtail.yml create mode 100644 scaletest/bridge/local-observability/pyroscope/agent.yml create mode 100644 scaletest/bridge/local-observability/pyroscope/pyroscope.yml create mode 100644 scaletest/bridge/local-observability/tempo/tempo.yml diff --git a/scaletest/bridge/local-observability/alloy/config.alloy b/scaletest/bridge/local-observability/alloy/config.alloy new file mode 100644 index 0000000000000..f02fd5a9434c3 --- /dev/null +++ b/scaletest/bridge/local-observability/alloy/config.alloy @@ -0,0 +1,91 @@ +// Grafana Alloy configuration to scrape pprof from develop.sh and forward to Pyroscope +// The develop.sh server exposes pprof at /api/v2/debug/pprof/ instead of /debug/pprof/ + +pyroscope.scrape "coderd" { + targets = [ + { + "__address__" = "host.docker.internal:3000", + "service_name" = "coderd", + }, + ] + + authorization { + credentials = "OUEi29lgBh-ovSmaMtFxosBF9Hicg7z4N" + type = "Bearer" + } + + forward_to = [pyroscope.write.local.receiver] + + profiling_config { + profile.process_cpu { + enabled = true + delta = true + path = "/api/v2/debug/pprof/profile" + } + profile.memory { + enabled = true + path = "/api/v2/debug/pprof/allocs" + } + profile.goroutine { + enabled = true + path = "/api/v2/debug/pprof/goroutine" + } + profile.block { + enabled = false + path = "/api/v2/debug/pprof/block" + } + profile.mutex { + enabled = false + path = "/api/v2/debug/pprof/mutex" + } + } + + delta_profiling_duration="2s" + scrape_interval = "3s" + scrape_timeout = "10s" +} + +pyroscope.scrape "llmmock" { + targets = [ + { + "__address__" = "host.docker.internal:6061", + "service_name" = "llmmock", + }, + ] + + forward_to = [pyroscope.write.local.receiver] + + profiling_config { + profile.process_cpu { + enabled = true + delta = true + path = "/debug/pprof/profile" + } + profile.memory { + enabled = true + path = "/debug/pprof/allocs" + } + profile.goroutine { + enabled = true + path = "/debug/pprof/goroutine" + } + profile.block { + enabled = false + path = "/debug/pprof/block" + } + profile.mutex { + enabled = false + path = "/debug/pprof/mutex" + } + } + + delta_profiling_duration="2s" + scrape_interval = "3s" + scrape_timeout = "10s" +} + +pyroscope.write "local" { + endpoint { + url = "http://pyroscope:4040/" + } +} diff --git a/scaletest/bridge/local-observability/docker-compose.yml b/scaletest/bridge/local-observability/docker-compose.yml new file mode 100644 index 0000000000000..6e02cc9db768c --- /dev/null +++ b/scaletest/bridge/local-observability/docker-compose.yml @@ -0,0 +1,132 @@ +version: '3.8' + +services: + prometheus: + image: prom/prometheus:latest + container_name: prometheus + ports: + - "9090:9090" + volumes: + - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml + - prometheus-data:/prometheus + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.path=/prometheus' + - '--web.console.libraries=/usr/share/prometheus/console_libraries' + - '--web.console.templates=/usr/share/prometheus/consoles' + - '--storage.tsdb.retention.time=30d' + extra_hosts: + - "host.docker.internal:host-gateway" + networks: + - observability + restart: unless-stopped + + grafana: + image: grafana/grafana:latest + container_name: grafana + ports: + - "3100:3000" + volumes: + - grafana-data:/var/lib/grafana + - ./grafana/provisioning:/etc/grafana/provisioning + environment: + - GF_SECURITY_ADMIN_USER=admin + - GF_SECURITY_ADMIN_PASSWORD=admin + - GF_USERS_ALLOW_SIGN_UP=false + - GF_INSTALL_PLUGINS=pyroscope-datasource,pyroscope-panel + networks: + - observability + depends_on: + - prometheus + - pyroscope + - tempo + - loki + restart: unless-stopped + + pyroscope: + image: grafana/pyroscope:latest + container_name: pyroscope + ports: + - "4040:4040" + volumes: + - ./pyroscope/pyroscope.yml:/etc/pyroscope/pyroscope.yml + - pyroscope-data:/var/lib/pyroscope + command: + - server + - --config=/etc/pyroscope/pyroscope.yml + networks: + - observability + restart: unless-stopped + + grafana-alloy: + image: grafana/alloy:latest + container_name: grafana-alloy + volumes: + - ./alloy/config.alloy:/etc/alloy/config.alloy:ro + command: + - run + - --server.http.listen-addr=0.0.0.0:12345 + - --storage.path=/var/lib/alloy/data + - /etc/alloy/config.alloy + extra_hosts: + - "host.docker.internal:host-gateway" + networks: + - observability + depends_on: + - pyroscope + restart: unless-stopped + + tempo: + image: grafana/tempo:latest + container_name: tempo + ports: + - "3200:3200" # Tempo HTTP + - "4317:4317" # OTLP gRPC + - "4318:4318" # OTLP HTTP + volumes: + - ./tempo/tempo.yml:/etc/tempo/tempo.yml + command: + - -config.file=/etc/tempo/tempo.yml + - -target=all + networks: + - observability + restart: unless-stopped + + loki: + image: grafana/loki:latest + container_name: loki + ports: + - "3101:3100" + volumes: + - ./loki/loki.yml:/etc/loki/local-config.yaml + - loki-data:/loki + command: + - -config.file=/etc/loki/local-config.yaml + networks: + - observability + restart: unless-stopped + + promtail: + image: grafana/promtail:latest + container_name: promtail + volumes: + - ./promtail/promtail.yml:/etc/promtail/config.yml:ro + - ./logs:/var/log/coder:ro + command: + - -config.file=/etc/promtail/config.yml + networks: + - observability + depends_on: + - loki + restart: unless-stopped + + +volumes: + prometheus-data: + grafana-data: + pyroscope-data: + loki-data: + +networks: + observability: + driver: bridge diff --git a/scaletest/bridge/local-observability/grafana/provisioning/dashboards/bridge.json b/scaletest/bridge/local-observability/grafana/provisioning/dashboards/bridge.json new file mode 100644 index 0000000000000..ecc40c4fdf1dd --- /dev/null +++ b/scaletest/bridge/local-observability/grafana/provisioning/dashboards/bridge.json @@ -0,0 +1,1991 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 0, + "links": [], + "panels": [ + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 6, + "panels": [ + { + "datasource": { + "uid": "prometheus" + }, + "description": "Total user and system CPU time spent in seconds.\n\n**Type:** *counter*\n\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 9, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 2, + "maxDataPoints": 500, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.0", + "targets": [ + { + "expr": "sum(rate(process_cpu_seconds_total{__ignore_usage__=\"\", service=\"coderd\"}[$__rate_interval]))", + "fromExploreMetrics": false, + "legendFormat": "sum(rate)", + "refId": "process_cpu_seconds_total-sum(rate)" + } + ], + "title": "process_cpu_seconds_total", + "type": "timeseries" + }, + { + "datasource": { + "uid": "prometheus" + }, + "description": "Resident memory size in bytes.\n\n**Type:** *gauge*\n\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 9, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 1, + "maxDataPoints": 500, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.0", + "targets": [ + { + "expr": "avg(process_resident_memory_bytes{__ignore_usage__=\"\", service=\"coderd\"})", + "fromExploreMetrics": false, + "legendFormat": "avg", + "refId": "process_resident_memory_bytes-avg" + } + ], + "title": "process_resident_memory_bytes", + "type": "timeseries" + }, + { + "datasource": { + "uid": "prometheus" + }, + "description": "Number of bytes transmitted by the process over the network.\n\n**Type:** *counter*\n\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 9, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 5, + "maxDataPoints": 500, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.0", + "targets": [ + { + "editorMode": "code", + "expr": "sum(rate(process_network_transmit_bytes_total{__ignore_usage__=\"\", service=\"coderd\"}[$__rate_interval]))", + "fromExploreMetrics": false, + "legendFormat": "sum(rate)", + "range": true, + "refId": "process_network_receive_bytes_total-sum(rate)" + } + ], + "title": "process_network_transmit_bytes_total", + "type": "timeseries" + }, + { + "datasource": { + "uid": "prometheus" + }, + "description": "Number of bytes received by the process over the network.\n\n**Type:** *counter*\n\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 9, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 9 + }, + "id": 4, + "maxDataPoints": 500, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.0", + "targets": [ + { + "expr": "sum(rate(process_network_receive_bytes_total{__ignore_usage__=\"\", service=\"coderd\"}[$__rate_interval]))", + "fromExploreMetrics": false, + "legendFormat": "sum(rate)", + "refId": "process_network_receive_bytes_total-sum(rate)" + } + ], + "title": "process_network_receive_bytes_total", + "type": "timeseries" + }, + { + "datasource": { + "uid": "prometheus" + }, + "description": "Number of open file descriptors.\n\n**Type:** *gauge*\n\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 9, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "avg" + }, + "properties": [] + }, + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 17 + }, + "id": 3, + "maxDataPoints": 500, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.0", + "targets": [ + { + "expr": "avg(process_open_fds{__ignore_usage__=\"\", service=\"coderd\"})", + "fromExploreMetrics": false, + "legendFormat": "avg", + "refId": "process_open_fds-avg" + } + ], + "title": "process_open_fds", + "type": "timeseries" + } + ], + "title": "Process", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 7, + "panels": [ + { + "datasource": { + "uid": "prometheus" + }, + "description": "The number of idle connections.\n\n**Type:** *gauge*\n\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 9, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 2 + }, + "id": 8, + "maxDataPoints": 500, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.0", + "targets": [ + { + "expr": "avg(go_sql_idle_connections{__ignore_usage__=\"\", service=\"coderd\"})", + "fromExploreMetrics": false, + "legendFormat": "avg", + "refId": "go_sql_idle_connections-avg" + } + ], + "title": "go_sql_idle_connections", + "type": "timeseries" + }, + { + "datasource": { + "uid": "prometheus" + }, + "description": "The number of connections currently in use.\n\n**Type:** *gauge*\n\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 9, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 2 + }, + "id": 9, + "maxDataPoints": 500, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.0", + "targets": [ + { + "expr": "avg(go_sql_in_use_connections{__ignore_usage__=\"\", service=\"coderd\"})", + "fromExploreMetrics": false, + "legendFormat": "avg", + "refId": "go_sql_in_use_connections-avg" + } + ], + "title": "go_sql_in_use_connections", + "type": "timeseries" + }, + { + "datasource": { + "uid": "prometheus" + }, + "description": "The total time blocked waiting for a new connection.\n\n**Type:** *counter*\n\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 9, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 10 + }, + "id": 10, + "maxDataPoints": 500, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.0", + "targets": [ + { + "expr": "sum(rate(go_sql_wait_duration_seconds_total{__ignore_usage__=\"\", service=\"coderd\"}[$__rate_interval]))", + "fromExploreMetrics": false, + "legendFormat": "sum(rate)", + "refId": "go_sql_wait_duration_seconds_total-sum(rate)" + } + ], + "title": "go_sql_wait_duration_seconds_total", + "type": "timeseries" + }, + { + "datasource": { + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 10 + }, + "id": 11, + "maxDataPoints": 500, + "options": { + "calculate": false, + "cellGap": 1, + "cellValues": {}, + "color": { + "exponent": 0.5, + "fill": "dark-orange", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Spectral", + "steps": 32 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "selectionMode": "x", + "showValue": "auto", + "tooltip": { + "mode": "single", + "showColorScale": false, + "yHistogram": false + }, + "yAxis": { + "axisPlacement": "left", + "reverse": false + } + }, + "pluginVersion": "12.3.0", + "targets": [ + { + "expr": "sum by (le) (rate(coderd_db_tx_duration_seconds_bucket{__ignore_usage__=\"\", service=\"coderd\"}[$__rate_interval]))", + "format": "heatmap", + "fromExploreMetrics": false, + "refId": "coderd_db_tx_duration_seconds_bucket-heatmap" + } + ], + "title": "coderd_db_tx_duration_seconds_bucket", + "type": "heatmap" + } + ], + "title": "SQL", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 2 + }, + "id": 12, + "panels": [ + { + "datasource": { + "uid": "prometheus" + }, + "description": "Number of heap bytes allocated and currently in use, same as go_memstats_alloc_bytes. Equals to /memory/classes/heap/objects:bytes.\n\n**Type:** *gauge*\n\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 9, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 3 + }, + "id": 16, + "maxDataPoints": 500, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.0", + "targets": [ + { + "expr": "avg(go_memstats_heap_alloc_bytes{__ignore_usage__=\"\", service=\"coderd\"})", + "fromExploreMetrics": false, + "legendFormat": "avg", + "refId": "go_memstats_heap_alloc_bytes-avg" + } + ], + "title": "go_memstats_heap_alloc_bytes", + "type": "timeseries" + }, + { + "datasource": { + "uid": "prometheus" + }, + "description": "Number of heap bytes that are in use. Equals to /memory/classes/heap/objects:bytes + /memory/classes/heap/unused:bytes\n\n**Type:** *gauge*\n\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 9, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 3 + }, + "id": 17, + "maxDataPoints": 500, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.0", + "targets": [ + { + "expr": "avg(go_memstats_heap_inuse_bytes{__ignore_usage__=\"\", service=\"coderd\"})", + "fromExploreMetrics": false, + "legendFormat": "avg", + "refId": "go_memstats_heap_inuse_bytes-avg" + } + ], + "title": "go_memstats_heap_inuse_bytes", + "type": "timeseries" + }, + { + "datasource": { + "uid": "prometheus" + }, + "description": "Number of heap bytes waiting to be used. Equals to /memory/classes/heap/released:bytes + /memory/classes/heap/free:bytes.\n\n**Type:** *gauge*\n\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 9, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 11 + }, + "id": 18, + "maxDataPoints": 500, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.0", + "targets": [ + { + "expr": "avg(go_memstats_heap_idle_bytes{__ignore_usage__=\"\", service=\"coderd\"})", + "fromExploreMetrics": false, + "legendFormat": "avg", + "refId": "go_memstats_heap_idle_bytes-avg" + } + ], + "title": "go_memstats_heap_idle_bytes", + "type": "timeseries" + }, + { + "datasource": { + "uid": "prometheus" + }, + "description": "Count of events that have been observed for the base metric (A summary of the wall-time pause (stop-the-world) duration in garbage collection cycles.)\n\n**Type:** *counter*\n\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 9, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 11 + }, + "id": 14, + "maxDataPoints": 500, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.0", + "targets": [ + { + "expr": "sum(rate(go_gc_duration_seconds_count{__ignore_usage__=\"\", service=\"coderd\"}[$__rate_interval]))", + "fromExploreMetrics": false, + "legendFormat": "sum(rate)", + "refId": "go_gc_duration_seconds_count-sum(rate)" + } + ], + "title": "go_gc_duration_seconds_count", + "type": "timeseries" + }, + { + "datasource": { + "uid": "prometheus" + }, + "description": "A summary of the wall-time pause (stop-the-world) duration in garbage collection cycles.\n\n**Type:** *summary*\n\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 9, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 19 + }, + "id": 13, + "maxDataPoints": 500, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.0", + "targets": [ + { + "expr": "avg(go_gc_duration_seconds{__ignore_usage__=\"\", service=\"coderd\"})", + "fromExploreMetrics": false, + "legendFormat": "avg", + "refId": "go_gc_duration_seconds-avg" + } + ], + "title": "go_gc_duration_seconds", + "type": "timeseries" + }, + { + "datasource": { + "uid": "prometheus" + }, + "description": "Total sum of all observed values for the base metric (A summary of the wall-time pause (stop-the-world) duration in garbage collection cycles.)\n\n**Type:** *counter*\n\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 9, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 19 + }, + "id": 15, + "maxDataPoints": 500, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.0", + "targets": [ + { + "expr": "sum(rate(go_gc_duration_seconds_sum{__ignore_usage__=\"\", service=\"coderd\"}[$__rate_interval]))", + "fromExploreMetrics": false, + "legendFormat": "sum(rate)", + "refId": "go_gc_duration_seconds_sum-sum(rate)" + } + ], + "title": "go_gc_duration_seconds_sum", + "type": "timeseries" + } + ], + "title": "Garbage Collector", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 3 + }, + "id": 19, + "panels": [ + { + "datasource": { + "uid": "prometheus" + }, + "description": "The number of concurrent API requests.\n\n**Type:** *gauge*\n\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 9, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 4 + }, + "id": 20, + "maxDataPoints": 500, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.0", + "targets": [ + { + "expr": "avg(coderd_api_concurrent_requests{__ignore_usage__=\"\", service=\"coderd\"})", + "fromExploreMetrics": false, + "legendFormat": "avg", + "refId": "coderd_api_concurrent_requests-avg" + } + ], + "title": "coderd_api_concurrent_requests", + "type": "timeseries" + }, + { + "datasource": { + "uid": "prometheus" + }, + "description": "Cumulative counters for the observation buckets (Latency distribution of requests in seconds.)\n\n**Type:** *counter*\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 4 + }, + "id": 21, + "maxDataPoints": 500, + "options": { + "calculate": false, + "cellGap": 1, + "cellValues": {}, + "color": { + "exponent": 0.5, + "fill": "dark-orange", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Spectral", + "steps": 32 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "selectionMode": "x", + "showValue": "auto", + "tooltip": { + "mode": "single", + "showColorScale": false, + "yHistogram": false + }, + "yAxis": { + "axisPlacement": "left", + "reverse": false + } + }, + "pluginVersion": "12.3.0", + "targets": [ + { + "expr": "sum by (le) (rate(coderd_api_request_latencies_seconds_bucket{__ignore_usage__=\"\", service=\"coderd\"}[$__rate_interval]))", + "format": "heatmap", + "fromExploreMetrics": false, + "refId": "coderd_api_request_latencies_seconds_bucket-heatmap" + } + ], + "title": "coderd_api_request_latencies_seconds_bucket", + "type": "heatmap" + }, + { + "datasource": { + "uid": "prometheus" + }, + "description": "The total number of processed API requests\n\n**Type:** *counter*\n\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 9, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 12 + }, + "id": 22, + "maxDataPoints": 500, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.0", + "targets": [ + { + "expr": "sum(rate(coderd_api_requests_processed_total{__ignore_usage__=\"\", service=\"coderd\"}[$__rate_interval]))", + "fromExploreMetrics": false, + "legendFormat": "sum(rate)", + "refId": "coderd_api_requests_processed_total-sum(rate)" + } + ], + "title": "coderd_api_requests_processed_total", + "type": "timeseries" + } + ], + "title": "API", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 4 + }, + "id": 23, + "panels": [], + "title": "PubSub", + "type": "row" + }, + { + "datasource": { + "uid": "prometheus" + }, + "description": "The time taken to receive a message from a pubsub event channel\n\n**Type:** *gauge*\n\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 9, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 5 + }, + "id": 24, + "maxDataPoints": 500, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.0", + "targets": [ + { + "expr": "avg(coder_pubsub_receive_latency_seconds{__ignore_usage__=\"\", service=\"coderd\"})", + "fromExploreMetrics": false, + "legendFormat": "avg", + "refId": "coder_pubsub_receive_latency_seconds-avg" + } + ], + "title": "coder_pubsub_receive_latency_seconds", + "type": "timeseries" + }, + { + "datasource": { + "uid": "prometheus" + }, + "description": "The time taken to send a message into a pubsub event channel\n\n**Type:** *gauge*\n\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 9, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 5 + }, + "id": 25, + "maxDataPoints": 500, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.0", + "targets": [ + { + "expr": "avg(coder_pubsub_send_latency_seconds{__ignore_usage__=\"\", service=\"coderd\"})", + "fromExploreMetrics": false, + "legendFormat": "avg", + "refId": "coder_pubsub_send_latency_seconds-avg" + } + ], + "title": "coder_pubsub_send_latency_seconds", + "type": "timeseries" + } + ], + "preload": false, + "schemaVersion": 42, + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "2025-12-11T18:09:40.000Z", + "to": "2025-12-11T18:15:04.000Z" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Bridge Load Testing Metrics", + "uid": "adjfnrq", + "version": 25 +} diff --git a/scaletest/bridge/local-observability/grafana/provisioning/dashboards/dashboard.yml b/scaletest/bridge/local-observability/grafana/provisioning/dashboards/dashboard.yml new file mode 100644 index 0000000000000..9141a4532b95a --- /dev/null +++ b/scaletest/bridge/local-observability/grafana/provisioning/dashboards/dashboard.yml @@ -0,0 +1,13 @@ +apiVersion: 1 + +providers: + - name: 'Default' + orgId: 1 + folder: '' + type: file + disableDeletion: false + updateIntervalSeconds: 10 + allowUiUpdates: true + options: + path: /etc/grafana/provisioning/dashboards + foldersFromFilesStructure: true diff --git a/scaletest/bridge/local-observability/grafana/provisioning/datasources/prometheus.yml b/scaletest/bridge/local-observability/grafana/provisioning/datasources/prometheus.yml new file mode 100644 index 0000000000000..1d15bafa90c9b --- /dev/null +++ b/scaletest/bridge/local-observability/grafana/provisioning/datasources/prometheus.yml @@ -0,0 +1,31 @@ +apiVersion: 1 + +datasources: + - name: Prometheus + type: prometheus + uid: prometheus + access: proxy + url: http://prometheus:9090 + isDefault: true + editable: true + jsonData: + timeInterval: "15s" + + - name: Pyroscope + type: pyroscope-datasource + uid: pyroscope + access: proxy + url: http://pyroscope:4040 + editable: true + jsonData: + keepNestedSets: true + minStep: "15s" + + - name: Loki + type: loki + uid: loki + access: proxy + url: http://loki:3100 + editable: true + jsonData: + maxLines: 1000 diff --git a/scaletest/bridge/local-observability/grafana/provisioning/datasources/tempo.yml b/scaletest/bridge/local-observability/grafana/provisioning/datasources/tempo.yml new file mode 100644 index 0000000000000..27856ee6530ef --- /dev/null +++ b/scaletest/bridge/local-observability/grafana/provisioning/datasources/tempo.yml @@ -0,0 +1,38 @@ +apiVersion: 1 + +datasources: + - name: Tempo + uid: tempo + type: tempo + access: proxy + url: http://tempo:3200 + editable: true + jsonData: + httpMethod: GET + + nodeGraph: + enabled: true + serviceMap: + datasourceUid: prometheus + + tracesToMetrics: + datasourceUid: prometheus + tags: + - key: service.name + value: service + - key: job + queries: + - name: P90 latency (spanmetrics) + query: sum(rate(tempo_spanmetrics_latency_bucket[$__interval])) by (le, service) + + # Optional: traces->logs (if you have Loki wired up) + # tracesToLogs: + # datasourceUid: loki + # tags: + # - job + # - instance + # mappedTags: + # - key: service.name + # value: service + # spanStartTimeShift: '1h' + # spanEndTimeShift: '1h' diff --git a/scaletest/bridge/local-observability/logs/.gitignore b/scaletest/bridge/local-observability/logs/.gitignore new file mode 100644 index 0000000000000..397b4a7624e35 --- /dev/null +++ b/scaletest/bridge/local-observability/logs/.gitignore @@ -0,0 +1 @@ +*.log diff --git a/scaletest/bridge/local-observability/loki/loki.yml b/scaletest/bridge/local-observability/loki/loki.yml new file mode 100644 index 0000000000000..023e7337f2a2e --- /dev/null +++ b/scaletest/bridge/local-observability/loki/loki.yml @@ -0,0 +1,26 @@ +auth_enabled: false + +server: + http_listen_port: 3100 + grpc_listen_port: 9096 + +common: + instance_addr: 127.0.0.1 + path_prefix: /loki + storage: + filesystem: + chunks_directory: /loki/chunks + rules_directory: /loki/rules + replication_factor: 1 + ring: + kvstore: + store: inmemory + +schema_config: + configs: + - from: 2020-10-24 + store: tsdb + object_store: filesystem + schema: v13 + index: + period: 24h diff --git a/scaletest/bridge/local-observability/prometheus/prometheus.yml b/scaletest/bridge/local-observability/prometheus/prometheus.yml new file mode 100644 index 0000000000000..8829c66d01808 --- /dev/null +++ b/scaletest/bridge/local-observability/prometheus/prometheus.yml @@ -0,0 +1,40 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + external_labels: + cluster: 'local-observability' + environment: 'development' + +# Alertmanager configuration (optional, can be added later) +# alerting: +# alertmanagers: +# - static_configs: +# - targets: [] + +# Load rules once and periodically evaluate them (optional) +# rule_files: +# - "alert_rules.yml" + +scrape_configs: + # Scrape Prometheus itself + - job_name: 'prometheus' + static_configs: + - targets: ['localhost:9090'] + + # Coder server from develop.sh + # This scrapes metrics from a running ./scripts/develop.sh server + # Requires: Start develop.sh with --prometheus-enable flag or set CODER_PROMETHEUS_ENABLE=true + - job_name: 'coderd-develop' + static_configs: + - targets: ['host.docker.internal:2118'] + labels: + service: 'coderd' + environment: 'development' + instance: 'develop-sh' + scrape_interval: 8s + scrape_timeout: 5s + metrics_path: '/api/v2/metrics' + + - job_name: 'tempo-metrics-generator' + static_configs: + - targets: ['tempo:3200'] diff --git a/scaletest/bridge/local-observability/promtail/promtail.yml b/scaletest/bridge/local-observability/promtail/promtail.yml new file mode 100644 index 0000000000000..51b9c21b25d4a --- /dev/null +++ b/scaletest/bridge/local-observability/promtail/promtail.yml @@ -0,0 +1,36 @@ +server: + http_listen_port: 9080 + grpc_listen_port: 0 + +positions: + filename: /tmp/positions.yaml + +clients: + - url: http://loki:3100/loki/api/v1/push + +scrape_configs: + - job_name: coder-logs + static_configs: + - targets: + - localhost + labels: + job: coder-logs + __path__: /var/log/coder/* + pipeline_stages: + - regex: + expression: '^(?P\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d+Z|\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})' + - labels: + filename: + - timestamp: + source: timestamp + format: RFC3339Nano + fallback_formats: + - "2006-01-02 15:04:05" + - output: + source: message + relabel_configs: + - source_labels: + - __path__ + target_label: filename + regex: '.*/([^/]+)$' + replacement: '${1}' diff --git a/scaletest/bridge/local-observability/pyroscope/agent.yml b/scaletest/bridge/local-observability/pyroscope/agent.yml new file mode 100644 index 0000000000000..b019c958050fa --- /dev/null +++ b/scaletest/bridge/local-observability/pyroscope/agent.yml @@ -0,0 +1,12 @@ +# Pyroscope agent configuration +# This configures the agent to scrape pprof endpoints + +# Server address to forward profiles to +server-address: http://pyroscope:4040 + +# Scrape configuration +# Note: The Pyroscope agent may need to be configured differently +# depending on the version. This is a basic configuration. + +# Logging +log-level: info diff --git a/scaletest/bridge/local-observability/pyroscope/pyroscope.yml b/scaletest/bridge/local-observability/pyroscope/pyroscope.yml new file mode 100644 index 0000000000000..db141dd01c73b --- /dev/null +++ b/scaletest/bridge/local-observability/pyroscope/pyroscope.yml @@ -0,0 +1,31 @@ +# Pyroscope server configuration +# This configures Pyroscope to collect profiling data from Go processes + +# Storage configuration +storage: + # Path where Pyroscope will store data + path: /var/lib/pyroscope + # Retention period (30 days = 720 hours) + retention: 720h + +# Server configuration +server: + # HTTP API address + api-bind-address: :4040 + # Base URL for the UI (adjust if behind a proxy) + base-url: http://localhost:4040 + +# Ingest configuration +# Pyroscope can ingest from: +# 1. HTTP endpoint (pprof format) - configure scrape targets below +# 2. Direct push from Go applications using pyroscope client +ingestion: + # Maximum number of samples per second + max_ingestion_rate: 10000 + +# Logging +log-level: info + +# Note: Pyroscope server mode doesn't natively support scraping pprof endpoints. +# Grafana Alloy is used to scrape pprof endpoints and forward to Pyroscope. +# See README.md for configuration details. diff --git a/scaletest/bridge/local-observability/tempo/tempo.yml b/scaletest/bridge/local-observability/tempo/tempo.yml new file mode 100644 index 0000000000000..e720ed9e3f3a3 --- /dev/null +++ b/scaletest/bridge/local-observability/tempo/tempo.yml @@ -0,0 +1,69 @@ +server: + http_listen_port: 3200 + +distributor: + receivers: + otlp: + protocols: + http: + endpoint: "0.0.0.0:4318" + grpc: + endpoint: "0.0.0.0:4317" + +ingester: + # how long a trace can be idle before it's flushed to a block (optional, but nice) + trace_idle_period: 10s + + # you already used this before; fine to keep + max_block_duration: 5m + + lifecycler: + ring: + kvstore: + store: memberlist # use in-memory memberlist ring (good for single-binary/docker) + replication_factor: 1 # single node, so 1 is fine + heartbeat_period: 5s # 👈 this must be > 0 + +metrics_generator: + # WAL for *metrics* generated from traces + storage: + path: /tmp/tempo/generator/wal + + # WAL for *traces* used by local-blocks (needed for TraceQL metrics) + # See MetricSummary/local-blocks notes. + traces_storage: + path: /tmp/tempo/generator/traces + + processor: + # Prometheus span metrics (RED style metrics) + span_metrics: {} + + # Service graph metrics (for service map / node graph) + service_graphs: {} + + # Local blocks enable TraceQL metrics API (/api/metrics/...) + local_blocks: + # Persist blocks so you can query a longer window than just in-memory + flush_to_storage: true + +compactor: + compaction: + # Totally fine to tweak; this is just a sane default for local dev + block_retention: 24h + +storage: + trace: + backend: local + + local: + path: /tmp/tempo/traces + +overrides: + defaults: + # Enable metrics-generator processors for the (default) tenant + # Note: dashes here, underscores in the config block. + metrics_generator: + processors: + - span-metrics + - service-graphs + - local-blocks From b4a10e37b287d4ec6915b9139978e30a5aa0ef07 Mon Sep 17 00:00:00 2001 From: Sas Swart Date: Tue, 16 Dec 2025 10:08:43 +0000 Subject: [PATCH 6/9] make fmt --- go.mod | 2 +- scaletest/bridge/run.go | 18 ++++++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/go.mod b/go.mod index 17fb110747803..62f0b5353c5c5 100644 --- a/go.mod +++ b/go.mod @@ -440,7 +440,7 @@ require ( go.opentelemetry.io/collector/pdata/pprofile v0.121.0 // indirect go.opentelemetry.io/collector/semconv v0.123.0 // indirect go.opentelemetry.io/contrib v1.19.0 // indirect - go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.62.0 // indirect + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.62.0 go.opentelemetry.io/otel/metric v1.38.0 // indirect go.opentelemetry.io/proto/otlp v1.7.0 // indirect go.uber.org/multierr v1.11.0 // indirect diff --git a/scaletest/bridge/run.go b/scaletest/bridge/run.go index 8404be86885cd..2a264d6c3654a 100644 --- a/scaletest/bridge/run.go +++ b/scaletest/bridge/run.go @@ -29,14 +29,16 @@ import ( "github.com/coder/quartz" ) -type tracingContextKey struct{} -type tracingContext struct { - provider string - model string - stream bool - requestNum int - mode RequestMode -} +type ( + tracingContextKey struct{} + tracingContext struct { + provider string + model string + stream bool + requestNum int + mode RequestMode + } +) type tracingTransport struct { cfg Config From ed8dea09de8756f60e1de2152170cc097b9914cb Mon Sep 17 00:00:00 2001 From: Sas Swart Date: Tue, 16 Dec 2025 10:10:47 +0000 Subject: [PATCH 7/9] remove defunct and short lived token --- scaletest/bridge/local-observability/alloy/config.alloy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scaletest/bridge/local-observability/alloy/config.alloy b/scaletest/bridge/local-observability/alloy/config.alloy index f02fd5a9434c3..ed40a15d67e9b 100644 --- a/scaletest/bridge/local-observability/alloy/config.alloy +++ b/scaletest/bridge/local-observability/alloy/config.alloy @@ -10,7 +10,7 @@ pyroscope.scrape "coderd" { ] authorization { - credentials = "OUEi29lgBh-ovSmaMtFxosBF9Hicg7z4N" + credentials = "" type = "Bearer" } From 2f10ad38dc16aa56593c11a92ef081e812f001b5 Mon Sep 17 00:00:00 2001 From: Sas Swart Date: Tue, 16 Dec 2025 10:16:36 +0000 Subject: [PATCH 8/9] remove defunct test file --- scaletest/llmmock/server_test.go | 473 ------------------------------- 1 file changed, 473 deletions(-) delete mode 100644 scaletest/llmmock/server_test.go diff --git a/scaletest/llmmock/server_test.go b/scaletest/llmmock/server_test.go deleted file mode 100644 index 5107ca0c7a8d2..0000000000000 --- a/scaletest/llmmock/server_test.go +++ /dev/null @@ -1,473 +0,0 @@ -package llmmock_test - -import ( - "bytes" - "context" - "encoding/json" - "fmt" - "net/http" - "strings" - "testing" - - "github.com/stretchr/testify/require" - - "cdr.dev/slog/sloggers/slogtest" - "github.com/coder/coder/v2/scaletest/llmmock" - "github.com/coder/coder/v2/testutil" -) - -func TestServer_StartStop(t *testing.T) { - t.Parallel() - - ctx := context.Background() - srv := new(llmmock.Server) - err := srv.Start(ctx, llmmock.Config{ - HostAddress: "127.0.0.1", - APIPort: 0, - Logger: slogtest.Make(t, nil), - }) - require.NoError(t, err) - require.NotEmpty(t, srv.APIAddress()) - - err = srv.Stop() - require.NoError(t, err) -} - -func TestServer_OpenAIRequest(t *testing.T) { - t.Parallel() - - ctx := context.Background() - srv := new(llmmock.Server) - err := srv.Start(ctx, llmmock.Config{ - HostAddress: "127.0.0.1", - APIPort: 0, - Logger: slogtest.Make(t, nil), - }) - require.NoError(t, err) - defer srv.Stop() - - reqBody := map[string]interface{}{ - "model": "gpt-4", - "messages": []map[string]interface{}{ - { - "role": "user", - "content": "Hello, world!", - }, - }, - "stream": false, - } - bodyBytes, err := json.Marshal(reqBody) - require.NoError(t, err) - - url := fmt.Sprintf("%s/v1/chat/completions", srv.APIAddress()) - req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(bodyBytes)) - require.NoError(t, err) - req.Header.Set("Content-Type", "application/json") - req.Header.Set("Authorization", "Bearer test-token-12345") - - resp, err := http.DefaultClient.Do(req) - require.NoError(t, err) - defer resp.Body.Close() - - require.Equal(t, http.StatusOK, resp.StatusCode) - require.Equal(t, "application/json", resp.Header.Get("Content-Type")) - - var openAIResp map[string]interface{} - err = json.NewDecoder(resp.Body).Decode(&openAIResp) - require.NoError(t, err) - require.Equal(t, "chat.completion", openAIResp["object"]) - - require.Eventually(t, func() bool { - return srv.RequestCount() == 1 - }, testutil.WaitShort, testutil.IntervalMedium) - - // Query stored requests - apiURL := fmt.Sprintf("%s/api/requests", srv.APIAddress()) - apiReq, err := http.NewRequestWithContext(ctx, http.MethodGet, apiURL, nil) - require.NoError(t, err) - - apiResp, err := http.DefaultClient.Do(apiReq) - require.NoError(t, err) - defer apiResp.Body.Close() - - var records []llmmock.RequestRecord - err = json.NewDecoder(apiResp.Body).Decode(&records) - require.NoError(t, err) - require.Len(t, records, 1) - require.Equal(t, llmmock.ProviderOpenAI, records[0].Request.Provider) - require.Equal(t, "gpt-4", records[0].Request.Model) - require.Equal(t, false, records[0].Request.Stream) - require.NotNil(t, records[0].Response) - require.Equal(t, "stop", records[0].Response.FinishReason) -} - -func TestServer_AnthropicRequest(t *testing.T) { - t.Parallel() - - ctx := context.Background() - srv := new(llmmock.Server) - err := srv.Start(ctx, llmmock.Config{ - HostAddress: "127.0.0.1", - APIPort: 0, - Logger: slogtest.Make(t, nil), - }) - require.NoError(t, err) - defer srv.Stop() - - reqBody := map[string]interface{}{ - "model": "claude-3-opus-20240229", - "messages": []map[string]interface{}{ - { - "role": "user", - "content": "Hello, world!", - }, - }, - "max_tokens": 1024, - "stream": false, - } - bodyBytes, err := json.Marshal(reqBody) - require.NoError(t, err) - - url := fmt.Sprintf("%s/v1/messages", srv.APIAddress()) - req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(bodyBytes)) - require.NoError(t, err) - req.Header.Set("Content-Type", "application/json") - req.Header.Set("Authorization", "Bearer test-token-67890") - req.Header.Set("anthropic-version", "2023-06-01") - - resp, err := http.DefaultClient.Do(req) - require.NoError(t, err) - defer resp.Body.Close() - - require.Equal(t, http.StatusOK, resp.StatusCode) - require.Equal(t, "application/json", resp.Header.Get("Content-Type")) - - var anthropicResp map[string]interface{} - err = json.NewDecoder(resp.Body).Decode(&anthropicResp) - require.NoError(t, err) - require.Equal(t, "message", anthropicResp["type"]) - - require.Eventually(t, func() bool { - return srv.RequestCount() == 1 - }, testutil.WaitShort, testutil.IntervalMedium) - - // Query stored requests - apiURL := fmt.Sprintf("%s/api/requests?provider=anthropic", srv.APIAddress()) - apiReq, err := http.NewRequestWithContext(ctx, http.MethodGet, apiURL, nil) - require.NoError(t, err) - - apiResp, err := http.DefaultClient.Do(apiReq) - require.NoError(t, err) - defer apiResp.Body.Close() - - var records []llmmock.RequestRecord - err = json.NewDecoder(apiResp.Body).Decode(&records) - require.NoError(t, err) - require.Len(t, records, 1) - require.Equal(t, llmmock.ProviderAnthropic, records[0].Request.Provider) - require.Equal(t, "claude-3-opus-20240229", records[0].Request.Model) - require.Equal(t, false, records[0].Request.Stream) - require.NotNil(t, records[0].Response) - require.Equal(t, "end_turn", records[0].Response.FinishReason) -} - -func TestServer_OpenAIStreaming(t *testing.T) { - t.Parallel() - - ctx := context.Background() - srv := new(llmmock.Server) - err := srv.Start(ctx, llmmock.Config{ - HostAddress: "127.0.0.1", - APIPort: 0, - Logger: slogtest.Make(t, nil), - }) - require.NoError(t, err) - defer srv.Stop() - - reqBody := map[string]interface{}{ - "model": "gpt-4", - "messages": []map[string]interface{}{ - { - "role": "user", - "content": "Hello!", - }, - }, - "stream": true, - } - bodyBytes, err := json.Marshal(reqBody) - require.NoError(t, err) - - url := fmt.Sprintf("%s/v1/chat/completions", srv.APIAddress()) - req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(bodyBytes)) - require.NoError(t, err) - req.Header.Set("Content-Type", "application/json") - - resp, err := http.DefaultClient.Do(req) - require.NoError(t, err) - defer resp.Body.Close() - - require.Equal(t, http.StatusOK, resp.StatusCode) - require.Equal(t, "text/event-stream", resp.Header.Get("Content-Type")) - - // Read streaming response - buf := make([]byte, 4096) - n, err := resp.Body.Read(buf) - require.NoError(t, err) - content := string(buf[:n]) - require.Contains(t, content, "data:") - require.Contains(t, content, "chat.completion.chunk") - - require.Eventually(t, func() bool { - return srv.RequestCount() == 1 - }, testutil.WaitShort, testutil.IntervalMedium) - - // Verify stored request has stream flag - apiURL := fmt.Sprintf("%s/api/requests", srv.APIAddress()) - apiReq, err := http.NewRequestWithContext(ctx, http.MethodGet, apiURL, nil) - require.NoError(t, err) - - apiResp, err := http.DefaultClient.Do(apiReq) - require.NoError(t, err) - defer apiResp.Body.Close() - - var records []llmmock.RequestRecord - err = json.NewDecoder(apiResp.Body).Decode(&records) - require.NoError(t, err) - require.Len(t, records, 1) - require.Equal(t, true, records[0].Request.Stream) - require.Equal(t, true, records[0].Response.Stream) -} - -func TestServer_AnthropicStreaming(t *testing.T) { - t.Parallel() - - ctx := context.Background() - srv := new(llmmock.Server) - err := srv.Start(ctx, llmmock.Config{ - HostAddress: "127.0.0.1", - APIPort: 0, - Logger: slogtest.Make(t, nil), - }) - require.NoError(t, err) - defer srv.Stop() - - reqBody := map[string]interface{}{ - "model": "claude-3-opus-20240229", - "messages": []map[string]interface{}{ - { - "role": "user", - "content": "Hello!", - }, - }, - "max_tokens": 1024, - "stream": true, - } - bodyBytes, err := json.Marshal(reqBody) - require.NoError(t, err) - - url := fmt.Sprintf("%s/v1/messages", srv.APIAddress()) - req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(bodyBytes)) - require.NoError(t, err) - req.Header.Set("Content-Type", "application/json") - req.Header.Set("anthropic-version", "2023-06-01") - - resp, err := http.DefaultClient.Do(req) - require.NoError(t, err) - defer resp.Body.Close() - - require.Equal(t, http.StatusOK, resp.StatusCode) - require.Equal(t, "text/event-stream", resp.Header.Get("Content-Type")) - - // Read streaming response - buf := make([]byte, 4096) - n, err := resp.Body.Read(buf) - require.NoError(t, err) - content := string(buf[:n]) - require.Contains(t, content, "data:") - require.Contains(t, content, "message_start") - - require.Eventually(t, func() bool { - return srv.RequestCount() == 1 - }, testutil.WaitShort, testutil.IntervalMedium) - - // Verify stored request has stream flag - apiURL := fmt.Sprintf("%s/api/requests?provider=anthropic", srv.APIAddress()) - apiReq, err := http.NewRequestWithContext(ctx, http.MethodGet, apiURL, nil) - require.NoError(t, err) - - apiResp, err := http.DefaultClient.Do(apiReq) - require.NoError(t, err) - defer apiResp.Body.Close() - - var records []llmmock.RequestRecord - err = json.NewDecoder(apiResp.Body).Decode(&records) - require.NoError(t, err) - require.Len(t, records, 1) - require.Equal(t, true, records[0].Request.Stream) - require.Equal(t, true, records[0].Response.Stream) -} - -func TestServer_FilterByUserID(t *testing.T) { - t.Parallel() - - ctx := context.Background() - srv := new(llmmock.Server) - err := srv.Start(ctx, llmmock.Config{ - HostAddress: "127.0.0.1", - APIPort: 0, - Logger: slogtest.Make(t, nil), - }) - require.NoError(t, err) - defer srv.Stop() - - // Send request with user token 1 - reqBody1 := map[string]interface{}{ - "model": "gpt-4", - "messages": []map[string]interface{}{ - {"role": "user", "content": "Hello"}, - }, - } - bodyBytes1, _ := json.Marshal(reqBody1) - url1 := fmt.Sprintf("%s/v1/chat/completions", srv.APIAddress()) - req1, _ := http.NewRequestWithContext(ctx, http.MethodPost, url1, bytes.NewReader(bodyBytes1)) - req1.Header.Set("Content-Type", "application/json") - req1.Header.Set("Authorization", "Bearer user-token-12345") - _, _ = http.DefaultClient.Do(req1) - - // Send request with user token 2 - reqBody2 := map[string]interface{}{ - "model": "gpt-4", - "messages": []map[string]interface{}{ - {"role": "user", "content": "World"}, - }, - } - bodyBytes2, _ := json.Marshal(reqBody2) - url2 := fmt.Sprintf("%s/v1/chat/completions", srv.APIAddress()) - req2, _ := http.NewRequestWithContext(ctx, http.MethodPost, url2, bytes.NewReader(bodyBytes2)) - req2.Header.Set("Content-Type", "application/json") - req2.Header.Set("Authorization", "Bearer user-token-67890") - _, _ = http.DefaultClient.Do(req2) - - require.Eventually(t, func() bool { - return srv.RequestCount() == 2 - }, testutil.WaitShort, testutil.IntervalMedium) - - // Filter by user_id (first 8 chars of token) - apiURL := fmt.Sprintf("%s/api/requests?user_id=user-tok", srv.APIAddress()) - apiReq, err := http.NewRequestWithContext(ctx, http.MethodGet, apiURL, nil) - require.NoError(t, err) - - apiResp, err := http.DefaultClient.Do(apiReq) - require.NoError(t, err) - defer apiResp.Body.Close() - - var records []llmmock.RequestRecord - err = json.NewDecoder(apiResp.Body).Decode(&records) - require.NoError(t, err) - require.Len(t, records, 1) - require.True(t, strings.HasPrefix(records[0].Request.UserID, "user-tok")) -} - -func TestServer_FilterByProvider(t *testing.T) { - t.Parallel() - - ctx := context.Background() - srv := new(llmmock.Server) - err := srv.Start(ctx, llmmock.Config{ - HostAddress: "127.0.0.1", - APIPort: 0, - Logger: slogtest.Make(t, nil), - }) - require.NoError(t, err) - defer srv.Stop() - - // Send OpenAI request - reqBody1 := map[string]interface{}{ - "model": "gpt-4", - "messages": []map[string]interface{}{ - {"role": "user", "content": "Hello"}, - }, - } - bodyBytes1, _ := json.Marshal(reqBody1) - url1 := fmt.Sprintf("%s/v1/chat/completions", srv.APIAddress()) - req1, _ := http.NewRequestWithContext(ctx, http.MethodPost, url1, bytes.NewReader(bodyBytes1)) - req1.Header.Set("Content-Type", "application/json") - _, _ = http.DefaultClient.Do(req1) - - // Send Anthropic request - reqBody2 := map[string]interface{}{ - "model": "claude-3-opus-20240229", - "messages": []map[string]interface{}{ - {"role": "user", "content": "World"}, - }, - "max_tokens": 1024, - } - bodyBytes2, _ := json.Marshal(reqBody2) - url2 := fmt.Sprintf("%s/v1/messages", srv.APIAddress()) - req2, _ := http.NewRequestWithContext(ctx, http.MethodPost, url2, bytes.NewReader(bodyBytes2)) - req2.Header.Set("Content-Type", "application/json") - req2.Header.Set("anthropic-version", "2023-06-01") - _, _ = http.DefaultClient.Do(req2) - - require.Eventually(t, func() bool { - return srv.RequestCount() == 2 - }, testutil.WaitShort, testutil.IntervalMedium) - - // Filter by provider - apiURL := fmt.Sprintf("%s/api/requests?provider=openai", srv.APIAddress()) - apiReq, err := http.NewRequestWithContext(ctx, http.MethodGet, apiURL, nil) - require.NoError(t, err) - - apiResp, err := http.DefaultClient.Do(apiReq) - require.NoError(t, err) - defer apiResp.Body.Close() - - var records []llmmock.RequestRecord - err = json.NewDecoder(apiResp.Body).Decode(&records) - require.NoError(t, err) - require.Len(t, records, 1) - require.Equal(t, llmmock.ProviderOpenAI, records[0].Request.Provider) -} - -func TestServer_Purge(t *testing.T) { - t.Parallel() - - ctx := context.Background() - srv := new(llmmock.Server) - err := srv.Start(ctx, llmmock.Config{ - HostAddress: "127.0.0.1", - APIPort: 0, - Logger: slogtest.Make(t, nil), - }) - require.NoError(t, err) - defer srv.Stop() - - // Send a request - reqBody := map[string]interface{}{ - "model": "gpt-4", - "messages": []map[string]interface{}{ - {"role": "user", "content": "Hello"}, - }, - } - bodyBytes, _ := json.Marshal(reqBody) - url := fmt.Sprintf("%s/v1/chat/completions", srv.APIAddress()) - req, _ := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(bodyBytes)) - req.Header.Set("Content-Type", "application/json") - _, _ = http.DefaultClient.Do(req) - - require.Eventually(t, func() bool { - return srv.RequestCount() == 1 - }, testutil.WaitShort, testutil.IntervalMedium) - - // Purge - purgeURL := fmt.Sprintf("%s/api/purge", srv.APIAddress()) - purgeReq, err := http.NewRequestWithContext(ctx, http.MethodPost, purgeURL, nil) - require.NoError(t, err) - - purgeResp, err := http.DefaultClient.Do(purgeReq) - require.NoError(t, err) - defer purgeResp.Body.Close() - require.Equal(t, http.StatusOK, purgeResp.StatusCode) - - require.Equal(t, 0, srv.RequestCount()) -} From 0362f92dda9c0fcb8e0c86dc00d292671496ff76 Mon Sep 17 00:00:00 2001 From: Sas Swart Date: Tue, 16 Dec 2025 10:23:03 +0000 Subject: [PATCH 9/9] make lint/go --- scaletest/llmmock/server.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scaletest/llmmock/server.go b/scaletest/llmmock/server.go index d3ced5f844b1a..2238ec7fd6ba9 100644 --- a/scaletest/llmmock/server.go +++ b/scaletest/llmmock/server.go @@ -165,7 +165,7 @@ func (s *Server) startAPIServer(ctx context.Context) error { ReadHeaderTimeout: 10 * time.Second, } - listener, err := net.Listen("tcp", fmt.Sprintf(s.address)) + listener, err := net.Listen("tcp", s.address) if err != nil { return xerrors.Errorf("listen on %s: %w", s.address, err) }