diff --git a/cli/exp_scaletest.go b/cli/exp_scaletest.go
index 419b1955477b9..cf79ec7ebcaaa 100644
--- a/cli/exp_scaletest.go
+++ b/cli/exp_scaletest.go
@@ -67,6 +67,8 @@ func (r *RootCmd) scaletestCmd() *serpent.Command {
 			r.scaletestTaskStatus(),
 			r.scaletestSMTP(),
 			r.scaletestPrebuilds(),
+			r.scaletestBridge(),
+			r.scaletestLLMMock(),
 		},
 	}
 
diff --git a/cli/exp_scaletest_bridge.go b/cli/exp_scaletest_bridge.go
new file mode 100644
index 0000000000000..b45da22cc1432
--- /dev/null
+++ b/cli/exp_scaletest_bridge.go
@@ -0,0 +1,278 @@
+//go:build !slim
+
+package cli
+
+import (
+	"fmt"
+	"net/http"
+	"os/signal"
+	"strconv"
+
+	"github.com/prometheus/client_golang/prometheus"
+	"golang.org/x/xerrors"
+
+	"github.com/coder/coder/v2/codersdk"
+	"github.com/coder/coder/v2/scaletest/bridge"
+	"github.com/coder/coder/v2/scaletest/createusers"
+	"github.com/coder/coder/v2/scaletest/harness"
+	"github.com/coder/serpent"
+)
+
+func (r *RootCmd) scaletestBridge() *serpent.Command {
+	var (
+		userCount          int64
+		noCleanup          bool
+		mode               string
+		upstreamURL        string
+		directToken        string
+		provider           string
+		requestCount       int64
+		model              string
+		stream             bool
+		requestPayloadSize int64
+
+		timeoutStrategy = &timeoutFlags{}
+		cleanupStrategy = newScaletestCleanupStrategy()
+		output          = &scaletestOutputFlags{}
+	)
+
+	cmd := &serpent.Command{
+		Use:   "bridge",
+		Short: "Generate load on the AI Bridge service.",
+		Long: `Generate load on the AI Bridge service by making requests to OpenAI or Anthropic APIs.
+
+Examples:
+  # Test OpenAI API through bridge
+  coder scaletest bridge --mode bridge --provider openai --user-count 10 --request-count 5
+
+  # Test Anthropic API through bridge
+  coder scaletest bridge --mode bridge --provider anthropic --user-count 10 --request-count 5
+
+  # Test directly against mock server
+  coder scaletest bridge --mode direct --provider openai --upstream-url http://localhost:8080/v1/chat/completions
+
+The load generator builds conversation history over time, with each request including
+all previous messages in the conversation.`,
+		Handler: func(inv *serpent.Invocation) error {
+			ctx := inv.Context()
+			client, err := r.InitClient(inv)
+			if err != nil {
+				return err
+			}
+
+			notifyCtx, stop := signal.NotifyContext(ctx, StopSignals...)
+			defer stop()
+			ctx = notifyCtx
+
+			if mode != "bridge" && mode != "direct" {
+				return xerrors.Errorf("--mode must be either 'bridge' or 'direct', got %q", mode)
+			}
+
+			var me codersdk.User
+			if mode == "bridge" {
+				var err error
+				me, err = requireAdmin(ctx, client)
+				if err != nil {
+					return err
+				}
+			} else if upstreamURL == "" {
+				return xerrors.Errorf("--upstream-url must be set when using --mode direct")
+			}
+
+			client.HTTPClient = &http.Client{
+				Transport: &codersdk.HeaderTransport{
+					Transport: http.DefaultTransport,
+					Header: map[string][]string{
+						codersdk.BypassRatelimitHeader: {"true"},
+					},
+				},
+			}
+
+			if userCount <= 0 {
+				return xerrors.Errorf("--user-count must be greater than 0")
+			}
+
+			if requestCount <= 0 {
+				requestCount = 1
+			}
+			if provider == "" {
+				provider = "openai"
+			}
+			if model == "" {
+				if provider == "anthropic" {
+					model = "claude-3-opus-20240229"
+				} else {
+					model = "gpt-4"
+				}
+			}
+
+			runnerCount := userCount
+
+			outputs, err := output.parse()
+			if err != nil {
+				return xerrors.Errorf("could not parse --output flags")
+			}
+
+			reg := prometheus.NewRegistry()
+			metrics := bridge.NewMetrics(reg)
+
+			if mode == "bridge" {
+				_, _ = fmt.Fprintln(inv.Stderr, "Bridge mode: creating users and making requests through AI Bridge...")
+			} else {
+				_, _ = fmt.Fprintf(inv.Stderr, "Direct mode: making requests directly to %s\n", upstreamURL)
+			}
+
+			configs := make([]bridge.Config, 0, runnerCount)
+			for range runnerCount {
+				config := bridge.Config{
+					Mode:               bridge.RequestMode(mode),
+					Metrics:            metrics,
+					Provider:           provider,
+					RequestCount:       int(requestCount),
+					Model:              model,
+					Stream:             stream,
+					RequestPayloadSize: int(requestPayloadSize),
+				}
+
+				if mode == "direct" {
+					config.UpstreamURL = upstreamURL
+					config.DirectToken = directToken
+				} else {
+					if len(me.OrganizationIDs) == 0 {
+						return xerrors.Errorf("admin user must have at least one organization")
+					}
+					config.User = createusers.Config{
+						OrganizationID: me.OrganizationIDs[0],
+					}
+				}
+
+				if err := config.Validate(); err != nil {
+					return xerrors.Errorf("validate config: %w", err)
+				}
+				configs = append(configs, config)
+			}
+
+			th := harness.NewTestHarness(timeoutStrategy.wrapStrategy(harness.ConcurrentExecutionStrategy{}), cleanupStrategy.toStrategy())
+
+			for i, config := range configs {
+				id := strconv.Itoa(i)
+				name := fmt.Sprintf("bridge-%s", id)
+				var runner harness.Runnable = bridge.NewRunner(client, config)
+				th.AddRun(name, id, runner)
+			}
+
+			_, _ = fmt.Fprintln(inv.Stderr, "Running bridge scaletest...")
+			testCtx, testCancel := timeoutStrategy.toContext(ctx)
+			defer testCancel()
+			err = th.Run(testCtx)
+			if err != nil {
+				return xerrors.Errorf("run test harness (harness failure, not a test failure): %w", err)
+			}
+
+			// If the command was interrupted, skip stats.
+			if notifyCtx.Err() != nil {
+				return notifyCtx.Err()
+			}
+
+			res := th.Results()
+
+			for _, o := range outputs {
+				err = o.write(res, inv.Stdout)
+				if err != nil {
+					return xerrors.Errorf("write output %q to %q: %w", o.format, o.path, err)
+				}
+			}
+
+			if !noCleanup {
+				_, _ = fmt.Fprintln(inv.Stderr, "\nCleaning up...")
+				cleanupCtx, cleanupCancel := cleanupStrategy.toContext(ctx)
+				defer cleanupCancel()
+				err = th.Cleanup(cleanupCtx)
+				if err != nil {
+					return xerrors.Errorf("cleanup tests: %w", err)
+				}
+			}
+
+			if res.TotalFail > 0 {
+				return xerrors.New("load test failed, see above for more details")
+			}
+
+			return nil
+		},
+	}
+
+	cmd.Options = serpent.OptionSet{
+		{
+			Flag:          "user-count",
+			FlagShorthand: "c",
+			Env:           "CODER_SCALETEST_BRIDGE_USER_COUNT",
+			Description:   "Required: Number of concurrent runners (in bridge mode, each creates a coder user).",
+			Value:         serpent.Int64Of(&userCount),
+			Required:      true,
+		},
+		{
+			Flag:        "mode",
+			Env:         "CODER_SCALETEST_BRIDGE_MODE",
+			Default:     "direct",
+			Description: "Request mode: 'bridge' (create users and use AI Bridge) or 'direct' (make requests directly to upstream-url).",
+			Value:       serpent.StringOf(&mode),
+		},
+		{
+			Flag:        "upstream-url",
+			Env:         "CODER_SCALETEST_BRIDGE_UPSTREAM_URL",
+			Description: "URL to make requests to directly (required in direct mode, e.g., http://localhost:8080/v1/chat/completions).",
+			Value:       serpent.StringOf(&upstreamURL),
+		},
+		{
+			Flag:        "direct-token",
+			Env:         "CODER_SCALETEST_BRIDGE_DIRECT_TOKEN",
+			Description: "Bearer token for direct mode (optional, uses client token if not set).",
+			Value:       serpent.StringOf(&directToken),
+		},
+		{
+			Flag:        "provider",
+			Env:         "CODER_SCALETEST_BRIDGE_PROVIDER",
+			Default:     "openai",
+			Description: "API provider to use: 'openai' or 'anthropic'.",
+			Value:       serpent.StringOf(&provider),
+		},
+		{
+			Flag:        "request-count",
+			Env:         "CODER_SCALETEST_BRIDGE_REQUEST_COUNT",
+			Default:     "1",
+			Description: "Number of sequential requests to make per runner.",
+			Value:       serpent.Int64Of(&requestCount),
+		},
+		{
+			Flag:        "model",
+			Env:         "CODER_SCALETEST_BRIDGE_MODEL",
+			Default:     "gpt-4",
+			Description: "Model to use for requests.",
+			Value:       serpent.StringOf(&model),
+		},
+		{
+			Flag:        "stream",
+			Env:         "CODER_SCALETEST_BRIDGE_STREAM",
+			Description: "Enable streaming requests.",
+			Value:       serpent.BoolOf(&stream),
+		},
+		{
+			Flag:        "request-payload-size",
+			Env:         "CODER_SCALETEST_BRIDGE_REQUEST_PAYLOAD_SIZE",
+			Default:     "0",
+			Description: "Size in bytes of the request payload (user message content). If 0, uses default message content.",
+			Value:       serpent.Int64Of(&requestPayloadSize),
+		},
+		{
+			Flag:        "no-cleanup",
+			Env:         "CODER_SCALETEST_NO_CLEANUP",
+			Description: "Do not clean up resources after the test completes.",
+			Value:       serpent.BoolOf(&noCleanup),
+		},
+	}
+
+	timeoutStrategy.attach(&cmd.Options)
+	cleanupStrategy.attach(&cmd.Options)
+	output.attach(&cmd.Options)
+	return cmd
+}
diff --git a/cli/exp_scaletest_llmmock.go b/cli/exp_scaletest_llmmock.go
new file mode 100644
index 0000000000000..2d03a08938ebb
--- /dev/null
+++ b/cli/exp_scaletest_llmmock.go
@@ -0,0 +1,120 @@
+//go:build !slim
+
+package cli
+
+import (
+	"fmt"
+	"net/http"
+	"os/signal"
+	"time"
+
+	"golang.org/x/xerrors"
+
+	"cdr.dev/slog"
+	"cdr.dev/slog/sloggers/sloghuman"
+	"github.com/coder/coder/v2/scaletest/llmmock"
+	"github.com/coder/serpent"
+)
+
+func (*RootCmd) scaletestLLMMock() *serpent.Command {
+	var (
+		address             string
+		artificialLatency   time.Duration
+		responsePayloadSize int64
+
+		pprofEnable  bool
+		pprofAddress string
+
+		traceEnable bool
+	)
+	cmd := &serpent.Command{
+		Use:   "llm-mock",
+		Short: "Start a mock LLM API server for testing",
+		Long:  `Start a mock LLM API server that simulates OpenAI and Anthropic APIs`,
+		Handler: func(inv *serpent.Invocation) error {
+			ctx, stop := signal.NotifyContext(inv.Context(), StopSignals...)
+			defer stop()
+
+			logger := slog.Make(sloghuman.Sink(inv.Stderr)).Leveled(slog.LevelInfo)
+
+			if pprofEnable {
+				_ = http.DefaultServeMux
+				closePprof := ServeHandler(ctx, logger, nil, pprofAddress, "pprof")
+				defer closePprof()
+				logger.Info(ctx, "pprof server started", slog.F("address", pprofAddress))
+			}
+
+			config := llmmock.Config{
+				Address:             address,
+				Logger:              logger,
+				ArtificialLatency:   artificialLatency,
+				ResponsePayloadSize: int(responsePayloadSize),
+				PprofEnable:         pprofEnable,
+				PprofAddress:        pprofAddress,
+				TraceEnable:         traceEnable,
+			}
+			srv := new(llmmock.Server)
+
+			if err := srv.Start(ctx, config); err != nil {
+				return xerrors.Errorf("start mock LLM server: %w", err)
+			}
+			defer func() {
+				_ = srv.Stop()
+			}()
+
+			_, _ = fmt.Fprintf(inv.Stdout, "Mock LLM API server started on %s\n", srv.APIAddress())
+			_, _ = fmt.Fprintf(inv.Stdout, "  OpenAI endpoint: %s/v1/chat/completions\n", srv.APIAddress())
+			_, _ = fmt.Fprintf(inv.Stdout, "  Anthropic endpoint: %s/v1/messages\n", srv.APIAddress())
+
+			<-ctx.Done()
+			return nil
+		},
+	}
+
+	cmd.Options = []serpent.Option{
+		{
+			Flag:        "address",
+			Env:         "CODER_SCALETEST_LLM_MOCK_ADDRESS",
+			Default:     "localhost",
+			Description: "Address to bind the mock LLM API server. Can include a port (e.g., 'localhost:8080' or ':8080'). Uses a random port if no port is specified.",
+			Value:       serpent.StringOf(&address),
+		},
+		{
+			Flag:        "artificial-latency",
+			Env:         "CODER_SCALETEST_LLM_MOCK_ARTIFICIAL_LATENCY",
+			Default:     "0s",
+			Description: "Artificial latency to add to each response (e.g., 100ms, 1s). Simulates slow upstream processing.",
+			Value:       serpent.DurationOf(&artificialLatency),
+		},
+		{
+			Flag:        "response-payload-size",
+			Env:         "CODER_SCALETEST_LLM_MOCK_RESPONSE_PAYLOAD_SIZE",
+			Default:     "0",
+			Description: "Size in bytes of the response payload. If 0, uses default context-aware responses.",
+			Value:       serpent.Int64Of(&responsePayloadSize),
+		},
+		{
+			Flag:        "pprof-enable",
+			Env:         "CODER_SCALETEST_LLM_MOCK_PPROF_ENABLE",
+			Default:     "false",
+			Description: "Serve pprof metrics on the address defined by pprof-address.",
+			Value:       serpent.BoolOf(&pprofEnable),
+		},
+		{
+			Flag:        "pprof-address",
+			Env:         "CODER_SCALETEST_LLM_MOCK_PPROF_ADDRESS",
+			Default:     "127.0.0.1:6060",
+			Description: "The bind address to serve pprof.",
+			Value:       serpent.StringOf(&pprofAddress),
+		},
+		{
+			Flag:        "trace-enable",
+			Env:         "CODER_SCALETEST_LLM_MOCK_TRACE_ENABLE",
+			Default:     "false",
+			Description: "Whether application tracing data is collected. It exports to a backend configured by environment variables. See: https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/protocol/exporter.md.",
+			Value:       serpent.BoolOf(&traceEnable),
+		},
+	}
+
+	return cmd
+}
diff --git a/go.mod b/go.mod
index 17fb110747803..62f0b5353c5c5 100644
--- a/go.mod
+++ b/go.mod
@@ -440,7 +440,7 @@ require (
 	go.opentelemetry.io/collector/pdata/pprofile v0.121.0 // indirect
 	go.opentelemetry.io/collector/semconv v0.123.0 // indirect
 	go.opentelemetry.io/contrib v1.19.0 // indirect
-	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.62.0 // indirect
+	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.62.0
 	go.opentelemetry.io/otel/metric v1.38.0 // indirect
 	go.opentelemetry.io/proto/otlp v1.7.0 // indirect
 	go.uber.org/multierr v1.11.0 // indirect
diff --git a/scaletest/bridge/config.go b/scaletest/bridge/config.go
new file mode 100644
index 0000000000000..ac7f7460a3aea
--- /dev/null
+++ b/scaletest/bridge/config.go
@@ -0,0 +1,95 @@
+package bridge
+
+import (
+	"golang.org/x/xerrors"
+
+	"github.com/google/uuid"
+
+	"github.com/coder/coder/v2/scaletest/createusers"
+)
+
+type RequestMode string
+
+const (
+	RequestModeBridge RequestMode = "bridge"
+	RequestModeDirect RequestMode = "direct"
+)
+
+type Config struct {
+	// Mode determines how requests are made.
+	// "bridge": Create users in Coder and use their session tokens to make requests through AI Bridge.
+	// "direct": Make requests directly to UpstreamURL without user creation.
+	Mode RequestMode `json:"mode"`
+
+	// User is the configuration for the user to create.
+	// Required in bridge mode.
+	User createusers.Config `json:"user"`
+
+	// UpstreamURL is the URL to make requests to directly.
+	// Only used in direct mode.
+	UpstreamURL string `json:"upstream_url"`
+
+	// DirectToken is the Bearer token for direct mode.
+	// If not set in direct mode, uses the client's token.
+	DirectToken string `json:"direct_token"`
+
+	// Provider is the API provider to use: "openai" or "anthropic".
+	Provider string `json:"provider"`
+
+	// RequestCount is the number of requests to make per runner.
+	RequestCount int `json:"request_count"`
+
+	// Model is the model to use for requests.
+	Model string `json:"model"`
+
+	// Stream indicates whether to use streaming requests.
+	Stream bool `json:"stream"`
+
+	// RequestPayloadSize is the size in bytes of the request payload (user message content).
+	// If 0, uses default message content.
+	RequestPayloadSize int `json:"request_payload_size"`
+
+	Metrics *Metrics `json:"-"`
+}
+
+func (c Config) Validate() error {
+	if c.Metrics == nil {
+		return xerrors.New("metrics must be set")
+	}
+
+	// Validate mode
+	if c.Mode != RequestModeBridge && c.Mode != RequestModeDirect {
+		return xerrors.New("mode must be either 'bridge' or 'direct'")
+	}
+
+	if c.RequestCount <= 0 {
+		return xerrors.New("request_count must be greater than 0")
+	}
+	if c.Model == "" {
+		return xerrors.New("model must be set")
+	}
+
+	// Validate provider
+	if c.Provider != "openai" && c.Provider != "anthropic" {
+		return xerrors.New("provider must be either 'openai' or 'anthropic'")
+	}
+
+	if c.Mode == RequestModeDirect {
+		// In direct mode, UpstreamURL must be set.
+		if c.UpstreamURL == "" {
+			return xerrors.New("upstream_url must be set in direct mode")
+		}
+		return nil
+	}
+
+	// In bridge mode, User config is required.
+	if c.User.OrganizationID == uuid.Nil {
+		return xerrors.New("user organization_id must be set in bridge mode")
+	}
+
+	if err := c.User.Validate(); err != nil {
+		return xerrors.Errorf("user config: %w", err)
+	}
+
+	return nil
+}
diff --git a/scaletest/bridge/local-observability/alloy/config.alloy b/scaletest/bridge/local-observability/alloy/config.alloy
new file mode 100644
index 0000000000000..ed40a15d67e9b
--- /dev/null
+++ b/scaletest/bridge/local-observability/alloy/config.alloy
@@ -0,0 +1,91 @@
+// Grafana Alloy configuration to scrape pprof from develop.sh and forward to Pyroscope
+// The develop.sh server exposes pprof at /api/v2/debug/pprof/ instead of /debug/pprof/
+
+pyroscope.scrape "coderd" {
+  targets = [
+    {
+      "__address__" = "host.docker.internal:3000",
+      "service_name" = "coderd",
+    },
+  ]
+
+	authorization {
+		credentials = ""
+		type = "Bearer"
+	}
+
+  forward_to = [pyroscope.write.local.receiver]
+
+  profiling_config {
+    profile.process_cpu {
+      enabled = true
+      delta = true
+      path = "/api/v2/debug/pprof/profile"
+    }
+    profile.memory {
+      enabled = true
+      path = "/api/v2/debug/pprof/allocs"
+    }
+    profile.goroutine {
+      enabled = true
+      path = "/api/v2/debug/pprof/goroutine"
+    }
+    profile.block {
+      enabled = false
+      path = "/api/v2/debug/pprof/block"
+    }
+    profile.mutex {
+      enabled = false
+      path = "/api/v2/debug/pprof/mutex"
+    }
+  }
+
+  delta_profiling_duration="2s"
+  scrape_interval = "3s"
+  scrape_timeout = "10s"
+}
+
+pyroscope.scrape "llmmock" {
+  targets = [
+		{
+      "__address__" = "host.docker.internal:6061",
+      "service_name" = "llmmock",
+		},
+  ]
+
+	forward_to = [pyroscope.write.local.receiver]
+
+  profiling_config {
+    profile.process_cpu {
+      enabled = true
+      delta = true
+      path = "/debug/pprof/profile"
+    }
+    profile.memory {
+      enabled = true
+      path = "/debug/pprof/allocs"
+    }
+    profile.goroutine {
+      enabled = true
+      path = "/debug/pprof/goroutine"
+    }
+    profile.block {
+      enabled = false
+      path = "/debug/pprof/block"
+    }
+    profile.mutex {
+      enabled = false
+      path = "/debug/pprof/mutex"
+    }
+  }
+
+  delta_profiling_duration="2s"
+  scrape_interval = "3s"
+  scrape_timeout = "10s"
+}
+
+pyroscope.write "local" {
+  endpoint {
+    url = "http://pyroscope:4040/"
+  }
+}
diff --git a/scaletest/bridge/local-observability/docker-compose.yml b/scaletest/bridge/local-observability/docker-compose.yml
new file mode 100644
index 0000000000000..6e02cc9db768c
--- /dev/null
+++ b/scaletest/bridge/local-observability/docker-compose.yml
@@ -0,0 +1,132 @@
+version: '3.8'
+
+services:
+  prometheus:
+    image: prom/prometheus:latest
+    container_name: prometheus
+    ports:
+      - "9090:9090"
+    volumes:
+      - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
+      - prometheus-data:/prometheus
+    command:
+      - '--config.file=/etc/prometheus/prometheus.yml'
+      - '--storage.tsdb.path=/prometheus'
+      - '--web.console.libraries=/usr/share/prometheus/console_libraries'
+      - '--web.console.templates=/usr/share/prometheus/consoles'
+      - '--storage.tsdb.retention.time=30d'
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    networks:
+      - observability
+    restart: unless-stopped
+
+  grafana:
+    image: grafana/grafana:latest
+    container_name: grafana
+    ports:
+      - "3100:3000"
+    volumes:
+      - grafana-data:/var/lib/grafana
+      - ./grafana/provisioning:/etc/grafana/provisioning
+    environment:
+      - GF_SECURITY_ADMIN_USER=admin
+      - GF_SECURITY_ADMIN_PASSWORD=admin
+      - GF_USERS_ALLOW_SIGN_UP=false
+      - GF_INSTALL_PLUGINS=pyroscope-datasource,pyroscope-panel
+    networks:
+      - observability
+    depends_on:
+      - prometheus
+      - pyroscope
+      - tempo
+      - loki
+    restart: unless-stopped
+
+  pyroscope:
+    image: grafana/pyroscope:latest
+    container_name: pyroscope
+    ports:
+      - "4040:4040"
+    volumes:
+      - ./pyroscope/pyroscope.yml:/etc/pyroscope/pyroscope.yml
+      - pyroscope-data:/var/lib/pyroscope
+    command:
+      - server
+      - --config=/etc/pyroscope/pyroscope.yml
+    networks:
+      - observability
+    restart: unless-stopped
+
+  grafana-alloy:
+    image: grafana/alloy:latest
+    container_name: grafana-alloy
+    volumes:
+      - ./alloy/config.alloy:/etc/alloy/config.alloy:ro
+    command:
+      - run
+      - --server.http.listen-addr=0.0.0.0:12345
+      - --storage.path=/var/lib/alloy/data
+      - /etc/alloy/config.alloy
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    networks:
+      - observability
+    depends_on:
+      - pyroscope
+    restart: unless-stopped
+
+  tempo:
+    image: grafana/tempo:latest
+    container_name: tempo
+    ports:
+      - "3200:3200"   # Tempo HTTP
+      - "4317:4317"   # OTLP gRPC
+      - "4318:4318"   # OTLP HTTP
+    volumes:
+      - ./tempo/tempo.yml:/etc/tempo/tempo.yml
+    command:
+      - -config.file=/etc/tempo/tempo.yml
+      - -target=all
+    networks:
+      - observability
+    restart: unless-stopped
+
+  loki:
+    image: grafana/loki:latest
+    container_name: loki
+    ports:
+      - "3101:3100"
+    volumes:
+      - ./loki/loki.yml:/etc/loki/local-config.yaml
+      - loki-data:/loki
+    command:
+      - -config.file=/etc/loki/local-config.yaml
+    networks:
+      - observability
+    restart: unless-stopped
+
+  promtail:
+    image: grafana/promtail:latest
+    container_name: promtail
+    volumes:
+      - ./promtail/promtail.yml:/etc/promtail/config.yml:ro
+      - ./logs:/var/log/coder:ro
+    command:
+      - -config.file=/etc/promtail/config.yml
+    networks:
+      - observability
+    depends_on:
+      - loki
+    restart: unless-stopped
+
+
+volumes:
+  prometheus-data:
+  grafana-data:
+  pyroscope-data:
+  loki-data:
+
+networks:
+  observability:
+    driver: bridge
diff --git a/scaletest/bridge/local-observability/grafana/provisioning/dashboards/bridge.json b/scaletest/bridge/local-observability/grafana/provisioning/dashboards/bridge.json
new file mode 100644
index 0000000000000..ecc40c4fdf1dd
--- /dev/null
+++ b/scaletest/bridge/local-observability/grafana/provisioning/dashboards/bridge.json
@@ -0,0 +1,1991 @@
+{
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": {
+          "type": "grafana",
+          "uid": "-- Grafana --"
+        },
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "type": "dashboard"
+      }
+    ]
+  },
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 0,
+  "id": 0,
+  "links": [],
+  "panels": [
+    {
+      "collapsed": true,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 0
+      },
+      "id": 6,
+      "panels": [
+        {
+          "datasource": {
+            "uid": "prometheus"
+          },
+          "description": "Total user and system CPU time spent in seconds.\n\n**Type:** *counter*\n\n",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 9,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "showValues": false,
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green",
+                    "value": 0
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "none"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 0,
+            "y": 1
+          },
+          "id": 2,
+          "maxDataPoints": 500,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "hideZeros": false,
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "12.3.0",
+          "targets": [
+            {
+              "expr": "sum(rate(process_cpu_seconds_total{__ignore_usage__=\"\", service=\"coderd\"}[$__rate_interval]))",
+              "fromExploreMetrics": false,
+              "legendFormat": "sum(rate)",
+              "refId": "process_cpu_seconds_total-sum(rate)"
+            }
+          ],
+          "title": "process_cpu_seconds_total",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "uid": "prometheus"
+          },
+          "description": "Resident memory size in bytes.\n\n**Type:** *gauge*\n\n",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 9,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "showValues": false,
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green",
+                    "value": 0
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "bytes"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 12,
+            "y": 1
+          },
+          "id": 1,
+          "maxDataPoints": 500,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "hideZeros": false,
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "12.3.0",
+          "targets": [
+            {
+              "expr": "avg(process_resident_memory_bytes{__ignore_usage__=\"\", service=\"coderd\"})",
+              "fromExploreMetrics": false,
+              "legendFormat": "avg",
+              "refId": "process_resident_memory_bytes-avg"
+            }
+          ],
+          "title": "process_resident_memory_bytes",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "uid": "prometheus"
+          },
+          "description": "Number of bytes transmitted by the process over the network.\n\n**Type:** *counter*\n\n",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 9,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "showValues": false,
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green",
+                    "value": 0
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "Bps"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 0,
+            "y": 9
+          },
+          "id": 5,
+          "maxDataPoints": 500,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "hideZeros": false,
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "12.3.0",
+          "targets": [
+            {
+              "editorMode": "code",
+              "expr": "sum(rate(process_network_transmit_bytes_total{__ignore_usage__=\"\", service=\"coderd\"}[$__rate_interval]))",
+              "fromExploreMetrics": false,
+              "legendFormat": "sum(rate)",
+              "range": true,
+              "refId": "process_network_receive_bytes_total-sum(rate)"
+            }
+          ],
+          "title": "process_network_transmit_bytes_total",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "uid": "prometheus"
+          },
+          "description": "Number of bytes received by the process over the network.\n\n**Type:** *counter*\n\n",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 9,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "showValues": false,
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green",
+                    "value": 0
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "Bps"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 12,
+            "y": 9
+          },
+          "id": 4,
+          "maxDataPoints": 500,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "hideZeros": false,
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "12.3.0",
+          "targets": [
+            {
+              "expr": "sum(rate(process_network_receive_bytes_total{__ignore_usage__=\"\", service=\"coderd\"}[$__rate_interval]))",
+              "fromExploreMetrics": false,
+              "legendFormat": "sum(rate)",
+              "refId": "process_network_receive_bytes_total-sum(rate)"
+            }
+          ],
+          "title": "process_network_receive_bytes_total",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "uid": "prometheus"
+          },
+          "description": "Number of open file descriptors.\n\n**Type:** *gauge*\n\n",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 9,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "showValues": false,
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green",
+                    "value": 0
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "none"
+            },
+            "overrides": [
+              {
+                "matcher": {
+                  "id": "byName",
+                  "options": "avg"
+                },
+                "properties": []
+              },
+              {
+                "matcher": {
+                  "id": "byName",
+                  "options": "Time"
+                },
+                "properties": []
+              }
+            ]
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 0,
+            "y": 17
+          },
+          "id": 3,
+          "maxDataPoints": 500,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "hideZeros": false,
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "12.3.0",
+          "targets": [
+            {
+              "expr": "avg(process_open_fds{__ignore_usage__=\"\", service=\"coderd\"})",
+              "fromExploreMetrics": false,
+              "legendFormat": "avg",
+              "refId": "process_open_fds-avg"
+            }
+          ],
+          "title": "process_open_fds",
+          "type": "timeseries"
+        }
+      ],
+      "title": "Process",
+      "type": "row"
+    },
+    {
+      "collapsed": true,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 1
+      },
+      "id": 7,
+      "panels": [
+        {
+          "datasource": {
+            "uid": "prometheus"
+          },
+          "description": "The number of idle connections.\n\n**Type:** *gauge*\n\n",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 9,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "showValues": false,
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green",
+                    "value": 0
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "none"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 0,
+            "y": 2
+          },
+          "id": 8,
+          "maxDataPoints": 500,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "hideZeros": false,
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "12.3.0",
+          "targets": [
+            {
+              "expr": "avg(go_sql_idle_connections{__ignore_usage__=\"\", service=\"coderd\"})",
+              "fromExploreMetrics": false,
+              "legendFormat": "avg",
+              "refId": "go_sql_idle_connections-avg"
+            }
+          ],
+          "title": "go_sql_idle_connections",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "uid": "prometheus"
+          },
+          "description": "The number of connections currently in use.\n\n**Type:** *gauge*\n\n",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 9,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "showValues": false,
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green",
+                    "value": 0
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "none"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 12,
+            "y": 2
+          },
+          "id": 9,
+          "maxDataPoints": 500,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "hideZeros": false,
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "12.3.0",
+          "targets": [
+            {
+              "expr": "avg(go_sql_in_use_connections{__ignore_usage__=\"\", service=\"coderd\"})",
+              "fromExploreMetrics": false,
+              "legendFormat": "avg",
+              "refId": "go_sql_in_use_connections-avg"
+            }
+          ],
+          "title": "go_sql_in_use_connections",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "uid": "prometheus"
+          },
+          "description": "The total time blocked waiting for a new connection.\n\n**Type:** *counter*\n\n",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 9,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "showValues": false,
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green",
+                    "value": 0
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "none"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 0,
+            "y": 10
+          },
+          "id": 10,
+          "maxDataPoints": 500,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "hideZeros": false,
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "12.3.0",
+          "targets": [
+            {
+              "expr": "sum(rate(go_sql_wait_duration_seconds_total{__ignore_usage__=\"\", service=\"coderd\"}[$__rate_interval]))",
+              "fromExploreMetrics": false,
+              "legendFormat": "sum(rate)",
+              "refId": "go_sql_wait_duration_seconds_total-sum(rate)"
+            }
+          ],
+          "title": "go_sql_wait_duration_seconds_total",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "uid": "prometheus"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "custom": {
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "scaleDistribution": {
+                  "type": "linear"
+                }
+              },
+              "unit": "s"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 12,
+            "y": 10
+          },
+          "id": 11,
+          "maxDataPoints": 500,
+          "options": {
+            "calculate": false,
+            "cellGap": 1,
+            "cellValues": {},
+            "color": {
+              "exponent": 0.5,
+              "fill": "dark-orange",
+              "mode": "scheme",
+              "reverse": false,
+              "scale": "exponential",
+              "scheme": "Spectral",
+              "steps": 32
+            },
+            "exemplars": {
+              "color": "rgba(255,0,255,0.7)"
+            },
+            "filterValues": {
+              "le": 1e-9
+            },
+            "legend": {
+              "show": true
+            },
+            "rowsFrame": {
+              "layout": "auto"
+            },
+            "selectionMode": "x",
+            "showValue": "auto",
+            "tooltip": {
+              "mode": "single",
+              "showColorScale": false,
+              "yHistogram": false
+            },
+            "yAxis": {
+              "axisPlacement": "left",
+              "reverse": false
+            }
+          },
+          "pluginVersion": "12.3.0",
+          "targets": [
+            {
+              "expr": "sum by (le) (rate(coderd_db_tx_duration_seconds_bucket{__ignore_usage__=\"\", service=\"coderd\"}[$__rate_interval]))",
+              "format": "heatmap",
+              "fromExploreMetrics": false,
+              "refId": "coderd_db_tx_duration_seconds_bucket-heatmap"
+            }
+          ],
+          "title": "coderd_db_tx_duration_seconds_bucket",
+          "type": "heatmap"
+        }
+      ],
+      "title": "SQL",
+      "type": "row"
+    },
+    {
+      "collapsed": true,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 2
+      },
+      "id": 12,
+      "panels": [
+        {
+          "datasource": {
+            "uid": "prometheus"
+          },
+          "description": "Number of heap bytes allocated and currently in use, same as go_memstats_alloc_bytes. Equals to /memory/classes/heap/objects:bytes.\n\n**Type:** *gauge*\n\n",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 9,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "showValues": false,
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green",
+                    "value": 0
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "bytes"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 0,
+            "y": 3
+          },
+          "id": 16,
+          "maxDataPoints": 500,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "hideZeros": false,
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "12.3.0",
+          "targets": [
+            {
+              "expr": "avg(go_memstats_heap_alloc_bytes{__ignore_usage__=\"\", service=\"coderd\"})",
+              "fromExploreMetrics": false,
+              "legendFormat": "avg",
+              "refId": "go_memstats_heap_alloc_bytes-avg"
+            }
+          ],
+          "title": "go_memstats_heap_alloc_bytes",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "uid": "prometheus"
+          },
+          "description": "Number of heap bytes that are in use. Equals to /memory/classes/heap/objects:bytes + /memory/classes/heap/unused:bytes\n\n**Type:** *gauge*\n\n",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 9,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "showValues": false,
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green",
+                    "value": 0
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "bytes"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 12,
+            "y": 3
+          },
+          "id": 17,
+          "maxDataPoints": 500,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "hideZeros": false,
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "12.3.0",
+          "targets": [
+            {
+              "expr": "avg(go_memstats_heap_inuse_bytes{__ignore_usage__=\"\", service=\"coderd\"})",
+              "fromExploreMetrics": false,
+              "legendFormat": "avg",
+              "refId": "go_memstats_heap_inuse_bytes-avg"
+            }
+          ],
+          "title": "go_memstats_heap_inuse_bytes",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "uid": "prometheus"
+          },
+          "description": "Number of heap bytes waiting to be used. Equals to /memory/classes/heap/released:bytes + /memory/classes/heap/free:bytes.\n\n**Type:** *gauge*\n\n",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 9,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "showValues": false,
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green",
+                    "value": 0
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "bytes"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 0,
+            "y": 11
+          },
+          "id": 18,
+          "maxDataPoints": 500,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "hideZeros": false,
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "12.3.0",
+          "targets": [
+            {
+              "expr": "avg(go_memstats_heap_idle_bytes{__ignore_usage__=\"\", service=\"coderd\"})",
+              "fromExploreMetrics": false,
+              "legendFormat": "avg",
+              "refId": "go_memstats_heap_idle_bytes-avg"
+            }
+          ],
+          "title": "go_memstats_heap_idle_bytes",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "uid": "prometheus"
+          },
+          "description": "Count of events that have been observed for the base metric (A summary of the wall-time pause (stop-the-world) duration in garbage collection cycles.)\n\n**Type:** *counter*\n\n",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 9,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "showValues": false,
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green",
+                    "value": 0
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "cps"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 12,
+            "y": 11
+          },
+          "id": 14,
+          "maxDataPoints": 500,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "hideZeros": false,
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "12.3.0",
+          "targets": [
+            {
+              "expr": "sum(rate(go_gc_duration_seconds_count{__ignore_usage__=\"\", service=\"coderd\"}[$__rate_interval]))",
+              "fromExploreMetrics": false,
+              "legendFormat": "sum(rate)",
+              "refId": "go_gc_duration_seconds_count-sum(rate)"
+            }
+          ],
+          "title": "go_gc_duration_seconds_count",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "uid": "prometheus"
+          },
+          "description": "A summary of the wall-time pause (stop-the-world) duration in garbage collection cycles.\n\n**Type:** *summary*\n\n",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 9,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "showValues": false,
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green",
+                    "value": 0
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "s"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 0,
+            "y": 19
+          },
+          "id": 13,
+          "maxDataPoints": 500,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "hideZeros": false,
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "12.3.0",
+          "targets": [
+            {
+              "expr": "avg(go_gc_duration_seconds{__ignore_usage__=\"\", service=\"coderd\"})",
+              "fromExploreMetrics": false,
+              "legendFormat": "avg",
+              "refId": "go_gc_duration_seconds-avg"
+            }
+          ],
+          "title": "go_gc_duration_seconds",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "uid": "prometheus"
+          },
+          "description": "Total sum of all observed values for the base metric (A summary of the wall-time pause (stop-the-world) duration in garbage collection cycles.)\n\n**Type:** *counter*\n\n",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 9,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "showValues": false,
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green",
+                    "value": 0
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "none"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 12,
+            "y": 19
+          },
+          "id": 15,
+          "maxDataPoints": 500,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "hideZeros": false,
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "12.3.0",
+          "targets": [
+            {
+              "expr": "sum(rate(go_gc_duration_seconds_sum{__ignore_usage__=\"\", service=\"coderd\"}[$__rate_interval]))",
+              "fromExploreMetrics": false,
+              "legendFormat": "sum(rate)",
+              "refId": "go_gc_duration_seconds_sum-sum(rate)"
+            }
+          ],
+          "title": "go_gc_duration_seconds_sum",
+          "type": "timeseries"
+        }
+      ],
+      "title": "Garbage Collector",
+      "type": "row"
+    },
+    {
+      "collapsed": true,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 3
+      },
+      "id": 19,
+      "panels": [
+        {
+          "datasource": {
+            "uid": "prometheus"
+          },
+          "description": "The number of concurrent API requests.\n\n**Type:** *gauge*\n\n",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 9,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "showValues": false,
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green",
+                    "value": 0
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "none"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 0,
+            "y": 4
+          },
+          "id": 20,
+          "maxDataPoints": 500,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "hideZeros": false,
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "12.3.0",
+          "targets": [
+            {
+              "expr": "avg(coderd_api_concurrent_requests{__ignore_usage__=\"\", service=\"coderd\"})",
+              "fromExploreMetrics": false,
+              "legendFormat": "avg",
+              "refId": "coderd_api_concurrent_requests-avg"
+            }
+          ],
+          "title": "coderd_api_concurrent_requests",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "uid": "prometheus"
+          },
+          "description": "Cumulative counters for the observation buckets (Latency distribution of requests in seconds.)\n\n**Type:** *counter*\n\n",
+          "fieldConfig": {
+            "defaults": {
+              "custom": {
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "scaleDistribution": {
+                  "type": "linear"
+                }
+              },
+              "unit": "s"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 12,
+            "y": 4
+          },
+          "id": 21,
+          "maxDataPoints": 500,
+          "options": {
+            "calculate": false,
+            "cellGap": 1,
+            "cellValues": {},
+            "color": {
+              "exponent": 0.5,
+              "fill": "dark-orange",
+              "mode": "scheme",
+              "reverse": false,
+              "scale": "exponential",
+              "scheme": "Spectral",
+              "steps": 32
+            },
+            "exemplars": {
+              "color": "rgba(255,0,255,0.7)"
+            },
+            "filterValues": {
+              "le": 1e-9
+            },
+            "legend": {
+              "show": true
+            },
+            "rowsFrame": {
+              "layout": "auto"
+            },
+            "selectionMode": "x",
+            "showValue": "auto",
+            "tooltip": {
+              "mode": "single",
+              "showColorScale": false,
+              "yHistogram": false
+            },
+            "yAxis": {
+              "axisPlacement": "left",
+              "reverse": false
+            }
+          },
+          "pluginVersion": "12.3.0",
+          "targets": [
+            {
+              "expr": "sum by (le) (rate(coderd_api_request_latencies_seconds_bucket{__ignore_usage__=\"\", service=\"coderd\"}[$__rate_interval]))",
+              "format": "heatmap",
+              "fromExploreMetrics": false,
+              "refId": "coderd_api_request_latencies_seconds_bucket-heatmap"
+            }
+          ],
+          "title": "coderd_api_request_latencies_seconds_bucket",
+          "type": "heatmap"
+        },
+        {
+          "datasource": {
+            "uid": "prometheus"
+          },
+          "description": "The total number of processed API requests\n\n**Type:** *counter*\n\n",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 9,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "showValues": false,
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green",
+                    "value": 0
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "cps"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 0,
+            "y": 12
+          },
+          "id": 22,
+          "maxDataPoints": 500,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "hideZeros": false,
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "12.3.0",
+          "targets": [
+            {
+              "expr": "sum(rate(coderd_api_requests_processed_total{__ignore_usage__=\"\", service=\"coderd\"}[$__rate_interval]))",
+              "fromExploreMetrics": false,
+              "legendFormat": "sum(rate)",
+              "refId": "coderd_api_requests_processed_total-sum(rate)"
+            }
+          ],
+          "title": "coderd_api_requests_processed_total",
+          "type": "timeseries"
+        }
+      ],
+      "title": "API",
+      "type": "row"
+    },
+    {
+      "collapsed": true,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 4
+      },
+      "id": 23,
+      "panels": [],
+      "title": "PubSub",
+      "type": "row"
+    },
+    {
+      "datasource": {
+        "uid": "prometheus"
+      },
+      "description": "The time taken to receive a message from a pubsub event channel\n\n**Type:** *gauge*\n\n",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 9,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "showValues": false,
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": 0
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "s"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 5
+      },
+      "id": 24,
+      "maxDataPoints": 500,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "12.3.0",
+      "targets": [
+        {
+          "expr": "avg(coder_pubsub_receive_latency_seconds{__ignore_usage__=\"\", service=\"coderd\"})",
+          "fromExploreMetrics": false,
+          "legendFormat": "avg",
+          "refId": "coder_pubsub_receive_latency_seconds-avg"
+        }
+      ],
+      "title": "coder_pubsub_receive_latency_seconds",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "uid": "prometheus"
+      },
+      "description": "The time taken to send a message into a pubsub event channel\n\n**Type:** *gauge*\n\n",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 9,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "showValues": false,
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": 0
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "s"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 5
+      },
+      "id": 25,
+      "maxDataPoints": 500,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "12.3.0",
+      "targets": [
+        {
+          "expr": "avg(coder_pubsub_send_latency_seconds{__ignore_usage__=\"\", service=\"coderd\"})",
+          "fromExploreMetrics": false,
+          "legendFormat": "avg",
+          "refId": "coder_pubsub_send_latency_seconds-avg"
+        }
+      ],
+      "title": "coder_pubsub_send_latency_seconds",
+      "type": "timeseries"
+    }
+  ],
+  "preload": false,
+  "schemaVersion": 42,
+  "tags": [],
+  "templating": {
+    "list": []
+  },
+  "time": {
+    "from": "2025-12-11T18:09:40.000Z",
+    "to": "2025-12-11T18:15:04.000Z"
+  },
+  "timepicker": {},
+  "timezone": "browser",
+  "title": "Bridge Load Testing Metrics",
+  "uid": "adjfnrq",
+  "version": 25
+}
diff --git a/scaletest/bridge/local-observability/grafana/provisioning/dashboards/dashboard.yml b/scaletest/bridge/local-observability/grafana/provisioning/dashboards/dashboard.yml
new file mode 100644
index 0000000000000..9141a4532b95a
--- /dev/null
+++ b/scaletest/bridge/local-observability/grafana/provisioning/dashboards/dashboard.yml
@@ -0,0 +1,13 @@
+apiVersion: 1
+
+providers:
+  - name: 'Default'
+    orgId: 1
+    folder: ''
+    type: file
+    disableDeletion: false
+    updateIntervalSeconds: 10
+    allowUiUpdates: true
+    options:
+      path: /etc/grafana/provisioning/dashboards
+      foldersFromFilesStructure: true
diff --git a/scaletest/bridge/local-observability/grafana/provisioning/datasources/prometheus.yml b/scaletest/bridge/local-observability/grafana/provisioning/datasources/prometheus.yml
new file mode 100644
index 0000000000000..1d15bafa90c9b
--- /dev/null
+++ b/scaletest/bridge/local-observability/grafana/provisioning/datasources/prometheus.yml
@@ -0,0 +1,31 @@
+apiVersion: 1
+
+datasources:
+  - name: Prometheus
+    type: prometheus
+    uid: prometheus
+    access: proxy
+    url: http://prometheus:9090
+    isDefault: true
+    editable: true
+    jsonData:
+      timeInterval: "15s"
+
+  - name: Pyroscope
+    type: pyroscope-datasource
+    uid: pyroscope
+    access: proxy
+    url: http://pyroscope:4040
+    editable: true
+    jsonData:
+      keepNestedSets: true
+      minStep: "15s"
+
+  - name: Loki
+    type: loki
+    uid: loki
+    access: proxy
+    url: http://loki:3100
+    editable: true
+    jsonData:
+      maxLines: 1000
diff --git a/scaletest/bridge/local-observability/grafana/provisioning/datasources/tempo.yml b/scaletest/bridge/local-observability/grafana/provisioning/datasources/tempo.yml
new file mode 100644
index 0000000000000..27856ee6530ef
--- /dev/null
+++ b/scaletest/bridge/local-observability/grafana/provisioning/datasources/tempo.yml
@@ -0,0 +1,38 @@
+apiVersion: 1
+
+datasources:
+  - name: Tempo
+    uid: tempo
+    type: tempo
+    access: proxy
+    url: http://tempo:3200
+    editable: true
+    jsonData:
+      httpMethod: GET
+
+      nodeGraph:
+        enabled: true
+      serviceMap:
+        datasourceUid: prometheus
+
+      tracesToMetrics:
+        datasourceUid: prometheus
+        tags:
+          - key: service.name
+            value: service
+          - key: job
+        queries:
+          - name: P90 latency (spanmetrics)
+            query: sum(rate(tempo_spanmetrics_latency_bucket[$__interval])) by (le, service)
+
+      # Optional: traces->logs (if you have Loki wired up)
+      # tracesToLogs:
+      #   datasourceUid: loki
+      #   tags:
+      #     - job
+      #     - instance
+      #   mappedTags:
+      #     - key: service.name
+      #       value: service
+      #   spanStartTimeShift: '1h'
+      #   spanEndTimeShift: '1h'
diff --git a/scaletest/bridge/local-observability/logs/.gitignore b/scaletest/bridge/local-observability/logs/.gitignore
new file mode 100644
index 0000000000000..397b4a7624e35
--- /dev/null
+++ b/scaletest/bridge/local-observability/logs/.gitignore
@@ -0,0 +1 @@
+*.log
diff --git a/scaletest/bridge/local-observability/loki/loki.yml b/scaletest/bridge/local-observability/loki/loki.yml
new file mode 100644
index 0000000000000..023e7337f2a2e
--- /dev/null
+++ b/scaletest/bridge/local-observability/loki/loki.yml
@@ -0,0 +1,26 @@
+auth_enabled: false
+
+server:
+  http_listen_port: 3100
+  grpc_listen_port: 9096
+
+common:
+  instance_addr: 127.0.0.1
+  path_prefix: /loki
+  storage:
+    filesystem:
+      chunks_directory: /loki/chunks
+      rules_directory: /loki/rules
+  replication_factor: 1
+  ring:
+    kvstore:
+      store: inmemory
+
+schema_config:
+  configs:
+    - from: 2020-10-24
+      store: tsdb
+      object_store: filesystem
+      schema: v13
+      index:
+        period: 24h
diff --git a/scaletest/bridge/local-observability/prometheus/prometheus.yml b/scaletest/bridge/local-observability/prometheus/prometheus.yml
new file mode 100644
index 0000000000000..8829c66d01808
--- /dev/null
+++ b/scaletest/bridge/local-observability/prometheus/prometheus.yml
@@ -0,0 +1,40 @@
+global:
+  scrape_interval: 15s
+  evaluation_interval: 15s
+  external_labels:
+    cluster: 'local-observability'
+    environment: 'development'
+
+# Alertmanager configuration (optional, can be added later)
+# alerting:
+#   alertmanagers:
+#     - static_configs:
+#         - targets: []
+
+# Load rules once and periodically evaluate them (optional)
+# rule_files:
+#   - "alert_rules.yml"
+
+scrape_configs:
+  # Scrape Prometheus itself
+  - job_name: 'prometheus'
+    static_configs:
+      - targets: ['localhost:9090']
+
+  # Coder server from develop.sh
+  # This scrapes metrics from a running ./scripts/develop.sh server
+  # Requires: Start develop.sh with --prometheus-enable flag or set CODER_PROMETHEUS_ENABLE=true
+  - job_name: 'coderd-develop'
+    static_configs:
+      - targets: ['host.docker.internal:2118']
+        labels:
+          service: 'coderd'
+          environment: 'development'
+          instance: 'develop-sh'
+    scrape_interval: 8s
+    scrape_timeout: 5s
+    metrics_path: '/api/v2/metrics'
+
+  - job_name: 'tempo-metrics-generator'
+    static_configs:
+      - targets: ['tempo:3200']
diff --git a/scaletest/bridge/local-observability/promtail/promtail.yml b/scaletest/bridge/local-observability/promtail/promtail.yml
new file mode 100644
index 0000000000000..51b9c21b25d4a
--- /dev/null
+++ b/scaletest/bridge/local-observability/promtail/promtail.yml
@@ -0,0 +1,36 @@
+server:
+  http_listen_port: 9080
+  grpc_listen_port: 0
+
+positions:
+  filename: /tmp/positions.yaml
+
+clients:
+  - url: http://loki:3100/loki/api/v1/push
+
+scrape_configs:
+  - job_name: coder-logs
+    static_configs:
+      - targets:
+          - localhost
+        labels:
+          job: coder-logs
+          __path__: /var/log/coder/*
+    pipeline_stages:
+      - regex:
+          expression: '^(?P<timestamp>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d+Z|\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})'
+      - labels:
+          filename:
+      - timestamp:
+          source: timestamp
+          format: RFC3339Nano
+          fallback_formats:
+            - "2006-01-02 15:04:05"
+      - output:
+          source: message
+    relabel_configs:
+      - source_labels:
+          - __path__
+        target_label: filename
+        regex: '.*/([^/]+)$'
+        replacement: '${1}'
diff --git a/scaletest/bridge/local-observability/pyroscope/agent.yml b/scaletest/bridge/local-observability/pyroscope/agent.yml
new file mode 100644
index 0000000000000..b019c958050fa
--- /dev/null
+++ b/scaletest/bridge/local-observability/pyroscope/agent.yml
@@ -0,0 +1,12 @@
+# Pyroscope agent configuration
+# This configures the agent to scrape pprof endpoints
+
+# Server address to forward profiles to
+server-address: http://pyroscope:4040
+
+# Scrape configuration
+# Note: The Pyroscope agent may need to be configured differently
+# depending on the version. This is a basic configuration.
+
+# Logging
+log-level: info
diff --git a/scaletest/bridge/local-observability/pyroscope/pyroscope.yml b/scaletest/bridge/local-observability/pyroscope/pyroscope.yml
new file mode 100644
index 0000000000000..db141dd01c73b
--- /dev/null
+++ b/scaletest/bridge/local-observability/pyroscope/pyroscope.yml
@@ -0,0 +1,31 @@
+# Pyroscope server configuration
+# This configures Pyroscope to collect profiling data from Go processes
+
+# Storage configuration
+storage:
+  # Path where Pyroscope will store data
+  path: /var/lib/pyroscope
+  # Retention period (30 days = 720 hours)
+  retention: 720h
+
+# Server configuration
+server:
+  # HTTP API address
+  api-bind-address: :4040
+  # Base URL for the UI (adjust if behind a proxy)
+  base-url: http://localhost:4040
+
+# Ingest configuration
+# Pyroscope can ingest from:
+# 1. HTTP endpoint (pprof format) - configure scrape targets below
+# 2. Direct push from Go applications using pyroscope client
+ingestion:
+  # Maximum number of samples per second
+  max_ingestion_rate: 10000
+
+# Logging
+log-level: info
+
+# Note: Pyroscope server mode doesn't natively support scraping pprof endpoints.
+# Grafana Alloy is used to scrape pprof endpoints and forward to Pyroscope.
+# See README.md for configuration details.
diff --git a/scaletest/bridge/local-observability/tempo/tempo.yml b/scaletest/bridge/local-observability/tempo/tempo.yml
new file mode 100644
index 0000000000000..e720ed9e3f3a3
--- /dev/null
+++ b/scaletest/bridge/local-observability/tempo/tempo.yml
@@ -0,0 +1,69 @@
+server:
+  http_listen_port: 3200
+
+distributor:
+  receivers:
+    otlp:
+      protocols:
+        http:
+          endpoint: "0.0.0.0:4318"
+        grpc:
+          endpoint: "0.0.0.0:4317"
+
+ingester:
+  # how long a trace can be idle before it's flushed to a block (optional, but nice)
+  trace_idle_period: 10s
+
+  # you already used this before; fine to keep
+  max_block_duration: 5m
+
+  lifecycler:
+    ring:
+      kvstore:
+        store: memberlist    # use in-memory memberlist ring (good for single-binary/docker)
+      replication_factor: 1  # single node, so 1 is fine
+    heartbeat_period: 5s      # 👈 this must be > 0
+
+metrics_generator:
+  # WAL for *metrics* generated from traces
+  storage:
+    path: /tmp/tempo/generator/wal
+
+  # WAL for *traces* used by local-blocks (needed for TraceQL metrics)
+  # See MetricSummary/local-blocks notes.
+  traces_storage:
+    path: /tmp/tempo/generator/traces
+
+  processor:
+    # Prometheus span metrics (RED style metrics)
+    span_metrics: {}
+
+    # Service graph metrics (for service map / node graph)
+    service_graphs: {}
+
+    # Local blocks enable TraceQL metrics API (/api/metrics/...)
+    local_blocks:
+      # Persist blocks so you can query a longer window than just in-memory
+      flush_to_storage: true
+
+compactor:
+  compaction:
+    # Totally fine to tweak; this is just a sane default for local dev
+    block_retention: 24h
+
+storage:
+  trace:
+    backend: local
+
+    local:
+      path: /tmp/tempo/traces
+
+overrides:
+  defaults:
+    # Enable metrics-generator processors for the (default) tenant
+    # Note: dashes here, underscores in the config block.
+    metrics_generator:
+      processors:
+        - span-metrics
+        - service-graphs
+        - local-blocks
diff --git a/scaletest/bridge/metrics.go b/scaletest/bridge/metrics.go
new file mode 100644
index 0000000000000..25a35f3e52bb4
--- /dev/null
+++ b/scaletest/bridge/metrics.go
@@ -0,0 +1,72 @@
+package bridge
+
+import (
+	"github.com/prometheus/client_golang/prometheus"
+)
+
+type Metrics struct {
+	bridgeErrors      *prometheus.CounterVec
+	bridgeRequests    *prometheus.CounterVec
+	bridgeDuration    prometheus.Histogram
+	bridgeTokensTotal *prometheus.CounterVec
+}
+
+func NewMetrics(reg prometheus.Registerer) *Metrics {
+	if reg == nil {
+		reg = prometheus.DefaultRegisterer
+	}
+
+	errors := prometheus.NewCounterVec(prometheus.CounterOpts{
+		Namespace: "coderd",
+		Subsystem: "scaletest",
+		Name:      "bridge_errors_total",
+		Help:      "Total number of bridge errors",
+	}, []string{"action"})
+
+	requests := prometheus.NewCounterVec(prometheus.CounterOpts{
+		Namespace: "coderd",
+		Subsystem: "scaletest",
+		Name:      "bridge_requests_total",
+		Help:      "Total number of bridge requests",
+	}, []string{"status"})
+
+	duration := prometheus.NewHistogram(prometheus.HistogramOpts{
+		Namespace: "coderd",
+		Subsystem: "scaletest",
+		Name:      "bridge_request_duration_seconds",
+		Help:      "Duration of bridge requests in seconds",
+		Buckets:   prometheus.DefBuckets,
+	})
+
+	tokens := prometheus.NewCounterVec(prometheus.CounterOpts{
+		Namespace: "coderd",
+		Subsystem: "scaletest",
+		Name:      "bridge_response_tokens_total",
+		Help:      "Total number of tokens in bridge responses",
+	}, []string{"type"})
+
+	reg.MustRegister(errors, requests, duration, tokens)
+
+	return &Metrics{
+		bridgeErrors:      errors,
+		bridgeRequests:    requests,
+		bridgeDuration:    duration,
+		bridgeTokensTotal: tokens,
+	}
+}
+
+func (m *Metrics) AddError(action string) {
+	m.bridgeErrors.WithLabelValues(action).Inc()
+}
+
+func (m *Metrics) AddRequest(status string) {
+	m.bridgeRequests.WithLabelValues(status).Inc()
+}
+
+func (m *Metrics) ObserveDuration(duration float64) {
+	m.bridgeDuration.Observe(duration)
+}
+
+func (m *Metrics) AddTokens(tokenType string, count int64) {
+	m.bridgeTokensTotal.WithLabelValues(tokenType).Add(float64(count))
+}
diff --git a/scaletest/bridge/run.go b/scaletest/bridge/run.go
new file mode 100644
index 0000000000000..2a264d6c3654a
--- /dev/null
+++ b/scaletest/bridge/run.go
@@ -0,0 +1,482 @@
+package bridge
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"strings"
+	"time"
+
+	"golang.org/x/xerrors"
+
+	"cdr.dev/slog"
+	"cdr.dev/slog/sloggers/sloghuman"
+
+	"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
+	"go.opentelemetry.io/otel/attribute"
+	semconv "go.opentelemetry.io/otel/semconv/v1.14.0"
+	"go.opentelemetry.io/otel/semconv/v1.14.0/httpconv"
+	"go.opentelemetry.io/otel/trace"
+
+	"github.com/coder/coder/v2/coderd/tracing"
+	"github.com/coder/coder/v2/codersdk"
+	"github.com/coder/coder/v2/scaletest/createusers"
+	"github.com/coder/coder/v2/scaletest/harness"
+	"github.com/coder/coder/v2/scaletest/loadtestutil"
+	"github.com/coder/quartz"
+)
+
+type (
+	tracingContextKey struct{}
+	tracingContext    struct {
+		provider   string
+		model      string
+		stream     bool
+		requestNum int
+		mode       RequestMode
+	}
+)
+
+type tracingTransport struct {
+	cfg        Config
+	underlying http.RoundTripper
+}
+
+func newTracingTransport(cfg Config, underlying http.RoundTripper) *tracingTransport {
+	if underlying == nil {
+		underlying = http.DefaultTransport
+	}
+	return &tracingTransport{
+		cfg:        cfg,
+		underlying: otelhttp.NewTransport(underlying),
+	}
+}
+
+func (t *tracingTransport) RoundTrip(req *http.Request) (*http.Response, error) {
+	aibridgeCtx, hasAIBridgeCtx := req.Context().Value(tracingContextKey{}).(tracingContext)
+
+	resp, err := t.underlying.RoundTrip(req)
+
+	if hasAIBridgeCtx {
+		ctx := req.Context()
+		if resp != nil && resp.Request != nil {
+			ctx = resp.Request.Context()
+		}
+		span := trace.SpanFromContext(ctx)
+		if span.IsRecording() {
+			span.SetAttributes(
+				attribute.String("aibridge.provider", aibridgeCtx.provider),
+				attribute.String("aibridge.model", aibridgeCtx.model),
+				attribute.Bool("aibridge.stream", aibridgeCtx.stream),
+				attribute.Int("aibridge.request_num", aibridgeCtx.requestNum),
+				attribute.String("aibridge.mode", string(aibridgeCtx.mode)),
+			)
+		}
+	}
+
+	return resp, err
+}
+
+type Runner struct {
+	client *codersdk.Client
+	cfg    Config
+
+	createUserRunner *createusers.Runner
+
+	clock      quartz.Clock
+	httpClient *http.Client
+
+	requestCount  int64
+	successCount  int64
+	failureCount  int64
+	totalDuration time.Duration
+	totalTokens   int64
+}
+
+func NewRunner(client *codersdk.Client, cfg Config) *Runner {
+	return &Runner{
+		client: client,
+		cfg:    cfg,
+		clock:  quartz.NewReal(),
+		httpClient: &http.Client{
+			Timeout:   30 * time.Second,
+			Transport: newTracingTransport(cfg, http.DefaultTransport),
+		},
+	}
+}
+
+func (r *Runner) WithClock(clock quartz.Clock) *Runner {
+	r.clock = clock
+	return r
+}
+
+var (
+	_ harness.Runnable    = &Runner{}
+	_ harness.Cleanable   = &Runner{}
+	_ harness.Collectable = &Runner{}
+)
+
+func (r *Runner) Run(ctx context.Context, id string, logs io.Writer) error {
+	ctx, span := tracing.StartSpan(ctx)
+	defer span.End()
+
+	logs = loadtestutil.NewSyncWriter(logs)
+	logger := slog.Make(sloghuman.Sink(logs)).Leveled(slog.LevelDebug)
+
+	var token string
+	var requestURL string
+
+	if r.cfg.Mode == RequestModeDirect {
+		// Direct mode: skip user creation, use upstream URL directly
+		requestURL = r.cfg.UpstreamURL
+		if r.cfg.DirectToken != "" {
+			token = r.cfg.DirectToken
+		} else if r.client.SessionToken() != "" {
+			token = r.client.SessionToken()
+		}
+		logger.Info(ctx, "bridge runner in direct mode", slog.F("url", requestURL))
+	} else {
+		// Bridge mode: create user and use AI Bridge endpoint
+		r.client.SetLogger(logger)
+		r.client.SetLogBodies(true)
+
+		r.createUserRunner = createusers.NewRunner(r.client, r.cfg.User)
+		newUserAndToken, err := r.createUserRunner.RunReturningUser(ctx, id, logs)
+		if err != nil {
+			r.cfg.Metrics.AddError("create_user")
+			return xerrors.Errorf("create user: %w", err)
+		}
+		newUser := newUserAndToken.User
+		token = newUserAndToken.SessionToken
+
+		logger.Info(ctx, "runner user created", slog.F("username", newUser.Username), slog.F("user_id", newUser.ID.String()))
+
+		// Construct AI Bridge URL based on provider
+		if r.cfg.Provider == "anthropic" {
+			requestURL = fmt.Sprintf("%s/api/v2/aibridge/anthropic/v1/messages", r.client.URL)
+		} else {
+			requestURL = fmt.Sprintf("%s/api/v2/aibridge/openai/v1/chat/completions", r.client.URL)
+		}
+		logger.Info(ctx, "bridge runner in bridge mode", slog.F("url", requestURL), slog.F("provider", r.cfg.Provider))
+	}
+
+	requestCount := r.cfg.RequestCount
+	if requestCount <= 0 {
+		requestCount = 1
+	}
+	model := r.cfg.Model
+	if model == "" {
+		model = "gpt-4"
+	}
+
+	logger.Info(ctx, "bridge runner is ready",
+		slog.F("request_count", requestCount),
+		slog.F("model", model),
+		slog.F("stream", r.cfg.Stream),
+	)
+
+	for i := 0; i < requestCount; i++ {
+		if err := r.makeRequest(ctx, logger, requestURL, token, model, i); err != nil {
+			logger.Warn(ctx, "bridge request failed",
+				slog.F("request_num", i+1),
+				slog.F("error_type", "request_failed"),
+				slog.Error(err),
+			)
+			r.cfg.Metrics.AddError("request")
+			r.cfg.Metrics.AddRequest("failure")
+			r.failureCount++
+
+			// Continue making requests even if one fails
+			continue
+		}
+		r.successCount++
+		r.cfg.Metrics.AddRequest("success")
+		r.requestCount++
+	}
+
+	logger.Info(ctx, "bridge runner completed",
+		slog.F("total_requests", r.requestCount),
+		slog.F("success", r.successCount),
+		slog.F("failure", r.failureCount),
+	)
+
+	// Fail the run if any request failed
+	if r.failureCount > 0 {
+		return xerrors.Errorf("bridge runner failed: %d out of %d requests failed", r.failureCount, requestCount)
+	}
+
+	return nil
+}
+
+func (r *Runner) makeRequest(ctx context.Context, logger slog.Logger, url, token, model string, requestNum int) error {
+	start := r.clock.Now()
+
+	ctx = context.WithValue(ctx, tracingContextKey{}, tracingContext{
+		provider:   r.cfg.Provider,
+		model:      model,
+		stream:     r.cfg.Stream,
+		requestNum: requestNum + 1,
+		mode:       r.cfg.Mode,
+	})
+
+	var content string
+	if r.cfg.RequestPayloadSize > 0 {
+		pattern := "x"
+		repeated := strings.Repeat(pattern, r.cfg.RequestPayloadSize)
+		content = repeated[:r.cfg.RequestPayloadSize]
+	} else {
+		content = fmt.Sprintf("Hello, this is test request #%d from the bridge load generator.", requestNum+1)
+	}
+
+	newUserMessage := map[string]string{
+		"role":    "user",
+		"content": content,
+	}
+	messages := make([]map[string]string, 0)
+	messages = append(messages, newUserMessage)
+
+	var reqBody map[string]interface{}
+	if r.cfg.Provider == "anthropic" {
+		anthropicMessages := make([]map[string]interface{}, 0, len(messages))
+		for _, msg := range messages {
+			anthropicMessages = append(anthropicMessages, map[string]interface{}{
+				"role": msg["role"],
+				"content": []map[string]string{
+					{
+						"type": "text",
+						"text": msg["content"],
+					},
+				},
+			})
+		}
+		reqBody = map[string]interface{}{
+			"model":      model,
+			"messages":   anthropicMessages,
+			"max_tokens": 1024,
+			"stream":     r.cfg.Stream,
+		}
+	} else {
+		reqBody = map[string]interface{}{
+			"model":    model,
+			"messages": messages,
+			"stream":   r.cfg.Stream,
+		}
+	}
+
+	bodyBytes, err := json.Marshal(reqBody)
+	if err != nil {
+		return xerrors.Errorf("marshal request body: %w", err)
+	}
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(bodyBytes))
+	if err != nil {
+		return xerrors.Errorf("create request: %w", err)
+	}
+
+	req.Header.Set("Content-Type", "application/json")
+	if token != "" {
+		req.Header.Set("Authorization", "Bearer "+token)
+	}
+
+	logger.Debug(ctx, "making bridge request",
+		slog.F("url", url),
+		slog.F("request_num", requestNum+1),
+		slog.F("model", model),
+	)
+
+	resp, err := r.httpClient.Do(req)
+	if err != nil {
+		span := trace.SpanFromContext(req.Context())
+		if span.IsRecording() {
+			span.RecordError(err)
+		}
+		logger.Warn(ctx, "request failed during execution",
+			slog.F("request_num", requestNum+1),
+			slog.Error(err),
+		)
+		return xerrors.Errorf("execute request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	span := trace.SpanFromContext(req.Context())
+	if span.IsRecording() {
+		span.SetAttributes(semconv.HTTPStatusCodeKey.Int(resp.StatusCode))
+		span.SetStatus(httpconv.ClientStatus(resp.StatusCode))
+	}
+
+	duration := r.clock.Since(start)
+	r.totalDuration += duration
+	r.cfg.Metrics.ObserveDuration(duration.Seconds())
+
+	if resp.StatusCode != http.StatusOK {
+		body, _ := io.ReadAll(resp.Body)
+		err := xerrors.Errorf("request failed with status %d: %s", resp.StatusCode, string(body))
+		span.RecordError(err)
+		return err
+	}
+
+	if r.cfg.Stream {
+		err := r.handleStreamingResponse(ctx, logger, resp)
+		if err != nil {
+			span.RecordError(err)
+			return err
+		}
+		return nil
+	}
+
+	return r.handleNonStreamingResponse(ctx, logger, resp, requestNum)
+}
+
+func (r *Runner) handleNonStreamingResponse(ctx context.Context, logger slog.Logger, resp *http.Response, requestNum int) error {
+	if r.cfg.Provider == "anthropic" {
+		return r.handleAnthropicResponse(ctx, logger, resp, requestNum)
+	}
+	return r.handleOpenAIResponse(ctx, logger, resp, requestNum)
+}
+
+func (r *Runner) handleOpenAIResponse(ctx context.Context, logger slog.Logger, resp *http.Response, _ int) error {
+	var response struct {
+		ID      string `json:"id"`
+		Model   string `json:"model"`
+		Choices []struct {
+			Message struct {
+				Content string `json:"content"`
+			} `json:"message"`
+		} `json:"choices"`
+		Usage struct {
+			PromptTokens     int `json:"prompt_tokens"`
+			CompletionTokens int `json:"completion_tokens"`
+			TotalTokens      int `json:"total_tokens"`
+		} `json:"usage"`
+	}
+
+	if err := json.NewDecoder(resp.Body).Decode(&response); err != nil {
+		return xerrors.Errorf("decode response: %w", err)
+	}
+
+	var assistantContent string
+	if len(response.Choices) > 0 {
+		assistantContent = response.Choices[0].Message.Content
+		logger.Debug(ctx, "received response",
+			slog.F("response_id", response.ID),
+			slog.F("content_length", len(assistantContent)),
+		)
+	}
+
+	if response.Usage.TotalTokens > 0 {
+		r.totalTokens += int64(response.Usage.TotalTokens)
+		r.cfg.Metrics.AddTokens("input", int64(response.Usage.PromptTokens))
+		r.cfg.Metrics.AddTokens("output", int64(response.Usage.CompletionTokens))
+	}
+
+	return nil
+}
+
+func (r *Runner) handleAnthropicResponse(ctx context.Context, logger slog.Logger, resp *http.Response, _ int) error {
+	var response struct {
+		ID      string `json:"id"`
+		Model   string `json:"model"`
+		Content []struct {
+			Type string `json:"type"`
+			Text string `json:"text"`
+		} `json:"content"`
+		Usage struct {
+			InputTokens  int `json:"input_tokens"`
+			OutputTokens int `json:"output_tokens"`
+		} `json:"usage"`
+	}
+
+	if err := json.NewDecoder(resp.Body).Decode(&response); err != nil {
+		return xerrors.Errorf("decode response: %w", err)
+	}
+
+	var assistantContent string
+	if len(response.Content) > 0 {
+		assistantContent = response.Content[0].Text
+		logger.Debug(ctx, "received response",
+			slog.F("response_id", response.ID),
+			slog.F("content_length", len(assistantContent)),
+		)
+	}
+
+	totalTokens := response.Usage.InputTokens + response.Usage.OutputTokens
+	if totalTokens > 0 {
+		r.totalTokens += int64(totalTokens)
+		r.cfg.Metrics.AddTokens("input", int64(response.Usage.InputTokens))
+		r.cfg.Metrics.AddTokens("output", int64(response.Usage.OutputTokens))
+	}
+
+	return nil
+}
+
+func (*Runner) handleStreamingResponse(ctx context.Context, logger slog.Logger, resp *http.Response) error {
+	buf := make([]byte, 4096)
+	totalRead := 0
+	for {
+		// Check for context cancellation before each read
+		if ctx.Err() != nil {
+			logger.Warn(ctx, "streaming response canceled",
+				slog.F("bytes_read", totalRead),
+				slog.Error(ctx.Err()),
+			)
+			return xerrors.Errorf("stream canceled: %w", ctx.Err())
+		}
+
+		n, err := resp.Body.Read(buf)
+		if n > 0 {
+			totalRead += n
+		}
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			// Check if error is due to context cancellation
+			if xerrors.Is(err, context.Canceled) || xerrors.Is(err, context.DeadlineExceeded) {
+				logger.Warn(ctx, "streaming response read canceled",
+					slog.F("bytes_read", totalRead),
+					slog.Error(err),
+				)
+				return xerrors.Errorf("stream read canceled: %w", err)
+			}
+			logger.Warn(ctx, "streaming response read error",
+				slog.F("bytes_read", totalRead),
+				slog.Error(err),
+			)
+			return xerrors.Errorf("read stream: %w", err)
+		}
+	}
+
+	logger.Debug(ctx, "received streaming response", slog.F("bytes_read", totalRead))
+	return nil
+}
+
+func (r *Runner) Cleanup(ctx context.Context, id string, logs io.Writer) error {
+	// Only cleanup user in bridge mode
+	if r.cfg.Mode == RequestModeBridge && r.createUserRunner != nil {
+		_, _ = fmt.Fprintln(logs, "Cleaning up user...")
+		if err := r.createUserRunner.Cleanup(ctx, id, logs); err != nil {
+			return xerrors.Errorf("cleanup user: %w", err)
+		}
+	}
+
+	return nil
+}
+
+func (r *Runner) GetMetrics() map[string]any {
+	avgDuration := time.Duration(0)
+	if r.requestCount > 0 {
+		avgDuration = r.totalDuration / time.Duration(r.requestCount)
+	}
+
+	return map[string]any{
+		"request_count":  r.requestCount,
+		"success_count":  r.successCount,
+		"failure_count":  r.failureCount,
+		"total_duration": r.totalDuration.String(),
+		"avg_duration":   avgDuration.String(),
+		"total_tokens":   r.totalTokens,
+	}
+}
diff --git a/scaletest/llmmock/server.go b/scaletest/llmmock/server.go
new file mode 100644
index 0000000000000..2238ec7fd6ba9
--- /dev/null
+++ b/scaletest/llmmock/server.go
@@ -0,0 +1,529 @@
+package llmmock
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"net"
+	"net/http"
+	"strings"
+	"time"
+
+	"github.com/google/uuid"
+	"go.opentelemetry.io/otel"
+	"go.opentelemetry.io/otel/propagation"
+	semconv "go.opentelemetry.io/otel/semconv/v1.14.0"
+	"go.opentelemetry.io/otel/semconv/v1.14.0/httpconv"
+	"go.opentelemetry.io/otel/semconv/v1.14.0/netconv"
+	"go.opentelemetry.io/otel/trace"
+	"golang.org/x/xerrors"
+
+	"cdr.dev/slog"
+
+	"github.com/coder/coder/v2/coderd/pproflabel"
+	"github.com/coder/coder/v2/coderd/tracing"
+)
+
+// Server wraps the LLM mock server and provides an HTTP API to retrieve requests.
+type Server struct {
+	httpServer   *http.Server
+	httpListener net.Listener
+	logger       slog.Logger
+
+	address             string
+	artificialLatency   time.Duration
+	responsePayloadSize int
+
+	tracerProvider trace.TracerProvider
+	closeTracing   func(context.Context) error
+}
+
+type Config struct {
+	Address             string
+	Logger              slog.Logger
+	ArtificialLatency   time.Duration
+	ResponsePayloadSize int
+
+	PprofEnable  bool
+	PprofAddress string
+
+	TraceEnable bool
+}
+
+type llmRequest struct {
+	Model  string `json:"model"`
+	Stream bool   `json:"stream,omitempty"`
+}
+
+type openAIMessage struct {
+	Role    string `json:"role"`
+	Content string `json:"content"`
+}
+
+type openAIResponse struct {
+	ID      string `json:"id"`
+	Object  string `json:"object"`
+	Created int64  `json:"created"`
+	Model   string `json:"model"`
+	Choices []struct {
+		Index        int           `json:"index"`
+		Message      openAIMessage `json:"message"`
+		FinishReason string        `json:"finish_reason"`
+	} `json:"choices"`
+	Usage struct {
+		PromptTokens     int `json:"prompt_tokens"`
+		CompletionTokens int `json:"completion_tokens"`
+		TotalTokens      int `json:"total_tokens"`
+	} `json:"usage"`
+}
+
+type anthropicResponse struct {
+	ID      string `json:"id"`
+	Type    string `json:"type"`
+	Role    string `json:"role"`
+	Content []struct {
+		Type string `json:"type"`
+		Text string `json:"text"`
+	} `json:"content"`
+	Model        string  `json:"model"`
+	StopReason   string  `json:"stop_reason"`
+	StopSequence *string `json:"stop_sequence"`
+	Usage        struct {
+		InputTokens  int `json:"input_tokens"`
+		OutputTokens int `json:"output_tokens"`
+	} `json:"usage"`
+}
+
+func (s *Server) Start(ctx context.Context, cfg Config) error {
+	s.address = cfg.Address
+	s.logger = cfg.Logger
+	s.artificialLatency = cfg.ArtificialLatency
+	s.responsePayloadSize = cfg.ResponsePayloadSize
+
+	if cfg.TraceEnable {
+		otel.SetTextMapPropagator(
+			propagation.NewCompositeTextMapPropagator(
+				propagation.TraceContext{},
+				propagation.Baggage{},
+			),
+		)
+
+		tracerProvider, closeTracing, err := tracing.TracerProvider(ctx, "llm-mock", tracing.TracerOpts{
+			Default: cfg.TraceEnable,
+		})
+		if err != nil {
+			s.logger.Warn(ctx, "failed to initialize tracing", slog.Error(err))
+		} else {
+			s.tracerProvider = tracerProvider
+			s.closeTracing = closeTracing
+		}
+	}
+
+	if err := s.startAPIServer(ctx); err != nil {
+		return xerrors.Errorf("start API server: %w", err)
+	}
+
+	return nil
+}
+
+func (s *Server) Stop() error {
+	if s.httpServer != nil {
+		shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+		defer cancel()
+		if err := s.httpServer.Shutdown(shutdownCtx); err != nil {
+			return xerrors.Errorf("shutdown HTTP server: %w", err)
+		}
+	}
+	if s.closeTracing != nil {
+		shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+		defer cancel()
+		if err := s.closeTracing(shutdownCtx); err != nil {
+			s.logger.Warn(shutdownCtx, "failed to close tracing", slog.Error(err))
+		}
+	}
+	return nil
+}
+
+func (s *Server) APIAddress() string {
+	return fmt.Sprintf("http://%s", s.address)
+}
+
+func (s *Server) startAPIServer(ctx context.Context) error {
+	mux := http.NewServeMux()
+
+	mux.HandleFunc("POST /v1/chat/completions", s.handleOpenAI)
+	mux.HandleFunc("POST /v1/messages", s.handleAnthropic)
+
+	var handler http.Handler = mux
+	if s.tracerProvider != nil {
+		handler = s.tracingMiddleware(handler)
+	}
+
+	s.httpServer = &http.Server{
+		Handler:           handler,
+		ReadHeaderTimeout: 10 * time.Second,
+	}
+
+	listener, err := net.Listen("tcp", s.address)
+	if err != nil {
+		return xerrors.Errorf("listen on %s: %w", s.address, err)
+	}
+	s.httpListener = listener
+
+	pproflabel.Go(ctx, pproflabel.Service("llm-mock"), func(ctx context.Context) {
+		if err := s.httpServer.Serve(listener); err != nil && !errors.Is(err, http.ErrServerClosed) {
+			s.logger.Error(ctx, "http API server error", slog.Error(err))
+		}
+	})
+
+	return nil
+}
+
+func (s *Server) handleOpenAI(w http.ResponseWriter, r *http.Request) {
+	pproflabel.Do(r.Context(), pproflabel.Service("llm-mock"), func(ctx context.Context) {
+		s.handleOpenAIWithLabels(w, r.WithContext(ctx))
+	})
+}
+
+func (s *Server) handleOpenAIWithLabels(w http.ResponseWriter, r *http.Request) {
+	s.logger.Debug(r.Context(), "handling OpenAI request")
+	defer s.logger.Debug(r.Context(), "handled OpenAI request")
+
+	ctx := r.Context()
+	requestID := uuid.New()
+	now := time.Now()
+
+	var req llmRequest
+	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+		s.logger.Error(ctx, "failed to parse OpenAI request", slog.Error(err))
+		http.Error(w, "invalid request body", http.StatusBadRequest)
+		return
+	}
+
+	if s.artificialLatency > 0 {
+		time.Sleep(s.artificialLatency)
+	}
+
+	var resp openAIResponse
+	resp.ID = fmt.Sprintf("chatcmpl-%s", requestID.String()[:8])
+	resp.Object = "chat.completion"
+	resp.Created = now.Unix()
+	resp.Model = req.Model
+
+	var responseContent string
+	if s.responsePayloadSize > 0 {
+		pattern := "x"
+		repeated := strings.Repeat(pattern, s.responsePayloadSize)
+		responseContent = repeated[:s.responsePayloadSize]
+	} else {
+		responseContent = "This is a mock response from OpenAI."
+	}
+
+	resp.Choices = []struct {
+		Index        int           `json:"index"`
+		Message      openAIMessage `json:"message"`
+		FinishReason string        `json:"finish_reason"`
+	}{
+		{
+			Index: 0,
+			Message: openAIMessage{
+				Role:    "assistant",
+				Content: responseContent,
+			},
+			FinishReason: "stop",
+		},
+	}
+
+	resp.Usage.PromptTokens = 10
+	resp.Usage.CompletionTokens = 5
+	resp.Usage.TotalTokens = 15
+
+	responseBody, _ := json.Marshal(resp)
+
+	if req.Stream {
+		s.sendOpenAIStream(ctx, w, resp)
+	} else {
+		w.Header().Set("Content-Type", "application/json")
+		w.WriteHeader(http.StatusOK)
+		if _, err := w.Write(responseBody); err != nil {
+			s.logger.Error(ctx, "failed to write OpenAI response",
+				slog.F("request_id", requestID),
+				slog.Error(err),
+				slog.F("error_type", "write_error"),
+				slog.F("likely_cause", "network_error"),
+			)
+		}
+	}
+}
+
+func (s *Server) handleAnthropic(w http.ResponseWriter, r *http.Request) {
+	pproflabel.Do(r.Context(), pproflabel.Service("llm-mock"), func(ctx context.Context) {
+		s.handleAnthropicWithLabels(w, r.WithContext(ctx))
+	})
+}
+
+func (s *Server) handleAnthropicWithLabels(w http.ResponseWriter, r *http.Request) {
+	ctx := r.Context()
+	requestID := uuid.New()
+
+	var req llmRequest
+
+	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+		s.logger.Error(ctx, "failed to parse LLM request", slog.Error(err))
+		http.Error(w, "invalid request body", http.StatusBadRequest)
+		return
+	}
+
+	if s.artificialLatency > 0 {
+		time.Sleep(s.artificialLatency)
+	}
+
+	var resp anthropicResponse
+	resp.ID = fmt.Sprintf("msg_%s", requestID.String()[:8])
+	resp.Type = "message"
+	resp.Role = "assistant"
+
+	var responseText string
+	if s.responsePayloadSize > 0 {
+		pattern := "x"
+		repeated := strings.Repeat(pattern, s.responsePayloadSize)
+		responseText = repeated[:s.responsePayloadSize]
+	} else {
+		responseText = "This is a mock response from Anthropic."
+	}
+
+	resp.Content = []struct {
+		Type string `json:"type"`
+		Text string `json:"text"`
+	}{
+		{
+			Type: "text",
+			Text: responseText,
+		},
+	}
+	resp.Model = req.Model
+	resp.StopReason = "end_turn"
+	resp.Usage.InputTokens = 10
+	resp.Usage.OutputTokens = 5
+
+	responseBody, _ := json.Marshal(resp)
+
+	if req.Stream {
+		s.sendAnthropicStream(ctx, w, resp)
+	} else {
+		w.Header().Set("Content-Type", "application/json")
+		w.Header().Set("anthropic-version", "2023-06-01")
+		w.WriteHeader(http.StatusOK)
+		if _, err := w.Write(responseBody); err != nil {
+			s.logger.Error(ctx, "failed to write Anthropic response",
+				slog.F("request_id", requestID),
+				slog.Error(err),
+				slog.F("error_type", "write_error"),
+				slog.F("likely_cause", "network_error"),
+			)
+		}
+	}
+}
+
+func (s *Server) sendOpenAIStream(ctx context.Context, w http.ResponseWriter, resp openAIResponse) {
+	w.Header().Set("Content-Type", "text/event-stream")
+	w.Header().Set("Cache-Control", "no-cache")
+	w.Header().Set("Connection", "keep-alive")
+	w.WriteHeader(http.StatusOK)
+
+	// Helper function to write with error checking
+	writeChunk := func(data string) bool {
+		if _, err := fmt.Fprintf(w, "%s", data); err != nil {
+			s.logger.Error(ctx, "failed to write OpenAI stream chunk",
+				slog.F("response_id", resp.ID),
+				slog.Error(err),
+				slog.F("error_type", "write_error"),
+				slog.F("likely_cause", "network_error"),
+			)
+			return false
+		}
+		return true
+	}
+
+	// Send initial chunk
+	chunk := map[string]interface{}{
+		"id":      resp.ID,
+		"object":  "chat.completion.chunk",
+		"created": resp.Created,
+		"model":   resp.Model,
+		"choices": []map[string]interface{}{
+			{
+				"index": 0,
+				"delta": map[string]interface{}{
+					"role":    "assistant",
+					"content": resp.Choices[0].Message.Content,
+				},
+				"finish_reason": nil,
+			},
+		},
+	}
+	chunkBytes, _ := json.Marshal(chunk)
+	if !writeChunk(fmt.Sprintf("data: %s\n\n", chunkBytes)) {
+		return
+	}
+
+	// Send final chunk
+	finalChunk := map[string]interface{}{
+		"id":      resp.ID,
+		"object":  "chat.completion.chunk",
+		"created": resp.Created,
+		"model":   resp.Model,
+		"choices": []map[string]interface{}{
+			{
+				"index":         0,
+				"delta":         map[string]interface{}{},
+				"finish_reason": resp.Choices[0].FinishReason,
+			},
+		},
+	}
+	finalChunkBytes, _ := json.Marshal(finalChunk)
+	if !writeChunk(fmt.Sprintf("data: %s\n\n", finalChunkBytes)) {
+		return
+	}
+	writeChunk("data: [DONE]\n\n")
+}
+
+func (s *Server) sendAnthropicStream(ctx context.Context, w http.ResponseWriter, resp anthropicResponse) {
+	w.Header().Set("Content-Type", "text/event-stream")
+	w.Header().Set("Cache-Control", "no-cache")
+	w.Header().Set("Connection", "keep-alive")
+	w.Header().Set("anthropic-version", "2023-06-01")
+	w.WriteHeader(http.StatusOK)
+
+	writeChunk := func(data string) bool {
+		if _, err := fmt.Fprintf(w, "%s", data); err != nil {
+			s.logger.Error(ctx, "failed to write Anthropic stream chunk",
+				slog.F("response_id", resp.ID),
+				slog.Error(err),
+				slog.F("error_type", "write_error"),
+				slog.F("likely_cause", "network_error"),
+			)
+			return false
+		}
+		return true
+	}
+
+	startEvent := map[string]interface{}{
+		"type": "message_start",
+		"message": map[string]interface{}{
+			"id":    resp.ID,
+			"type":  resp.Type,
+			"role":  resp.Role,
+			"model": resp.Model,
+		},
+	}
+	startBytes, _ := json.Marshal(startEvent)
+	if !writeChunk(fmt.Sprintf("data: %s\n\n", startBytes)) {
+		return
+	}
+
+	// Send content_block_start event
+	contentStartEvent := map[string]interface{}{
+		"type":  "content_block_start",
+		"index": 0,
+		"content_block": map[string]interface{}{
+			"type": "text",
+			"text": resp.Content[0].Text,
+		},
+	}
+	contentStartBytes, _ := json.Marshal(contentStartEvent)
+	if !writeChunk(fmt.Sprintf("data: %s\n\n", contentStartBytes)) {
+		return
+	}
+
+	// Send content_block_delta event
+	deltaEvent := map[string]interface{}{
+		"type":  "content_block_delta",
+		"index": 0,
+		"delta": map[string]interface{}{
+			"type": "text_delta",
+			"text": resp.Content[0].Text,
+		},
+	}
+	deltaBytes, _ := json.Marshal(deltaEvent)
+	if !writeChunk(fmt.Sprintf("data: %s\n\n", deltaBytes)) {
+		return
+	}
+
+	// Send content_block_stop event
+	contentStopEvent := map[string]interface{}{
+		"type":  "content_block_stop",
+		"index": 0,
+	}
+	contentStopBytes, _ := json.Marshal(contentStopEvent)
+	if !writeChunk(fmt.Sprintf("data: %s\n\n", contentStopBytes)) {
+		return
+	}
+
+	// Send message_delta event
+	deltaMsgEvent := map[string]interface{}{
+		"type": "message_delta",
+		"delta": map[string]interface{}{
+			"stop_reason":   resp.StopReason,
+			"stop_sequence": resp.StopSequence,
+		},
+		"usage": resp.Usage,
+	}
+	deltaMsgBytes, _ := json.Marshal(deltaMsgEvent)
+	if !writeChunk(fmt.Sprintf("data: %s\n\n", deltaMsgBytes)) {
+		return
+	}
+
+	// Send message_stop event
+	stopEvent := map[string]interface{}{
+		"type": "message_stop",
+	}
+	stopBytes, _ := json.Marshal(stopEvent)
+	writeChunk(fmt.Sprintf("data: %s\n\n", stopBytes))
+}
+
+func (s *Server) tracingMiddleware(next http.Handler) http.Handler {
+	tracer := s.tracerProvider.Tracer("llm-mock")
+
+	return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
+		// Wrap response writer with StatusWriter for tracing
+		sw := &tracing.StatusWriter{ResponseWriter: rw}
+
+		// Extract trace context from headers
+		propagator := otel.GetTextMapPropagator()
+		hc := propagation.HeaderCarrier(r.Header)
+		ctx := propagator.Extract(r.Context(), hc)
+
+		// Start span with initial name (will be updated after handler)
+		ctx, span := tracer.Start(ctx, fmt.Sprintf("%s %s", r.Method, r.RequestURI))
+		defer span.End()
+		r = r.WithContext(ctx)
+
+		// Inject trace context into response headers
+		if span.SpanContext().HasTraceID() && span.SpanContext().HasSpanID() {
+			rw.Header().Set("X-Trace-ID", span.SpanContext().TraceID().String())
+			rw.Header().Set("X-Span-ID", span.SpanContext().SpanID().String())
+
+			hc := propagation.HeaderCarrier(rw.Header())
+			propagator.Inject(ctx, hc)
+		}
+
+		// Execute the handler
+		next.ServeHTTP(sw, r)
+
+		// Update span with final route and response information
+		route := r.URL.Path
+		span.SetName(fmt.Sprintf("%s %s", r.Method, route))
+		span.SetAttributes(netconv.Transport("tcp"))
+		span.SetAttributes(httpconv.ServerRequest("llm-mock", r)...)
+		span.SetAttributes(semconv.HTTPRouteKey.String(route))
+
+		status := sw.Status
+		if status == 0 {
+			status = http.StatusOK
+		}
+		span.SetAttributes(semconv.HTTPStatusCodeKey.Int(status))
+		span.SetStatus(httpconv.ServerStatus(status))
+	})
+}
diff --git a/scaletest/llmmock/types.go b/scaletest/llmmock/types.go
new file mode 100644
index 0000000000000..f1f4e1772153a
--- /dev/null
+++ b/scaletest/llmmock/types.go
@@ -0,0 +1,47 @@
+package llmmock
+
+import (
+	"time"
+
+	"github.com/google/uuid"
+)
+
+// Provider represents the LLM provider type.
+type Provider string
+
+const (
+	ProviderOpenAI    Provider = "openai"
+	ProviderAnthropic Provider = "anthropic"
+)
+
+// RequestSummary contains metadata about an intercepted LLM API request.
+type RequestSummary struct {
+	ID        uuid.UUID `json:"id"`
+	Timestamp time.Time `json:"timestamp"`
+	Provider  Provider  `json:"provider"`
+	Model     string    `json:"model"`
+	UserID    string    `json:"user_id,omitempty"`
+	Stream    bool      `json:"stream"`
+	// Request body as JSON string for reference
+	RequestBody string `json:"request_body,omitempty"`
+}
+
+// ResponseSummary contains metadata about an LLM API response.
+type ResponseSummary struct {
+	RequestID    uuid.UUID `json:"request_id"`
+	Timestamp    time.Time `json:"timestamp"`
+	Status       int       `json:"status"`
+	Stream       bool      `json:"stream"`
+	FinishReason string    `json:"finish_reason,omitempty"` // OpenAI: finish_reason, Anthropic: stop_reason
+	PromptTokens int       `json:"prompt_tokens,omitempty"`
+	OutputTokens int       `json:"output_tokens,omitempty"` // OpenAI: completion_tokens, Anthropic: output_tokens
+	TotalTokens  int       `json:"total_tokens,omitempty"`
+	// Response body as JSON string for reference (non-streaming) or first chunk (streaming)
+	ResponseBody string `json:"response_body,omitempty"`
+}
+
+// RequestRecord combines request and response information.
+type RequestRecord struct {
+	Request  RequestSummary   `json:"request"`
+	Response *ResponseSummary `json:"response,omitempty"`
+}