-
Notifications
You must be signed in to change notification settings - Fork 0
feat(buildrunner): add Buildkite BuildRunner implementation #190
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
8213ccc
85e6f48
a8a0678
290ead5
6e8c498
e80be59
5ce5e8c
4228731
67c40d4
dc9f929
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,30 @@ | ||
| load("@rules_go//go:def.bzl", "go_library", "go_test") | ||
|
|
||
| go_library( | ||
| name = "buildkite", | ||
| srcs = [ | ||
| "buildkite.go", | ||
| "client.go", | ||
| ], | ||
| importpath = "github.com/uber/submitqueue/submitqueue/extension/buildrunner/buildkite", | ||
| visibility = ["//visibility:public"], | ||
| deps = [ | ||
| "//submitqueue/entity", | ||
| "//submitqueue/extension/buildrunner", | ||
| "@org_uber_go_zap//:zap", | ||
| ], | ||
| ) | ||
|
|
||
| go_test( | ||
| name = "buildkite_test", | ||
| srcs = ["buildkite_test.go"], | ||
| embed = [":buildkite"], | ||
| deps = [ | ||
| "//core/httpclient", | ||
| "//submitqueue/entity", | ||
| "//submitqueue/extension/buildrunner", | ||
| "@com_github_stretchr_testify//assert", | ||
| "@com_github_stretchr_testify//require", | ||
| "@org_uber_go_zap//:zap", | ||
| ], | ||
| ) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,214 @@ | ||
| // Copyright (c) 2025 Uber Technologies, Inc. | ||
| // | ||
| // Licensed under the Apache License, Version 2.0 (the "License"); | ||
| // you may not use this file except in compliance with the License. | ||
| // You may obtain a copy of the License at | ||
| // | ||
| // http://www.apache.org/licenses/LICENSE-2.0 | ||
| // | ||
| // Unless required by applicable law or agreed to in writing, software | ||
| // distributed under the License is distributed on an "AS IS" BASIS, | ||
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| // See the License for the specific language governing permissions and | ||
| // limitations under the License. | ||
|
|
||
| // Package buildkite implements buildrunner.BuildRunner backed by the Buildkite | ||
| // CI platform. | ||
| // | ||
| // Trigger calls the Buildkite API to create the build and returns the Buildkite | ||
| // build number as the build ID. Status and Cancel parse the number directly | ||
| // from the build ID — no local state is required. | ||
| // | ||
| // The Buildkite build receives base and head change URIs as JSON-encoded | ||
| // environment variables (SQ_BASE_URIS, SQ_HEAD_URIS, SQ_QUEUE). The pipeline | ||
| // script fetches each PR's diff with the GitHub API, applies them with | ||
| // `git apply -3`, produces one commit per layer (base, head), then runs CI. | ||
| package buildkite | ||
|
|
||
| import ( | ||
| "context" | ||
| "encoding/json" | ||
| "fmt" | ||
| "net/http" | ||
| "strconv" | ||
|
|
||
| "go.uber.org/zap" | ||
|
|
||
| "github.com/uber/submitqueue/submitqueue/entity" | ||
| "github.com/uber/submitqueue/submitqueue/extension/buildrunner" | ||
| ) | ||
|
|
||
| // Env var keys set on every triggered Buildkite build. | ||
| const ( | ||
| // EnvKeyBaseURIs carries the JSON-encoded ordered list of change URIs from | ||
| // the dependency batches. The pipeline script applies these first and | ||
| // commits the result as the "base" layer. | ||
| EnvKeyBaseURIs = "SQ_BASE_URIS" | ||
|
|
||
| // EnvKeyHeadURIs carries the JSON-encoded ordered list of change URIs from | ||
| // the batch under test. Applied on top of the base layer, committed | ||
| // separately. | ||
| EnvKeyHeadURIs = "SQ_HEAD_URIS" | ||
|
|
||
| // EnvKeyQueue carries the SQ queue name so the pipeline script can select | ||
| // queue-specific test targets. | ||
| EnvKeyQueue = "SQ_QUEUE" | ||
| ) | ||
|
|
||
| // runner implements buildrunner.BuildRunner. | ||
| type runner struct { | ||
| cfg buildrunner.Config | ||
| client *client | ||
| logger *zap.SugaredLogger | ||
| } | ||
|
|
||
| var _ buildrunner.BuildRunner = (*runner)(nil) | ||
|
|
||
| // Params holds the dependencies for a Buildkite BuildRunner. The caller is | ||
| // responsible for configuring HTTPClient with the base URL (via | ||
| // httpclient.BaseURLTransport) and auth (via an Authorization-header transport). | ||
| type Params struct { | ||
| // Config holds the per-queue identity for this BuildRunner. | ||
| Config buildrunner.Config | ||
| // HTTPClient is a pre-configured HTTP client. The caller is responsible | ||
| // for the base URL (via httpclient.BaseURLTransport) and auth (via a | ||
| // transport layer). If nil, http.DefaultClient is used. | ||
| HTTPClient *http.Client | ||
| // Logger is the structured logger. | ||
| Logger *zap.SugaredLogger | ||
| } | ||
|
|
||
| // NewBuildRunner constructs a Buildkite-backed BuildRunner bound to a single | ||
| // pipeline. | ||
| // | ||
| // The HTTPClient must have BaseURLTransport configured to the pipeline's API | ||
| // root (e.g. "https://api.buildkite.com/v2/organizations/{org}/pipelines/{slug}"), | ||
| // and an auth transport that injects the Authorization header. | ||
| func NewBuildRunner(params Params) (buildrunner.BuildRunner, error) { | ||
| if params.HTTPClient == nil { | ||
| return nil, fmt.Errorf("http client is required") | ||
| } | ||
| if params.Logger == nil { | ||
| return nil, fmt.Errorf("logger is required") | ||
| } | ||
| return newRunner(params.Config, &client{httpClient: params.HTTPClient}, params.Logger.Named("buildkite_buildrunner")), nil | ||
| } | ||
|
|
||
| // newRunner constructs a runner. Used by NewBuildRunner and by tests. | ||
| func newRunner(cfg buildrunner.Config, c *client, logger *zap.SugaredLogger) *runner { | ||
| return &runner{ | ||
| cfg: cfg, | ||
| client: c, | ||
| logger: logger, | ||
| } | ||
| } | ||
|
|
||
| // Trigger calls the Buildkite API to create the build and returns the Buildkite | ||
| // build number as the build ID. Errors are propagated to the caller so the | ||
| // queue consumer can nack and retry. | ||
| func (r *runner) Trigger(ctx context.Context, base, head []entity.Change, _ entity.BuildMetadata) (entity.BuildID, error) { | ||
| baseJSON, _ := json.Marshal(flattenURIs(base)) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just curious (outside scope of this PR) -- will we actually apply multiple base changes, and if so why? This is different from SQV1 right?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. v1 did apply multiple changes as bases, only the last change in the list was considered the "head change". I think distinctly in v2 we allow for more than a single test change to be applied to the batch, but the bases follow the same pattern as v1. |
||
| headJSON, _ := json.Marshal(flattenURIs(head)) | ||
|
|
||
| req := createBuildRequest{ | ||
| Message: "submitqueue speculative build", | ||
| Env: map[string]string{ | ||
| EnvKeyBaseURIs: string(baseJSON), | ||
| EnvKeyHeadURIs: string(headJSON), | ||
| EnvKeyQueue: r.cfg.QueueName, | ||
| }, | ||
| } | ||
|
|
||
| resp, err := r.client.createBuild(ctx, req) | ||
| if err != nil { | ||
| return entity.BuildID{}, fmt.Errorf("buildkite: create build: %w", err) | ||
| } | ||
|
|
||
| r.logger.Debugw("triggered Buildkite build", | ||
| "buildkite_number", resp.Number, | ||
| ) | ||
| return entity.BuildID{ID: encodeBuildNumber(resp.Number)}, nil | ||
| } | ||
|
|
||
| // Status fetches the current state of the build from Buildkite and returns it | ||
| // with the build URL in BuildMetadata["url"]. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we check w/CI team to see if any concerns with the polling approach mainly w.r.t rate limits -- are we are planning on using polling in production or a webhook-based approach? |
||
| func (r *runner) Status(ctx context.Context, buildID entity.BuildID) (entity.BuildStatus, entity.BuildMetadata, error) { | ||
| number, err := parseBuildNumber(buildID.ID) | ||
| if err != nil { | ||
| return entity.BuildStatusUnknown, nil, fmt.Errorf("buildkite: malformed build ID: %w", err) | ||
| } | ||
|
|
||
| resp, err := r.client.getBuild(ctx, number) | ||
| if err != nil { | ||
| return entity.BuildStatusUnknown, nil, fmt.Errorf("buildkite: get build: %w", err) | ||
| } | ||
|
|
||
| return mapState(resp.State), entity.BuildMetadata{"url": resp.WebURL}, nil | ||
| } | ||
|
|
||
| // Cancel calls the Buildkite API to cancel the build. A no-op on already-terminal | ||
| // builds (Buildkite returns 422 for those). | ||
| func (r *runner) Cancel(ctx context.Context, buildID entity.BuildID) error { | ||
| number, err := parseBuildNumber(buildID.ID) | ||
| if err != nil { | ||
| return fmt.Errorf("buildkite: malformed build ID: %w", err) | ||
| } | ||
|
|
||
| if err := r.client.cancelBuild(ctx, number); err != nil { | ||
| return fmt.Errorf("buildkite: cancel build: %w", err) | ||
| } | ||
| r.logger.Debugw("cancelled Buildkite build", | ||
| "buildkite_number", number, | ||
| ) | ||
| return nil | ||
| } | ||
|
|
||
| // flattenURIs concatenates the URI lists from all changes into a single slice. | ||
| func flattenURIs(changes []entity.Change) []string { | ||
| uris := make([]string, 0, len(changes)) | ||
| for _, c := range changes { | ||
| uris = append(uris, c.URIs...) | ||
| } | ||
| return uris | ||
| } | ||
|
|
||
| // encodeBuildNumber encodes a Buildkite build number as the SQ build ID. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. does this build id need to be namespaced somehow to prevent conflicts if there are multiple build systems used that might use the same id number?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it doesn't matter since inside the orchestrator they are all bounded by their batch ids, but we can always switch it to uuid if we want to fully avoid conflicts. Build numbers on BK are unique per pipeline. |
||
| func encodeBuildNumber(number int) string { | ||
| return strconv.Itoa(number) | ||
| } | ||
|
|
||
| // parseBuildNumber is the inverse of encodeBuildNumber. | ||
| func parseBuildNumber(id string) (int, error) { | ||
| n, err := strconv.Atoi(id) | ||
| if err != nil { | ||
| return 0, fmt.Errorf("invalid build ID %q", id) | ||
| } | ||
| return n, nil | ||
| } | ||
|
|
||
| // mapState maps a Buildkite build state string to a BuildStatus. | ||
| // | ||
| // Buildkite states: creating, scheduled, running, blocked, passed, failed, | ||
| // canceling, canceled, skipped, not_run. | ||
| func mapState(state string) entity.BuildStatus { | ||
| switch state { | ||
| case "creating", "scheduled": | ||
| return entity.BuildStatusAccepted | ||
| case "running", "blocked": | ||
| // blocked = waiting on a block step; still live, not yet terminal. | ||
| return entity.BuildStatusRunning | ||
| case "passed": | ||
| return entity.BuildStatusSucceeded | ||
| case "failed", "not_run", "skipped": | ||
| // not_run/skipped never produced a passing result; treat them as | ||
| // terminal failure so the batch is not merged on a non-success verdict. | ||
| return entity.BuildStatusFailed | ||
| case "canceling", "canceled": | ||
| return entity.BuildStatusCancelled | ||
| default: | ||
| // Unrecognised Buildkite state. Do NOT assume terminal: Unknown is | ||
| // non-terminal, so the buildsignal poll loop keeps waiting rather than | ||
| // failing the batch on a state this code does not understand. | ||
| return entity.BuildStatusUnknown | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
will we be able to use this extension as-is for our on-prem deployment or would we need one that does everything here except uses CI Gateway?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Will have to double check, this was just to have a reference implementation for OSS and just to test the design; I would likely have a completely separate internal one for ci-gw