Skip to content

Commit

Permalink
feat: add cron task that runs the minimal training pipeline nightly (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
bhearsum authored Jan 6, 2025
1 parent 16077f1 commit 2fe2ada
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 0 deletions.
13 changes: 13 additions & 0 deletions .cron.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
jobs:
# Run the minimal pipeline once a day to support integration testing before
# worker image changes are made (see https://bugzilla.mozilla.org/show_bug.cgi?id=1937882).
# These runs will pick up cached tasks, so most of the time this will simply
# end up running `all-pipeline`.
- name: run-pipeline
job:
type: decision-task
# we don't use treeherder...but this is a required field
treeherder-symbol: pipeline
target-tasks-method: train-target-tasks
when:
- {hour: 0, minute: 0}
13 changes: 13 additions & 0 deletions taskcluster/translations_taskgraph/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,14 @@
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

import logging
from pathlib import Path
from taskgraph.parameters import extend_parameters_schema
from voluptuous import Extra, Optional, Required
import yaml

logger = logging.getLogger(__name__)


# By default, provide a very minimal config for CI that runs very quickly. This allows
# the pipeline to be validated in CI. The production training configs should override
Expand Down Expand Up @@ -104,3 +107,13 @@ def deep_setdefault(dict_, defaults):
def get_decision_parameters(graph_config, parameters):
parameters.setdefault("training_config", {})
deep_setdefault(parameters, get_ci_training_config())
# We run the pipeline on a cron schedule to enable integration testing when
# worker images change (see https://bugzilla.mozilla.org/show_bug.cgi?id=1937882).
# These runs should _never_ be sent to W&B to avoid cluttering it up
# with data of no value.
if (
parameters["tasks_for"] == "cron"
and parameters["target_tasks_method"] == "train-target-tasks"
):
logger.info("Overriding wandb-publication to be False for cron pipeline run")
parameters["training_config"]["wandb-publication"] = False

0 comments on commit 2fe2ada

Please sign in to comment.