Skip to content

API References

Package exports

__main__

The main module provides the executable entrypoint for oper8

add_command(subparsers, cmd)

Add the subparser and set up the default fun call

Source code in oper8/__main__.py
79
80
81
82
83
84
85
86
87
88
def add_command(
    subparsers: argparse._SubParsersAction,
    cmd: CmdBase,
) -> Tuple[argparse.ArgumentParser, Dict[str, str]]:
    """Add the subparser and set up the default fun call"""
    parser = cmd.add_subparser(subparsers)
    parser.set_defaults(func=cmd.cmd)
    library_args = parser.add_argument_group("Library Configuration")
    library_config_setters = add_library_config_args(library_args)
    return parser, library_config_setters

add_library_config_args(parser, config_obj=None, path=None)

Automatically add args for all elements of the library config

Source code in oper8/__main__.py
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
def add_library_config_args(parser, config_obj=None, path=None):
    """Automatically add args for all elements of the library config"""
    path = path or []
    setters = {}
    config_obj = config_obj or config
    for key, val in config_obj.items():
        sub_path = path + [key]

        # If this is a nested arg, recurse
        if isinstance(val, aconfig.AttributeAccessDict):
            sub_setters = add_library_config_args(parser, config_obj=val, path=sub_path)
            for dest_name, nested_path in sub_setters.items():
                setters[dest_name] = nested_path

        # Otherwise, add an argument explicitly
        else:
            arg_name = ".".join(sub_path)
            dest_name = "_".join(sub_path)
            kwargs = {
                "default": val,
                "dest": dest_name,
                "help": f"Library config override for {arg_name} (see oper8.config)",
            }
            if isinstance(val, list):
                kwargs["nargs"] = "*"
            elif isinstance(val, bool):
                kwargs["action"] = "store_true"
            else:
                type_name = None
                if val is not None:
                    type_name = type(val)
                kwargs["type"] = type_name

            if (
                f"--{arg_name}"
                not in parser._option_string_actions  # pylint: disable=protected-access
            ):
                parser.add_argument(f"--{arg_name}", **kwargs)
                setters[dest_name] = sub_path
    return setters

main()

The main module provides the executable entrypoint for oper8

Source code in oper8/__main__.py
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
def main():
    """The main module provides the executable entrypoint for oper8"""
    parser = argparse.ArgumentParser(description=__doc__)

    # Add the subcommands
    subparsers = parser.add_subparsers(help="Available commands", dest="command")
    run_operator_cmd = RunOperatorCmd()
    run_operator_parser, library_config_setters = add_command(
        subparsers, run_operator_cmd
    )
    run_health_check_cmd = CheckHeartbeatCmd()
    add_command(subparsers, run_health_check_cmd)
    setup_vcs_cmd = SetupVCSCmd()
    add_command(subparsers, setup_vcs_cmd)

    # Use a preliminary parser to check for the presence of a command and fall
    # back to the default command if not found
    check_parser = argparse.ArgumentParser(add_help=False)
    check_parser.add_argument("command", nargs="?")
    check_args, _ = check_parser.parse_known_args()
    if check_args.command not in subparsers.choices:
        args = run_operator_parser.parse_args()
    else:
        args = parser.parse_args()

    # Provide overrides to the library configs
    update_library_config(args, library_config_setters)

    # Reconfigure logging
    alog.configure(
        default_level=config.log_level,
        filters=config.log_filters,
        formatter=Oper8JsonFormatter() if config.log_json else "pretty",
        thread_id=config.log_thread_id,
    )

    # Run the command's function
    args.func(args)

update_library_config(args, setters)

Update the library config values based on the parsed arguments

Source code in oper8/__main__.py
69
70
71
72
73
74
75
76
def update_library_config(args, setters):
    """Update the library config values based on the parsed arguments"""
    for dest_name, config_path in setters.items():
        config_obj = library_config
        while len(config_path) > 1:
            config_obj = config_obj[config_path[0]]
            config_path = config_path[1:]
        config_obj[config_path[0]] = getattr(args, dest_name)

cmd

This module holds all of the command classes for oper8's main entrypoint

base

Base class for all oper8 commands

CmdBase

Bases: ABC

Source code in oper8/cmd/base.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
class CmdBase(abc.ABC):
    __doc__ = __doc__

    @abc.abstractmethod
    def add_subparser(
        self,
        subparsers: argparse._SubParsersAction,
    ) -> argparse.ArgumentParser:
        """Add this command's argument parser subcommand

        Args:
            subparsers (argparse._SubParsersAction): The subparser section for
                the central main parser

        Returns:
            subparser (argparse.ArgumentParser): The configured parser for this
                command
        """

    @abc.abstractmethod
    def cmd(self, args: argparse.Namespace):
        """Execute the command with the parsed arguments

        Args:
            args (argparse.Namespace): The parsed command line arguments
        """
add_subparser(subparsers) abstractmethod

Add this command's argument parser subcommand

Parameters:

Name Type Description Default
subparsers _SubParsersAction

The subparser section for the central main parser

required

Returns:

Name Type Description
subparser ArgumentParser

The configured parser for this command

Source code in oper8/cmd/base.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
@abc.abstractmethod
def add_subparser(
    self,
    subparsers: argparse._SubParsersAction,
) -> argparse.ArgumentParser:
    """Add this command's argument parser subcommand

    Args:
        subparsers (argparse._SubParsersAction): The subparser section for
            the central main parser

    Returns:
        subparser (argparse.ArgumentParser): The configured parser for this
            command
    """
cmd(args) abstractmethod

Execute the command with the parsed arguments

Parameters:

Name Type Description Default
args Namespace

The parsed command line arguments

required
Source code in oper8/cmd/base.py
29
30
31
32
33
34
35
@abc.abstractmethod
def cmd(self, args: argparse.Namespace):
    """Execute the command with the parsed arguments

    Args:
        args (argparse.Namespace): The parsed command line arguments
    """

check_heartbeat

This is the main entrypoint command for running the operator

CheckHeartbeatCmd

Bases: CmdBase

Source code in oper8/cmd/check_heartbeat.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
class CheckHeartbeatCmd(CmdBase):
    __doc__ = __doc__

    ## Interface ##

    def add_subparser(
        self,
        subparsers: argparse._SubParsersAction,
    ) -> argparse.ArgumentParser:
        parser = subparsers.add_parser("check-heartbeat", help=__doc__)
        runtime_args = parser.add_argument_group("Check Heartbeat Configuration")
        runtime_args.add_argument(
            "--delta",
            "-d",
            required=True,
            type=int,
            help="Max time allowed since last check",
        )
        runtime_args.add_argument(
            "--file",
            "-f",
            default=config.python_watch_manager.heartbeat_file,
            help="Location of health check file. Defaults to config based.",
        )
        return parser

    def cmd(self, args: argparse.Namespace):
        """Run command to validate a health check file"""

        # Validate args
        assert args.delta is not None
        assert args.file is not None

        # Ensure file exists
        file_path = Path(args.file)
        if not file_path.exists():
            log.error(f"Health Check failed: {file_path} does not exist")
            raise FileNotFoundError()

        # Read and the most recent time from the health check
        last_log_time = file_path.read_text().strip()
        last_time = datetime.strptime(last_log_time, HeartbeatThread._DATE_FORMAT)

        if last_time + timedelta(seconds=args.delta) < datetime.now():
            msg = f"Health Check failed: {last_log_time} is to old"
            log.error(msg)
            raise KeyError(msg)
cmd(args)

Run command to validate a health check file

Source code in oper8/cmd/check_heartbeat.py
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
def cmd(self, args: argparse.Namespace):
    """Run command to validate a health check file"""

    # Validate args
    assert args.delta is not None
    assert args.file is not None

    # Ensure file exists
    file_path = Path(args.file)
    if not file_path.exists():
        log.error(f"Health Check failed: {file_path} does not exist")
        raise FileNotFoundError()

    # Read and the most recent time from the health check
    last_log_time = file_path.read_text().strip()
    last_time = datetime.strptime(last_log_time, HeartbeatThread._DATE_FORMAT)

    if last_time + timedelta(seconds=args.delta) < datetime.now():
        msg = f"Health Check failed: {last_log_time} is to old"
        log.error(msg)
        raise KeyError(msg)

run_operator_cmd

This is the main entrypoint command for running the operator

RunOperatorCmd

Bases: CmdBase

Source code in oper8/cmd/run_operator_cmd.py
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
class RunOperatorCmd(CmdBase):
    __doc__ = __doc__

    ## Interface ##

    def add_subparser(
        self,
        subparsers: argparse._SubParsersAction,
    ) -> argparse.ArgumentParser:
        parser = subparsers.add_parser("run", help=__doc__)
        runtime_args = parser.add_argument_group("Runtime Configuration")
        runtime_args.add_argument(
            "--module_name",
            "-m",
            required=True,
            help="The module to import that holds the operator code",
        )
        runtime_args.add_argument(
            "--cr",
            "-c",
            default=None,
            help="(dry run) A CR manifest yaml to apply directly ",
        )
        runtime_args.add_argument(
            "--resource_dir",
            "-r",
            default=None,
            help="(dry run) Path to a directory of yaml files that should exist in the cluster",
        )
        return parser

    def cmd(self, args: argparse.Namespace):
        # Validate args
        assert args.cr is None or (
            config.dry_run and os.path.isfile(args.cr)
        ), "Can only specify --cr with dry run and it must point to a valid file"
        assert args.resource_dir is None or (
            config.dry_run and os.path.isdir(args.resource_dir)
        ), "Can only specify --resource_dir with dry run and it must point to a valid directory"

        # Find all controllers in the operator library
        controller_types = self._get_controller_types(
            args.module_name, args.controller_name
        )

        # Parse pre-populated resources if needed
        resources = self._parse_resource_dir(args.resource_dir)

        # Create the watch managers
        deploy_manager = self._setup_watches(controller_types, resources)

        # Register the signal handler to stop the watches
        def do_stop(*_, **__):  # pragma: no cover
            watch_manager.stop_all()

        signal.signal(signal.SIGINT, do_stop)

        # Run the watch manager
        log.info("Starting Watches")
        watch_manager.start_all()

        # If given, apply the CR directly
        if args.cr:
            log.info("Applying CR [%s]", args.cr)
            with open(args.cr, encoding="utf-8") as handle:
                cr_manifest = yaml.safe_load(handle)
                cr_manifest.setdefault("metadata", {}).setdefault(
                    "namespace", "default"
                )
                log.debug3(cr_manifest)
                deploy_manager.deploy([cr_manifest])

        # All done!
        log.info("SHUTTING DOWN")

    ## Impl ##

    @staticmethod
    def _is_controller_type(attr_val: str):
        """Determine if a given attribute value is a controller type"""
        return (
            isinstance(attr_val, type)
            and issubclass(attr_val, Controller)
            and attr_val is not Controller
        )

    @classmethod
    def _get_controller_types(cls, module_name: str, controller_name=""):
        """Import the operator library and either extract all Controllers,
        or just extract the provided Controller"""
        module = importlib.import_module(module_name)
        log.debug4(dir(module))
        controller_types = []

        if controller_name:
            # Confirm that the class exists and that it is a controller type
            try:
                controller_attr_val = getattr(module, controller_name)
                is_valid_controller = cls._is_controller_type(controller_attr_val)
            except AttributeError:
                is_valid_controller = False

            if is_valid_controller:
                log.debug3("Provided controller, %s, is valid", controller_name)
                controller_types.append(controller_attr_val)
            else:
                raise AttributeError(
                    f"Provided controller, {controller_name}, is invalid"
                )
        else:
            log.debug3("Searching for all controllers...")
            for attr in dir(module):
                attr_val = getattr(module, attr)
                if cls._is_controller_type(attr_val):
                    log.debug2("Found Controller: %s", attr_val)
                    controller_types.append(attr_val)

        assert controller_types, f"No Controllers found in [{module_name}]"
        return controller_types

    @staticmethod
    def _parse_resource_dir(resource_dir: Optional[str]):
        """If given, this will parse all yaml files found in the given directory"""
        all_resources = []
        if resource_dir is not None:
            for fname in os.listdir(resource_dir):
                if fname.endswith(".yaml") or fname.endswith(".yml"):
                    resource_path = os.path.join(resource_dir, fname)
                    log.debug3("Reading resource file [%s]", resource_path)
                    with open(resource_path, encoding="utf-8") as handle:
                        all_resources.extend(yaml.safe_load_all(handle))
        return all_resources

    @staticmethod
    def _setup_watches(
        controller_types: List[Type[Controller]],
        resources: List[dict],
    ) -> Optional[DryRunDeployManager]:
        """Set up watches for all controllers. If in dry run mode, the
        DryRunDeployManager will be returned.
        """
        deploy_manager = None
        extra_kwargs = {}
        if config.dry_run:
            log.info("Running DRY RUN")
            deploy_manager = DryRunDeployManager(resources=resources)
            wm_type = watch_manager.DryRunWatchManager
            extra_kwargs["deploy_manager"] = deploy_manager
        elif config.watch_manager == "ansible":  # pragma: no cover
            log.info("Running Ansible Operator")
            wm_type = watch_manager.AnsibleWatchManager
        elif config.watch_manager == "python":  # pragma: no cover
            log.info("Running Python Operator")
            wm_type = watch_manager.PythonWatchManager
        else:
            raise ConfigError(f"Unknown watch manager {config.watch_manager}")

        for controller_type in controller_types:
            log.debug("Registering watch for %s", controller_type)
            wm_type(controller_type=controller_type, **extra_kwargs)
        return deploy_manager

setup_vcs_cmd

CLI command for setting up a VCS version repo

SetupVCSCmd

Bases: CmdBase

Source code in oper8/cmd/setup_vcs_cmd.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
class SetupVCSCmd(CmdBase):
    __doc__ = __doc__

    def add_subparser(
        self,
        subparsers: argparse._SubParsersAction,
    ) -> argparse.ArgumentParser:
        """Add the subparser for this command"""
        parser = subparsers.add_parser(
            "setup-vcs",
            help="Initialize a clean git repo to use with VCS versioning",
        )
        command_args = parser.add_argument_group("Command Arguments")
        command_args.add_argument(
            "--source",
            "-s",
            required=True,
            help="Source repo to seed the clean git history",
        )
        command_args.add_argument(
            "--destination",
            "-d",
            default=DEFAULT_DEST,
            help="Destination directory in which to place the clean git history",
        )
        command_args.add_argument(
            "--branch-expr",
            "-b",
            nargs="*",
            default=None,
            help="Regular expression(s) to use to identify branches",
        )
        command_args.add_argument(
            "--tag-expr",
            "-te",
            nargs="*",
            default=DEFAULT_TAG_EXPR,
            help="Regular expression(s) to use to identify tags",
        )
        command_args.add_argument(
            "--force",
            "-f",
            action="store_true",
            default=False,
            help="Force overwrite existing destination",
        )
        return parser

    def cmd(self, args: argparse.Namespace):
        setup_vcs(
            source=args.source,
            destination=args.destination,
            branch_expr=args.branch_expr,
            tag_expr=args.tag_expr,
            force=args.force,
        )
add_subparser(subparsers)

Add the subparser for this command

Source code in oper8/cmd/setup_vcs_cmd.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
def add_subparser(
    self,
    subparsers: argparse._SubParsersAction,
) -> argparse.ArgumentParser:
    """Add the subparser for this command"""
    parser = subparsers.add_parser(
        "setup-vcs",
        help="Initialize a clean git repo to use with VCS versioning",
    )
    command_args = parser.add_argument_group("Command Arguments")
    command_args.add_argument(
        "--source",
        "-s",
        required=True,
        help="Source repo to seed the clean git history",
    )
    command_args.add_argument(
        "--destination",
        "-d",
        default=DEFAULT_DEST,
        help="Destination directory in which to place the clean git history",
    )
    command_args.add_argument(
        "--branch-expr",
        "-b",
        nargs="*",
        default=None,
        help="Regular expression(s) to use to identify branches",
    )
    command_args.add_argument(
        "--tag-expr",
        "-te",
        nargs="*",
        default=DEFAULT_TAG_EXPR,
        help="Regular expression(s) to use to identify tags",
    )
    command_args.add_argument(
        "--force",
        "-f",
        action="store_true",
        default=False,
        help="Force overwrite existing destination",
    )
    return parser

component

Component base class for building larger abstractions off of

Component

Bases: Node, ABC

This file defines the top-level interface for a "Component" in the deployment ecosystem. Each Component will ultimately resolve to a Node in the deployment execution graph which can be atomically rendered, deployed, verified, and if needed reverted.

Source code in oper8/component.py
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
class Component(Node, abc.ABC):
    """
    This file defines the top-level interface for a "Component" in the
    deployment ecosystem. Each Component will ultimately resolve to a Node in
    the deployment execution graph which can be atomically rendered, deployed,
    verified, and if needed reverted.
    """

    @abstractclassproperty
    def name(self):
        """All Components must implement a name class attribute"""

    def __init__(
        self,
        session: Session,
        disabled: bool = False,
    ):
        """Construct with the session for this deployment

        Args:
            session:  Session
                The session that this component will belong to
            disabled:  bool
                Whether or not this component is disabled
        """
        # Ensure that the name property is defined by accessing it and that
        # namespace is inherited from session.
        self.name  # noqa: B018
        self.session_namespace = session.namespace
        self.disabled = disabled

        # Initialize Node with name
        super().__init__(self.name)

        # Register with the session
        # NOTE: This is done before the parent initialization so duplicates can
        #   be caught by the session with a nice error rather than Graph
        log.debug2("[%s] Auto-registering %s", session.id, self)
        session.add_component(self)

        # Initialize the Graph that'll control component rendering
        self.graph = Graph()

        # The list of all managed objects owned by this component
        self._managed_objects = None

    def __str__(self):
        return f"Component({self.name})"

    @property
    def managed_objects(self) -> List[ManagedObject]:
        """The list of managed objects that this Component currently knows
        about. If called before rending, this will be an empty list, so it will
        always be iterable.

        Returns:
            managed_objects:  List[ManagedObject]
                The list of known managed objects
        """
        return self._managed_objects or []

    ## Base Class Interface ####################################################
    #
    # These methods MAY be implemented by children, but contain default
    # implementations that are appropriate for simple cases.
    #
    # NOTE: We liberally use pylint disables here to make the base interface
    #   clear to deriving classes.
    ##

    def build_chart(self, session: Session):  # pylint: disable=unused-argument
        """The build_chart function allows the derived class to add child Charts
        lazily so that they can take advantage of post-initialization
        information.

        Args:
            session:  Session
                The current deploy session
        """

    def verify(self, session):
        """The verify function will run any necessary testing and validation
        that the component needs to ensure that rollout was successfully
        completed.

        Args:
            session:  Session
                The current reconciliation session

        Returns:
            success:  bool
                True on successful deployment verification, False on failure
                conditions
        """
        return self._default_verify(session, is_subsystem=False)

    @alog.logged_function(log.debug2)
    @alog.timed_function(log.debug2)
    def render_chart(self, session):
        """This will be invoked by the parent Application to build and render
        the individual component's chart

        Args:
            session:  Session
                The session for this reconciliation
        """

        # Do the rendering
        self.__render(session)

        # If a working directory is configured, use it
        if config.working_dir:
            rendered_file = self.to_file(session)
            log.debug("Rendered %s to %s", self, rendered_file)

    def update_object_definition(
        self,
        session: Session,  # pylint: disable=unused-argument
        internal_name: str,  # pylint: disable=unused-argument
        resource_definition: dict,
    ):
        """Allow children to inject arbitrary object mutation logic for
        individual managed objects

        The base implementation simply returns the given definition as a
        passthrough

        Args:
            session:  Session
                The session for this reconciliation
            internal_name:  str
                The internal name of the object to update
            resource_definition:  dict
                The dict representation of the resource to modify

        Returns:
            resource_definition:  dict
                The dict representation of the resource with any modifications
                applied
        """
        return resource_definition

    @alog.logged_function(log.debug2)
    @alog.timed_function(log.debug2)
    def deploy(self, session):
        """Deploy the component

        Args:
            session:  Session
                The current reconciliation session

        Returns:
            success:  bool
                True on successful application of the kub state (not
                programmatic verification), False otherwise
        """
        assert (
            self._managed_objects is not None
        ), "Cannot call deploy() before render_chart()"

        # Deploy all managed objects
        for obj in self.managed_objects:
            success, _ = session.deploy_manager.deploy(
                resource_definitions=[obj.definition],
                method=obj.deploy_method,
            )
            if not success:
                log.warning("Failed to deploy [%s]", self)
                return False
        return True

    def disable(self, session):
        """Disable the component

        Args:
            session:  Session
                The current reconciliation session

        Returns:
            success:  bool
                True on successful application of the kub state (not
                programmatic verification), False otherwise
        """
        assert (
            self._managed_objects is not None
        ), "Cannot call disable() before render_chart()"

        # Disable all managed objects
        success, _ = session.deploy_manager.disable(
            [obj.definition for obj in self._managed_objects]
        )
        if not success:
            log.warning("Failed to disable [%s]", self)
            return False
        return True

    ## Resource Interface ####################################################
    #
    # These methods offer functionality that children can use to add resources to
    # a components graph
    ##

    def add_resource(
        self,
        name: str,  # pylint: disable=redefined-builtin
        obj: Any,
        verify_function: Optional[RESOURCE_VERIFY_FUNCTION] = None,
        deploy_method: Optional[DeployMethod] = DeployMethod.DEFAULT,
    ) -> Optional[
        ResourceNode
    ]:  # pylint: disable=unused-argument, redefined-builtin, invalid-name
        """The add_resource function allows the derived class to add resources
        to this component to later be rendered

        Args:
            name:  str
                The name of the resource in the Graph
            obj: Any
                An object or dict which can be manipulated into a dict
                representation of the kubernetes resource
        """
        # Sanitize object to enable native support for openapi objects
        obj = sanitize_for_serialization(obj)

        # Add namespace to obj if not present
        obj.setdefault("metadata", {}).setdefault("namespace", self.session_namespace)

        node = ResourceNode(name, obj, verify_function, deploy_method)
        self.graph.add_node(node)
        return node

    def add_dependency(
        self,
        session: Session,
        *components: "Component",
        verify_function: Optional[COMPONENT_VERIFY_FUNCTION] = None,
    ):
        """This add_dependency function sets up a dependency between this component
        and a list of other components. To add a dependency between resources inside
        this component use resource.add_dependency
        Args:
            session:  Session
                The current resource session
            *components:  Components
                Any number of components to be added as a dependency
            verify_function: Optional[verify_function]
                An Optional callable function of the form `def verify(session) -> bool:`
                to use to verify that the dependency has been satisfied. This
                will be used to block deployment of the component beyond
                requiring that the upstream has been deployed successfully.
        """
        for component in components:
            session.add_component_dependency(self, component, verify_function)

    ## Base Class Utilities ####################################################
    #
    # These methods offer shared functionality that children can (and should)
    # use in their implementations
    ##

    @alog.logged_function(log.debug2)
    def to_dict(self, session):
        """
        Render the component and return it as a Dictionary, mainly useful for testing
        :return: Dictionary of the rendered component
        """
        self.__render(session)
        return [obj.definition for obj in self.managed_objects]

    def to_config(self, session):
        """
        Render the component and return it as an AttrDict, mainly useful for testing
        :return: AttrDict of the rendered component
        """

        return [
            aconfig.Config(obj, override_env_vars=False)
            for obj in self.to_dict(session)
        ]

    def to_file(self, session):
        """
        Render the component to disk and return the rendered file path
        :return: str path to rendered file
        """
        assert config.working_dir is not None, "Config must have a working_dir set"

        # If disabled and not dumping disabled components, nothing to do
        if self.disabled and not config.dump_disabled:
            log.debug("Not dumping disabled component: %s", self)
            return None

        # Get the in-memory representation
        objects = self.to_dict(session)

        # Get the output file name and make sure the directory structure exists
        path_parts = [
            config.working_dir,
            ".".join([session.api_version.replace("/", "."), session.kind]).lower(),
            session.name,
        ]
        if self.disabled:
            path_parts.append("DISABLED")
        path_parts.append(self.name)
        output_dir = os.path.join(*path_parts)
        if not os.path.exists(output_dir):
            log.debug2("Creating output dir: %s", output_dir)
            os.makedirs(output_dir)

        # Serialize to a yaml file
        instance_name = session.name
        output_file = os.path.join(output_dir, f"{instance_name}-{self.name}.k8s.yaml")
        log.debug2("Saving %s to %s", self, output_file)
        with open(output_file, "w", encoding="utf-8") as outfile:
            outfile.write("---\n" + yaml.safe_dump_all(objects))

        return output_file

    ## Base Class Implementation Details #######################################
    #
    # These methods provide shared functionality to the base class function
    # implementations and should not be used directly by children
    ##

    @classmethod
    def get_name(cls):  # pylint: disable=arguments-differ
        """Override get_name to support class attribute"""
        return cls.name

    def _default_verify(self, session, is_subsystem=False):
        """The verify function will run any necessary testing and validation
        that the component needs to ensure that rollout was successfully
        completed.

        Args:
            session:  Session
                The current reconciliation session

        Returns:
            success:  bool
                True on successful deployment verification, False on failure
                conditions
        """
        log.debug2("Using default verification for [%s]", self)

        # If this is in dry run mode, we skip verification since this relies on
        # checking for changes in the cluster which won't ever happen
        if config.dry_run:
            log.debug2("No verification to perform in dry_run")
            return True

        # Verify all managed resources
        for resource in self.managed_objects:
            log.debug2("Verifying [%s/%s]", resource.kind, resource.name)
            if not verify_resource(
                kind=resource.kind,
                name=resource.name,
                api_version=resource.api_version,
                session=session,
                is_subsystem=is_subsystem,
                namespace=resource.namespace,
                verify_function=resource.verify_function,
            ):
                log.debug("[%s/%s] not verified", resource.kind, resource.name)
                return False
        log.debug("All managed resources verified for [%s]", self)
        return True

    @staticmethod
    def _preserve_patch_annotation(session, internal_name, resource_definition):
        """This implementation helper checks the current state of the given
        resource and patches the desired state to preserve any temporary patch
        annotations found. This is done so that temporary patches can be applied
        to subsystem CRs managed by a top-level controller.
        """

        # Get the current state of the object
        kind = resource_definition.get("kind")
        api_version = resource_definition.get("apiVersion")
        metadata = resource_definition.get("metadata", {})
        name = metadata.get("name")
        namespace = metadata.get("namespace")
        assert (
            kind is not None and api_version is not None and name is not None
        ), f"Resource {internal_name} missing critical metadata!"
        success, content = session.get_object_current_state(
            kind=kind, name=name, api_version=api_version, namespace=namespace
        )
        assert_cluster(
            success,
            f"Failed to look for current state for [{kind}/{api_version}/{namespace}/{name}]",
        )

        # Look for existing patch annotations
        if content is not None:
            content_meta = content.get("metadata", {})
            patch_anno = content_meta.get("annotations", {}).get(
                TEMPORARY_PATCHES_ANNOTATION_NAME
            )

            # If found, update the resource
            if patch_anno:
                resource_definition.setdefault("metadata", {}).setdefault(
                    "annotations", {}
                )[TEMPORARY_PATCHES_ANNOTATION_NAME] = patch_anno

            # Any time we have metadata changes, we need to include the
            # resourceVersion. It can't hurt to do so, so we will just always do
            # it here if found.
            resource_version = content_meta.get("resourceVersion")
            if resource_version is not None:
                resource_definition["metadata"]["resourceVersion"] = resource_version

            # Make sure any ownerReferences are persisted as well
            owner_refs = content_meta.get("ownerReferences")
            if owner_refs:
                resource_definition["metadata"]["ownerReferences"] = owner_refs

        return resource_definition

    def __build_lazy_charts(self, session):
        """Delegate to the child implementation of build_chart for lazy chart
        construction.
        """
        self.build_chart(session)

    @alog.logged_function(log.debug3)
    def __render(self, session):
        """This is the primary implementation for rendering objects into
        self.managed_objects
        """

        # Short-circuit if already rendered
        if self._managed_objects is not None:
            log.debug2(
                "%s returning %d pre-rendered objects", self, len(self._managed_objects)
            )
            return self.managed_objects

        # Generate name and dict representation of objects
        resource_list = self.__gather_resources(session)

        # Iterate all ApiObject children in dependency order and perform the
        # rendering, including patches and backend modifications.
        self._managed_objects = []
        for name, obj, verify_func, deploy_method in resource_list:
            # Apply any patches to this object
            log.debug2("Applying patches to managed object: %s", name)
            log.debug4("Before Patching: %s", obj)
            obj = apply_patches(name, obj, session.temporary_patches)

            # Make sure any temporary patch annotations that exist already
            # on this resource in the cluster are preserved
            log.debug2("Checking for existing subsystem patches on: %s", name)
            obj = self._preserve_patch_annotation(session, name, obj)

            # Add the internal name annotation if enabled
            if config.internal_name_annotation:
                log.debug2(
                    "Adding internal name annotation [%s: %s]",
                    INTERNAL_NAME_ANNOTATION_NAME,
                    name,
                )
                obj.setdefault("metadata", {}).setdefault("annotations", {})[
                    INTERNAL_NAME_ANNOTATION_NAME
                ] = name

            # Allow children to inject additional modification logic
            log.debug4("Before Object Updates: %s", obj)
            obj = self.update_object_definition(session, name, obj)

            # Add the resource to the set managed by the is component
            managed_obj = ManagedObject(obj, verify_func, deploy_method)
            log.debug2("Adding managed object: %s", managed_obj)
            log.debug4("Final Definition: %s", obj)
            self._managed_objects.append(managed_obj)

        return self.managed_objects

    def __gather_resources(
        self, session
    ) -> List[Tuple[str, dict, Callable, DeployMethod]]:
        """This is a helper for __render which handles converting resource objects
        into a list of dictionaries.
        """
        # Perform lazy chart creation before finishing rendering
        self.__build_lazy_charts(session)

        # Determine the flattened set of ApiObject children.
        log.debug2("%s populating managed_objects", self)
        topology = self.graph.topology()
        log.debug3("%s topology has %d elements", self, len(topology))
        log.debug4([type(obj) for obj in topology])
        children = [node for node in topology if isinstance(node, ResourceNode)]
        log.debug2("%s found %d ResourceNode children", self, len(children))

        resource_list = []
        for child in children:
            # Construct the managed object with its internal name
            child_name = ".".join([self.name, child.get_name()])
            resource_list.append(
                (child_name, child.manifest, child.verify_function, child.deploy_method)
            )

        return resource_list

managed_objects property

The list of managed objects that this Component currently knows about. If called before rending, this will be an empty list, so it will always be iterable.

Returns:

Name Type Description
managed_objects List[ManagedObject]

List[ManagedObject] The list of known managed objects

__build_lazy_charts(session)

Delegate to the child implementation of build_chart for lazy chart construction.

Source code in oper8/component.py
452
453
454
455
456
def __build_lazy_charts(self, session):
    """Delegate to the child implementation of build_chart for lazy chart
    construction.
    """
    self.build_chart(session)

__gather_resources(session)

This is a helper for __render which handles converting resource objects into a list of dictionaries.

Source code in oper8/component.py
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
def __gather_resources(
    self, session
) -> List[Tuple[str, dict, Callable, DeployMethod]]:
    """This is a helper for __render which handles converting resource objects
    into a list of dictionaries.
    """
    # Perform lazy chart creation before finishing rendering
    self.__build_lazy_charts(session)

    # Determine the flattened set of ApiObject children.
    log.debug2("%s populating managed_objects", self)
    topology = self.graph.topology()
    log.debug3("%s topology has %d elements", self, len(topology))
    log.debug4([type(obj) for obj in topology])
    children = [node for node in topology if isinstance(node, ResourceNode)]
    log.debug2("%s found %d ResourceNode children", self, len(children))

    resource_list = []
    for child in children:
        # Construct the managed object with its internal name
        child_name = ".".join([self.name, child.get_name()])
        resource_list.append(
            (child_name, child.manifest, child.verify_function, child.deploy_method)
        )

    return resource_list

__init__(session, disabled=False)

Construct with the session for this deployment

Parameters:

Name Type Description Default
session Session

Session The session that this component will belong to

required
disabled bool

bool Whether or not this component is disabled

False
Source code in oper8/component.py
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
def __init__(
    self,
    session: Session,
    disabled: bool = False,
):
    """Construct with the session for this deployment

    Args:
        session:  Session
            The session that this component will belong to
        disabled:  bool
            Whether or not this component is disabled
    """
    # Ensure that the name property is defined by accessing it and that
    # namespace is inherited from session.
    self.name  # noqa: B018
    self.session_namespace = session.namespace
    self.disabled = disabled

    # Initialize Node with name
    super().__init__(self.name)

    # Register with the session
    # NOTE: This is done before the parent initialization so duplicates can
    #   be caught by the session with a nice error rather than Graph
    log.debug2("[%s] Auto-registering %s", session.id, self)
    session.add_component(self)

    # Initialize the Graph that'll control component rendering
    self.graph = Graph()

    # The list of all managed objects owned by this component
    self._managed_objects = None

__render(session)

This is the primary implementation for rendering objects into self.managed_objects

Source code in oper8/component.py
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
@alog.logged_function(log.debug3)
def __render(self, session):
    """This is the primary implementation for rendering objects into
    self.managed_objects
    """

    # Short-circuit if already rendered
    if self._managed_objects is not None:
        log.debug2(
            "%s returning %d pre-rendered objects", self, len(self._managed_objects)
        )
        return self.managed_objects

    # Generate name and dict representation of objects
    resource_list = self.__gather_resources(session)

    # Iterate all ApiObject children in dependency order and perform the
    # rendering, including patches and backend modifications.
    self._managed_objects = []
    for name, obj, verify_func, deploy_method in resource_list:
        # Apply any patches to this object
        log.debug2("Applying patches to managed object: %s", name)
        log.debug4("Before Patching: %s", obj)
        obj = apply_patches(name, obj, session.temporary_patches)

        # Make sure any temporary patch annotations that exist already
        # on this resource in the cluster are preserved
        log.debug2("Checking for existing subsystem patches on: %s", name)
        obj = self._preserve_patch_annotation(session, name, obj)

        # Add the internal name annotation if enabled
        if config.internal_name_annotation:
            log.debug2(
                "Adding internal name annotation [%s: %s]",
                INTERNAL_NAME_ANNOTATION_NAME,
                name,
            )
            obj.setdefault("metadata", {}).setdefault("annotations", {})[
                INTERNAL_NAME_ANNOTATION_NAME
            ] = name

        # Allow children to inject additional modification logic
        log.debug4("Before Object Updates: %s", obj)
        obj = self.update_object_definition(session, name, obj)

        # Add the resource to the set managed by the is component
        managed_obj = ManagedObject(obj, verify_func, deploy_method)
        log.debug2("Adding managed object: %s", managed_obj)
        log.debug4("Final Definition: %s", obj)
        self._managed_objects.append(managed_obj)

    return self.managed_objects

add_dependency(session, *components, verify_function=None)

This add_dependency function sets up a dependency between this component and a list of other components. To add a dependency between resources inside this component use resource.add_dependency Args: session: Session The current resource session *components: Components Any number of components to be added as a dependency verify_function: Optional[verify_function] An Optional callable function of the form def verify(session) -> bool: to use to verify that the dependency has been satisfied. This will be used to block deployment of the component beyond requiring that the upstream has been deployed successfully.

Source code in oper8/component.py
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
def add_dependency(
    self,
    session: Session,
    *components: "Component",
    verify_function: Optional[COMPONENT_VERIFY_FUNCTION] = None,
):
    """This add_dependency function sets up a dependency between this component
    and a list of other components. To add a dependency between resources inside
    this component use resource.add_dependency
    Args:
        session:  Session
            The current resource session
        *components:  Components
            Any number of components to be added as a dependency
        verify_function: Optional[verify_function]
            An Optional callable function of the form `def verify(session) -> bool:`
            to use to verify that the dependency has been satisfied. This
            will be used to block deployment of the component beyond
            requiring that the upstream has been deployed successfully.
    """
    for component in components:
        session.add_component_dependency(self, component, verify_function)

add_resource(name, obj, verify_function=None, deploy_method=DeployMethod.DEFAULT)

The add_resource function allows the derived class to add resources to this component to later be rendered

Parameters:

Name Type Description Default
name str

str The name of the resource in the Graph

required
obj Any

Any An object or dict which can be manipulated into a dict representation of the kubernetes resource

required
Source code in oper8/component.py
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
def add_resource(
    self,
    name: str,  # pylint: disable=redefined-builtin
    obj: Any,
    verify_function: Optional[RESOURCE_VERIFY_FUNCTION] = None,
    deploy_method: Optional[DeployMethod] = DeployMethod.DEFAULT,
) -> Optional[
    ResourceNode
]:  # pylint: disable=unused-argument, redefined-builtin, invalid-name
    """The add_resource function allows the derived class to add resources
    to this component to later be rendered

    Args:
        name:  str
            The name of the resource in the Graph
        obj: Any
            An object or dict which can be manipulated into a dict
            representation of the kubernetes resource
    """
    # Sanitize object to enable native support for openapi objects
    obj = sanitize_for_serialization(obj)

    # Add namespace to obj if not present
    obj.setdefault("metadata", {}).setdefault("namespace", self.session_namespace)

    node = ResourceNode(name, obj, verify_function, deploy_method)
    self.graph.add_node(node)
    return node

build_chart(session)

The build_chart function allows the derived class to add child Charts lazily so that they can take advantage of post-initialization information.

Parameters:

Name Type Description Default
session Session

Session The current deploy session

required
Source code in oper8/component.py
102
103
104
105
106
107
108
109
110
def build_chart(self, session: Session):  # pylint: disable=unused-argument
    """The build_chart function allows the derived class to add child Charts
    lazily so that they can take advantage of post-initialization
    information.

    Args:
        session:  Session
            The current deploy session
    """

deploy(session)

Deploy the component

Parameters:

Name Type Description Default
session

Session The current reconciliation session

required

Returns:

Name Type Description
success

bool True on successful application of the kub state (not programmatic verification), False otherwise

Source code in oper8/component.py
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
@alog.logged_function(log.debug2)
@alog.timed_function(log.debug2)
def deploy(self, session):
    """Deploy the component

    Args:
        session:  Session
            The current reconciliation session

    Returns:
        success:  bool
            True on successful application of the kub state (not
            programmatic verification), False otherwise
    """
    assert (
        self._managed_objects is not None
    ), "Cannot call deploy() before render_chart()"

    # Deploy all managed objects
    for obj in self.managed_objects:
        success, _ = session.deploy_manager.deploy(
            resource_definitions=[obj.definition],
            method=obj.deploy_method,
        )
        if not success:
            log.warning("Failed to deploy [%s]", self)
            return False
    return True

disable(session)

Disable the component

Parameters:

Name Type Description Default
session

Session The current reconciliation session

required

Returns:

Name Type Description
success

bool True on successful application of the kub state (not programmatic verification), False otherwise

Source code in oper8/component.py
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
def disable(self, session):
    """Disable the component

    Args:
        session:  Session
            The current reconciliation session

    Returns:
        success:  bool
            True on successful application of the kub state (not
            programmatic verification), False otherwise
    """
    assert (
        self._managed_objects is not None
    ), "Cannot call disable() before render_chart()"

    # Disable all managed objects
    success, _ = session.deploy_manager.disable(
        [obj.definition for obj in self._managed_objects]
    )
    if not success:
        log.warning("Failed to disable [%s]", self)
        return False
    return True

get_name() classmethod

Override get_name to support class attribute

Source code in oper8/component.py
356
357
358
359
@classmethod
def get_name(cls):  # pylint: disable=arguments-differ
    """Override get_name to support class attribute"""
    return cls.name

name()

All Components must implement a name class attribute

Source code in oper8/component.py
40
41
42
@abstractclassproperty
def name(self):
    """All Components must implement a name class attribute"""

render_chart(session)

This will be invoked by the parent Application to build and render the individual component's chart

Parameters:

Name Type Description Default
session

Session The session for this reconciliation

required
Source code in oper8/component.py
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
@alog.logged_function(log.debug2)
@alog.timed_function(log.debug2)
def render_chart(self, session):
    """This will be invoked by the parent Application to build and render
    the individual component's chart

    Args:
        session:  Session
            The session for this reconciliation
    """

    # Do the rendering
    self.__render(session)

    # If a working directory is configured, use it
    if config.working_dir:
        rendered_file = self.to_file(session)
        log.debug("Rendered %s to %s", self, rendered_file)

to_config(session)

Render the component and return it as an AttrDict, mainly useful for testing :return: AttrDict of the rendered component

Source code in oper8/component.py
301
302
303
304
305
306
307
308
309
310
def to_config(self, session):
    """
    Render the component and return it as an AttrDict, mainly useful for testing
    :return: AttrDict of the rendered component
    """

    return [
        aconfig.Config(obj, override_env_vars=False)
        for obj in self.to_dict(session)
    ]

to_dict(session)

Render the component and return it as a Dictionary, mainly useful for testing :return: Dictionary of the rendered component

Source code in oper8/component.py
292
293
294
295
296
297
298
299
@alog.logged_function(log.debug2)
def to_dict(self, session):
    """
    Render the component and return it as a Dictionary, mainly useful for testing
    :return: Dictionary of the rendered component
    """
    self.__render(session)
    return [obj.definition for obj in self.managed_objects]

to_file(session)

Render the component to disk and return the rendered file path :return: str path to rendered file

Source code in oper8/component.py
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
def to_file(self, session):
    """
    Render the component to disk and return the rendered file path
    :return: str path to rendered file
    """
    assert config.working_dir is not None, "Config must have a working_dir set"

    # If disabled and not dumping disabled components, nothing to do
    if self.disabled and not config.dump_disabled:
        log.debug("Not dumping disabled component: %s", self)
        return None

    # Get the in-memory representation
    objects = self.to_dict(session)

    # Get the output file name and make sure the directory structure exists
    path_parts = [
        config.working_dir,
        ".".join([session.api_version.replace("/", "."), session.kind]).lower(),
        session.name,
    ]
    if self.disabled:
        path_parts.append("DISABLED")
    path_parts.append(self.name)
    output_dir = os.path.join(*path_parts)
    if not os.path.exists(output_dir):
        log.debug2("Creating output dir: %s", output_dir)
        os.makedirs(output_dir)

    # Serialize to a yaml file
    instance_name = session.name
    output_file = os.path.join(output_dir, f"{instance_name}-{self.name}.k8s.yaml")
    log.debug2("Saving %s to %s", self, output_file)
    with open(output_file, "w", encoding="utf-8") as outfile:
        outfile.write("---\n" + yaml.safe_dump_all(objects))

    return output_file

update_object_definition(session, internal_name, resource_definition)

Allow children to inject arbitrary object mutation logic for individual managed objects

The base implementation simply returns the given definition as a passthrough

Parameters:

Name Type Description Default
session Session

Session The session for this reconciliation

required
internal_name str

str The internal name of the object to update

required
resource_definition dict

dict The dict representation of the resource to modify

required

Returns:

Name Type Description
resource_definition

dict The dict representation of the resource with any modifications applied

Source code in oper8/component.py
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
def update_object_definition(
    self,
    session: Session,  # pylint: disable=unused-argument
    internal_name: str,  # pylint: disable=unused-argument
    resource_definition: dict,
):
    """Allow children to inject arbitrary object mutation logic for
    individual managed objects

    The base implementation simply returns the given definition as a
    passthrough

    Args:
        session:  Session
            The session for this reconciliation
        internal_name:  str
            The internal name of the object to update
        resource_definition:  dict
            The dict representation of the resource to modify

    Returns:
        resource_definition:  dict
            The dict representation of the resource with any modifications
            applied
    """
    return resource_definition

verify(session)

The verify function will run any necessary testing and validation that the component needs to ensure that rollout was successfully completed.

Parameters:

Name Type Description Default
session

Session The current reconciliation session

required

Returns:

Name Type Description
success

bool True on successful deployment verification, False on failure conditions

Source code in oper8/component.py
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
def verify(self, session):
    """The verify function will run any necessary testing and validation
    that the component needs to ensure that rollout was successfully
    completed.

    Args:
        session:  Session
            The current reconciliation session

    Returns:
        success:  bool
            True on successful deployment verification, False on failure
            conditions
    """
    return self._default_verify(session, is_subsystem=False)

config

Base operator config module. The config here is only used as a baseline bootup config. All application-specific config must come from the app_config.

config

This module just loads config at import time and does the initial log config

validation

Module to validate values in a loaded config

get_invalid_params(config, validation_config)

Get a list of any params that are invalid

Parameters:

Name Type Description Default
config Config

aconfig.Config The parsed config with any override values

required
validation_config Config

aconfig.Config The parallel config holding validation setup

required

Returns:

Name Type Description
invalid_params List[str]

List[str] A list of all string keys for parameters that fail validation

Source code in oper8/config/validation.py
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
def get_invalid_params(
    config: aconfig.Config,
    validation_config: aconfig.Config,
) -> List[str]:
    """Get a list of any params that are invalid

    Args:
        config:  aconfig.Config
            The parsed config with any override values
        validation_config:  aconfig.Config
            The parallel config holding validation setup

    Returns:
        invalid_params:  List[str]
            A list of all string keys for parameters that fail validation
    """

    # For each validation element, perform the validation
    invalid_params = []
    for val_key, validator in _parse_validation_config(validation_config).items():
        if not validator.validate(nested_get(config, val_key)):
            log.warning("Found invalid config key [%s]", val_key)
            invalid_params.append(val_key)

    # Return the list of invalid params
    return invalid_params

constants

Shared module to hold constant values for the library

controller

The Controller class manages a collection of Components and associates them with a CustomResource in the cluster.

Controller

Bases: ABC

This class represents a controller for a single kubernetes custom resource kind. Its primary functionality is to perform a reconciliation of a given CR manifest for an instance of the resource kind against the current state of the cluster. To accomplish this, its reconciliation logic is:

  1. Construct a Directed Acyclic Graph of all Components that this kind needs to manage.
  2. Execute the Graph in dependency order where each node of the graph first renders the manifests for all kubernetes resources managed by the Component, then applies them to the cluster.
  3. Execute a secondary Graph with verification logic for each Component, terminating verification for downstream nodes if any node is not yet verified.

To do this, the main operations of the class are to construct a DAG of Components, then walk them through the primary lifecycle phases:

  1. Run the Component's deploy() function to completion and verify that the actual deployment operations succeeded
  2. Run the Component's verify() function to run component-specific tests that will verify if the deployment is rolled out in a successful state
Source code in oper8/controller.py
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
class Controller(abc.ABC):
    """This class represents a controller for a single kubernetes custom
    resource kind. Its primary functionality is to perform a reconciliation of a
    given CR manifest for an instance of the resource kind against the current
    state of the cluster. To accomplish this, its reconciliation logic is:

    1. Construct a Directed Acyclic Graph of all Components that this kind
        needs to manage.
    2. Execute the Graph in dependency order where each node of the graph first
        renders the manifests for all kubernetes resources managed by the
        Component, then applies them to the cluster.
    3. Execute a secondary Graph with verification logic for each Component,
        terminating verification for downstream nodes if any node is not yet
        verified.

    To do this, the main operations of the class are to construct a DAG of
    Components, then walk them through the primary lifecycle phases:

    1. Run the Component's deploy() function to completion and verify that the
        actual deployment operations succeeded
    2. Run the Component's verify() function to run component-specific tests
        that will verify if the deployment is rolled out in a successful state
    """

    ## Class Properties ########################################################

    # Derived classes must have class properties for group, version, and kind.
    # To enforce this, we set defaults for all of these and then validate that
    # they are present, we define them as classproperty and raise when accessed
    # from the base implementation.

    # NOTE: pylint is very confused by the use of these property decorators, so
    #   we need to liberally ignore warnings.

    @abstractclassproperty  # noqa: B027
    def group(cls) -> str:
        """The apiVersion group for the resource this controller manages"""

    @abstractclassproperty  # noqa: B027
    def version(cls) -> str:
        """The apiVersion version for the resource this controller manages"""

    @abstractclassproperty  # noqa: B027
    def kind(cls) -> str:
        """The kind for the resource this controller manages"""

    @classproperty
    def finalizer(cls) -> Optional[str]:  # pylint: disable=no-self-argument
        """The finalizer used by this Controller"""
        if cls.has_finalizer:  # pylint: disable=using-constant-test
            return f"finalizers.{cls.kind.lower()}.{cls.group}"  # pylint: disable=no-member
        return None

    @classproperty
    def has_finalizer(cls) -> bool:  # pylint: disable=no-self-argument
        """If the derived class has an implementation of finalize_components, it
        has a finalizer and can be registered for finalize events
        """
        return cls.finalize_components is not Controller.finalize_components

    ## Construction ############################################################

    def __init__(self, config_defaults: Optional[aconfig.Config] = None):
        """The constructor sets up all of the properties of the controller which
        are constant across reconciliations.

        Args:
            config_defaults:  Optional[aconfig.Config]
                Default values for the backend controller config

        """
        # Make sure the class properties are present and not empty
        assert self.group, "Controller.group must be a non-empty string"
        assert self.version, "Controller.version must be a non-empty string"
        assert self.kind, "Controller.kind must be a non-empty string"
        self.config_defaults = config_defaults or aconfig.Config({})

    @classmethod
    def __str__(cls):
        """Stringify with the GVK"""
        return f"Controller({cls.group}/{cls.version}/{cls.kind})"

    ## Abstract Interface ######################################################
    #
    # These functions must be implemented by child classes
    ##

    @abc.abstractmethod
    def setup_components(self, session: Session):
        """Given the session for an individual reconciliation, construct the set
        of Components that will be deployed.

        Error Semantics: Child classes should throw ConfigError if config is
        not valid and include the portion of config where the problem occurred.

        Args:
            session:  Session
                The current session containing the per-event configs
        """

    ## Base Class Interface ####################################################
    #
    # These methods MAY be implemented by children, but contain default
    # implementations that are appropriate for simple cases.
    #
    # NOTE: We liberally use pylint disables here to make the base interface
    #   clear to deriving classes.
    ##

    def finalize_components(self, session: Session):  # noqa: B027
        """When performing a finalizer operation, this function will be called
        to perform custom finalizer logic for this Controller.

        Error Semantics: Child classes should throw ConfigError if config is
        not valid and include the portion of config where the problem occurred.

        NOTE: This method is not abstract since the standard controller usecase
            does not require finalizing

        Args:
            session:  Session
                The current session containing the per-event configs
        """

    def after_deploy(self, session: Session) -> bool:
        """This allows children to inject logic that will run when the
        controller has finished deploying all components, but not necessarily
        verifying all of them. The default behavior is a no-op.

        Args:
            session:  Session
                The current reconciliation session

        Returns:
            success:  bool
                True if custom hook code executed successfully and lifecycle
                should continue
        """
        return True

    def after_deploy_unsuccessful(self, session: Session, failed: bool) -> bool:
        """This allows children to inject logic that will run when the
        controller has failed or could not finish deploying all components.
        The default behavior is a no-op.

        Args:
            session:  Session
                The current reconciliation session
            failed:  bool
                Indicator of whether or not the termination was a failure

        Returns:
            success:  bool
                True if custom hook code executed successfully and lifecycle
                should continue
        """
        return True

    def after_verify(
        self,
        session: Session,  # pylint: disable=unused-argument
    ) -> bool:
        """This allows children to inject logic that will run when the
        controller has finished verifying all components. The default behavior
        is a no-op.

        Args:
            session:  Session
                The current reconciliation session

        Returns:
            success:  bool
                True if custom hook code executed successfully and lifecycle
                should continue
        """
        return True

    def after_verify_unsuccessful(self, session: Session, failed: bool) -> bool:
        """This allows children to inject logic that will run when the
        controller has finished deploying all components but failed to verify.
        The default behavior is a no-op.

        Args:
            session:  Session
                The current reconciliation session
            failed:  bool
                Indicator of whether or not the termination was a failure

        Returns:
            success:  bool
                True if custom hook code executed successfully and lifecycle
                should continue
        """
        return True

    def should_requeue(self, session: Session) -> Tuple[bool, Optional[RequeueParams]]:
        """should_requeue determines if current reconcile request should be re-queued.

        Children can override default implementation to provide custom logic.
        Default implementation re-queues the request if the reconciling CR status
        hasn't been reached stable state.

        Args:
            session: Session
                The current reconciliation session

        Returns:
            requeue: bool
                True if the reconciliation request should be re-queued
            config: RequeueParams
                 Parameters of requeue request. Can be None if requeue is False.
        """
        api_version = session.api_version
        kind = session.kind
        name = session.name
        namespace = session.namespace
        requeue_params = RequeueParams()
        # Fetch the current status from the cluster
        success, current_state = session.deploy_manager.get_object_current_state(
            api_version=api_version,
            kind=kind,
            name=name,
            namespace=namespace,
        )
        if not success:
            log.warning(
                "Failed to fetch current state for %s/%s/%s", namespace, kind, name
            )
            return True, requeue_params
        # Do not requeue if resource was deleted
        if not current_state:
            log.warning("Resource not found: %s/%s/%s", namespace, kind, name)
            return False, requeue_params

        log.debug3("Current CR manifest for requeue check: %s", current_state)

        verified = verify_subsystem(current_state, session.version)
        return not verified, requeue_params

    def get_cr_manifest_defaults(
        self,
    ) -> Union[dict, aconfig.Config]:
        """This allows children to provide default values for their cr_manifest
        that will be injected where no override is provided in the user-provided
        cr_manifest.

        Returns:
            cr_manifest_defaults:  Union[dict, aconfig.Config]
                The cr defaults. Raw dicts will be converted to Config objects.
        """
        return aconfig.Config({})

    def get_config_defaults(self):
        """This function allows children to override the default values for the session
        config. This value can also be set via the controllers __init__ function.
        """
        return self.config_defaults

    ## Public Interface ########################################################
    #
    # These functions should be used by the reconciliation manager or in
    # tests
    ##

    def run_reconcile(
        self, session: Session, is_finalizer: bool = False
    ) -> CompletionState:
        """Perform a reconciliation iteration for this controller on given a session.
        This function should only be called once per session. The general logic for a
        controller reconcile is as follows:

        1. Set up the set of Components and their dependencies that will be
            managed in this reconciliation based on the CR and config
        2. Invoke the rollout to render each component and apply it to the
            cluster (if not in dry-run), then verify the DAG of components

        Args:
            session:  Session
                The full structured content of the CR manifest for this operand
            is_finalizer:  bool
                If true, the logic in finalize_components is run, otherwise the
                logic in setup_components is called

        Returns:
            result: ReconciliationResult
                The result of reconcile
        """
        # Check if session has already been reconciled
        if not session.graph.empty():
            raise RolloutError("Session has already been reconciled")

        self._manage_components(session, is_finalizer)
        completion_state = self._rollout_components(session)
        return completion_state

    ## Implementation Details ##################################################

    def _manage_components(self, session: Session, is_finalizer: bool):
        """Delegate logic to child's finalize_components or setup_components

        Args:
            session: Session
                The current session being reconciled
            is_finalizer: bool
                Weather the current CR is being deleted

        """

        # If this is a finalizer, run finalize_components
        if is_finalizer:
            log.debug("[%s] Running as finalizer", session.id)
            self.finalize_components(session)

        # Otherwise run setup_components
        else:
            self.setup_components(session)

    @alog.logged_function(log.debug)
    def _rollout_components(self, session: Session):
        """Deploy all dependent components according to the configured
        dependencies between them
        """
        log.debug("Rolling out %s", str(self))

        # Set up the deployment manager and run the rollout
        rollout_manager = RolloutManager(
            session=session,
            after_deploy=self.after_deploy,
            after_deploy_unsuccessful=self.after_deploy_unsuccessful,
            after_verify=self.after_verify,
            after_verify_unsuccessful=self.after_verify_unsuccessful,
        )
        completion_state = rollout_manager.rollout()
        rollout_failed = completion_state.failed()
        log.info("Final rollout state: %s", completion_state)

        # Get Rollout Status
        deploy_completed = completion_state.deploy_completed()
        verify_completed = completion_state.verify_completed()
        log.debug2(
            "Deploy Completed: %s, Verify Completed: %s, Deploy Failed: %s",
            deploy_completed,
            verify_completed,
            rollout_failed,
        )

        # If an oper8 error occurred in the rollout, decorate it with a reference
        # to the completion state itself and then raise it to be handled by the
        # top-level ReconcileManager handler.
        if isinstance(completion_state.exception, Oper8Error):
            log.debug("Handling Oper8Error from rollout")
            completion_state.exception.completion_state = completion_state
            raise completion_state.exception

        # If the deploy failed but didn't trigger an Oper8Error, we'll make one
        # ourselves
        if rollout_failed:
            raise RolloutError(
                "Deploy phase failed", completion_state=completion_state
            ) from completion_state.exception

        return completion_state

__init__(config_defaults=None)

The constructor sets up all of the properties of the controller which are constant across reconciliations.

Parameters:

Name Type Description Default
config_defaults Optional[Config]

Optional[aconfig.Config] Default values for the backend controller config

None
Source code in oper8/controller.py
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
def __init__(self, config_defaults: Optional[aconfig.Config] = None):
    """The constructor sets up all of the properties of the controller which
    are constant across reconciliations.

    Args:
        config_defaults:  Optional[aconfig.Config]
            Default values for the backend controller config

    """
    # Make sure the class properties are present and not empty
    assert self.group, "Controller.group must be a non-empty string"
    assert self.version, "Controller.version must be a non-empty string"
    assert self.kind, "Controller.kind must be a non-empty string"
    self.config_defaults = config_defaults or aconfig.Config({})

__str__() classmethod

Stringify with the GVK

Source code in oper8/controller.py
108
109
110
111
@classmethod
def __str__(cls):
    """Stringify with the GVK"""
    return f"Controller({cls.group}/{cls.version}/{cls.kind})"

after_deploy(session)

This allows children to inject logic that will run when the controller has finished deploying all components, but not necessarily verifying all of them. The default behavior is a no-op.

Parameters:

Name Type Description Default
session Session

Session The current reconciliation session

required

Returns:

Name Type Description
success bool

bool True if custom hook code executed successfully and lifecycle should continue

Source code in oper8/controller.py
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
def after_deploy(self, session: Session) -> bool:
    """This allows children to inject logic that will run when the
    controller has finished deploying all components, but not necessarily
    verifying all of them. The default behavior is a no-op.

    Args:
        session:  Session
            The current reconciliation session

    Returns:
        success:  bool
            True if custom hook code executed successfully and lifecycle
            should continue
    """
    return True

after_deploy_unsuccessful(session, failed)

This allows children to inject logic that will run when the controller has failed or could not finish deploying all components. The default behavior is a no-op.

Parameters:

Name Type Description Default
session Session

Session The current reconciliation session

required
failed bool

bool Indicator of whether or not the termination was a failure

required

Returns:

Name Type Description
success bool

bool True if custom hook code executed successfully and lifecycle should continue

Source code in oper8/controller.py
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
def after_deploy_unsuccessful(self, session: Session, failed: bool) -> bool:
    """This allows children to inject logic that will run when the
    controller has failed or could not finish deploying all components.
    The default behavior is a no-op.

    Args:
        session:  Session
            The current reconciliation session
        failed:  bool
            Indicator of whether or not the termination was a failure

    Returns:
        success:  bool
            True if custom hook code executed successfully and lifecycle
            should continue
    """
    return True

after_verify(session)

This allows children to inject logic that will run when the controller has finished verifying all components. The default behavior is a no-op.

Parameters:

Name Type Description Default
session Session

Session The current reconciliation session

required

Returns:

Name Type Description
success bool

bool True if custom hook code executed successfully and lifecycle should continue

Source code in oper8/controller.py
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
def after_verify(
    self,
    session: Session,  # pylint: disable=unused-argument
) -> bool:
    """This allows children to inject logic that will run when the
    controller has finished verifying all components. The default behavior
    is a no-op.

    Args:
        session:  Session
            The current reconciliation session

    Returns:
        success:  bool
            True if custom hook code executed successfully and lifecycle
            should continue
    """
    return True

after_verify_unsuccessful(session, failed)

This allows children to inject logic that will run when the controller has finished deploying all components but failed to verify. The default behavior is a no-op.

Parameters:

Name Type Description Default
session Session

Session The current reconciliation session

required
failed bool

bool Indicator of whether or not the termination was a failure

required

Returns:

Name Type Description
success bool

bool True if custom hook code executed successfully and lifecycle should continue

Source code in oper8/controller.py
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
def after_verify_unsuccessful(self, session: Session, failed: bool) -> bool:
    """This allows children to inject logic that will run when the
    controller has finished deploying all components but failed to verify.
    The default behavior is a no-op.

    Args:
        session:  Session
            The current reconciliation session
        failed:  bool
            Indicator of whether or not the termination was a failure

    Returns:
        success:  bool
            True if custom hook code executed successfully and lifecycle
            should continue
    """
    return True

finalize_components(session)

When performing a finalizer operation, this function will be called to perform custom finalizer logic for this Controller.

Error Semantics: Child classes should throw ConfigError if config is not valid and include the portion of config where the problem occurred.

This method is not abstract since the standard controller usecase

does not require finalizing

Parameters:

Name Type Description Default
session Session

Session The current session containing the per-event configs

required
Source code in oper8/controller.py
140
141
142
143
144
145
146
147
148
149
150
151
152
153
def finalize_components(self, session: Session):  # noqa: B027
    """When performing a finalizer operation, this function will be called
    to perform custom finalizer logic for this Controller.

    Error Semantics: Child classes should throw ConfigError if config is
    not valid and include the portion of config where the problem occurred.

    NOTE: This method is not abstract since the standard controller usecase
        does not require finalizing

    Args:
        session:  Session
            The current session containing the per-event configs
    """

finalizer()

The finalizer used by this Controller

Source code in oper8/controller.py
77
78
79
80
81
82
@classproperty
def finalizer(cls) -> Optional[str]:  # pylint: disable=no-self-argument
    """The finalizer used by this Controller"""
    if cls.has_finalizer:  # pylint: disable=using-constant-test
        return f"finalizers.{cls.kind.lower()}.{cls.group}"  # pylint: disable=no-member
    return None

get_config_defaults()

This function allows children to override the default values for the session config. This value can also be set via the controllers init function.

Source code in oper8/controller.py
283
284
285
286
287
def get_config_defaults(self):
    """This function allows children to override the default values for the session
    config. This value can also be set via the controllers __init__ function.
    """
    return self.config_defaults

get_cr_manifest_defaults()

This allows children to provide default values for their cr_manifest that will be injected where no override is provided in the user-provided cr_manifest.

Returns:

Name Type Description
cr_manifest_defaults Union[dict, Config]

Union[dict, aconfig.Config] The cr defaults. Raw dicts will be converted to Config objects.

Source code in oper8/controller.py
270
271
272
273
274
275
276
277
278
279
280
281
def get_cr_manifest_defaults(
    self,
) -> Union[dict, aconfig.Config]:
    """This allows children to provide default values for their cr_manifest
    that will be injected where no override is provided in the user-provided
    cr_manifest.

    Returns:
        cr_manifest_defaults:  Union[dict, aconfig.Config]
            The cr defaults. Raw dicts will be converted to Config objects.
    """
    return aconfig.Config({})

group()

The apiVersion group for the resource this controller manages

Source code in oper8/controller.py
65
66
67
@abstractclassproperty  # noqa: B027
def group(cls) -> str:
    """The apiVersion group for the resource this controller manages"""

has_finalizer()

If the derived class has an implementation of finalize_components, it has a finalizer and can be registered for finalize events

Source code in oper8/controller.py
84
85
86
87
88
89
@classproperty
def has_finalizer(cls) -> bool:  # pylint: disable=no-self-argument
    """If the derived class has an implementation of finalize_components, it
    has a finalizer and can be registered for finalize events
    """
    return cls.finalize_components is not Controller.finalize_components

kind()

The kind for the resource this controller manages

Source code in oper8/controller.py
73
74
75
@abstractclassproperty  # noqa: B027
def kind(cls) -> str:
    """The kind for the resource this controller manages"""

run_reconcile(session, is_finalizer=False)

Perform a reconciliation iteration for this controller on given a session. This function should only be called once per session. The general logic for a controller reconcile is as follows:

  1. Set up the set of Components and their dependencies that will be managed in this reconciliation based on the CR and config
  2. Invoke the rollout to render each component and apply it to the cluster (if not in dry-run), then verify the DAG of components

Parameters:

Name Type Description Default
session Session

Session The full structured content of the CR manifest for this operand

required
is_finalizer bool

bool If true, the logic in finalize_components is run, otherwise the logic in setup_components is called

False

Returns:

Name Type Description
result CompletionState

ReconciliationResult The result of reconcile

Source code in oper8/controller.py
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
def run_reconcile(
    self, session: Session, is_finalizer: bool = False
) -> CompletionState:
    """Perform a reconciliation iteration for this controller on given a session.
    This function should only be called once per session. The general logic for a
    controller reconcile is as follows:

    1. Set up the set of Components and their dependencies that will be
        managed in this reconciliation based on the CR and config
    2. Invoke the rollout to render each component and apply it to the
        cluster (if not in dry-run), then verify the DAG of components

    Args:
        session:  Session
            The full structured content of the CR manifest for this operand
        is_finalizer:  bool
            If true, the logic in finalize_components is run, otherwise the
            logic in setup_components is called

    Returns:
        result: ReconciliationResult
            The result of reconcile
    """
    # Check if session has already been reconciled
    if not session.graph.empty():
        raise RolloutError("Session has already been reconciled")

    self._manage_components(session, is_finalizer)
    completion_state = self._rollout_components(session)
    return completion_state

setup_components(session) abstractmethod

Given the session for an individual reconciliation, construct the set of Components that will be deployed.

Error Semantics: Child classes should throw ConfigError if config is not valid and include the portion of config where the problem occurred.

Parameters:

Name Type Description Default
session Session

Session The current session containing the per-event configs

required
Source code in oper8/controller.py
118
119
120
121
122
123
124
125
126
127
128
129
@abc.abstractmethod
def setup_components(self, session: Session):
    """Given the session for an individual reconciliation, construct the set
    of Components that will be deployed.

    Error Semantics: Child classes should throw ConfigError if config is
    not valid and include the portion of config where the problem occurred.

    Args:
        session:  Session
            The current session containing the per-event configs
    """

should_requeue(session)

should_requeue determines if current reconcile request should be re-queued.

Children can override default implementation to provide custom logic. Default implementation re-queues the request if the reconciling CR status hasn't been reached stable state.

Parameters:

Name Type Description Default
session Session

Session The current reconciliation session

required

Returns:

Name Type Description
requeue bool

bool True if the reconciliation request should be re-queued

config Optional[RequeueParams]

RequeueParams Parameters of requeue request. Can be None if requeue is False.

Source code in oper8/controller.py
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
def should_requeue(self, session: Session) -> Tuple[bool, Optional[RequeueParams]]:
    """should_requeue determines if current reconcile request should be re-queued.

    Children can override default implementation to provide custom logic.
    Default implementation re-queues the request if the reconciling CR status
    hasn't been reached stable state.

    Args:
        session: Session
            The current reconciliation session

    Returns:
        requeue: bool
            True if the reconciliation request should be re-queued
        config: RequeueParams
             Parameters of requeue request. Can be None if requeue is False.
    """
    api_version = session.api_version
    kind = session.kind
    name = session.name
    namespace = session.namespace
    requeue_params = RequeueParams()
    # Fetch the current status from the cluster
    success, current_state = session.deploy_manager.get_object_current_state(
        api_version=api_version,
        kind=kind,
        name=name,
        namespace=namespace,
    )
    if not success:
        log.warning(
            "Failed to fetch current state for %s/%s/%s", namespace, kind, name
        )
        return True, requeue_params
    # Do not requeue if resource was deleted
    if not current_state:
        log.warning("Resource not found: %s/%s/%s", namespace, kind, name)
        return False, requeue_params

    log.debug3("Current CR manifest for requeue check: %s", current_state)

    verified = verify_subsystem(current_state, session.version)
    return not verified, requeue_params

version()

The apiVersion version for the resource this controller manages

Source code in oper8/controller.py
69
70
71
@abstractclassproperty  # noqa: B027
def version(cls) -> str:
    """The apiVersion version for the resource this controller manages"""

dag

Package exports

completion_state

CompletionState holds info about how a DAG Runner completes

CompletionState

This class holds the definition of a CompletionState which manages all the information about how the nodes in a rollout Runner terminated

Source code in oper8/dag/completion_state.py
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
class CompletionState:
    """
    This class holds the definition of a CompletionState which manages all
    the information about how the nodes in a rollout Runner terminated
    """

    def __init__(  # pylint: disable=too-many-arguments
        self,
        verified_nodes: Optional[List[Node]] = None,
        unverified_nodes: Optional[List[Node]] = None,
        failed_nodes: Optional[List[Node]] = None,
        unstarted_nodes: Optional[List[Node]] = None,
        exception: Optional[Exception] = None,
    ):
        """Construct with each node set"""
        self.verified_nodes = set(verified_nodes or [])
        self.unverified_nodes = set(unverified_nodes or [])
        self.failed_nodes = set(failed_nodes or [])
        self.unstarted_nodes = set(unstarted_nodes or [])
        self.all_nodes = (
            self.verified_nodes.union(self.unverified_nodes)
            .union(self.failed_nodes)
            .union(self.unstarted_nodes)
        )
        self.exception = exception

        # Make sure the sets are not overlapping
        sets = [
            self.verified_nodes,
            self.unverified_nodes,
            self.failed_nodes,
            self.unstarted_nodes,
        ]
        for i, node_set_a in enumerate(sets):
            for j, node_set_b in enumerate(sets):
                if i != j:
                    assert not node_set_a.intersection(node_set_b), (
                        "Programming Error: "
                        + f"CompletionState constructed with overlapping sets: {str(self)}"
                    )

    def __str__(self):
        return "\n".join(
            [
                f"[NODES] {key}: {list(sorted(nodes))}"
                for key, nodes in [
                    ("Verified", [node.get_name() for node in self.verified_nodes]),
                    ("Unverified", [node.get_name() for node in self.unverified_nodes]),
                    ("Failed", [node.get_name() for node in self.failed_nodes]),
                    ("Unstarted", [node.get_name() for node in self.unstarted_nodes]),
                ]
            ]
            + [
                f"Exception: {self.exception}",
            ]
        )

    def __eq__(self, other: "CompletionState"):
        return (
            self.verified_nodes == other.verified_nodes
            and self.unverified_nodes == other.unverified_nodes
            and self.failed_nodes == other.failed_nodes
            and self.unstarted_nodes == other.unstarted_nodes
        )

    def deploy_completed(self) -> bool:
        """Determine if the dag completed all nodes through to the deploy
        step

        NOTE: An empty node set is considered completed

        Returns:
            completed:  bool
                True if there are no failed nodes and no unstarted nodes
        """
        return not self.failed_nodes and not self.unstarted_nodes

    def verify_completed(self) -> bool:
        """Determine if the dag completed all nodes through to the verification
        step

        NOTE: An empty node set is considered verified

        Returns:
            completed:  bool
                True if there are no nodes found outside of the verified_nodes
                and there is no exception in the termination state
        """
        return (
            not self.unverified_nodes
            and not self.failed_nodes
            and not self.unstarted_nodes
            and not self.exception
        )

    def failed(self) -> bool:
        """Determine if any of the nodes failed

        Returns:
            failed:  bool
                True if there are any nodes in the failed state or there is a
                fatal error
        """
        return bool(self.failed_nodes) or self._fatal_exception()

    def _fatal_exception(self):
        """Helper to determine if there is a fatal exception in the state"""
        return self.exception is not None and getattr(
            self.exception, "is_fatal_error", True
        )
__init__(verified_nodes=None, unverified_nodes=None, failed_nodes=None, unstarted_nodes=None, exception=None)

Construct with each node set

Source code in oper8/dag/completion_state.py
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
def __init__(  # pylint: disable=too-many-arguments
    self,
    verified_nodes: Optional[List[Node]] = None,
    unverified_nodes: Optional[List[Node]] = None,
    failed_nodes: Optional[List[Node]] = None,
    unstarted_nodes: Optional[List[Node]] = None,
    exception: Optional[Exception] = None,
):
    """Construct with each node set"""
    self.verified_nodes = set(verified_nodes or [])
    self.unverified_nodes = set(unverified_nodes or [])
    self.failed_nodes = set(failed_nodes or [])
    self.unstarted_nodes = set(unstarted_nodes or [])
    self.all_nodes = (
        self.verified_nodes.union(self.unverified_nodes)
        .union(self.failed_nodes)
        .union(self.unstarted_nodes)
    )
    self.exception = exception

    # Make sure the sets are not overlapping
    sets = [
        self.verified_nodes,
        self.unverified_nodes,
        self.failed_nodes,
        self.unstarted_nodes,
    ]
    for i, node_set_a in enumerate(sets):
        for j, node_set_b in enumerate(sets):
            if i != j:
                assert not node_set_a.intersection(node_set_b), (
                    "Programming Error: "
                    + f"CompletionState constructed with overlapping sets: {str(self)}"
                )
deploy_completed()

Determine if the dag completed all nodes through to the deploy step

NOTE: An empty node set is considered completed

Returns:

Name Type Description
completed bool

bool True if there are no failed nodes and no unstarted nodes

Source code in oper8/dag/completion_state.py
85
86
87
88
89
90
91
92
93
94
95
def deploy_completed(self) -> bool:
    """Determine if the dag completed all nodes through to the deploy
    step

    NOTE: An empty node set is considered completed

    Returns:
        completed:  bool
            True if there are no failed nodes and no unstarted nodes
    """
    return not self.failed_nodes and not self.unstarted_nodes
failed()

Determine if any of the nodes failed

Returns:

Name Type Description
failed bool

bool True if there are any nodes in the failed state or there is a fatal error

Source code in oper8/dag/completion_state.py
115
116
117
118
119
120
121
122
123
def failed(self) -> bool:
    """Determine if any of the nodes failed

    Returns:
        failed:  bool
            True if there are any nodes in the failed state or there is a
            fatal error
    """
    return bool(self.failed_nodes) or self._fatal_exception()
verify_completed()

Determine if the dag completed all nodes through to the verification step

NOTE: An empty node set is considered verified

Returns:

Name Type Description
completed bool

bool True if there are no nodes found outside of the verified_nodes and there is no exception in the termination state

Source code in oper8/dag/completion_state.py
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
def verify_completed(self) -> bool:
    """Determine if the dag completed all nodes through to the verification
    step

    NOTE: An empty node set is considered verified

    Returns:
        completed:  bool
            True if there are no nodes found outside of the verified_nodes
            and there is no exception in the termination state
    """
    return (
        not self.unverified_nodes
        and not self.failed_nodes
        and not self.unstarted_nodes
        and not self.exception
    )

graph

Graph holds information about a Directed Acyclic Graph

Graph

Class for representing an instance of a Graph. Handles adding and removing nodes as well as graph functions like flattening

Source code in oper8/dag/graph.py
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
class Graph:
    """Class for representing an instance of a Graph. Handles adding and removing nodes
    as well as graph functions like flattening"""

    def __init__(self) -> None:
        self.__node_dict = {}

        # Add the root node of the Graph. Every member of this graph is also a child
        # of the root node
        self.__root_node = Node()
        self.__node_dict[self.__root_node.get_name()] = self.__root_node

    ## Properties ##############################################################

    @property
    def root(self) -> Node:  # pylint: disable=invalid-name
        """The root node of the Graph"""
        return self.__root_node

    @property
    def node_dict(self) -> dict:
        """Dictionary of all node names and their nodes"""
        return self.__node_dict

    ## Modifiers ##############################################################

    def add_node(self, node: Node):
        """Add node to graph
        Args:
            node:  Node
                The node to be added to the Dag.
        """
        if not node.get_name():
            raise ValueError("None is reserved for the root node of the dag Graph")

        if node.get_name() in self.node_dict:
            raise ValueError(
                f"Only one node with id {node.get_name()} can be added to a Graph"
            )

        self.node_dict[node.get_name()] = node
        self.root.add_child(node)

    def add_node_dependency(
        self, parent_node: Node, child_node: Node, edge_fn: Optional[Callable] = None
    ):
        """Add dependency or "edge" to graph between two nodes. This is the same
        as doing parent_node.add_dependency(child_node)
        Args:
            parent_node:  Node
                The parent or dependent node aka the node that must wait
            child_node: Node
                The child or dependency node aka the node that must be deployed first
            edge_fn:
        """
        if not self.get_node(parent_node.get_name()):
            raise ValueError(f"Parent node {parent_node} is not present in Graph")

        if not self.get_node(child_node.get_name()):
            raise ValueError(f"Child node {child_node} is not present in Graph")

        # Make sure edits are applied to the nodes already present in the graph
        parent_node = self.get_node(parent_node.get_name())
        child_node = self.get_node(child_node.get_name())

        parent_node.add_child(child_node, edge_fn)

    ## Accessors ##############################################################

    def get_node(self, name: str):  # pylint: disable=invalid-name
        """Get the node with name"""
        return self.node_dict.get(name)

    def get_all_nodes(self):
        """Get list of all nodes"""
        return [node for node, _ in self.root.get_children()]

    def has_node(self, node: Node):  # pylint: disable=invalid-name
        """Check if node is in graph"""
        return self.root.has_child(node)

    def empty(self):
        """Check if a graph is empty"""
        return len(self.root.get_children()) == 0

    ## Graph Functions ##############################################################

    def topology(self) -> List["Node"]:
        """Get a list of nodes in deployment order"""
        topology = self.root.topology()
        topology.remove(self.root)
        return topology

    ## Internal Functions ##############################################################

    def __repr__(self):
        str_list = []
        for child, _ in self.root.get_children():
            child_str_list = [node.get_name() for node, _ in child.get_children()]
            str_list.append(f"{child.get_name()}:[{','.join(child_str_list)}]")

        return f"Graph({{{','.join(str_list)}}})"

    def __contains__(self, item: Node):
        return self.has_node(item)

    def __iter__(self):
        """Iterate over all child nodes"""
        return self.get_all_nodes().__iter__()
node_dict property

Dictionary of all node names and their nodes

root property

The root node of the Graph

__iter__()

Iterate over all child nodes

Source code in oper8/dag/graph.py
126
127
128
def __iter__(self):
    """Iterate over all child nodes"""
    return self.get_all_nodes().__iter__()
add_node(node)

Add node to graph Args: node: Node The node to be added to the Dag.

Source code in oper8/dag/graph.py
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
def add_node(self, node: Node):
    """Add node to graph
    Args:
        node:  Node
            The node to be added to the Dag.
    """
    if not node.get_name():
        raise ValueError("None is reserved for the root node of the dag Graph")

    if node.get_name() in self.node_dict:
        raise ValueError(
            f"Only one node with id {node.get_name()} can be added to a Graph"
        )

    self.node_dict[node.get_name()] = node
    self.root.add_child(node)
add_node_dependency(parent_node, child_node, edge_fn=None)

Add dependency or "edge" to graph between two nodes. This is the same as doing parent_node.add_dependency(child_node) Args: parent_node: Node The parent or dependent node aka the node that must wait child_node: Node The child or dependency node aka the node that must be deployed first edge_fn:

Source code in oper8/dag/graph.py
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
def add_node_dependency(
    self, parent_node: Node, child_node: Node, edge_fn: Optional[Callable] = None
):
    """Add dependency or "edge" to graph between two nodes. This is the same
    as doing parent_node.add_dependency(child_node)
    Args:
        parent_node:  Node
            The parent or dependent node aka the node that must wait
        child_node: Node
            The child or dependency node aka the node that must be deployed first
        edge_fn:
    """
    if not self.get_node(parent_node.get_name()):
        raise ValueError(f"Parent node {parent_node} is not present in Graph")

    if not self.get_node(child_node.get_name()):
        raise ValueError(f"Child node {child_node} is not present in Graph")

    # Make sure edits are applied to the nodes already present in the graph
    parent_node = self.get_node(parent_node.get_name())
    child_node = self.get_node(child_node.get_name())

    parent_node.add_child(child_node, edge_fn)
empty()

Check if a graph is empty

Source code in oper8/dag/graph.py
101
102
103
def empty(self):
    """Check if a graph is empty"""
    return len(self.root.get_children()) == 0
get_all_nodes()

Get list of all nodes

Source code in oper8/dag/graph.py
93
94
95
def get_all_nodes(self):
    """Get list of all nodes"""
    return [node for node, _ in self.root.get_children()]
get_node(name)

Get the node with name

Source code in oper8/dag/graph.py
89
90
91
def get_node(self, name: str):  # pylint: disable=invalid-name
    """Get the node with name"""
    return self.node_dict.get(name)
has_node(node)

Check if node is in graph

Source code in oper8/dag/graph.py
97
98
99
def has_node(self, node: Node):  # pylint: disable=invalid-name
    """Check if node is in graph"""
    return self.root.has_child(node)
topology()

Get a list of nodes in deployment order

Source code in oper8/dag/graph.py
107
108
109
110
111
def topology(self) -> List["Node"]:
    """Get a list of nodes in deployment order"""
    topology = self.root.topology()
    topology.remove(self.root)
    return topology

node

This module contains a collection of classes for implementing nodes of a Graph

Node

Class for representing a node in the Graph

Source code in oper8/dag/node.py
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
class Node:
    """Class for representing a node in the Graph"""

    def __init__(
        self,
        name: Optional[str] = None,
        data: Optional[Any] = None,
    ) -> None:
        """Construct a new Node

        Args:
            name:  Optional[str]
                The name of the node
            data:  Optional[Any]
                Any data that should be stored with the node
        """
        self._name = name
        self._data = data
        self.children = {}

    ## Modifiers ##############################################################
    def add_child(self, node: "Node", edge_data: Optional[Any] = None):
        """Add edge from  self to node with optional edge data"""
        if node.dfs(self):
            raise ValueError("Unable to add cyclic dependency")
        self.children[node] = edge_data

    def remove_child(self, node: "Node"):
        """Remove child node from self"""
        if node in self.children:
            self.children.pop(node)

    def set_data(self, data: Any):
        """Mutator for node data"""
        self._data = data

    ## Accessors ##############################################################

    def get_data(self):
        """Accessor for specific child"""
        return self._data

    def get_name(self):
        """Accessor for specific child"""
        return self._name

    def has_child(self, node: "Node"):
        """Accessor for specific child"""
        return node in self.children

    def get_children(self) -> set:
        """Accessor for all children"""
        return list(self.children.items())

    ## Graph Functions ##############################################################
    def topology(self) -> List["Node"]:
        """Function to get an ordered topology of a node's children"""
        found = set()
        topology = []

        def visit(node):
            for child, _ in sorted(node.get_children()):
                visit(child)

            if node not in found:
                topology.append(node)
                found.add(node)

        visit(self)

        return topology

    def dfs(self, node: "Node", visited: List["Node"] = None) -> bool:
        """Function to determine if their is a path between two nodes. Used in acyclic check"""
        if not visited:
            visited = []
        if node == self:
            return True
        visited.append(self)
        for child, _ in self.get_children():
            if child not in visited:
                if child.dfs(node, visited):
                    return True
                visited.append(child)
        return False

    ## Internal ##
    def __eq__(self, obj):
        """Compare and sort nodes by name"""
        if not isinstance(obj, Node):
            return False
        return (self.get_name()) == (obj.get_name())

    def __lt__(self, obj):
        if not isinstance(obj, Node):
            return False
        return (self.get_name()) < (obj.get_name())

    def __repr__(self) -> str:
        # __repr__ may be called before __init__ thus _name is not present
        if hasattr(self, "_name"):
            return f"{self.__class__.__name__}('{self.get_name()}', {self.get_data()})"
        return super().__repr__()

    def __hash__(self) -> str:
        return self.get_name().__hash__()
__eq__(obj)

Compare and sort nodes by name

Source code in oper8/dag/node.py
95
96
97
98
99
def __eq__(self, obj):
    """Compare and sort nodes by name"""
    if not isinstance(obj, Node):
        return False
    return (self.get_name()) == (obj.get_name())
__init__(name=None, data=None)

Construct a new Node

Parameters:

Name Type Description Default
name Optional[str]

Optional[str] The name of the node

None
data Optional[Any]

Optional[Any] Any data that should be stored with the node

None
Source code in oper8/dag/node.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
def __init__(
    self,
    name: Optional[str] = None,
    data: Optional[Any] = None,
) -> None:
    """Construct a new Node

    Args:
        name:  Optional[str]
            The name of the node
        data:  Optional[Any]
            Any data that should be stored with the node
    """
    self._name = name
    self._data = data
    self.children = {}
add_child(node, edge_data=None)

Add edge from self to node with optional edge data

Source code in oper8/dag/node.py
29
30
31
32
33
def add_child(self, node: "Node", edge_data: Optional[Any] = None):
    """Add edge from  self to node with optional edge data"""
    if node.dfs(self):
        raise ValueError("Unable to add cyclic dependency")
    self.children[node] = edge_data
dfs(node, visited=None)

Function to determine if their is a path between two nodes. Used in acyclic check

Source code in oper8/dag/node.py
80
81
82
83
84
85
86
87
88
89
90
91
92
def dfs(self, node: "Node", visited: List["Node"] = None) -> bool:
    """Function to determine if their is a path between two nodes. Used in acyclic check"""
    if not visited:
        visited = []
    if node == self:
        return True
    visited.append(self)
    for child, _ in self.get_children():
        if child not in visited:
            if child.dfs(node, visited):
                return True
            visited.append(child)
    return False
get_children()

Accessor for all children

Source code in oper8/dag/node.py
58
59
60
def get_children(self) -> set:
    """Accessor for all children"""
    return list(self.children.items())
get_data()

Accessor for specific child

Source code in oper8/dag/node.py
46
47
48
def get_data(self):
    """Accessor for specific child"""
    return self._data
get_name()

Accessor for specific child

Source code in oper8/dag/node.py
50
51
52
def get_name(self):
    """Accessor for specific child"""
    return self._name
has_child(node)

Accessor for specific child

Source code in oper8/dag/node.py
54
55
56
def has_child(self, node: "Node"):
    """Accessor for specific child"""
    return node in self.children
remove_child(node)

Remove child node from self

Source code in oper8/dag/node.py
35
36
37
38
def remove_child(self, node: "Node"):
    """Remove child node from self"""
    if node in self.children:
        self.children.pop(node)
set_data(data)

Mutator for node data

Source code in oper8/dag/node.py
40
41
42
def set_data(self, data: Any):
    """Mutator for node data"""
    self._data = data
topology()

Function to get an ordered topology of a node's children

Source code in oper8/dag/node.py
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
def topology(self) -> List["Node"]:
    """Function to get an ordered topology of a node's children"""
    found = set()
    topology = []

    def visit(node):
        for child, _ in sorted(node.get_children()):
            visit(child)

        if node not in found:
            topology.append(node)
            found.add(node)

    visit(self)

    return topology

ResourceNode

Bases: Node

Class for representing a kubernetes resource in the Graph with a function for verifying said resource

Source code in oper8/dag/node.py
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
class ResourceNode(Node):
    """Class for representing a kubernetes resource in the Graph with
    a function for verifying said resource"""

    def __init__(
        self,
        name: str,
        manifest: dict,
        verify_func: Optional[Callable] = None,
        deploy_method: Optional["DeployMethod"] = None,  # noqa: F821
    ):
        # Override init to require name/manifest parameters
        super().__init__(name, manifest)
        self._verify_function = verify_func
        self._deploy_method = deploy_method
        if not deploy_method:
            # Local
            from ..deploy_manager import DeployMethod

            self._deploy_method = DeployMethod.DEFAULT

    ## ApiObject Parameters and Functions ######################################
    @property
    def api_group(self) -> str:
        """The kubernetes apiVersion group name without the schema version"""
        return self.api_version.split("/")[0]

    @property
    def api_version(self) -> str:
        """The full kubernetes apiVersion"""
        return self.manifest.get("apiVersion")

    @property
    def kind(self) -> str:
        """The resource kind"""
        return self.manifest.get("kind")

    @property
    def metadata(self) -> dict:
        """The full resource metadata dict"""
        return self.manifest.get("metadata", {})

    @property
    def name(self) -> str:
        """The resource metadata.name"""
        return self.metadata.get("name")

    @property
    def manifest(self) -> dict:
        """The resource manifest"""
        return self.get_data()

    @property
    def verify_function(self) -> Optional[Callable]:
        """The resource manifest"""
        return self._verify_function

    @property
    def deploy_method(self) -> Optional["DeployMethod"]:  # noqa: F821
        """The resource manifest"""
        return self._deploy_method

    def add_dependency(self, node: "ResourceNode"):
        """Add a child dependency to this node"""
        self.add_child(node)
api_group property

The kubernetes apiVersion group name without the schema version

api_version property

The full kubernetes apiVersion

deploy_method property

The resource manifest

kind property

The resource kind

manifest property

The resource manifest

metadata property

The full resource metadata dict

name property

The resource metadata.name

verify_function property

The resource manifest

add_dependency(node)

Add a child dependency to this node

Source code in oper8/dag/node.py
178
179
180
def add_dependency(self, node: "ResourceNode"):
    """Add a child dependency to this node"""
    self.add_child(node)

runner

This module contains a collection of classes for executing functions along a DAG

DagHaltError

Bases: Exception

Custom exception used to indicate that a Runner execution should halt

Source code in oper8/dag/runner.py
315
316
317
318
319
320
321
322
323
324
325
class DagHaltError(Exception):
    """Custom exception used to indicate that a Runner execution should halt"""

    def __init__(
        self,
        failure: bool,
        exception: Exception = None,
    ):
        super().__init__()
        self.failure = failure
        self.exception = exception

NonThreadPoolExecutor

Bases: Executor

This "pool" implements the Executor interfaces, but runs without any threads. This is used when running a Runner without cocurrency

Source code in oper8/dag/runner.py
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
class NonThreadPoolExecutor(Executor):
    """This "pool" implements the Executor interfaces, but runs without any
    threads. This is used when running a Runner without cocurrency
    """

    def __init__(self, *_, **__):
        """Swallow constructor args so that it can match ThreadPoolExecutor"""
        super().__init__()

    @staticmethod
    def submit(fn: Callable, /, *args, **kwargs):
        """Run the function immediately and return a pre-completed Future"""
        fut = Future()
        fut.set_result(fn(*args, **kwargs))
        return fut

    @staticmethod
    def shutdown(*_, **__):
        """Nothing to do since this is not a real pool"""
__init__(*_, **__)

Swallow constructor args so that it can match ThreadPoolExecutor

Source code in oper8/dag/runner.py
299
300
301
def __init__(self, *_, **__):
    """Swallow constructor args so that it can match ThreadPoolExecutor"""
    super().__init__()
shutdown(*_, **__) staticmethod

Nothing to do since this is not a real pool

Source code in oper8/dag/runner.py
310
311
312
@staticmethod
def shutdown(*_, **__):
    """Nothing to do since this is not a real pool"""
submit(fn, /, *args, **kwargs) staticmethod

Run the function immediately and return a pre-completed Future

Source code in oper8/dag/runner.py
303
304
305
306
307
308
@staticmethod
def submit(fn: Callable, /, *args, **kwargs):
    """Run the function immediately and return a pre-completed Future"""
    fut = Future()
    fut.set_result(fn(*args, **kwargs))
    return fut

Runner

This is a very simple "keep running until done" Runner executor which uses a ThreadPoolExecutor to allow non-blocking calls to execute in parallel.

Source code in oper8/dag/runner.py
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
class Runner:  # pylint: disable=too-many-instance-attributes
    """This is a very simple "keep running until done" Runner executor which uses
    a ThreadPoolExecutor to allow non-blocking calls to execute in parallel.
    """

    @property
    def graph(self) -> str:  # pylint: disable=missing-function-docstring
        return self._graph

    def __init__(  # pylint: disable=too-many-arguments
        self,
        name: str = "",
        threads: Optional[int] = None,
        graph: Optional[Graph] = None,
        default_function: Optional[Callable[["Node"], bool]] = None,
        poll_time: float = 0.05,
        verify_upstream: bool = True,
    ):
        """Construct a Runner which will manage the execution of a single Graph

        Args:
            name:  str
                String name that can be used for logging to differentiate deploy
                and verify Graph executions
            threads:  Optional[int]
                Number of threads to use. If not given, the default behavior of
                ThreadPoolExecutor is to use the number of available cores.
            graph:  Optional[Graph]
                Existing graph to use, if not supplied an empty graph is created
            default_function: Optional[Callable[["Node"],None]]=None
                Function that will be called on node run if specific function is not provided
            poll_time:  float
                How often to check runner status
            verify_upstream: bool
                Conditional to control whether to check a nodes upstream via its edge function
        """
        self.name = name
        # If threads are disabled, use the NonThreadPoolExecutor
        if threads == 0:
            log.debug("Running without threading")
            pool_type = NonThreadPoolExecutor
        else:
            log.debug("Running with %s threads", threads)
            pool_type = ThreadPoolExecutor
        self._pool = pool_type(max_workers=threads)
        self._graph = graph or Graph()
        self._default_node_func = default_function or (lambda _: None)

        self._failed = False
        self._exception = None
        self._verify_upstream = verify_upstream
        self._poll_time = poll_time
        self._started_nodes = []
        self._disabled_nodes = []

        # Nodes can terminate in one of three states:
        #   1. Completed and verified
        #   2. Completed, but not verified
        #   3. Failed
        self._verified_nodes = []
        self._unverified_nodes = []
        self._failed_nodes = []

    ## Public ##

    def disable_node(
        self,
        node: "Node",
    ):
        """Function to disable a node in the graph. This will skip the node in runner without
        changing the graph"""
        graph_node = self.graph.get_node(node.get_name())
        if graph_node:
            self._disabled_nodes.append(graph_node)

    def enable_node(
        self,
        node: "Node",
    ):
        """Function to reenable a node after it was disabled by Runner.disable_node"""
        graph_node = self.graph.get_node(node.get_name())
        if graph_node in self._disabled_nodes:
            self._disabled_nodes.remove(graph_node)

    def completion_state(self):
        """Get the state of which nodes completed and which failed

        Returns:
            completion_state:  CompletionState
                The state holding the full view of the termination state of each
                node
        """
        return CompletionState(
            verified_nodes=self._verified_nodes,
            unverified_nodes=self._unverified_nodes,
            failed_nodes=self._failed_nodes,
            unstarted_nodes=[
                node
                for node in self.graph.get_all_nodes()
                if node not in self._get_completed_nodes()
            ],
            exception=self._exception,
        )

    def run(self):
        """Run the Runner! This will continue until the graph has run to completion
        or halted due to an error.
        """
        node_list = self._get_runnable_nodes()
        log.debug3(
            "Started Nodes: %s, All Nodes: %s",
            self._started_nodes,
            list(node_list),
        )

        # The "node start" loop should terminate if:
        # 1. All nodes have started
        # 2. All started nodes have completed in one form or another and there
        #   are no newly ready nodes
        while len(self._started_nodes) < len(node_list):
            # Get the set of nodes that has completed already
            #
            # NOTE: It's _critically_ important that this be done before getting
            #   the ready nodes. The operation of getting ready nodes can
            #   delegate to user-defined verification functions which may be
            #   very slow and IO bound. With slow verification functions, a node
            #   running in a thread may complete and mark itself verified after
            #   a downstream dependency has checked its completion status, but
            #   before the full set of _get_ready_nodes() checks has passed. If
            #   this happens and _get_completed_nodes() is called afterwards,
            #   the short-circuit logic below will think that all started nodes
            #   have completed and there are no ready nodes, thus terminating
            #   the Runner prematurely.
            completed_nodes = self._get_completed_nodes()

            # Get the currently ready nodes
            ready_nodes = self._get_ready_nodes()

            # If there are no ready nodes and all started nodes have completed
            # in one way or another, we're in an early termination case
            log.debug4("Ready Nodes: %s", ready_nodes)
            log.debug4("Completed Nodes: %s", completed_nodes)
            if not ready_nodes and set(self._started_nodes) == set(completed_nodes):
                log.debug2(
                    "[%s] Graph exhausted all available nodes. Terminating early.",
                    self.name,
                )
                break

            # If there are new ready nodes, start them
            if ready_nodes:
                log.debug2(
                    "Ready nodes: %s. Remaining nodes: %s",
                    ready_nodes,
                    [
                        node
                        for node in node_list
                        if node not in ready_nodes and node not in completed_nodes
                    ],
                )
            for ready_node in ready_nodes:
                self._started_nodes.append(ready_node)
                self._pool.submit(self._run_node, ready_node)
            time.sleep(self._poll_time)

        # Log out the state of the graph once we've terminated, but before we've
        # waited for all nodes to terminate
        log.debug2("[NODES] Started: %s", sorted(self._started_nodes))
        log.debug2("[NODES] Verified: %s", sorted(self._verified_nodes))
        log.debug2("[NODES] Unverified: %s", sorted(self._unverified_nodes))
        log.debug2("[NODES] Failed: %s", sorted(self._failed_nodes))
        log.debug2("[NODES] All: %s", sorted(list(node_list)))

        # Wait until all started nodes have finished one way or the other
        while len(self._get_completed_nodes()) != len(self._started_nodes):
            time.sleep(self._poll_time)

        # Make sure any in-flight nodes complete before terminating
        log.debug2("Waiting for in-flight nodes to complete")
        self._pool.shutdown()
        log.debug2("All nodes complete")
        log.debug2(self.completion_state())

    ## Implementation Details ##

    def _run_node(self, node: "Node"):
        node_name = node.get_name()
        log.debug2("Starting node: %s", node_name)

        try:
            # Call node function or default
            node_func = node.get_data()
            if callable(node_func):
                node_func()
            else:
                self._default_node_func(node)

        except DagHaltError as err:
            log.debug("[%s] DagHaltError caught. Stopping Execution", self.name)
            self._failed = err.failure
            self._exception = err.exception
            if err.failure:
                self._failed_nodes.append(node)
            else:
                self._unverified_nodes.append(node)
        except Exception as err:  # pylint: disable=broad-except
            log.warning(
                "Unexpected exception caught in Runner node: %s", err, exc_info=True
            )
            self._failed = True
            self._failed_nodes.append(node)
        else:
            log.debug2("Node complete: %s", node_name)
            self._verified_nodes.append(node)

    def _dependency_satisfied(self, dep: "Node", verify_fn: Callable = None) -> bool:
        # A dependency is satisfied if
        # a) The upstream has been deployed and no verification function is
        #       given for the dependency
        # b) The upstream has been deployed and the given verification
        #       function passes
        dep_name = dep.get_name()
        if dep not in self._verified_nodes:
            log.debug4("%s not yet verified", dep_name)
            return False

        if not self._verify_upstream:
            log.debug3("%s verified without checking", dep_name)
            return True
        if verify_fn is None:
            log.debug4("%s verified with no verify_fn", dep_name)
            return True

        log.debug4("%s calling verify_fn", dep_name)
        satisfied = verify_fn()
        log.debug4("%s verify_fn() -> %s", dep_name, satisfied)
        return satisfied

    def _get_ready_nodes(self) -> List[str]:
        ready_nodes = []
        for node in [
            n for n in self._get_runnable_nodes() if n not in self._started_nodes
        ]:
            node_name = node.get_name()
            log.debug4("Checking if %s is ready", node_name)
            node_deps = node.get_children()
            satisfied_dependencies = [
                (self._dependency_satisfied(dep, verify_fn), dep)
                for dep, verify_fn in node_deps
            ]
            if all(res[0] for res in satisfied_dependencies):
                ready_nodes.append(node)
            else:
                log.debug3(
                    "[%s] waiting on upstreams: %s",
                    node_name,
                    [res[0] for res in satisfied_dependencies if not res[0]],
                )
        return ready_nodes

    def _get_completed_nodes(self) -> List[str]:
        return self._verified_nodes + self._unverified_nodes + self._failed_nodes

    def _get_runnable_nodes(self) -> List[Node]:
        return set(self.graph.get_all_nodes()) - set(self._disabled_nodes)
__init__(name='', threads=None, graph=None, default_function=None, poll_time=0.05, verify_upstream=True)

Construct a Runner which will manage the execution of a single Graph

Parameters:

Name Type Description Default
name str

str String name that can be used for logging to differentiate deploy and verify Graph executions

''
threads Optional[int]

Optional[int] Number of threads to use. If not given, the default behavior of ThreadPoolExecutor is to use the number of available cores.

None
graph Optional[Graph]

Optional[Graph] Existing graph to use, if not supplied an empty graph is created

None
default_function Optional[Callable[[Node], bool]]

Optional[Callable[["Node"],None]]=None Function that will be called on node run if specific function is not provided

None
poll_time float

float How often to check runner status

0.05
verify_upstream bool

bool Conditional to control whether to check a nodes upstream via its edge function

True
Source code in oper8/dag/runner.py
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
def __init__(  # pylint: disable=too-many-arguments
    self,
    name: str = "",
    threads: Optional[int] = None,
    graph: Optional[Graph] = None,
    default_function: Optional[Callable[["Node"], bool]] = None,
    poll_time: float = 0.05,
    verify_upstream: bool = True,
):
    """Construct a Runner which will manage the execution of a single Graph

    Args:
        name:  str
            String name that can be used for logging to differentiate deploy
            and verify Graph executions
        threads:  Optional[int]
            Number of threads to use. If not given, the default behavior of
            ThreadPoolExecutor is to use the number of available cores.
        graph:  Optional[Graph]
            Existing graph to use, if not supplied an empty graph is created
        default_function: Optional[Callable[["Node"],None]]=None
            Function that will be called on node run if specific function is not provided
        poll_time:  float
            How often to check runner status
        verify_upstream: bool
            Conditional to control whether to check a nodes upstream via its edge function
    """
    self.name = name
    # If threads are disabled, use the NonThreadPoolExecutor
    if threads == 0:
        log.debug("Running without threading")
        pool_type = NonThreadPoolExecutor
    else:
        log.debug("Running with %s threads", threads)
        pool_type = ThreadPoolExecutor
    self._pool = pool_type(max_workers=threads)
    self._graph = graph or Graph()
    self._default_node_func = default_function or (lambda _: None)

    self._failed = False
    self._exception = None
    self._verify_upstream = verify_upstream
    self._poll_time = poll_time
    self._started_nodes = []
    self._disabled_nodes = []

    # Nodes can terminate in one of three states:
    #   1. Completed and verified
    #   2. Completed, but not verified
    #   3. Failed
    self._verified_nodes = []
    self._unverified_nodes = []
    self._failed_nodes = []
completion_state()

Get the state of which nodes completed and which failed

Returns:

Name Type Description
completion_state

CompletionState The state holding the full view of the termination state of each node

Source code in oper8/dag/runner.py
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
def completion_state(self):
    """Get the state of which nodes completed and which failed

    Returns:
        completion_state:  CompletionState
            The state holding the full view of the termination state of each
            node
    """
    return CompletionState(
        verified_nodes=self._verified_nodes,
        unverified_nodes=self._unverified_nodes,
        failed_nodes=self._failed_nodes,
        unstarted_nodes=[
            node
            for node in self.graph.get_all_nodes()
            if node not in self._get_completed_nodes()
        ],
        exception=self._exception,
    )
disable_node(node)

Function to disable a node in the graph. This will skip the node in runner without changing the graph

Source code in oper8/dag/runner.py
89
90
91
92
93
94
95
96
97
def disable_node(
    self,
    node: "Node",
):
    """Function to disable a node in the graph. This will skip the node in runner without
    changing the graph"""
    graph_node = self.graph.get_node(node.get_name())
    if graph_node:
        self._disabled_nodes.append(graph_node)
enable_node(node)

Function to reenable a node after it was disabled by Runner.disable_node

Source code in oper8/dag/runner.py
 99
100
101
102
103
104
105
106
def enable_node(
    self,
    node: "Node",
):
    """Function to reenable a node after it was disabled by Runner.disable_node"""
    graph_node = self.graph.get_node(node.get_name())
    if graph_node in self._disabled_nodes:
        self._disabled_nodes.remove(graph_node)
run()

Run the Runner! This will continue until the graph has run to completion or halted due to an error.

Source code in oper8/dag/runner.py
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
def run(self):
    """Run the Runner! This will continue until the graph has run to completion
    or halted due to an error.
    """
    node_list = self._get_runnable_nodes()
    log.debug3(
        "Started Nodes: %s, All Nodes: %s",
        self._started_nodes,
        list(node_list),
    )

    # The "node start" loop should terminate if:
    # 1. All nodes have started
    # 2. All started nodes have completed in one form or another and there
    #   are no newly ready nodes
    while len(self._started_nodes) < len(node_list):
        # Get the set of nodes that has completed already
        #
        # NOTE: It's _critically_ important that this be done before getting
        #   the ready nodes. The operation of getting ready nodes can
        #   delegate to user-defined verification functions which may be
        #   very slow and IO bound. With slow verification functions, a node
        #   running in a thread may complete and mark itself verified after
        #   a downstream dependency has checked its completion status, but
        #   before the full set of _get_ready_nodes() checks has passed. If
        #   this happens and _get_completed_nodes() is called afterwards,
        #   the short-circuit logic below will think that all started nodes
        #   have completed and there are no ready nodes, thus terminating
        #   the Runner prematurely.
        completed_nodes = self._get_completed_nodes()

        # Get the currently ready nodes
        ready_nodes = self._get_ready_nodes()

        # If there are no ready nodes and all started nodes have completed
        # in one way or another, we're in an early termination case
        log.debug4("Ready Nodes: %s", ready_nodes)
        log.debug4("Completed Nodes: %s", completed_nodes)
        if not ready_nodes and set(self._started_nodes) == set(completed_nodes):
            log.debug2(
                "[%s] Graph exhausted all available nodes. Terminating early.",
                self.name,
            )
            break

        # If there are new ready nodes, start them
        if ready_nodes:
            log.debug2(
                "Ready nodes: %s. Remaining nodes: %s",
                ready_nodes,
                [
                    node
                    for node in node_list
                    if node not in ready_nodes and node not in completed_nodes
                ],
            )
        for ready_node in ready_nodes:
            self._started_nodes.append(ready_node)
            self._pool.submit(self._run_node, ready_node)
        time.sleep(self._poll_time)

    # Log out the state of the graph once we've terminated, but before we've
    # waited for all nodes to terminate
    log.debug2("[NODES] Started: %s", sorted(self._started_nodes))
    log.debug2("[NODES] Verified: %s", sorted(self._verified_nodes))
    log.debug2("[NODES] Unverified: %s", sorted(self._unverified_nodes))
    log.debug2("[NODES] Failed: %s", sorted(self._failed_nodes))
    log.debug2("[NODES] All: %s", sorted(list(node_list)))

    # Wait until all started nodes have finished one way or the other
    while len(self._get_completed_nodes()) != len(self._started_nodes):
        time.sleep(self._poll_time)

    # Make sure any in-flight nodes complete before terminating
    log.debug2("Waiting for in-flight nodes to complete")
    self._pool.shutdown()
    log.debug2("All nodes complete")
    log.debug2(self.completion_state())

decorator

Decorator for making the authoring of "pure" components easier

component(name)

The @component decorator is the primary entrypoint for creating an oper8.Component. It ensures the wrapped type's interface matches the expected Component interface, including the "name" class attribute.

Parameters:

Name Type Description Default
name str

str The name string will be set as the class property for the wrapped class

required

Returns:

Name Type Description
decorator Callable[[Type], Type]

Callable[[Type[Component]], Type[Component]] The decorator function that will be invoked on construction of decorated classes

Source code in oper8/decorator.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
def component(name: str) -> Callable[[Type], Type]:
    """The @component decorator is the primary entrypoint for creating an
    oper8.Component. It ensures the wrapped type's interface matches the expected
    Component interface, including the "name" class attribute.

    Args:
        name:  str
            The name string will be set as the class property for the wrapped
            class

    Returns:
        decorator:  Callable[[Type[Component]], Type[Component]]
            The decorator function that will be invoked on construction of
            decorated classes
    """

    def decorator(cls: Type[Component]) -> Type[Component]:
        cls.name = name
        return cls

    return decorator

controller(group, version, kind, finalizer=None, extra_properties=None)

The @controller decorator is the primary entrypoint for creating an oper8.Controller. It ensures the wrapped type's interface matches the required Controller interface, including class properties.

The extra_properties argument is an entrypoint for loosely coupled

Controller-specific configuration that is tied to the specific WatchManager implementation being used. The current list of useful properties is:

  • disable_vcs: This can be used to tell the AnsibleWatchManager that the Controller will not use ansible-vcs, even if other Controllers managed by the same operator do.
  • pwm_filters: This can be used to tell the PythonWatchManager of any additional watch filters. If value is a list then the filters are added to all watches including dependent watches. If value is a dict than it expects the keys to be the resource global id with the values being a list of filters for that resource
  • pwm_subsystems: This can be used to tell the PythonWatchManager of any subsystem relations. This allows a "subsystem" controller to be ran during the reconciliation of another similar to the DryRunWatchManager

Parameters:

Name Type Description Default
group str

str The apiVersion group for the resource this controller manages

required
version str

str The apiVersion version for the resource this controller manages

required
kind str

str The kind for the resource this controller manages

required
extra_properties Optional[Dict[str, any]]

Optional[Dict[str, any]] Extra properties that should be defined as class-properties for this controller

None

Returns:

Name Type Description
decorator Callable[[Type[Controller]], Type[Controller]]

Callable[[Type[Controller]], Type[Controller]] The decorator function that will be invoked on construction of decorated classes

Source code in oper8/decorator.py
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
def controller(  # pylint: disable=too-many-arguments
    group: str,
    version: str,
    kind: str,
    finalizer: str = None,
    extra_properties: Optional[Dict[str, any]] = None,
) -> Callable[[Type[Controller]], Type[Controller]]:
    """The @controller decorator is the primary entrypoint for creating an
    oper8.Controller. It ensures the wrapped type's interface matches the
    required Controller interface, including class properties.

    NOTE: The `extra_properties` argument is an entrypoint for loosely coupled
        Controller-specific configuration that is tied to the specific
        WatchManager implementation being used. The current list of useful
        properties is:

        * disable_vcs: This can be used to tell the AnsibleWatchManager
            that the Controller will not use ansible-vcs, even if other
            Controllers managed by the same operator do.
        * pwm_filters: This can be used to tell the PythonWatchManager of any
            additional watch filters. If value is a list then the filters are added
            to all watches including dependent watches. If value is a dict than
            it expects the keys to be the resource global id with the values being a list
            of filters for that resource
        * pwm_subsystems: This can be used to tell the PythonWatchManager of any
            subsystem relations. This allows a "subsystem" controller to be ran during
            the reconciliation of another similar to the DryRunWatchManager

    Args:
        group:  str
            The apiVersion group for the resource this controller manages
        version:  str
            The apiVersion version for the resource this controller manages
        kind:  str
            The kind for the resource this controller manages
        extra_properties:  Optional[Dict[str, any]]
            Extra properties that should be defined as class-properties for this
            controller

    Returns:
        decorator:  Callable[[Type[Controller]], Type[Controller]]
            The decorator function that will be invoked on construction of
            decorated classes
    """

    def decorator(cls: Type[Controller]) -> Type[Controller]:
        cls.group = group
        cls.version = version
        cls.kind = kind
        for key, val in (extra_properties or {}).items():
            setattr(cls, key, val)
        if finalizer is not None:
            cls.finalizer = finalizer
        return cls

    return decorator

deploy_manager

The DeployManager is the abstraction in charge of interacting with the kubernetes cluster to deploy, look up, and delete resources.

base

This defines the base class for all DeployManager types.

DeployManagerBase

Bases: ABC

Base class for deploy managers which will be responsible for carrying out the actual deploy of an Application/Component.

Source code in oper8/deploy_manager/base.py
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
class DeployManagerBase(abc.ABC):
    """
    Base class for deploy managers which will be responsible for carrying out
    the actual deploy of an Application/Component.
    """

    @abc.abstractmethod
    def deploy(
        self,
        resource_definitions: List[dict],
        manage_owner_references: bool = True,
        method: DeployMethod = DeployMethod.DEFAULT,
    ) -> Tuple[bool, bool]:
        """The deploy function ensures that the resources defined in the list of
        definitions are deployed in the cluster.

        Args:
            resource_definitions:  list(dict)
                List of resource object dicts to apply to the cluster
            manage_owner_references:  bool
                If true, ownerReferences for the parent CR will be applied to
                the deployed object

        Returns:
            success:  bool
                Whether or not the deploy succeeded
            changed:  bool
                Whether or not the deployment resulted in changes
        """

    @abc.abstractmethod
    def disable(self, resource_definitions: List[dict]) -> Tuple[bool, bool]:
        """The disable function ensures that the resources defined in the list of
        definitions are deleted from the cluster

        Args:
            resource_definitions:  list(dict)
                List of resource object dicts to apply to the cluster

        Returns:
            success:  bool
                Whether or not the delete succeeded
            changed:  bool
                Whether or not the delete resulted in changes
        """

    @abc.abstractmethod
    def get_object_current_state(
        self,
        kind: str,
        name: str,
        namespace: Optional[str] = None,
        api_version: Optional[str] = None,
    ) -> Tuple[bool, dict]:
        """The get_current_objects function fetches the current state of a given
        object by name

        Args:
            kind:  str
                The kind of the object to fetch
            name:  str
                The full name of the object to fetch
            namespace:  str
                The namespace to search for the object
            api_version:  str
                The api_version of the resource kind to fetch

        Returns:
            success:  bool
                Whether or not the state fetch operation succeeded
            current_state:  dict or None
                The dict representation of the current object's configuration,
                or None if not present
        """

    @abc.abstractmethod
    def watch_objects(  # pylint: disable=too-many-arguments
        self,
        kind: str,
        api_version: Optional[str] = None,
        namespace: Optional[str] = None,
        name: Optional[str] = None,
        label_selector: Optional[str] = None,
        field_selector: Optional[str] = None,
        resource_version: Optional[str] = None,
    ) -> Iterator[KubeWatchEvent]:
        """The watch_objects function listens for changes in the cluster and returns a
        stream of KubeWatchEvents

        Args:
            kind:  str
                The kind of the object to fetch
            namespace:  str
                The namespace to search for the object
            name:  str
                The name to search for the object
            api_version:  str
                The api_version of the resource kind to fetch
            label_selector:  str
                The label_selector to filter the resources
            field_selector:  str
                The field_selector to filter the resources
            resource_version:  str
                The resource_version the resource must be newer than

        Returns:
            watch_stream: Generator[KubeWatchEvent]
                A stream of KubeWatchEvents generated while watching
        """

    @abc.abstractmethod
    def filter_objects_current_state(  # pylint: disable=too-many-arguments
        self,
        kind: str,
        namespace: Optional[str] = None,
        api_version: Optional[str] = None,
        label_selector: Optional[str] = None,
        field_selector: Optional[str] = None,
    ) -> Tuple[bool, List[dict]]:
        """The filter_objects_current_state function fetches a list of objects
        that match either/both the label or field selector
        Args:
            kind:  str
                The kind of the object to fetch
            namespace:  str
                The namespace to search for the object
            api_version:  str
                The api_version of the resource kind to fetch
            label_selector:  str
                The label_selector to filter the resources
            field_selector:  str
                The field_selector to filter the resources

        Returns:
            success:  bool
                Whether or not the state fetch operation succeeded
            current_state:  List[dict]
                A list of  dict representations for the objects configuration,
                or an empty list if no objects match
        """

    @abc.abstractmethod
    def set_status(  # pylint: disable=too-many-arguments
        self,
        kind: str,
        name: str,
        namespace: Optional[str],
        status: dict,
        api_version: Optional[str] = None,
    ) -> Tuple[bool, bool]:
        """Set the status for an object managed by oper8

        Args:
            kind:  str
                The kind of the object ot fetch
            name:  str
                The full name of the object to fetch
            namespace:  Optional[str]
                The namespace to search for the object. If None search cluster wide
            status:  dict
                The status object to set onto the given object
            api_version:  str
                The api_version of the resource to update

        Returns:
            success:  bool
                Whether or not the state fetch operation succeeded
            changed:  bool
                Whether or not the status update resulted in a change
        """
deploy(resource_definitions, manage_owner_references=True, method=DeployMethod.DEFAULT) abstractmethod

The deploy function ensures that the resources defined in the list of definitions are deployed in the cluster.

Parameters:

Name Type Description Default
resource_definitions List[dict]

list(dict) List of resource object dicts to apply to the cluster

required
manage_owner_references bool

bool If true, ownerReferences for the parent CR will be applied to the deployed object

True

Returns:

Name Type Description
success bool

bool Whether or not the deploy succeeded

changed bool

bool Whether or not the deployment resulted in changes

Source code in oper8/deploy_manager/base.py
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
@abc.abstractmethod
def deploy(
    self,
    resource_definitions: List[dict],
    manage_owner_references: bool = True,
    method: DeployMethod = DeployMethod.DEFAULT,
) -> Tuple[bool, bool]:
    """The deploy function ensures that the resources defined in the list of
    definitions are deployed in the cluster.

    Args:
        resource_definitions:  list(dict)
            List of resource object dicts to apply to the cluster
        manage_owner_references:  bool
            If true, ownerReferences for the parent CR will be applied to
            the deployed object

    Returns:
        success:  bool
            Whether or not the deploy succeeded
        changed:  bool
            Whether or not the deployment resulted in changes
    """
disable(resource_definitions) abstractmethod

The disable function ensures that the resources defined in the list of definitions are deleted from the cluster

Parameters:

Name Type Description Default
resource_definitions List[dict]

list(dict) List of resource object dicts to apply to the cluster

required

Returns:

Name Type Description
success bool

bool Whether or not the delete succeeded

changed bool

bool Whether or not the delete resulted in changes

Source code in oper8/deploy_manager/base.py
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
@abc.abstractmethod
def disable(self, resource_definitions: List[dict]) -> Tuple[bool, bool]:
    """The disable function ensures that the resources defined in the list of
    definitions are deleted from the cluster

    Args:
        resource_definitions:  list(dict)
            List of resource object dicts to apply to the cluster

    Returns:
        success:  bool
            Whether or not the delete succeeded
        changed:  bool
            Whether or not the delete resulted in changes
    """
filter_objects_current_state(kind, namespace=None, api_version=None, label_selector=None, field_selector=None) abstractmethod

The filter_objects_current_state function fetches a list of objects that match either/both the label or field selector Args: kind: str The kind of the object to fetch namespace: str The namespace to search for the object api_version: str The api_version of the resource kind to fetch label_selector: str The label_selector to filter the resources field_selector: str The field_selector to filter the resources

Returns:

Name Type Description
success bool

bool Whether or not the state fetch operation succeeded

current_state List[dict]

List[dict] A list of dict representations for the objects configuration, or an empty list if no objects match

Source code in oper8/deploy_manager/base.py
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
@abc.abstractmethod
def filter_objects_current_state(  # pylint: disable=too-many-arguments
    self,
    kind: str,
    namespace: Optional[str] = None,
    api_version: Optional[str] = None,
    label_selector: Optional[str] = None,
    field_selector: Optional[str] = None,
) -> Tuple[bool, List[dict]]:
    """The filter_objects_current_state function fetches a list of objects
    that match either/both the label or field selector
    Args:
        kind:  str
            The kind of the object to fetch
        namespace:  str
            The namespace to search for the object
        api_version:  str
            The api_version of the resource kind to fetch
        label_selector:  str
            The label_selector to filter the resources
        field_selector:  str
            The field_selector to filter the resources

    Returns:
        success:  bool
            Whether or not the state fetch operation succeeded
        current_state:  List[dict]
            A list of  dict representations for the objects configuration,
            or an empty list if no objects match
    """
get_object_current_state(kind, name, namespace=None, api_version=None) abstractmethod

The get_current_objects function fetches the current state of a given object by name

Parameters:

Name Type Description Default
kind str

str The kind of the object to fetch

required
name str

str The full name of the object to fetch

required
namespace Optional[str]

str The namespace to search for the object

None
api_version Optional[str]

str The api_version of the resource kind to fetch

None

Returns:

Name Type Description
success bool

bool Whether or not the state fetch operation succeeded

current_state dict

dict or None The dict representation of the current object's configuration, or None if not present

Source code in oper8/deploy_manager/base.py
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
@abc.abstractmethod
def get_object_current_state(
    self,
    kind: str,
    name: str,
    namespace: Optional[str] = None,
    api_version: Optional[str] = None,
) -> Tuple[bool, dict]:
    """The get_current_objects function fetches the current state of a given
    object by name

    Args:
        kind:  str
            The kind of the object to fetch
        name:  str
            The full name of the object to fetch
        namespace:  str
            The namespace to search for the object
        api_version:  str
            The api_version of the resource kind to fetch

    Returns:
        success:  bool
            Whether or not the state fetch operation succeeded
        current_state:  dict or None
            The dict representation of the current object's configuration,
            or None if not present
    """
set_status(kind, name, namespace, status, api_version=None) abstractmethod

Set the status for an object managed by oper8

Parameters:

Name Type Description Default
kind str

str The kind of the object ot fetch

required
name str

str The full name of the object to fetch

required
namespace Optional[str]

Optional[str] The namespace to search for the object. If None search cluster wide

required
status dict

dict The status object to set onto the given object

required
api_version Optional[str]

str The api_version of the resource to update

None

Returns:

Name Type Description
success bool

bool Whether or not the state fetch operation succeeded

changed bool

bool Whether or not the status update resulted in a change

Source code in oper8/deploy_manager/base.py
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
@abc.abstractmethod
def set_status(  # pylint: disable=too-many-arguments
    self,
    kind: str,
    name: str,
    namespace: Optional[str],
    status: dict,
    api_version: Optional[str] = None,
) -> Tuple[bool, bool]:
    """Set the status for an object managed by oper8

    Args:
        kind:  str
            The kind of the object ot fetch
        name:  str
            The full name of the object to fetch
        namespace:  Optional[str]
            The namespace to search for the object. If None search cluster wide
        status:  dict
            The status object to set onto the given object
        api_version:  str
            The api_version of the resource to update

    Returns:
        success:  bool
            Whether or not the state fetch operation succeeded
        changed:  bool
            Whether or not the status update resulted in a change
    """
watch_objects(kind, api_version=None, namespace=None, name=None, label_selector=None, field_selector=None, resource_version=None) abstractmethod

The watch_objects function listens for changes in the cluster and returns a stream of KubeWatchEvents

Parameters:

Name Type Description Default
kind str

str The kind of the object to fetch

required
namespace Optional[str]

str The namespace to search for the object

None
name Optional[str]

str The name to search for the object

None
api_version Optional[str]

str The api_version of the resource kind to fetch

None
label_selector Optional[str]

str The label_selector to filter the resources

None
field_selector Optional[str]

str The field_selector to filter the resources

None
resource_version Optional[str]

str The resource_version the resource must be newer than

None

Returns:

Name Type Description
watch_stream Iterator[KubeWatchEvent]

Generator[KubeWatchEvent] A stream of KubeWatchEvents generated while watching

Source code in oper8/deploy_manager/base.py
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
@abc.abstractmethod
def watch_objects(  # pylint: disable=too-many-arguments
    self,
    kind: str,
    api_version: Optional[str] = None,
    namespace: Optional[str] = None,
    name: Optional[str] = None,
    label_selector: Optional[str] = None,
    field_selector: Optional[str] = None,
    resource_version: Optional[str] = None,
) -> Iterator[KubeWatchEvent]:
    """The watch_objects function listens for changes in the cluster and returns a
    stream of KubeWatchEvents

    Args:
        kind:  str
            The kind of the object to fetch
        namespace:  str
            The namespace to search for the object
        name:  str
            The name to search for the object
        api_version:  str
            The api_version of the resource kind to fetch
        label_selector:  str
            The label_selector to filter the resources
        field_selector:  str
            The field_selector to filter the resources
        resource_version:  str
            The resource_version the resource must be newer than

    Returns:
        watch_stream: Generator[KubeWatchEvent]
            A stream of KubeWatchEvents generated while watching
    """

dry_run_deploy_manager

The DryRunDeployManager implements the DeployManager interface but does not actually interact with the cluster and instead holds the state of the cluster in a local map.

DryRunDeployManager

Bases: DeployManagerBase

Deploy manager which doesn't actually deploy!

Source code in oper8/deploy_manager/dry_run_deploy_manager.py
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
class DryRunDeployManager(DeployManagerBase):
    """
    Deploy manager which doesn't actually deploy!
    """

    def __init__(
        self,
        resources=None,
        owner_cr=None,
        strict_resource_version=False,
        generate_resource_version=True,
    ):
        """Construct with a static value to use for whether or not the functions
        should report change.
        """
        self._owner_cr = owner_cr
        self._cluster_content = {}
        self.strict_resource_version = strict_resource_version
        self.generate_resource_version = generate_resource_version

        # Dicts of registered watches and watchers
        self._watches = {}
        self._finalizers = {}

        # Deploy provided resources
        self._deploy(resources or [], call_watches=False, manage_owner_references=False)

    ## Interface ###############################################################

    def deploy(
        self,
        resource_definitions,
        manage_owner_references=True,
        method: DeployMethod = DeployMethod.DEFAULT,
        **_,
    ):
        log.info("DRY RUN deploy")
        return self._deploy(
            resource_definitions,
            manage_owner_references=manage_owner_references,
            method=method,
        )

    def disable(self, resource_definitions):
        log.info("DRY RUN disable")
        changed = False
        for resource in resource_definitions:
            api_version = resource.get("apiVersion")
            kind = resource.get("kind")
            name = resource.get("metadata", {}).get("name")
            namespace = resource.get("metadata", {}).get("namespace")
            _, content = self.get_object_current_state(
                kind=kind, api_version=api_version, namespace=namespace, name=name
            )
            if content is not None:
                changed = True

                # Set resource finalizers
                with DRY_RUN_CLUSTER_LOCK:
                    self._cluster_content[namespace][kind][api_version][name][
                        "metadata"
                    ]["deletionTimestamp"] = datetime.now().strftime(
                        "%Y-%m-%dT%H:%M:%SZ"
                    )
                    self._cluster_content[namespace][kind][api_version][name][
                        "metadata"
                    ]["deletionGracePeriodSeconds"] = 0

                # Call any registered finalizers
                for key, callback in self._get_registered_watches(
                    api_version, kind, namespace, name, finalizer=True
                ):
                    log.debug2(
                        "Calling registered finalizer [%s] for [%s]", callback, key
                    )
                    callback(self._cluster_content[namespace][kind][api_version][name])

                # If finalizers have been cleared and object hasn't already been deleted then
                # remove the key
                current_obj = (
                    self._cluster_content.get(namespace, {})
                    .get(kind, {})
                    .get(api_version, {})
                    .get(name, {})
                )
                if current_obj and not current_obj.get("metadata", {}).get(
                    "finalizers", []
                ):
                    with DRY_RUN_CLUSTER_LOCK:
                        self._delete_key(namespace, kind, api_version, name)

        return True, changed

    def get_object_current_state(self, kind, name, namespace=None, api_version=None):
        log.info(
            "DRY RUN get_object_current_state of [%s/%s] in [%s]", kind, name, namespace
        )

        # Look in the cluster state
        matches = []
        kind_entries = self._cluster_content.get(namespace, {}).get(kind, {})
        log.debug3("Kind entries: %s", kind_entries)
        for api_ver, entries in kind_entries.items():
            log.debug3("Checking api_version [%s // %s]", api_ver, api_version)
            if name in entries and (api_ver == api_version or api_version is None):
                matches.append(entries[name])
        log.debug(
            "Found %d matches for [%s/%s] in %s", len(matches), kind, name, namespace
        )
        if len(matches) == 1:
            return True, copy.deepcopy(matches[0])
        return True, None

    def filter_objects_current_state(
        self,
        kind,
        namespace=None,
        api_version=None,
        label_selector=None,
        field_selector=None,
    ):  # pylint: disable=too-many-arguments
        log.info(
            "DRY RUN filter_objects_current_state of [%s] in [%s]", kind, namespace
        )
        # Look in the cluster state
        matches = []
        kind_entries = self._cluster_content.get(namespace, {}).get(kind, {})
        log.debug3("Kind entries: %s", kind_entries)
        for api_ver, entries in kind_entries.items():
            # Make sure api version matches
            log.debug3("Checking api_version [%s // %s]", api_ver, api_version)
            if api_ver != api_version and api_version is not None:
                continue

            for resource in entries.values():
                # Make sure Labels Match
                log.debug3("Resource: %s", resource)

                labels = resource.get("metadata", {}).get("labels", {})
                log.debug3("Checking label_selector [%s // %s]", labels, label_selector)
                if label_selector is not None and not _match_selector(
                    labels, label_selector
                ):
                    continue

                # Only do the work for field selector if one exists
                log.debug3("Checking field_selector [%s]", field_selector)
                if field_selector is not None and not _match_selector(
                    _convert_dict_to_dot(resource),
                    field_selector,
                ):
                    continue

                # Add deep copy of entry to matches list
                matches.append(copy.deepcopy(resource))

        return True, matches

    def set_status(
        self,
        kind,
        name,
        namespace,
        status,
        api_version=None,
    ):  # pylint: disable=too-many-arguments
        log.info(
            "DRY RUN set_status of [%s.%s/%s] in %s: %s",
            api_version,
            kind,
            name,
            namespace,
            status,
        )
        object_content = self.get_object_current_state(
            kind, name, namespace, api_version
        )[1]
        if object_content is None:
            log.debug("Did not find [%s/%s] in %s", kind, name, namespace)
            return False, False
        prev_status = object_content.get("status")
        object_content["status"] = status
        self._deploy([object_content], call_watches=False)
        return True, prev_status != status

    def watch_objects(  # pylint: disable=too-many-arguments,too-many-locals,unused-argument
        self,
        kind: str,
        api_version: Optional[str] = None,
        namespace: Optional[str] = None,
        name: Optional[str] = None,
        label_selector: Optional[str] = None,
        field_selector: Optional[str] = None,
        resource_version: Optional[str] = None,
        timeout: Optional[int] = 15,
        **kwargs,
    ) -> Iterator[KubeWatchEvent]:
        """Watch the DryRunDeployManager for resource changes by registering
        callbacks"""

        event_queue = Queue()
        resource_map = {}

        def add_event(resource_map: dict, manifest: dict):
            """Callback triggered when resources are deployed"""
            resource = ManagedObject(manifest)
            event_type = KubeEventType.ADDED

            watch_key = self._watch_key(
                api_version=resource.api_version,
                kind=resource.kind,
                namespace=resource.namespace,
                name=resource.name,
            )
            if watch_key in resource_map:
                log.debug4("Watch key detected, setting Modified event type")
                event_type = KubeEventType.MODIFIED

            resource_map[watch_key] = resource
            event = KubeWatchEvent(
                type=event_type,
                resource=resource,
            )
            event_queue.put(event)

        def delete_event(resource_map: dict, manifest: dict):
            """Callback triggered when resources are disabled"""
            resource = ManagedObject(manifest)
            watch_key = self._watch_key(
                api_version=resource.api_version,
                kind=resource.kind,
                namespace=resource.namespace,
                name=resource.name,
            )
            if watch_key in resource_map:
                del resource_map[watch_key]

            event = KubeWatchEvent(
                type=KubeEventType.DELETED,
                resource=resource,
            )
            event_queue.put(event)

        # Get initial resources
        _, manifests = self.filter_objects_current_state(
            kind=kind,
            api_version=api_version,
            namespace=namespace,
            label_selector=label_selector,
            field_selector=field_selector,
        )
        for manifest in manifests:
            resource = ManagedObject(manifest)
            watch_key = self._watch_key(
                kind=resource.kind,
                api_version=resource.api_version,
                name=resource.name,
                namespace=resource.namespace,
            )
            resource_map[watch_key] = resource

            event = KubeWatchEvent(type=KubeEventType.ADDED, resource=resource)
            log.debug2("Yielding initial event %s", event)
            yield event

        end_time = datetime.max
        if timeout:
            end_time = datetime.now() + timedelta(seconds=timeout)

        # Register callbacks
        self.register_watch(
            api_version=api_version,
            kind=kind,
            namespace=namespace,
            name=name,
            callback=partial(add_event, resource_map),
        )
        self.register_finalizer(
            api_version=api_version,
            kind=kind,
            namespace=namespace,
            name=name,
            callback=partial(delete_event, resource_map),
        )

        # Yield any events from the callback queue
        log.debug2("Waiting till %s", end_time)
        while True:
            sec_till_end = (end_time - datetime.now()).seconds or 1
            try:
                event = event_queue.get(timeout=sec_till_end)
                log.debug2("Yielding event %s", event)
                yield event
            except Empty:
                pass

            if datetime.now() > end_time:
                return

    ## Dry Run Methods #########################################################
    def register_watch(  # pylint: disable=too-many-arguments
        self,
        api_version: str,
        kind: str,
        callback: Callable[[dict], None],
        namespace="",
        name="",
    ):
        """Register a callback to watch for deploy events on a given
        api_version/kind
        """
        watch_key = self._watch_key(
            api_version=api_version, kind=kind, namespace=namespace, name=name
        )
        log.debug("Registering watch for %s", watch_key)
        self._watches.setdefault(watch_key, []).append(callback)

    def register_finalizer(  # pylint: disable=too-many-arguments
        self,
        api_version: str,
        kind: str,
        callback: Callable[[dict], None],
        namespace="",
        name="",
    ):
        """Register a callback to call on deletion events on a given
        api_version/kind
        """
        watch_key = self._watch_key(
            api_version=api_version, kind=kind, namespace=namespace, name=name
        )
        log.debug("Registering finalizer for %s", watch_key)
        self._finalizers.setdefault(watch_key, []).append(callback)

    ## Implementation Details ##################################################

    @staticmethod
    def _watch_key(api_version="", kind="", namespace="", name=""):
        return ":".join([api_version or "", kind or "", namespace or "", name or ""])

    def _get_registered_watches(  # pylint: disable=too-many-arguments
        self,
        api_version: str = "",
        kind: str = "",
        namespace: str = "",
        name: str = "",
        finalizer: bool = False,
    ) -> List[Tuple[str, Callable]]:
        # Get the scoped watch key
        resource_watch_key = self._watch_key(
            api_version=api_version, kind=kind, namespace=namespace, name=name
        )
        namespaced_watch_key = self._watch_key(
            api_version=api_version, kind=kind, namespace=namespace
        )
        global_watch_key = self._watch_key(api_version=api_version, kind=kind)

        # Get which watch list we're pulling from
        callback_map = self._watches
        if finalizer:
            callback_map = self._finalizers

        output_list = []
        log.debug3(
            "Looking for resourced key: %s namespace key %s global key %s",
            resource_watch_key,
            namespaced_watch_key,
            global_watch_key,
        )
        for key, callback_list in callback_map.items():
            if key in [resource_watch_key, namespaced_watch_key, global_watch_key]:
                log.debug3("%d Callbacks found for key %s", len(callback_list), key)
                for callback in callback_list:
                    output_list.append((key, callback))

        return output_list

    def _delete_key(self, namespace, kind, api_version, name):
        del self._cluster_content[namespace][kind][api_version][name]
        if not self._cluster_content[namespace][kind][api_version]:
            del self._cluster_content[namespace][kind][api_version]
        if not self._cluster_content[namespace][kind]:
            del self._cluster_content[namespace][kind]
        if not self._cluster_content[namespace]:
            del self._cluster_content[namespace]

    def _deploy(
        self,
        resource_definitions,
        call_watches=True,
        manage_owner_references=True,
        method: DeployMethod = DeployMethod.DEFAULT,
    ):
        log.info("DRY RUN deploy")
        changes = False
        for resource in resource_definitions:
            api_version = resource.get("apiVersion")
            kind = resource.get("kind")
            name = resource.get("metadata", {}).get("name")
            namespace = resource.get("metadata", {}).get("namespace")
            log.debug(
                "DRY RUN deploy [%s/%s/%s/%s]", namespace, kind, api_version, name
            )
            log.debug4(resource)

            # If owner CR configured, add ownerReferences
            if self._owner_cr and manage_owner_references:
                log.debug2("Adding dry-run owner references")
                update_owner_references(self, self._owner_cr, resource)
                log.debug3(
                    "All owner references: %s", resource["metadata"]["ownerReferences"]
                )

            with DRY_RUN_CLUSTER_LOCK:
                entries = (
                    self._cluster_content.setdefault(namespace, {})
                    .setdefault(kind, {})
                    .setdefault(api_version, {})
                )
                current = copy.deepcopy(entries.get(name, {}))
                old_resource_version = current.get("metadata", {}).pop(
                    "resourceVersion", None
                )
                changes = changes or (current != resource)

                if "metadata" not in resource:
                    resource["metadata"] = {}

                if (
                    self.strict_resource_version
                    and resource["metadata"].get("resourceVersion")
                    and old_resource_version
                    and resource["metadata"].get("resourceVersion")
                    != old_resource_version
                ):
                    log.warning(
                        "Unable to deploy resource. resourceVersion is out of date"
                    )
                    return False, False

                resource["metadata"]["creationTimestamp"] = entries.get(
                    "metadata", {}
                ).get("creationTimestamp", datetime.now().isoformat())
                resource["metadata"]["uid"] = entries.get("metadata", {}).get(
                    "uid", str(uuid.uuid4())
                )

                if self.generate_resource_version:
                    resource["metadata"]["resourceVersion"] = str(
                        random.randint(1, 1000)
                    ).zfill(5)

                # Depending on the deploy method either update or fully replace the object
                if method == DeployMethod.DEFAULT or method == DeployMethod.REPLACE:
                    entries[name] = resource
                else:
                    if name in entries:
                        entries[name] = merge_configs(entries[name], resource)
                    # If the object doesn't already exist then just add it
                    else:
                        entries[name] = resource

            # Call any registered watches
            if call_watches:
                for key, callback in self._get_registered_watches(
                    api_version, kind, namespace, name
                ):
                    log.debug2("Calling registered watch [%s] for [%s]", callback, key)
                    callback(resource)

            # Delete Key if it has already been disabled and doesn't have finalizers
            if self._cluster_content[namespace][kind][api_version][name].get(
                "metadata", {}
            ).get("deletionTimestamp") and not self._cluster_content[namespace][kind][
                api_version
            ][
                name
            ].get(
                "metadata", {}
            ).get(
                "finalizers"
            ):
                with DRY_RUN_CLUSTER_LOCK:
                    self._delete_key(namespace, kind, api_version, name)

        return True, changes
__init__(resources=None, owner_cr=None, strict_resource_version=False, generate_resource_version=True)

Construct with a static value to use for whether or not the functions should report change.

Source code in oper8/deploy_manager/dry_run_deploy_manager.py
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
def __init__(
    self,
    resources=None,
    owner_cr=None,
    strict_resource_version=False,
    generate_resource_version=True,
):
    """Construct with a static value to use for whether or not the functions
    should report change.
    """
    self._owner_cr = owner_cr
    self._cluster_content = {}
    self.strict_resource_version = strict_resource_version
    self.generate_resource_version = generate_resource_version

    # Dicts of registered watches and watchers
    self._watches = {}
    self._finalizers = {}

    # Deploy provided resources
    self._deploy(resources or [], call_watches=False, manage_owner_references=False)
register_finalizer(api_version, kind, callback, namespace='', name='')

Register a callback to call on deletion events on a given api_version/kind

Source code in oper8/deploy_manager/dry_run_deploy_manager.py
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
def register_finalizer(  # pylint: disable=too-many-arguments
    self,
    api_version: str,
    kind: str,
    callback: Callable[[dict], None],
    namespace="",
    name="",
):
    """Register a callback to call on deletion events on a given
    api_version/kind
    """
    watch_key = self._watch_key(
        api_version=api_version, kind=kind, namespace=namespace, name=name
    )
    log.debug("Registering finalizer for %s", watch_key)
    self._finalizers.setdefault(watch_key, []).append(callback)
register_watch(api_version, kind, callback, namespace='', name='')

Register a callback to watch for deploy events on a given api_version/kind

Source code in oper8/deploy_manager/dry_run_deploy_manager.py
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
def register_watch(  # pylint: disable=too-many-arguments
    self,
    api_version: str,
    kind: str,
    callback: Callable[[dict], None],
    namespace="",
    name="",
):
    """Register a callback to watch for deploy events on a given
    api_version/kind
    """
    watch_key = self._watch_key(
        api_version=api_version, kind=kind, namespace=namespace, name=name
    )
    log.debug("Registering watch for %s", watch_key)
    self._watches.setdefault(watch_key, []).append(callback)
watch_objects(kind, api_version=None, namespace=None, name=None, label_selector=None, field_selector=None, resource_version=None, timeout=15, **kwargs)

Watch the DryRunDeployManager for resource changes by registering callbacks

Source code in oper8/deploy_manager/dry_run_deploy_manager.py
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
def watch_objects(  # pylint: disable=too-many-arguments,too-many-locals,unused-argument
    self,
    kind: str,
    api_version: Optional[str] = None,
    namespace: Optional[str] = None,
    name: Optional[str] = None,
    label_selector: Optional[str] = None,
    field_selector: Optional[str] = None,
    resource_version: Optional[str] = None,
    timeout: Optional[int] = 15,
    **kwargs,
) -> Iterator[KubeWatchEvent]:
    """Watch the DryRunDeployManager for resource changes by registering
    callbacks"""

    event_queue = Queue()
    resource_map = {}

    def add_event(resource_map: dict, manifest: dict):
        """Callback triggered when resources are deployed"""
        resource = ManagedObject(manifest)
        event_type = KubeEventType.ADDED

        watch_key = self._watch_key(
            api_version=resource.api_version,
            kind=resource.kind,
            namespace=resource.namespace,
            name=resource.name,
        )
        if watch_key in resource_map:
            log.debug4("Watch key detected, setting Modified event type")
            event_type = KubeEventType.MODIFIED

        resource_map[watch_key] = resource
        event = KubeWatchEvent(
            type=event_type,
            resource=resource,
        )
        event_queue.put(event)

    def delete_event(resource_map: dict, manifest: dict):
        """Callback triggered when resources are disabled"""
        resource = ManagedObject(manifest)
        watch_key = self._watch_key(
            api_version=resource.api_version,
            kind=resource.kind,
            namespace=resource.namespace,
            name=resource.name,
        )
        if watch_key in resource_map:
            del resource_map[watch_key]

        event = KubeWatchEvent(
            type=KubeEventType.DELETED,
            resource=resource,
        )
        event_queue.put(event)

    # Get initial resources
    _, manifests = self.filter_objects_current_state(
        kind=kind,
        api_version=api_version,
        namespace=namespace,
        label_selector=label_selector,
        field_selector=field_selector,
    )
    for manifest in manifests:
        resource = ManagedObject(manifest)
        watch_key = self._watch_key(
            kind=resource.kind,
            api_version=resource.api_version,
            name=resource.name,
            namespace=resource.namespace,
        )
        resource_map[watch_key] = resource

        event = KubeWatchEvent(type=KubeEventType.ADDED, resource=resource)
        log.debug2("Yielding initial event %s", event)
        yield event

    end_time = datetime.max
    if timeout:
        end_time = datetime.now() + timedelta(seconds=timeout)

    # Register callbacks
    self.register_watch(
        api_version=api_version,
        kind=kind,
        namespace=namespace,
        name=name,
        callback=partial(add_event, resource_map),
    )
    self.register_finalizer(
        api_version=api_version,
        kind=kind,
        namespace=namespace,
        name=name,
        callback=partial(delete_event, resource_map),
    )

    # Yield any events from the callback queue
    log.debug2("Waiting till %s", end_time)
    while True:
        sec_till_end = (end_time - datetime.now()).seconds or 1
        try:
            event = event_queue.get(timeout=sec_till_end)
            log.debug2("Yielding event %s", event)
            yield event
        except Empty:
            pass

        if datetime.now() > end_time:
            return

kube_event

Helper module to define shared types related to Kube Events

KubeEventType

Bases: Enum

Enum for all possible kubernetes event types

Source code in oper8/deploy_manager/kube_event.py
19
20
21
22
23
24
class KubeEventType(Enum):
    """Enum for all possible kubernetes event types"""

    DELETED = "DELETED"
    MODIFIED = "MODIFIED"
    ADDED = "ADDED"

KubeWatchEvent dataclass

DataClass containing the type, resource, and timestamp of a particular event

Source code in oper8/deploy_manager/kube_event.py
27
28
29
30
31
32
33
34
@dataclass
class KubeWatchEvent:
    """DataClass containing the type, resource, and timestamp of a
    particular event"""

    type: KubeEventType
    resource: ManagedObject
    timestamp: datetime = field(default_factory=datetime.now)

openshift_deploy_manager

This DeployManager is responsible for delegating cluster operations to the openshift library. It is the one that will be used when the operator is running in the cluster or outside the cluster making live changes.

OpenshiftDeployManager

Bases: DeployManagerBase

This DeployManager uses the openshift DynamicClient to interact with the cluster

Source code in oper8/deploy_manager/openshift_deploy_manager.py
  56
  57
  58
  59
  60
  61
  62
  63
  64
  65
  66
  67
  68
  69
  70
  71
  72
  73
  74
  75
  76
  77
  78
  79
  80
  81
  82
  83
  84
  85
  86
  87
  88
  89
  90
  91
  92
  93
  94
  95
  96
  97
  98
  99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 202
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212
 213
 214
 215
 216
 217
 218
 219
 220
 221
 222
 223
 224
 225
 226
 227
 228
 229
 230
 231
 232
 233
 234
 235
 236
 237
 238
 239
 240
 241
 242
 243
 244
 245
 246
 247
 248
 249
 250
 251
 252
 253
 254
 255
 256
 257
 258
 259
 260
 261
 262
 263
 264
 265
 266
 267
 268
 269
 270
 271
 272
 273
 274
 275
 276
 277
 278
 279
 280
 281
 282
 283
 284
 285
 286
 287
 288
 289
 290
 291
 292
 293
 294
 295
 296
 297
 298
 299
 300
 301
 302
 303
 304
 305
 306
 307
 308
 309
 310
 311
 312
 313
 314
 315
 316
 317
 318
 319
 320
 321
 322
 323
 324
 325
 326
 327
 328
 329
 330
 331
 332
 333
 334
 335
 336
 337
 338
 339
 340
 341
 342
 343
 344
 345
 346
 347
 348
 349
 350
 351
 352
 353
 354
 355
 356
 357
 358
 359
 360
 361
 362
 363
 364
 365
 366
 367
 368
 369
 370
 371
 372
 373
 374
 375
 376
 377
 378
 379
 380
 381
 382
 383
 384
 385
 386
 387
 388
 389
 390
 391
 392
 393
 394
 395
 396
 397
 398
 399
 400
 401
 402
 403
 404
 405
 406
 407
 408
 409
 410
 411
 412
 413
 414
 415
 416
 417
 418
 419
 420
 421
 422
 423
 424
 425
 426
 427
 428
 429
 430
 431
 432
 433
 434
 435
 436
 437
 438
 439
 440
 441
 442
 443
 444
 445
 446
 447
 448
 449
 450
 451
 452
 453
 454
 455
 456
 457
 458
 459
 460
 461
 462
 463
 464
 465
 466
 467
 468
 469
 470
 471
 472
 473
 474
 475
 476
 477
 478
 479
 480
 481
 482
 483
 484
 485
 486
 487
 488
 489
 490
 491
 492
 493
 494
 495
 496
 497
 498
 499
 500
 501
 502
 503
 504
 505
 506
 507
 508
 509
 510
 511
 512
 513
 514
 515
 516
 517
 518
 519
 520
 521
 522
 523
 524
 525
 526
 527
 528
 529
 530
 531
 532
 533
 534
 535
 536
 537
 538
 539
 540
 541
 542
 543
 544
 545
 546
 547
 548
 549
 550
 551
 552
 553
 554
 555
 556
 557
 558
 559
 560
 561
 562
 563
 564
 565
 566
 567
 568
 569
 570
 571
 572
 573
 574
 575
 576
 577
 578
 579
 580
 581
 582
 583
 584
 585
 586
 587
 588
 589
 590
 591
 592
 593
 594
 595
 596
 597
 598
 599
 600
 601
 602
 603
 604
 605
 606
 607
 608
 609
 610
 611
 612
 613
 614
 615
 616
 617
 618
 619
 620
 621
 622
 623
 624
 625
 626
 627
 628
 629
 630
 631
 632
 633
 634
 635
 636
 637
 638
 639
 640
 641
 642
 643
 644
 645
 646
 647
 648
 649
 650
 651
 652
 653
 654
 655
 656
 657
 658
 659
 660
 661
 662
 663
 664
 665
 666
 667
 668
 669
 670
 671
 672
 673
 674
 675
 676
 677
 678
 679
 680
 681
 682
 683
 684
 685
 686
 687
 688
 689
 690
 691
 692
 693
 694
 695
 696
 697
 698
 699
 700
 701
 702
 703
 704
 705
 706
 707
 708
 709
 710
 711
 712
 713
 714
 715
 716
 717
 718
 719
 720
 721
 722
 723
 724
 725
 726
 727
 728
 729
 730
 731
 732
 733
 734
 735
 736
 737
 738
 739
 740
 741
 742
 743
 744
 745
 746
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
class OpenshiftDeployManager(DeployManagerBase):
    """This DeployManager uses the openshift DynamicClient to interact with the
    cluster
    """

    def __init__(
        self,
        manage_ansible_status: bool = False,
        owner_cr: Optional[dict] = None,
    ):
        """
        Args:
            manage_ansible_status:  bool
                If true, oper8 will emulate the status management done natively
                by ansible based on the readiness values of oper8's native status
                management
            owner_cr:  Optional[dict]
                The dict content of the CR that triggered this reconciliation.
                If given, deployed objects will have an ownerReference added to
                assign ownership to this CR instance.
        """
        self.manage_ansible_status = manage_ansible_status
        self._owner_cr = owner_cr

        # Set up the client
        log.debug("Initializing openshift client")
        self._client = None

        # Keep a threading lock for performing status updates. This is necessary
        # to avoid running into 409 Conflict errors if concurrent threads are
        # trying to perform status updates
        self._status_lock = threading.Lock()

    @property
    def client(self):
        """Lazy property access to the client"""
        if self._client is None:
            self._client = self._setup_client()
        return self._client

    @alog.logged_function(log.debug)
    def deploy(
        self,
        resource_definitions: List[dict],
        manage_owner_references: bool = True,
        retry_operation: bool = True,
        method: DeployMethod = DeployMethod.DEFAULT,
        **_,  # Accept any kwargs to compatibility
    ) -> Tuple[bool, bool]:
        """Deploy using the openshift client

        Args:
            resource_definitions:  list(dict)
                List of resource object dicts to apply to the cluster
            manage_owner_references:  bool
                If true, ownerReferences for the parent CR will be applied to
                the deployed object

        Returns:
            success:  bool
                True if deploy succeeded, False otherwise
            changed:  bool
                Whether or not the deployment resulted in changes
        """
        return self._retried_operation(
            resource_definitions,
            self._apply,
            max_retries=config.deploy_retries if retry_operation else 0,
            manage_owner_references=manage_owner_references,
            method=method,
        )

    @alog.logged_function(log.debug)
    def disable(self, resource_definitions: List[dict]) -> Tuple[bool, bool]:
        """The disable process is the same as the deploy process, but the child
        module params are set to 'state: absent'

        Args:
            resource_definitions:  list(dict)
                List of resource object dicts to apply to the cluster

        Returns:
            success:  bool
                True if deploy succeeded, False otherwise
            changed:  bool
                Whether or not the delete resulted in changes
        """
        return self._retried_operation(
            resource_definitions,
            self._disable,
            max_retries=config.deploy_retries,
            manage_owner_references=False,
        )

    def get_object_current_state(
        self,
        kind: str,
        name: str,
        namespace: Optional[str] = None,
        api_version: Optional[str] = None,
    ) -> Tuple[bool, dict]:
        """The get_current_objects function fetches the current state using
        calls directly to the api client

        Args:
            kind:  str
                The kind of the object ot fetch
            name:  str
                The full name of the object to fetch
            namespace:  Optional[str]
                The namespace to search for the object or None for no namespace
            api_version:  Optional[str]
                The api_version of the resource kind to fetch

        Returns:
            success:  bool
                Whether or not the state fetch operation succeeded
            current_state:  dict or None
                The dict representation of the current object's configuration,
                or None if not present
        """

        # Use the lazy discovery tool to first get all objects of the given type
        # in the given namespace, then look for the specific resource by name
        resources = self._get_resource_handle(kind, api_version)
        if not resources:
            return True, None

        if not namespace:
            resources.namespaced = False

        try:
            resource = resources.get(name=name, namespace=namespace)
        except ForbiddenError:
            log.debug(
                "Fetching objects of kind [%s] forbidden in namespace [%s]",
                kind,
                namespace,
            )
            return False, None
        except NotFoundError:
            log.debug(
                "No object named [%s/%s] found in namespace [%s]", kind, name, namespace
            )
            return True, None

        # If the resource was found, return it's dict representation
        return True, resource.to_dict()

    def watch_objects(  # pylint: disable=too-many-arguments
        self,
        kind: str,
        api_version: Optional[str] = None,
        namespace: Optional[str] = None,
        name: Optional[str] = None,
        label_selector: Optional[str] = None,
        field_selector: Optional[str] = None,
        resource_version: Optional[str] = None,
        watch_manager: Optional[Watch] = None,
    ) -> Iterator[KubeWatchEvent]:
        watch_manager = watch_manager if watch_manager else Watch()
        resource_handle = self._get_resource_handle(kind, api_version)
        assert_cluster(
            resource_handle,
            (
                "Failed to fetch resource handle for "
                + f"{namespace}/{api_version}/{kind}"
            ),
        )

        resource_version = resource_version if resource_version else 0

        while True:
            try:
                for event_obj in watch_manager.stream(
                    resource_handle.get,
                    resource_version=resource_version,
                    namespace=namespace,
                    name=name,
                    label_selector=label_selector,
                    field_selector=field_selector,
                    serialize=False,
                    timeout_seconds=SERVER_WATCH_TIMEOUT,
                    _request_timeout=CLIENT_WATCH_TIMEOUT,
                ):
                    event_type = KubeEventType(event_obj["type"])
                    event_resource = ManagedObject(event_obj["object"])
                    yield KubeWatchEvent(event_type, event_resource)
            except client.exceptions.ApiException as exception:
                if exception.status == 410:
                    log.debug2(
                        f"Resource age expired, restarting watch {kind}/{api_version}"
                    )
                    resource_version = None
                else:
                    log.info("Unknown ApiException received, re-raising")
                    raise exception
            except urllib3.exceptions.ReadTimeoutError:
                log.debug4(
                    f"Watch Socket closed, restarting watch {kind}/{api_version}"
                )
            except urllib3.exceptions.ProtocolError:
                log.debug2(
                    f"Invalid Chunk from server, restarting watch {kind}/{api_version}"
                )

            # This is hidden attribute so probably not best to check
            if watch_manager._stop:  # pylint: disable=protected-access
                log.debug(
                    "Internal watch stopped. Stopping deploy manager watch for %s/%s",
                    kind,
                    api_version,
                )
                return

    def filter_objects_current_state(  # pylint: disable=too-many-arguments
        self,
        kind: str,
        namespace: Optional[str] = None,
        api_version: Optional[str] = None,
        label_selector: Optional[str] = None,
        field_selector: Optional[str] = None,
    ) -> Tuple[bool, List[dict]]:
        """The filter_objects_current_state function fetches a list of objects
        that match either/both the label or field selector
        Args:
            kind:  str
                The kind of the object to fetch
            namespace:  str
                The namespace to search for the object
            api_version:  str
                The api_version of the resource kind to fetch
            label_selector:  str
                The label_selector to filter the resources
            field_selector:  str
                The field_selector to filter the resources

        Returns:
            success:  bool
                Whether or not the state fetch operation succeeded
            current_state:  List[dict]
                A list of  dict representations for the objects configuration,
                or an empty list if no objects match
        """
        # Use the lazy discovery tool to first get all objects of the given type
        # in the given namespace, then look for the specific resource by name
        resources = self._get_resource_handle(kind, api_version)
        if not resources:
            return True, []

        if not namespace:
            resources.namespaced = False

        try:
            list_obj = resources.get(
                label_selector=label_selector,
                field_selector=field_selector,
                namespace=namespace,
            )
        except ForbiddenError:
            log.debug(
                "Fetching objects of kind [%s] forbidden in namespace [%s]",
                kind,
                namespace,
            )
            return False, []
        except NotFoundError:
            log.debug(
                "No objects of kind [%s] found in namespace [%s]", kind, namespace
            )
            return True, []

        # If the resource was found, get it's dict representation
        resource_list = list_obj.to_dict().get("items", [])
        return True, resource_list

    def set_status(  # pylint: disable=too-many-arguments
        self,
        kind: str,
        name: str,
        namespace: Optional[str],
        status: dict,
        api_version: Optional[str] = None,
    ) -> Tuple[bool, bool]:
        """Set the status in the cluster manifest for an object managed by this
        operator

        Args:
            kind:  str
                The kind of the object ot fetch
            name:  str
                The full name of the object to fetch
            namespace:  Optional[str]
                The namespace to search for the object.
            status:  dict
                The status object to set onto the given object
            api_version:  Optional[str]
                The api_version of the resource to update

        Returns:
            success:  bool
                Whether or not the status update operation succeeded
            changed:  bool
                Whether or not the status update resulted in a change
        """
        # Create a dummy resource to use in the common retry function
        resource_definitions = [
            {
                "kind": kind,
                "apiVersion": api_version,
                "metadata": {
                    "name": name,
                    "namespace": namespace,
                },
            }
        ]

        # Run it with retries
        return self._retried_operation(
            resource_definitions,
            self._set_status,
            max_retries=config.deploy_retries,
            status=status,
            manage_owner_references=False,
        )

    ## Implementation Helpers ##################################################

    @staticmethod
    def _setup_client():
        """Create a DynamicClient that will work based on where the operator is
        running
        """
        # Try in-cluster config
        try:
            log.debug2("Running with in-cluster config")

            # Create Empty Config and load in-cluster information
            kube_config = kubernetes.client.Configuration()
            kubernetes.config.load_incluster_config(client_configuration=kube_config)

            # Generate ApiClient and return Openshift DynamicClient
            api_client = kubernetes.client.ApiClient(kube_config)
            return DynamicClient(api_client)

        # Fall back to out-of-cluster config
        except kubernetes.config.ConfigException:
            log.debug2("Running with out-of-cluster config")
            return DynamicClient(kubernetes.config.new_client_from_config())

    @staticmethod
    def _strip_last_applied(resource_definitions):
        """Make sure that the last-applied annotation is not present in any of
        the resources. This can lead to recursive nesting!
        """
        for resource_definition in resource_definitions:
            last_applied = (
                resource_definition.get("metadata", {})
                .get("annotations", {})
                .get(LAST_APPLIED_CONFIG_ANNOTATION)
            )
            if last_applied:
                log.debug3("Removing [%s]", LAST_APPLIED_CONFIG_ANNOTATION)
                del resource_definition["metadata"]["annotations"][
                    LAST_APPLIED_CONFIG_ANNOTATION
                ]
                if not resource_definition["metadata"]["annotations"]:
                    del resource_definition["metadata"]["annotations"]

    def _get_resource_handle(self, kind: str, api_version: str) -> Optional[Resource]:
        """Get the openshift resource handle for a specified kind and api_version"""
        resources = None
        try:
            resources = self.client.resources.get(kind=kind, api_version=api_version)
        except (ResourceNotFoundError, ResourceNotUniqueError):
            try:
                resources = self.client.resources.get(
                    short_names=[kind], api_version=api_version
                )
            except (ResourceNotFoundError, ResourceNotUniqueError):
                log.debug(
                    "No objects of kind [%s] found or multiple objects matching request found",
                    kind,
                )
        return resources

    def _update_owner_references(self, resource_definitions):
        """If configured to do so, add owner references to the given resources"""
        if self._owner_cr:
            for resource_definition in resource_definitions:
                update_owner_references(self, self._owner_cr, resource_definition)

    def _retried_operation(
        self,
        resource_definitions,
        operation,
        max_retries,
        manage_owner_references,
        **kwargs,
    ):
        """Shared wrapper for executing a client operation with retries"""

        # Make sure the resource_definitions is a list
        assert isinstance(
            resource_definitions, list
        ), "Programming Error: resource_definitions is not a list"
        log.debug3("Running module with %d retries", max_retries)

        # If there are no resource definitions given, consider it a success with
        # no change
        if not resource_definitions:
            log.debug("Nothing to do for an empty list of resources")
            return True, False

        # Strip out last-applied annotations from all resources to avoid nested
        # annotations
        self._strip_last_applied(resource_definitions)

        # Add owner references if configured to do so
        if manage_owner_references:
            self._update_owner_references(resource_definitions)

        # Run each resource individually so that we can track partial completion
        success = True
        changed = False
        for resource_definition in resource_definitions:
            # Perform the operation and update the aggregate changed status
            try:
                changed = (
                    self._run_individual_operation_with_retries(
                        operation,
                        max_retries,
                        resource_definition=resource_definition,
                        **kwargs,
                    )
                    or changed
                )

            # On failure, mark it and stop processing the rest of the resources.
            # This is done because the resources in the file are assumed to be
            # in an intentional sequence and resources later in the file may
            # depend on resources earlier in the file.
            except Exception as err:  # pylint: disable=broad-except
                log.warning(
                    "Operation [%s] failed to execute: %s",
                    operation,
                    err,
                    exc_info=True,
                )
                success = False
                break

        # Return the aggregate success and change values
        return success, changed

    def _run_individual_operation_with_retries(
        self,
        operation: Callable,
        remaining_retries: int,
        resource_definition: dict,
        **kwargs,
    ):
        """Helper to execute a single helper operation with retries

        Args:
            operation:  Callable
                The operation function to run
            remaining_retries:  int
                The number of remaining retries
            resource_definition:  dict
                The dict representation of the resource being applied
            **kwargs:  dict
                Keyword args to pass to the operation beyond resource_definition

        Returns:
            changed:  bool
                Whether or not the operation resulted in meaningful change
        """
        try:
            return operation(resource_definition=resource_definition, **kwargs)
        except ConflictError as err:
            log.debug2("Handling ConflictError: %s", err)

            # If we have retries left, try again
            if remaining_retries:
                # Sleep for the backoff duration
                backoff_duration = config.retry_backoff_base_seconds * (
                    config.deploy_retries - remaining_retries + 1
                )
                log.debug3("Retrying in %fs", backoff_duration)
                time.sleep(backoff_duration)

                # Fetch the current resourceVersion and update in the
                # resource definition
                # NOTE: This can overwrite changes made external to the operator
                #   but that's an acceptable case since resources managed by
                #   oper8 should only be managed by oper8. In the rare case where
                #   oper8 shares ownership of a resource, any conflicts should
                #   be resoled cleanly on the next reconciliation.
                res_id = self._get_resource_identifiers(resource_definition)
                api_version = res_id.api_version
                kind = res_id.kind
                name = res_id.name
                namespace = res_id.namespace
                success, content = self.get_object_current_state(
                    kind=kind,
                    name=name,
                    namespace=namespace,
                    api_version=api_version,
                )
                assert_cluster(
                    success and content is not None,
                    (
                        "Failed to fetch updated resourceVersion for "
                        + f"{namespace}/{api_version}/{kind}/{name}"
                    ),
                )
                updated_resource_version = content.get("metadata", {}).get(
                    "resourceVersion"
                )
                assert_cluster(
                    updated_resource_version is not None,
                    "No updated resource version found!",
                )
                log.debug3(
                    "Updating resourceVersion from %s -> %s",
                    resource_definition.get("metadata", {}).get("resourceVersion"),
                    updated_resource_version,
                )
                resource_definition.setdefault("metadata", Config({}))[
                    "resourceVersion"
                ] = updated_resource_version

                # Run the retry
                log.debug3("Retrying")
                return self._run_individual_operation_with_retries(
                    operation, remaining_retries - 1, resource_definition, **kwargs
                )
            raise

    _ANSIBLE_COND_TYPE = "Running"
    _ANSIBLE_COND_RES_READY = {"ok": 1, "changed": 0, "skipped": 0, "failures": 0}
    _ANSIBLE_COND_RES_UNREADY = {"ok": 0, "changed": 0, "skipped": 0, "failures": 0}

    def _inject_ansible_status(self, status, previous_status):
        """If manage_ansible_status is enabled, this will inject the right
        ansible status values to emulate the format that ansible natively
        supports
        """
        previous_status = previous_status or {}

        # Check if the oper8 status indicates readiness
        is_ready = verify_subsystem(
            {"status": status}, desired_version=oper8_status.get_version(status)
        )
        prev_is_ready = verify_subsystem(
            {"status": previous_status},
            desired_version=oper8_status.get_version(previous_status),
        )
        log.debug3(
            "Status shows ready? %s. Previous ready? %s", is_ready, prev_is_ready
        )

        # Create the ansible status blob
        ansible_result = (
            self._ANSIBLE_COND_RES_READY if is_ready else self._ANSIBLE_COND_RES_UNREADY
        )
        log.debug3("Ansible Result: %s", ansible_result)

        # Determine if the condition has changed to know whether this is a
        # transition time
        current_ready_timestamp = oper8_status.get_condition(
            oper8_status.READY_CONDITION, status
        ).get(oper8_status.TIMESTAMP_KEY)
        prev_ready_timestamp = oper8_status.get_condition(
            oper8_status.READY_CONDITION, previous_status
        ).get(oper8_status.TIMESTAMP_KEY)
        if prev_ready_timestamp is not None and prev_is_ready == is_ready:
            log.debug3("No readiness change. Not a transition.")
            transition_time = prev_ready_timestamp
        else:
            log.debug3(
                "Transitioning from Ready(%s) -> Ready(%s)", prev_is_ready, is_ready
            )
            transition_time = current_ready_timestamp

        # Inject the final ansible condition
        conditions = [
            cond
            for cond in status.get("conditions", [])
            if cond.get("type") != self._ANSIBLE_COND_TYPE
        ]
        conditions.append(
            {
                "type": self._ANSIBLE_COND_TYPE,
                "ansibleResult": ansible_result,
                "lastTransitionTime": transition_time,
            }
        )
        status["conditions"] = conditions
        log.debug4("Status With Ansible: %s", status)

    @classmethod
    def _clean_manifest(cls, manifest_a: dict, manifest_b: dict) -> Tuple[dict, dict]:
        """Clean two manifests before being compared. This removes fields that
        change every reconcile

        Returns:
            Tuple[dict, dict]: The cleaned manifests
        """
        manifest_a = copy.deepcopy(manifest_a)
        manifest_b = copy.deepcopy(manifest_b)
        for metadata_field in [
            "resourceVersion",
            "generation",
            "managedFields",
            "uid",
            "creationTimestamp",
        ]:
            manifest_a.get("metadata", {}).pop(metadata_field, None)
            manifest_b.get("metadata", {}).pop(metadata_field, None)
        return (manifest_a, manifest_b)

    @classmethod
    def _manifest_diff(cls, manifest_a, manifest_b) -> bool:
        """Helper to compare two manifests for meaningful diff while ignoring
        fields that always change.

        Returns:
            [bool, bool]: The first bool identifies if the resource changed while the
        """

        manifest_a, manifest_b = cls._clean_manifest(manifest_a, manifest_b)

        cls._strip_last_applied([manifest_b, manifest_a])
        diff = recursive_diff(
            manifest_a,
            manifest_b,
        )
        change = bool(diff)
        log.debug2("Found change? %s", change)
        log.debug3("A: %s", manifest_a)
        log.debug3("B: %s", manifest_b)
        return change

    @classmethod
    def _retain_kubernetes_annotations(cls, current: dict, desired: dict) -> dict:
        """Helper to update a desired manifest with certain annotations from the existing
        resource. This stops other controllers from re-reconciling this resource

        Returns:
            dict: updated resource
        """

        identifiers = cls._get_resource_identifiers(desired)

        for annotation, annotation_value in (
            current.get("metadata", {}).get("annotations", {}).items()
        ):
            for cluster_annotation in config.cluster_passthrough_annotations:
                if cluster_annotation in annotation and annotation not in desired[
                    "metadata"
                ].get("annotations", {}):
                    log.debug4(
                        "Retaining annotation %s for [%s/%s/%s]",
                        annotation,
                        identifiers.kind,
                        identifiers.api_version,
                        identifiers.name,
                    )
                    desired["metadata"].setdefault("annotations", {})[
                        annotation
                    ] = annotation_value
        return desired

    @classmethod
    def _requires_replace(cls, manifest_a, manifest_b) -> bool:
        """Helper to compare two manifests to see if they require
        replace

        Returns:
            bool: If the resource requires a replace operation
        """

        manifest_a, manifest_b = cls._clean_manifest(manifest_a, manifest_b)

        change = bool(requires_replace(manifest_a, manifest_b))
        log.debug2("Requires Replace? %s", change)
        return change

    # Internal struct to hold the key resource identifier elements
    _ResourceIdentifiers = namedtuple(
        "ResourceIdentifiers", ["api_version", "kind", "name", "namespace"]
    )

    @classmethod
    def _get_resource_identifiers(cls, resource_definition, require_api_version=True):
        """Helper for getting the required parts of a single resource definition"""
        api_version = resource_definition.get("apiVersion")
        kind = resource_definition.get("kind")
        name = resource_definition.get("metadata", {}).get("name")
        namespace = resource_definition.get("metadata", {}).get("namespace")
        assert None not in [
            kind,
            name,
        ], "Cannot apply resource without kind or name"
        assert (
            not require_api_version or api_version is not None
        ), "Cannot apply resource without apiVersion"
        return cls._ResourceIdentifiers(api_version, kind, name, namespace)

    ################
    ## Operations ##
    ################

    def _replace_resource(self, resource_definition: dict) -> dict:
        """Helper function to forcibly replace a resource on the cluster"""
        # Get the key elements of the resource
        res_id = self._get_resource_identifiers(resource_definition)
        api_version = res_id.api_version
        kind = res_id.kind
        name = res_id.name
        namespace = res_id.namespace

        # Strip out managedFields to let the sever set them
        resource_definition["metadata"]["managedFields"] = None

        # Get the resource handle
        log.debug2("Fetching resource handle [%s/%s]", api_version, kind)
        resource_handle = self._get_resource_handle(api_version=api_version, kind=kind)
        assert_cluster(
            resource_handle,
            (
                "Failed to fetch resource handle for "
                + f"{namespace}/{api_version}/{kind}"
            ),
        )

        log.debug2(
            "Attempting to put [%s/%s/%s] in %s",
            api_version,
            kind,
            name,
            namespace,
        )
        return resource_handle.replace(
            resource_definition,
            name=name,
            namespace=namespace,
            field_manager="oper8",
        ).to_dict()

    def _apply_resource(self, resource_definition: dict) -> dict:
        """Helper function to apply a single resource to the cluster"""
        # Get the key elements of the resource
        res_id = self._get_resource_identifiers(resource_definition)
        api_version = res_id.api_version
        kind = res_id.kind
        name = res_id.name
        namespace = res_id.namespace

        # Strip out managedFields to let the sever set them
        resource_definition["metadata"]["managedFields"] = None

        # Get the resource handle
        log.debug2("Fetching resource handle [%s/%s]", api_version, kind)
        resource_handle = self._get_resource_handle(api_version=api_version, kind=kind)
        assert_cluster(
            resource_handle,
            (
                "Failed to fetch resource handle for "
                + f"{namespace}/{api_version}/{kind}"
            ),
        )

        log.debug2(
            "Attempting to apply [%s/%s/%s] in %s",
            api_version,
            kind,
            name,
            namespace,
        )
        try:
            return resource_handle.server_side_apply(
                resource_definition,
                name=name,
                namespace=namespace,
                field_manager="oper8",
            ).to_dict()
        except ConflictError:
            log.debug(
                "Overriding field manager conflict for [%s/%s/%s] in %s ",
                api_version,
                kind,
                name,
                namespace,
            )
            return resource_handle.server_side_apply(
                resource_definition,
                name=name,
                namespace=namespace,
                field_manager="oper8",
                force_conflicts=True,
            ).to_dict()

    def _apply(self, resource_definition, method: DeployMethod):
        """Apply a single resource to the cluster

        Args:
            resource_definition:  dict
                The resource manifest to apply

        Returns:
            changed:  bool
                Whether or not the apply resulted in a meaningful change
        """
        changed = False

        res_id = self._get_resource_identifiers(resource_definition)
        api_version = res_id.api_version
        kind = res_id.kind
        name = res_id.name
        namespace = res_id.namespace

        # Get the current resource state
        success, current = self.get_object_current_state(
            kind=kind,
            name=name,
            namespace=namespace,
            api_version=api_version,
        )
        assert_cluster(
            success,
            (
                "Failed to fetch current state for "
                + f"{namespace}/{api_version}/{kind}/{name}"
            ),
        )
        if not current:
            current = {}

        # Determine if there will be a meaningful change (ignoring fields that
        # always change)
        changed = self._manifest_diff(current, resource_definition)

        # If there is meaningful change, apply this instance
        if changed:

            resource_definition = self._retain_kubernetes_annotations(
                current, resource_definition
            )

            req_replace = False
            if method is DeployMethod.DEFAULT:
                req_replace = self._requires_replace(current, resource_definition)

            log.debug2(
                "Attempting to deploy [%s/%s/%s] in %s with %s",
                api_version,
                kind,
                name,
                namespace,
                method,
            )
            # If the resource requires a replace operation then use put. Otherwise use
            # server side apply
            if (
                (req_replace or method is DeployMethod.REPLACE)
                and method != DeployMethod.UPDATE
                and current != {}
            ):
                apply_res = self._replace_resource(
                    resource_definition,
                )
            else:
                try:
                    apply_res = self._apply_resource(resource_definition)
                except UnprocessibleEntityError as err:
                    log.debug3("Caught 422 error: %s", err, exc_info=True)
                    if config.deploy_unprocessable_put_fallback:
                        log.debug("Falling back to PUT on 422: %s", err)
                        apply_res = self._replace_resource(
                            resource_definition,
                        )
                    else:
                        raise

            # Recompute the diff to determine if the apply actually caused a
            # meaningful change. This may have a different result than the check
            # above because the applied manifest does not always result in the
            # resource looking identical (e.g. removing field from applied =
            # manifest does not delete from the resource).
            changed = self._manifest_diff(current, apply_res)

        return changed

    def _disable(self, resource_definition):
        """Disable a single resource to the cluster if it exists

        Args:
            resource_definition:  dict
                The resource manifest to disable

        Returns:
            changed:  bool
                Whether or not the disable resulted in a meaningful change
        """
        changed = False

        # Get the key elements of the resource
        res_id = self._get_resource_identifiers(resource_definition)
        api_version = res_id.api_version
        kind = res_id.kind
        name = res_id.name
        namespace = res_id.namespace

        # Get the resource handle, handling missing kinds as success without
        # change
        log.debug2("Fetching resource [%s/%s]", api_version, kind)
        try:
            # Get a handle to the kind. This may fail with ResourceNotFoundError
            resource_handle = self.client.resources.get(
                api_version=api_version, kind=kind
            )

            # If resource is not namespaced set kubernetes api namespaced to false
            if not namespace:
                resource_handle.namespaced = False

            # Attempt to delete this instance. This ay fail with NotFoundError
            log.debug2(
                "Attempting to delete [%s/%s/%s] from %s",
                api_version,
                kind,
                name,
                namespace,
            )
            resource_handle.delete(name=name, namespace=namespace)
            changed = True

        # If the kind or instance is not found, that's a success without change
        except (ResourceNotFoundError, NotFoundError) as err:
            log.debug2("Valid error caught when disabling [%s/%s]: %s", kind, name, err)

        return changed

    def _set_status(self, resource_definition, status):
        """Disable a single resource to the cluster if it exists

        Args:
            resource_definition:  dict
                A dummy manifest holding the resource identifiers
            status:  dict
                The status to apply

        Returns:
            changed:  bool
                Whether or not the status update resulted in a meaningful change
        """
        changed = False

        # Get the key elements of the resource
        res_id = self._get_resource_identifiers(
            resource_definition, require_api_version=False
        )
        api_version = res_id.api_version
        kind = res_id.kind
        name = res_id.name
        namespace = res_id.namespace
        resource_handle = self.client.resources.get(api_version=api_version, kind=kind)

        # If resource is not namespaced set kubernetes api namespaced to false
        if not namespace:
            resource_handle.namespaced = False

        with self._status_lock:
            # Get the resource itself
            resource = resource_handle.get(name=name, namespace=namespace).to_dict()

            # Get the previous status and compare with the proposed status
            log.debug2(
                "Resource version: %s",
                resource.get("metadata", {}).get("resourceVersion"),
            )
            previous_status = resource.get("status")
            if previous_status == status:
                log.debug("Status has not changed. No update")

            else:
                # Inject the ansible status if enabled
                if self.manage_ansible_status:
                    log.debug2("Injecting ansible status")
                    self._inject_ansible_status(status, previous_status)

                # Overwrite the status
                resource["status"] = status
                resource_handle.status.replace(body=resource).to_dict()
                log.debug2(
                    "Successfully set the status for [%s/%s] in %s",
                    kind,
                    name,
                    namespace,
                )
                changed = True

            return changed
client property

Lazy property access to the client

__init__(manage_ansible_status=False, owner_cr=None)

Parameters:

Name Type Description Default
manage_ansible_status bool

bool If true, oper8 will emulate the status management done natively by ansible based on the readiness values of oper8's native status management

False
owner_cr Optional[dict]

Optional[dict] The dict content of the CR that triggered this reconciliation. If given, deployed objects will have an ownerReference added to assign ownership to this CR instance.

None
Source code in oper8/deploy_manager/openshift_deploy_manager.py
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
def __init__(
    self,
    manage_ansible_status: bool = False,
    owner_cr: Optional[dict] = None,
):
    """
    Args:
        manage_ansible_status:  bool
            If true, oper8 will emulate the status management done natively
            by ansible based on the readiness values of oper8's native status
            management
        owner_cr:  Optional[dict]
            The dict content of the CR that triggered this reconciliation.
            If given, deployed objects will have an ownerReference added to
            assign ownership to this CR instance.
    """
    self.manage_ansible_status = manage_ansible_status
    self._owner_cr = owner_cr

    # Set up the client
    log.debug("Initializing openshift client")
    self._client = None

    # Keep a threading lock for performing status updates. This is necessary
    # to avoid running into 409 Conflict errors if concurrent threads are
    # trying to perform status updates
    self._status_lock = threading.Lock()
deploy(resource_definitions, manage_owner_references=True, retry_operation=True, method=DeployMethod.DEFAULT, **_)

Deploy using the openshift client

Parameters:

Name Type Description Default
resource_definitions List[dict]

list(dict) List of resource object dicts to apply to the cluster

required
manage_owner_references bool

bool If true, ownerReferences for the parent CR will be applied to the deployed object

True

Returns:

Name Type Description
success bool

bool True if deploy succeeded, False otherwise

changed bool

bool Whether or not the deployment resulted in changes

Source code in oper8/deploy_manager/openshift_deploy_manager.py
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
@alog.logged_function(log.debug)
def deploy(
    self,
    resource_definitions: List[dict],
    manage_owner_references: bool = True,
    retry_operation: bool = True,
    method: DeployMethod = DeployMethod.DEFAULT,
    **_,  # Accept any kwargs to compatibility
) -> Tuple[bool, bool]:
    """Deploy using the openshift client

    Args:
        resource_definitions:  list(dict)
            List of resource object dicts to apply to the cluster
        manage_owner_references:  bool
            If true, ownerReferences for the parent CR will be applied to
            the deployed object

    Returns:
        success:  bool
            True if deploy succeeded, False otherwise
        changed:  bool
            Whether or not the deployment resulted in changes
    """
    return self._retried_operation(
        resource_definitions,
        self._apply,
        max_retries=config.deploy_retries if retry_operation else 0,
        manage_owner_references=manage_owner_references,
        method=method,
    )
disable(resource_definitions)

The disable process is the same as the deploy process, but the child module params are set to 'state: absent'

Parameters:

Name Type Description Default
resource_definitions List[dict]

list(dict) List of resource object dicts to apply to the cluster

required

Returns:

Name Type Description
success bool

bool True if deploy succeeded, False otherwise

changed bool

bool Whether or not the delete resulted in changes

Source code in oper8/deploy_manager/openshift_deploy_manager.py
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
@alog.logged_function(log.debug)
def disable(self, resource_definitions: List[dict]) -> Tuple[bool, bool]:
    """The disable process is the same as the deploy process, but the child
    module params are set to 'state: absent'

    Args:
        resource_definitions:  list(dict)
            List of resource object dicts to apply to the cluster

    Returns:
        success:  bool
            True if deploy succeeded, False otherwise
        changed:  bool
            Whether or not the delete resulted in changes
    """
    return self._retried_operation(
        resource_definitions,
        self._disable,
        max_retries=config.deploy_retries,
        manage_owner_references=False,
    )
filter_objects_current_state(kind, namespace=None, api_version=None, label_selector=None, field_selector=None)

The filter_objects_current_state function fetches a list of objects that match either/both the label or field selector Args: kind: str The kind of the object to fetch namespace: str The namespace to search for the object api_version: str The api_version of the resource kind to fetch label_selector: str The label_selector to filter the resources field_selector: str The field_selector to filter the resources

Returns:

Name Type Description
success bool

bool Whether or not the state fetch operation succeeded

current_state List[dict]

List[dict] A list of dict representations for the objects configuration, or an empty list if no objects match

Source code in oper8/deploy_manager/openshift_deploy_manager.py
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
def filter_objects_current_state(  # pylint: disable=too-many-arguments
    self,
    kind: str,
    namespace: Optional[str] = None,
    api_version: Optional[str] = None,
    label_selector: Optional[str] = None,
    field_selector: Optional[str] = None,
) -> Tuple[bool, List[dict]]:
    """The filter_objects_current_state function fetches a list of objects
    that match either/both the label or field selector
    Args:
        kind:  str
            The kind of the object to fetch
        namespace:  str
            The namespace to search for the object
        api_version:  str
            The api_version of the resource kind to fetch
        label_selector:  str
            The label_selector to filter the resources
        field_selector:  str
            The field_selector to filter the resources

    Returns:
        success:  bool
            Whether or not the state fetch operation succeeded
        current_state:  List[dict]
            A list of  dict representations for the objects configuration,
            or an empty list if no objects match
    """
    # Use the lazy discovery tool to first get all objects of the given type
    # in the given namespace, then look for the specific resource by name
    resources = self._get_resource_handle(kind, api_version)
    if not resources:
        return True, []

    if not namespace:
        resources.namespaced = False

    try:
        list_obj = resources.get(
            label_selector=label_selector,
            field_selector=field_selector,
            namespace=namespace,
        )
    except ForbiddenError:
        log.debug(
            "Fetching objects of kind [%s] forbidden in namespace [%s]",
            kind,
            namespace,
        )
        return False, []
    except NotFoundError:
        log.debug(
            "No objects of kind [%s] found in namespace [%s]", kind, namespace
        )
        return True, []

    # If the resource was found, get it's dict representation
    resource_list = list_obj.to_dict().get("items", [])
    return True, resource_list
get_object_current_state(kind, name, namespace=None, api_version=None)

The get_current_objects function fetches the current state using calls directly to the api client

Parameters:

Name Type Description Default
kind str

str The kind of the object ot fetch

required
name str

str The full name of the object to fetch

required
namespace Optional[str]

Optional[str] The namespace to search for the object or None for no namespace

None
api_version Optional[str]

Optional[str] The api_version of the resource kind to fetch

None

Returns:

Name Type Description
success bool

bool Whether or not the state fetch operation succeeded

current_state dict

dict or None The dict representation of the current object's configuration, or None if not present

Source code in oper8/deploy_manager/openshift_deploy_manager.py
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
def get_object_current_state(
    self,
    kind: str,
    name: str,
    namespace: Optional[str] = None,
    api_version: Optional[str] = None,
) -> Tuple[bool, dict]:
    """The get_current_objects function fetches the current state using
    calls directly to the api client

    Args:
        kind:  str
            The kind of the object ot fetch
        name:  str
            The full name of the object to fetch
        namespace:  Optional[str]
            The namespace to search for the object or None for no namespace
        api_version:  Optional[str]
            The api_version of the resource kind to fetch

    Returns:
        success:  bool
            Whether or not the state fetch operation succeeded
        current_state:  dict or None
            The dict representation of the current object's configuration,
            or None if not present
    """

    # Use the lazy discovery tool to first get all objects of the given type
    # in the given namespace, then look for the specific resource by name
    resources = self._get_resource_handle(kind, api_version)
    if not resources:
        return True, None

    if not namespace:
        resources.namespaced = False

    try:
        resource = resources.get(name=name, namespace=namespace)
    except ForbiddenError:
        log.debug(
            "Fetching objects of kind [%s] forbidden in namespace [%s]",
            kind,
            namespace,
        )
        return False, None
    except NotFoundError:
        log.debug(
            "No object named [%s/%s] found in namespace [%s]", kind, name, namespace
        )
        return True, None

    # If the resource was found, return it's dict representation
    return True, resource.to_dict()
set_status(kind, name, namespace, status, api_version=None)

Set the status in the cluster manifest for an object managed by this operator

Parameters:

Name Type Description Default
kind str

str The kind of the object ot fetch

required
name str

str The full name of the object to fetch

required
namespace Optional[str]

Optional[str] The namespace to search for the object.

required
status dict

dict The status object to set onto the given object

required
api_version Optional[str]

Optional[str] The api_version of the resource to update

None

Returns:

Name Type Description
success bool

bool Whether or not the status update operation succeeded

changed bool

bool Whether or not the status update resulted in a change

Source code in oper8/deploy_manager/openshift_deploy_manager.py
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
def set_status(  # pylint: disable=too-many-arguments
    self,
    kind: str,
    name: str,
    namespace: Optional[str],
    status: dict,
    api_version: Optional[str] = None,
) -> Tuple[bool, bool]:
    """Set the status in the cluster manifest for an object managed by this
    operator

    Args:
        kind:  str
            The kind of the object ot fetch
        name:  str
            The full name of the object to fetch
        namespace:  Optional[str]
            The namespace to search for the object.
        status:  dict
            The status object to set onto the given object
        api_version:  Optional[str]
            The api_version of the resource to update

    Returns:
        success:  bool
            Whether or not the status update operation succeeded
        changed:  bool
            Whether or not the status update resulted in a change
    """
    # Create a dummy resource to use in the common retry function
    resource_definitions = [
        {
            "kind": kind,
            "apiVersion": api_version,
            "metadata": {
                "name": name,
                "namespace": namespace,
            },
        }
    ]

    # Run it with retries
    return self._retried_operation(
        resource_definitions,
        self._set_status,
        max_retries=config.deploy_retries,
        status=status,
        manage_owner_references=False,
    )

owner_references

This module holds common functionality that the DeployManager implementations can use to manage ownerReferences on deployed resources

update_owner_references(deploy_manager, owner_cr, child_obj)

Fetch current ownerReferences and merge a reference for this CR into the child object

Source code in oper8/deploy_manager/owner_references.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
def update_owner_references(
    deploy_manager: DeployManagerBase,
    owner_cr: dict,
    child_obj: dict,
):
    """Fetch current ownerReferences and merge a reference for this CR into
    the child object
    """

    # Validate the shape of the owner CR and the chid object
    _validate_object_struct(owner_cr)
    _validate_object_struct(child_obj)

    # Fetch the current state of this object
    kind = child_obj["kind"]
    api_version = child_obj["apiVersion"]
    name = child_obj["metadata"]["name"]
    namespace = child_obj["metadata"]["namespace"]
    uid = child_obj["metadata"].get("uid")

    success, content = deploy_manager.get_object_current_state(
        kind=kind, name=name, api_version=api_version, namespace=namespace
    )
    assert_cluster(
        success, f"Failed to fetch current state of {api_version}.{kind}/{name}"
    )

    # Get the current ownerReferences
    owner_refs = []
    if content is not None:
        owner_refs = content.get("metadata", {}).get("ownerReferences", [])
        log.debug3("Current owner refs: %s", owner_refs)

    # If the current CR is not represented and current CR is in the same
    # namespace as the child object, add it
    current_uid = owner_cr["metadata"]["uid"]
    log.debug3("Current CR UID: %s", current_uid)
    current_namespace = owner_cr["metadata"]["namespace"]
    log.debug3("Current CR namespace: %s", current_namespace)

    if current_uid == uid:
        log.debug2("Owner is same as child; Not adding owner ref")
        return

    if (namespace == current_namespace) and (
        current_uid not in [ref["uid"] for ref in owner_refs]
    ):
        log.debug2(
            "Adding current CR owner reference for %s.%s/%s",
            api_version,
            kind,
            name,
        )
        owner_refs.append(_make_owner_reference(owner_cr))

    # Add the ownerReferences to the object that will be applied to the
    # cluster
    log.debug4("Final owner refs: %s", owner_refs)
    child_obj["metadata"]["ownerReferences"] = owner_refs

replace_utils

This file contains common utilities for detecting if a replace operation is required for a resource

modified_lists(current_manifest, desired_manifest, in_list=False)

Helper function to check if there are any differences in the lists of the desired manifest. This is required because Kubernetes combines lists which is often not the desired use

Source code in oper8/deploy_manager/replace_utils.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
def modified_lists(
    current_manifest: dict, desired_manifest: dict, in_list: bool = False
) -> bool:
    """Helper function to check if there are any differences in the lists of the desired manifest.
    This is required because Kubernetes combines lists which is often not the desired use
    """
    # If type mismatch then assume replace
    if (
        desired_manifest
        and current_manifest
        and type(desired_manifest) is not type(current_manifest)
    ):
        log.debug4("Requires replace due to type mismatch")
        return True

    if isinstance(current_manifest, list) and isinstance(desired_manifest, list):
        # if the desired has less then the current then return True. Removing
        # from a list requires a put
        if len(current_manifest) > len(desired_manifest):
            log.debug4("Requires replace due to list deletion")
            return True
        # Iterate over the desired manifest
        for recurse_a, recurse_b in zip(current_manifest, desired_manifest):
            if modified_lists(recurse_a, recurse_b, in_list=True):
                return True
    if isinstance(current_manifest, dict) and isinstance(desired_manifest, dict):
        key_intersection = set(current_manifest.keys()).intersection(
            set(desired_manifest.keys())
        )
        # If there are no common keys and we're in a list then return True
        # this means we have a new object
        if in_list and not key_intersection:
            log.debug4("Requires replace due to no common key in list")
            return True

        # Tack if one key has the same value. This is
        # required for kubernetes merges
        at_least_one_common = False
        for k in key_intersection:
            # Check if two objects are the same for their value operations
            changed = False
            if isinstance(current_manifest[k], list):
                changed = bool(
                    recursive_list_diff(current_manifest[k], desired_manifest[k])
                )
            elif isinstance(current_manifest[k], dict):
                changed = bool(recursive_diff(current_manifest[k], desired_manifest[k]))
            else:
                changed = current_manifest[k] != desired_manifest[k]

            if not changed:
                at_least_one_common = True

            # Recurse on modified lists
            if modified_lists(current_manifest[k], desired_manifest[k]):
                return True
        if in_list and not at_least_one_common:
            log.debug4("Requires replace due to no common key/value in list")
            return True
    return False

modified_value_from(current_manifest, desired_manifest)

Helper function to check if a manifest switched from value to valueFrom. These are mutually exclusive thus they require a replace command.

Source code in oper8/deploy_manager/replace_utils.py
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
def modified_value_from(current_manifest: Any, desired_manifest: Any) -> bool:
    """Helper function to check if a manifest switched from value to valueFrom. These are mutually
    exclusive thus they require a replace command.
    """
    # If type mismatch then assume replace
    if (
        desired_manifest
        and current_manifest
        and type(desired_manifest) is not type(current_manifest)
    ):
        log.debug4("Requires replace due to type mismatch")
        return True

    if isinstance(current_manifest, list) and isinstance(desired_manifest, list):
        for recurse_a, recurse_b in zip(current_manifest, desired_manifest):
            if modified_value_from(recurse_a, recurse_b):
                return True
    if isinstance(current_manifest, dict) and isinstance(desired_manifest, dict):
        if ("value" in current_manifest and "valueFrom" in desired_manifest) or (
            "valueFrom" in current_manifest and "value" in desired_manifest
        ):
            log.debug4("Requires replace due to value/valueFrom change")
            return True
        else:
            for k in set(current_manifest.keys()).intersection(
                set(desired_manifest.keys())
            ):
                if modified_value_from(current_manifest[k], desired_manifest[k]):
                    return True
    return False

requires_replace(current_manifest, desired_manifest)

Function to determine if a resource requires a replace operation instead of apply. This can occur due to list merging, or updating envVars

Parameters:

Name Type Description Default
current_manifest dict

The current manifest in the cluster

required
desired_manifest dict

The desired manifest that should be applied

required

Returns:

Name Type Description
bool bool

If the current manifest requires a replace operation

Source code in oper8/deploy_manager/replace_utils.py
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
def requires_replace(current_manifest: dict, desired_manifest: dict) -> bool:
    """Function to determine if a resource requires a replace operation instead
    of apply. This can occur due to list merging, or updating envVars

    Args:
        current_manifest (dict): The current manifest in the cluster
        desired_manifest (dict): The desired manifest that should be applied

    Returns:
        bool: If the current manifest requires a replace operation
    """
    for func in _REPLACE_FUNCS:
        if func(current_manifest, desired_manifest):
            log.debug4("Manifest requires replace", desired_manifest)
            return True
    return False

exceptions

This module implements custom exceptions

ClusterError

Bases: Oper8FatalError

Exception caused during chart construction when a cluster operation fails in an unexpected way.

Source code in oper8/exceptions.py
50
51
52
53
class ClusterError(Oper8FatalError):
    """Exception caused during chart construction when a cluster operation fails
    in an unexpected way.
    """

ConfigError

Bases: Oper8FatalError

Exception caused during usage of user-provided configuration

Source code in oper8/exceptions.py
46
47
class ConfigError(Oper8FatalError):
    """Exception caused during usage of user-provided configuration"""

Oper8DeprecationWarning

Bases: DeprecationWarning

This warning is issued for deprecated APIs

Source code in oper8/exceptions.py
120
121
class Oper8DeprecationWarning(DeprecationWarning):
    """This warning is issued for deprecated APIs"""

Oper8Error

Bases: Exception

Base class for all oper8 exceptions

Source code in oper8/exceptions.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
class Oper8Error(Exception):
    """Base class for all oper8 exceptions"""

    def __init__(self, message: str, is_fatal_error: bool):
        """Construct with a flag indicating whether this is a fatal error. This
        will be a static property of all children.
        """
        super().__init__(message)
        self._is_fatal_error = is_fatal_error

    @property
    def is_fatal_error(self):
        """Property indicating whether or not this error should signal a fatal
        state in the rollout
        """
        return self._is_fatal_error

is_fatal_error property

Property indicating whether or not this error should signal a fatal state in the rollout

__init__(message, is_fatal_error)

Construct with a flag indicating whether this is a fatal error. This will be a static property of all children.

Source code in oper8/exceptions.py
11
12
13
14
15
16
def __init__(self, message: str, is_fatal_error: bool):
    """Construct with a flag indicating whether this is a fatal error. This
    will be a static property of all children.
    """
    super().__init__(message)
    self._is_fatal_error = is_fatal_error

Oper8ExpectedError

Bases: Oper8Error

An Oper8ExpectedError is one that indicates an expected failure condition that should cause a reconciliation to terminate, but is expected to resolve in a subsequent reconciliation.

Source code in oper8/exceptions.py
59
60
61
62
63
64
65
66
class Oper8ExpectedError(Oper8Error):
    """An Oper8ExpectedError is one that indicates an expected failure condition
    that should cause a reconciliation to terminate, but is expected to resolve
    in a subsequent reconciliation.
    """

    def __init__(self, message: str = ""):
        super().__init__(message=message, is_fatal_error=False)

Oper8FatalError

Bases: Oper8Error

An Oper8FatalError is one that indicates an unexpected, and likely unrecoverable, failure during a reconciliation.

Source code in oper8/exceptions.py
29
30
31
32
33
34
35
class Oper8FatalError(Oper8Error):
    """An Oper8FatalError is one that indicates an unexpected, and likely
    unrecoverable, failure during a reconciliation.
    """

    def __init__(self, message: str = ""):
        super().__init__(message=message, is_fatal_error=True)

Oper8PendingDeprecationWarning

Bases: PendingDeprecationWarning

This warning is issued for APIs that are still supported but will be removed eventually

Source code in oper8/exceptions.py
124
125
class Oper8PendingDeprecationWarning(PendingDeprecationWarning):
    """This warning is issued for APIs that are still supported but will be removed eventually"""

PreconditionError

Bases: Oper8ExpectedError

Exception caused during chart construction when an expected precondition is not met.

Source code in oper8/exceptions.py
69
70
71
72
class PreconditionError(Oper8ExpectedError):
    """Exception caused during chart construction when an expected precondition
    is not met.
    """

RolloutError

Bases: Oper8FatalError

Exception indicating a failure during application rollout

Source code in oper8/exceptions.py
38
39
40
41
42
43
class RolloutError(Oper8FatalError):
    """Exception indicating a failure during application rollout"""

    def __init__(self, message: str = "", completion_state=None):
        self.completion_state = completion_state
        super().__init__(message)

VerificationError

Bases: Oper8ExpectedError

Exception caused during resource verification when a desired verification state is not reached.

Source code in oper8/exceptions.py
75
76
77
78
class VerificationError(Oper8ExpectedError):
    """Exception caused during resource verification when a desired verification
    state is not reached.
    """

assert_cluster(condition, message='')

Replacement for assert() which will throw a ClusterError. This should be used when building a chart which requires that an operation in the cluster (such as fetching an existing secret) succeeds.

Source code in oper8/exceptions.py
110
111
112
113
114
115
116
def assert_cluster(condition: bool, message: str = ""):
    """Replacement for assert() which will throw a ClusterError. This should
    be used when building a chart which requires that an operation in the
    cluster (such as fetching an existing secret) succeeds.
    """
    if not condition:
        raise ClusterError(message)

assert_config(condition, message='')

Replacement for assert() which will throw a ConfigError. This should be used when building a chart which requires that certain conditions be true in the deploy_config or app_config.

Source code in oper8/exceptions.py
101
102
103
104
105
106
107
def assert_config(condition: bool, message: str = ""):
    """Replacement for assert() which will throw a ConfigError. This should be
    used when building a chart which requires that certain conditions be true in
    the deploy_config or app_config.
    """
    if not condition:
        raise ConfigError(message)

assert_precondition(condition, message='')

Replacement for assert() which will throw a PreconditionError. This should be used when building a chart which requires that a precondition is met before continuing.

Source code in oper8/exceptions.py
84
85
86
87
88
89
90
def assert_precondition(condition: bool, message: str = ""):
    """Replacement for assert() which will throw a PreconditionError. This
    should be used when building a chart which requires that a precondition is
    met before continuing.
    """
    if not condition:
        raise PreconditionError(message)

assert_verified(condition, message='')

Replacement for assert() which will throw a VerificationError. This should be used when verifying the state of a resource in the cluster.

Source code in oper8/exceptions.py
93
94
95
96
97
98
def assert_verified(condition: bool, message: str = ""):
    """Replacement for assert() which will throw a VerificationError. This
    should be used when verifying the state of a resource in the cluster.
    """
    if not condition:
        raise VerificationError(message)

log_format

Custom logging formats that contain more detailed oper8 logs

Oper8JsonFormatter

Bases: AlogJsonFormatter

Custom Log Format that extends AlogJsonFormatter to add multiple oper8 specific fields to the json. This includes things like identifiers of the resource being reconciled, reconciliationId, and thread information

Source code in oper8/log_format.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
class Oper8JsonFormatter(AlogJsonFormatter):
    """Custom Log Format that extends AlogJsonFormatter to add multiple
    oper8 specific fields to the json. This includes things like identifiers
    of the resource being reconciled, reconciliationId, and thread information
    """

    _FIELDS_TO_PRINT = AlogJsonFormatter._FIELDS_TO_PRINT + [
        "process",
        "thread",
        "threadName",
        "kind",
        "apiVersion",
        "resourceVersion",
        "resourceName",
        "reconciliationId",
    ]

    def __init__(self, manifest=None, reconciliation_id=None):
        super().__init__()
        self.manifest = manifest
        self.reconciliation_id = reconciliation_id

    def format(self, record):
        if self.reconciliation_id:
            record.reconciliationId = self.reconciliation_id

        if resource := getattr(record, "resource", self.manifest):
            record.kind = resource.get("kind")
            record.apiVersion = resource.get("apiVersion")

            metadata = resource.get("metadata", {})
            record.resourceVersion = metadata.get("resourceVersion")
            record.resourceName = metadata.get("name")

        return super().format(record)

managed_object

Helper object to represent a kubernetes object that is managed by the operator

ManagedObject

Basic struct to represent a managed kubernetes object

Source code in oper8/managed_object.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
class ManagedObject:  # pylint: disable=too-many-instance-attributes
    """Basic struct to represent a managed kubernetes object"""

    def __init__(
        self,
        definition: dict,
        verify_function: Optional[Callable] = None,
        deploy_method: Optional["DeployMethod"] = None,  # noqa: F821
    ):
        self.kind = definition.get("kind")
        self.metadata = definition.get("metadata", {})
        self.name = self.metadata.get("name")
        self.namespace = self.metadata.get("namespace")
        self.uid = self.metadata.get("uid", uuid.uuid4())
        self.resource_version = self.metadata.get("resourceVersion")
        self.api_version = definition.get("apiVersion")
        self.definition = definition
        self.verify_function = verify_function
        self.deploy_method = deploy_method

        # If resource is not list then check name
        if KUBE_LIST_IDENTIFIER not in self.kind:
            assert self.name is not None, "No name found"

        assert self.kind is not None, "No kind found"
        assert self.api_version is not None, "No apiVersion found"

    def get(self, *args, **kwargs):
        """Pass get calls to the objects definition"""
        return self.definition.get(*args, **kwargs)

    def __str__(self):
        return f"{self.api_version}/{self.kind}/{self.name}"

    def __repr__(self):
        return str(self)

    def __hash__(self):
        """Hash explicitly excludes the definition so that the object's
        identifier in a map can be based only on the unique identifier of the
        resource in the cluster. If the original resource did not provide a unique
        identifier then use the apiVersion, kind, and name
        """
        return hash(self.metadata.get("uid", str(self)))

    def __eq__(self, other):
        return hash(self) == hash(other)

__hash__()

Hash explicitly excludes the definition so that the object's identifier in a map can be based only on the unique identifier of the resource in the cluster. If the original resource did not provide a unique identifier then use the apiVersion, kind, and name

Source code in oper8/managed_object.py
48
49
50
51
52
53
54
def __hash__(self):
    """Hash explicitly excludes the definition so that the object's
    identifier in a map can be based only on the unique identifier of the
    resource in the cluster. If the original resource did not provide a unique
    identifier then use the apiVersion, kind, and name
    """
    return hash(self.metadata.get("uid", str(self)))

get(*args, **kwargs)

Pass get calls to the objects definition

Source code in oper8/managed_object.py
38
39
40
def get(self, *args, **kwargs):
    """Pass get calls to the objects definition"""
    return self.definition.get(*args, **kwargs)

patch

This module holds shared semantics for patching resources using temporary_patch

apply_patches(internal_name, resource_definition, temporary_patches)

Apply all temporary patches to the given resource from the given list. The patches are applied in-place.

Parameters:

Name Type Description Default
internal_name str

str The name given to the internal node of the object. This is used to identify which patches apply to this object.

required
resource_definition dict

dict The dict representation of the object to patch

required
temporary_patches List[dict]

List[dict] The list of temporary patches that apply to this rollout

required

Returns:

Name Type Description
patched_definition

dict The dict representation of the object with patches applied

Source code in oper8/patch.py
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
def apply_patches(
    internal_name: str,
    resource_definition: dict,
    temporary_patches: List[dict],
):
    """Apply all temporary patches to the given resource from the given list.
    The patches are applied in-place.

    Args:
        internal_name:  str
            The name given to the internal node of the object. This is used to
            identify which patches apply to this object.
        resource_definition:  dict
            The dict representation of the object to patch
        temporary_patches:  List[dict]
            The list of temporary patches that apply to this rollout

    Returns:
        patched_definition:  dict
            The dict representation of the object with patches applied
    """
    log.debug2(
        "Looking for patches for %s/%s (%s)",
        resource_definition.get("kind"),
        resource_definition.get("metadata", {}).get("name"),
        internal_name,
    )
    resource_definition = copy.deepcopy(resource_definition)
    for patch_content in temporary_patches:
        log.debug4("Checking patch: << %s >>", patch_content)

        # Look to see if this patch contains a match for the internal name
        internal_name_parts = internal_name.split(".")
        internal_name_parts.reverse()
        patch = patch_content.spec.patch
        log.debug4("Full patch section: %s", patch)
        while internal_name_parts and isinstance(patch, dict):
            patch_level = internal_name_parts.pop()
            log.debug4("Getting patch level [%s]", patch_level)
            patch = patch.get(patch_level, {})
            log.debug4("Patch level: %s", patch)
        log.debug4("Checking patch: %s", patch)

        # If the patch matches, apply the right merge
        if patch and not internal_name_parts:
            log.debug3("Found matching patch: %s", patch_content.metadata.name)

            # Dispatch the right patch type
            if patch_content.spec.patchType == STRATEGIC_MERGE_PATCH:
                resource_definition = _apply_patch_strategic_merge(
                    resource_definition, patch
                )
            elif patch_content.spec.patchType == JSON_PATCH_6902:
                resource_definition = _apply_json_patch(resource_definition, patch)
            else:
                raise ValueError(
                    f"Unsupported patch type [{patch_content.spec.patchType}]"
                )
    return resource_definition

patch_strategic_merge

This module implements Patch Strategic Merge following the semantics in:

  • kustomize: https://kubectl.docs.kubernetes.io/references/kustomize/glossary/#patchstrategicmerge
  • kubernetes: https://github.com/kubernetes/community/blob/master/contributors/devel/sig-api-machinery/strategic-merge-patch.md

patch_strategic_merge(resource_definition, patch, merge_patch_keys=None)

Apply a Strategic Merge Patch based on JSON Merge Patch (rfc 7386)

Parameters:

Name Type Description Default
resource_definition dict

dict The dict representation of the kubernetes resource

required
patch dict

dict The formatted patch to apply

required
merge_patch_keys Dict[str, str]

Dict[str, str] The mapping from paths to merge keys used to perform merge semantics for list elements

None

Returns:

Name Type Description
patched_resource_definition dict

dict The patched version of the resource_definition

Source code in oper8/patch_strategic_merge.py
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
def patch_strategic_merge(
    resource_definition: dict,
    patch: dict,
    merge_patch_keys: Dict[str, str] = None,
) -> dict:
    """Apply a Strategic Merge Patch based on JSON Merge Patch (rfc 7386)

    Args:
        resource_definition:  dict
            The dict representation of the kubernetes resource
        patch:  dict
            The formatted patch to apply
        merge_patch_keys:  Dict[str, str]
            The mapping from paths to merge keys used to perform merge semantics
            for list elements

    Returns:
        patched_resource_definition:  dict
            The patched version of the resource_definition
    """
    if merge_patch_keys is None:
        merge_patch_keys = STRATEGIC_MERGE_PATCH_KEYS
    return _strategic_merge(
        current=copy.deepcopy(resource_definition),
        desired=copy.deepcopy(patch),
        position=resource_definition.get("kind"),
        merge_patch_keys=merge_patch_keys,
    )

reconcile

The ReconcileManager class manages an individual reconcile of a controller. This setups up the session, constructs the controller, and runs its reconcile

ReconcileManager

This class manages reconciliations for an instance of Oper8. It's primary function is to run reconciles given a CR manifest, Controller, and the current cluster state via a DeployManager.

Source code in oper8/reconcile.py
  95
  96
  97
  98
  99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 202
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212
 213
 214
 215
 216
 217
 218
 219
 220
 221
 222
 223
 224
 225
 226
 227
 228
 229
 230
 231
 232
 233
 234
 235
 236
 237
 238
 239
 240
 241
 242
 243
 244
 245
 246
 247
 248
 249
 250
 251
 252
 253
 254
 255
 256
 257
 258
 259
 260
 261
 262
 263
 264
 265
 266
 267
 268
 269
 270
 271
 272
 273
 274
 275
 276
 277
 278
 279
 280
 281
 282
 283
 284
 285
 286
 287
 288
 289
 290
 291
 292
 293
 294
 295
 296
 297
 298
 299
 300
 301
 302
 303
 304
 305
 306
 307
 308
 309
 310
 311
 312
 313
 314
 315
 316
 317
 318
 319
 320
 321
 322
 323
 324
 325
 326
 327
 328
 329
 330
 331
 332
 333
 334
 335
 336
 337
 338
 339
 340
 341
 342
 343
 344
 345
 346
 347
 348
 349
 350
 351
 352
 353
 354
 355
 356
 357
 358
 359
 360
 361
 362
 363
 364
 365
 366
 367
 368
 369
 370
 371
 372
 373
 374
 375
 376
 377
 378
 379
 380
 381
 382
 383
 384
 385
 386
 387
 388
 389
 390
 391
 392
 393
 394
 395
 396
 397
 398
 399
 400
 401
 402
 403
 404
 405
 406
 407
 408
 409
 410
 411
 412
 413
 414
 415
 416
 417
 418
 419
 420
 421
 422
 423
 424
 425
 426
 427
 428
 429
 430
 431
 432
 433
 434
 435
 436
 437
 438
 439
 440
 441
 442
 443
 444
 445
 446
 447
 448
 449
 450
 451
 452
 453
 454
 455
 456
 457
 458
 459
 460
 461
 462
 463
 464
 465
 466
 467
 468
 469
 470
 471
 472
 473
 474
 475
 476
 477
 478
 479
 480
 481
 482
 483
 484
 485
 486
 487
 488
 489
 490
 491
 492
 493
 494
 495
 496
 497
 498
 499
 500
 501
 502
 503
 504
 505
 506
 507
 508
 509
 510
 511
 512
 513
 514
 515
 516
 517
 518
 519
 520
 521
 522
 523
 524
 525
 526
 527
 528
 529
 530
 531
 532
 533
 534
 535
 536
 537
 538
 539
 540
 541
 542
 543
 544
 545
 546
 547
 548
 549
 550
 551
 552
 553
 554
 555
 556
 557
 558
 559
 560
 561
 562
 563
 564
 565
 566
 567
 568
 569
 570
 571
 572
 573
 574
 575
 576
 577
 578
 579
 580
 581
 582
 583
 584
 585
 586
 587
 588
 589
 590
 591
 592
 593
 594
 595
 596
 597
 598
 599
 600
 601
 602
 603
 604
 605
 606
 607
 608
 609
 610
 611
 612
 613
 614
 615
 616
 617
 618
 619
 620
 621
 622
 623
 624
 625
 626
 627
 628
 629
 630
 631
 632
 633
 634
 635
 636
 637
 638
 639
 640
 641
 642
 643
 644
 645
 646
 647
 648
 649
 650
 651
 652
 653
 654
 655
 656
 657
 658
 659
 660
 661
 662
 663
 664
 665
 666
 667
 668
 669
 670
 671
 672
 673
 674
 675
 676
 677
 678
 679
 680
 681
 682
 683
 684
 685
 686
 687
 688
 689
 690
 691
 692
 693
 694
 695
 696
 697
 698
 699
 700
 701
 702
 703
 704
 705
 706
 707
 708
 709
 710
 711
 712
 713
 714
 715
 716
 717
 718
 719
 720
 721
 722
 723
 724
 725
 726
 727
 728
 729
 730
 731
 732
 733
 734
 735
 736
 737
 738
 739
 740
 741
 742
 743
 744
 745
 746
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
class ReconcileManager:  # pylint: disable=too-many-lines
    """This class manages reconciliations for an instance of Oper8. It's
    primary function is to run reconciles given a CR manifest, Controller,
    and the current cluster state via a DeployManager.
    """

    ## Construction ############################################################

    def __init__(
        self,
        home_dir: str = None,
        deploy_manager: Optional[DeployManagerBase] = None,
        enable_vcs: Optional[bool] = None,
        reimport_controller: Optional[bool] = True,
    ):
        """The constructor sets up the properties used across every
        reconcile and checks that the current config is valid.

        Args:
            home_dir:  Optional[str]=None
                The root directory for importing controllers or VCS checkout
            deploy_manager:  Optional[DeployManager]=None
                Deploy manager to use. If not given, a new DeployManager will
                be created for each reconcile.
            enable_vcs:  Optional[bool]=True
                Parameter to manually control the state of VCS on a per instance
                basis
            reimport_controller:  Optional[bool]=None
                Parameter to manually control if a controller needs to be reimported each
                reconcile.
        """

        if home_dir:
            self.home_dir = home_dir
        elif config.vcs.enabled:
            self.home_dir = config.vcs.repo
        else:
            self.home_dir = os.getcwd()

        self.vcs = None

        # If enable_vcs is not provided than default to
        # config
        if enable_vcs is None:
            enable_vcs = config.vcs.enabled

        if enable_vcs:
            assert_config(
                config.vcs.repo,
                "Can not enable vcs without supply source repo at vcs.repo",
            )
            assert_config(
                config.vcs.dest,
                "Cannot require enable vcs without providing a destination",
            )
            vcs_checkout_methods = [method.value for method in VCSCheckoutMethod]
            assert_config(
                config.vcs.checkout_method in vcs_checkout_methods,
                f"VCS checkout method must be one of the following {vcs_checkout_methods}",
            )

            self.vcs = VCS(self.home_dir)

        # Ensure config is setup correctly for strict_versioning
        if config.strict_versioning:
            assert_config(
                config.supported_versions is not None,
                "Must provide supported_versions with strict_versioning=True",
            )
            assert_config(
                config.vcs.field is not None,
                "Must provide vcs.field with strict_versioning=True",
            )

        self.deploy_manager = deploy_manager
        self.reimport_controller = reimport_controller

    ## Reconciliation ############################################################

    @alog.logged_function(log.info)
    @alog.timed_function(log.info, "Reconcile finished in: ")
    def reconcile(
        self,
        controller_info: CONTROLLER_INFO,
        resource: Union[dict, aconfig.Config],
        is_finalizer: bool = False,
    ) -> ReconciliationResult:
        """This is the main entrypoint for reconciliations and contains the
        core implementation. The general reconcile path is as follows:

            1. Parse the raw CR manifest
            2. Setup logging based on config with overrides from CR
            3. Check if the CR is paused and for strict versioning
            4. Setup directory if VCS is enabled
            5. Import and construct the Controller
            6. Setup the DeployManager and Session objects
            7. Run the Controller reconcile

        Args:
            controller_info: CONTROLLER_INFO
                The description of a controller. See CONTROLLER_INFO for
                more information
            resource: Union[dict, aconfig.Config]
                A raw representation of the resource to be reconciled
            is_finalizer: bool=False
                Whether the resource is being deleted

        Returns:
            reconcile_result:  ReconciliationResult
                The result of the reconcile
        """

        # Parse the full CR content
        cr_manifest = self.parse_manifest(resource)

        # generate id unique to this session
        reconcile_id = self.generate_id()

        # Initialize logging prior to any other work
        self.configure_logging(cr_manifest, reconcile_id)

        # If paused, do nothing and don't requeue
        if self._is_paused(cr_manifest):
            log.info("CR is paused. Exiting reconciliation")
            result = ReconciliationResult(requeue=False, requeue_params=RequeueParams())
            return result

        # Check strict versioning before continuing
        if config.strict_versioning:
            self._check_strict_versioning(cr_manifest)

        # Check if VCS is enabled and then attempt to checkout
        if config.vcs.enabled:
            self.setup_vcs(cr_manifest)

        # Import controller and setup the instance
        controller = self.setup_controller(controller_info)

        # Configure deploy manager on a per reconcile basis for
        # owner references unless a manager is provided on initialization
        deploy_manager = self.setup_deploy_manager(cr_manifest)

        # Setup Session
        session = self.setup_session(
            controller, cr_manifest, deploy_manager, reconcile_id
        )

        # Run the controller reconcile
        result = self.run_controller(controller, session, is_finalizer)

        return result

    def safe_reconcile(
        self,
        controller_info: CONTROLLER_INFO,
        resource: dict,
        is_finalizer: bool = False,
    ) -> ReconciliationResult:
        """
        This function calls out to reconcile but catches any errors thrown. This
        function guarantees a safe result which is needed by some Watch Managers

        Args:
            controller_info: CONTROLLER_INFO
                The description of a controller. See CONTROLLER_INFO for
                more information
            resource: Union[dict, aconfig.Config]
                A raw representation of the reconcile
            is_finalize: bool=False
                Whether the resource is being deleted

        Returns:
            reconcile_result:  ReconciliationResult
                The result of the reconcile

        """

        try:
            return self.reconcile(controller_info, resource, is_finalizer)

        # VCSMultiProcessError is an expected error caused by oper8 which should
        # not be handled by the exception handling code
        except VCSMultiProcessError as exc:
            # Requeue after ~7.5 seconds. Add randomness to avoid
            # repeated conflicts
            requeue_time = 5 + random.uniform(0, 5)
            params = RequeueParams(
                requeue_after=datetime.timedelta(seconds=requeue_time)
            )
            log.debug("VCS Multiprocessing Error Detected: {%s}", exc, exc_info=True)
            log.warning(
                "VCS Setup failed due to other process. Requeueing in %ss",
                requeue_time,
            )
            return ReconciliationResult(
                requeue=True, requeue_params=params, exception=exc
            )

        # Capture all generic exceptions
        except Exception as exc:  # pylint: disable=broad-except
            log.warning("Handling caught error in reconcile: %s", exc, exc_info=True)
            error = exc

        if config.manage_status:
            try:
                self._update_error_status(resource, error)
                log.debug("Update CR status with error message")
            except Exception as exc:  # pylint: disable=broad-except
                log.error("Failed to update status: %s", exc, exc_info=True)

        # If we got to this return it means there was an
        # exception during reconcile and we should requeue
        # with the default backoff period
        log.info("Requeuing CR due to error during reconcile")
        return ReconciliationResult(
            requeue=True, requeue_params=RequeueParams(), exception=error
        )

    ## Reconciliation Stages ############################################################

    @classmethod
    def parse_manifest(cls, resource: Union[dict, aconfig.Config]) -> aconfig.Config:
        """Parse a raw resource into an aconfig Config

        Args:
            resource: Union[dict, aconfig.Config])
                The resource to be parsed into a manifest

        Returns
            cr_manifest: aconfig.Config
                The parsed and validated config
        """
        try:
            cr_manifest = aconfig.Config(resource, override_env_vars=False)
        except (ValueError, SyntaxError, AttributeError) as exc:
            raise ValueError("Failed to parse full_cr") from exc

        return cr_manifest

    @classmethod
    def configure_logging(cls, cr_manifest: aconfig.Config, reconciliation_id: str):
        """Configure the logging for a given reconcile

        Args:
            cr_manifest: aconfig.Config
                The resource to get annotation overrides from
            reconciliation_id: str
                The unique id for the reconciliation
        """

        # Fetch the annotations for logging
        # NOTE: We use safe fetching here because this happens before CR
        #   verification in the Session constructor
        annotations = cr_manifest.get("metadata", {}).get("annotations", {})
        default_level = annotations.get(
            constants.LOG_DEFAULT_LEVEL_NAME, config.log_level
        )

        filters = annotations.get(constants.LOG_FILTERS_NAME, config.log_filters)
        log_json = annotations.get(constants.LOG_JSON_NAME, str(config.log_json))
        log_thread_id = annotations.get(
            constants.LOG_THREAD_ID_NAME, str(config.log_thread_id)
        )

        # Convert boolean args
        log_json = (log_json or "").lower() == "true"
        log_thread_id = (log_thread_id or "").lower() == "true"

        # Keep the old handler. This is useful if running with ansible as
        # it will preserve the handler generator set up to log to a file
        # since ansible captures all logging output
        handler_generator = None
        if logging.root.handlers:
            old_handler = logging.root.handlers[0]

            def handler_generator():
                return old_handler

        alog.configure(
            default_level=default_level,
            filters=filters,
            formatter=(
                Oper8JsonFormatter(cr_manifest, reconciliation_id)
                if log_json
                else "pretty"
            ),
            thread_id=log_thread_id,
            handler_generator=handler_generator,
        )

    @classmethod
    def generate_id(cls) -> str:
        """Generates a unique human readable id for this reconciliation

        Returns:
            id: str
                A unique base32 encoded id
        """
        uuid4 = uuid.uuid4()
        base32_str = base64.b32encode(uuid4.bytes).decode("utf-8")
        reconcile_id = base32_str[:22]
        log.debug("Generated reconcile id: %s", reconcile_id)
        return reconcile_id

    def setup_vcs(self, cr_manifest: aconfig.Config):
        """Setups the VCS directory and sys.path for a reconcile.
        This function also ensures that the version is valid if
        config.strict_versioning is enabled.

        Args:
            cr_manifest: aconfig.Config
                The cr manifest to pull the requested version from.
        """
        version = get_manifest_version(cr_manifest)
        if not version:
            raise ValueError("CR Manifest has no version")

        log.debug(
            "Setting up working directory with src: %s and version: %s",
            self.home_dir,
            version,
        )
        working_dir = self._setup_directory(cr_manifest, version)

        # Construct working dir path from vcs and git directory
        if config.vcs.module_dir:
            module_path = pathlib.Path(config.vcs.module_dir)
            working_dir = working_dir / module_path

        if not working_dir.is_dir():
            log.error(
                "Working directory %s could not be found. Invalid module path",
                working_dir,
            )
            raise ConfigError(
                f"Module path: '{module_path}' could not be found in repository"
            )

        log.debug4("Changing working directory to %s", working_dir)
        os.chdir(working_dir)
        sys.path.insert(0, str(working_dir))

    def setup_controller(
        self, controller_info: CONTROLLER_INFO
    ) -> CONTROLLER_CLASS_TYPE:
        """
        Import the requested Controller class and enable any compatibility layers

        Args:
            controller_info:CONTROLLER_INFO
                The description of a controller. See CONTROLLER_INFO for
                more information
        Returns:
            controller:
                The required Controller Class
        """

        # Local
        from .controller import (  # pylint: disable=import-outside-toplevel, cyclic-import
            Controller,
        )

        # If controller info is already a constructed controller then
        # skip importing
        if isinstance(controller_info, Controller):
            return controller_info

        controller_class = self._import_controller(controller_info)
        return self._configure_controller(controller_class)

    def setup_deploy_manager(self, cr_manifest: aconfig.Config) -> DeployManagerBase:
        """
        Configure a deploy_manager for a reconcile given a manifest

        Args:
            cr_manifest: aconfig.Config
                The resource to be used as an owner_ref

        Returns:
            deploy_manager: DeployManagerBase
                The deploy_manager to be used during reconcile
        """
        if self.deploy_manager:
            return self.deploy_manager

        if config.dry_run:
            log.debug("Using DryRunDeployManager")
            return DryRunDeployManager()

        log.debug("Using OpenshiftDeployManager")
        return OpenshiftDeployManager(owner_cr=cr_manifest)

    def setup_session(
        self,
        controller: CONTROLLER_TYPE,
        cr_manifest: aconfig.Config,
        deploy_manager: DeployManagerBase,
        reconciliation_id: str,
    ) -> Session:
        """Construct the session, including gathering the backend config and any temp patches

        Args:
            controller: Controller
                The controller class being reconciled
            cr_manifest: aconfig.Config
                The resource being reconciled
            deploy_manager: DeployManagerBase
                The deploy manager used in the cluster
            reconciliation_id: str
                The id for the reconcile

        Return:
            session: Session
                The session for reconcile
        """
        # Get backend config for reconciliation
        controller_defaults = controller.get_config_defaults()
        reconciliation_config = self._get_reconcile_config(
            cr_manifest=cr_manifest,
            deploy_manager=deploy_manager,
            controller_defaults=controller_defaults,
        )
        log.debug4("Gathered Config: %s", reconciliation_config)

        # Get Temporary patches
        patches = self._get_temp_patches(deploy_manager, cr_manifest)
        log.debug3("Found %d patches", len(patches))

        # Get the complete CR Manifest including defaults
        cr_manifest_defaults = controller.get_cr_manifest_defaults()
        full_cr_manifest = merge_configs(
            aconfig.Config(cr_manifest_defaults),
            cr_manifest,
        )

        return Session(
            reconciliation_id=reconciliation_id,
            cr_manifest=full_cr_manifest,
            config=reconciliation_config,
            deploy_manager=deploy_manager,
            temporary_patches=patches,
        )

    def run_controller(
        self, controller: CONTROLLER_TYPE, session: Session, is_finalizer: bool
    ) -> ReconciliationResult:
        """Run the Controller's reconciliation or finalizer with the constructed Session.
        This function also updates the CR status and handles requeue logic.

        Args:
            controller: Controller
                The Controller being reconciled
            session: Session
                The current Session state
            is_finalizer:
                Whether the resource is being deleted

        Returns:
            reconciliation_result: ReconciliationResult
                The result of the reconcile
        """
        log.info(
            "%s resource %s/%s/%s",
            "Finalizing" if is_finalizer else "Reconciling",
            session.kind,
            session.namespace,
            session.name,
        )

        # Ensure the resource has the proper finalizers
        if controller.has_finalizer:
            add_finalizer(session, controller.finalizer)

        # Update the Resource status
        if config.manage_status:
            self._update_reconcile_start_status(session)

        # Reconcile the controller
        completion_state = controller.run_reconcile(
            session,
            is_finalizer=is_finalizer,
        )

        if config.manage_status:
            self._update_reconcile_completion_status(session, completion_state)

        # Check if the controller session should requeue
        requeue, requeue_params = controller.should_requeue(session)
        if not requeue_params:
            requeue_params = RequeueParams()

        # Remove managed finalizers if not requeuing
        if not requeue and is_finalizer and controller.has_finalizer:
            remove_finalizer(session, controller.finalizer)

        return ReconciliationResult(requeue=requeue, requeue_params=requeue_params)

    ## Implementation Details ############################################################

    @classmethod
    def _is_paused(cls, cr_manifest: aconfig.Config) -> bool:
        """Check if a manifest has a paused annotation

        Args:
            cr_manifest: aconfig.Config
                The manifest becking checked

        Returns:
            is_paused: bool
                If the manifest contains the paused annotation
        """
        annotations = cr_manifest.metadata.get("annotations", {})
        paused = annotations.get(constants.PAUSE_ANNOTATION_NAME)
        return paused and paused.lower() == "true"

    def _check_strict_versioning(self, cr_manifest: aconfig.Config):
        """Check the version against config and vcs directory

        Args:
            version_directory: str
                The repo directory to check
            version: str
                The version from the manifest
        """
        version = get_manifest_version(cr_manifest)
        if not version:
            raise ValueError("CR Manifest has no version")

        # Ensure version is in list of supported versions
        assert_config(
            version in config.supported_versions,
            f"Unsupported version: {version}",
        )

        # If VCS is enabled ensure the branch or tag exists
        if self.vcs:
            repo_versions = self.vcs.list_refs()
            assert_config(
                version in repo_versions,
                f"Version not found in repo: {version}",
            )
            log.debug3("Supported VCS Versions: %s", repo_versions)

    def _setup_directory(
        self, cr_manifest: aconfig.Config, version: str
    ) -> pathlib.Path:
        """Construct the VCS directory from the cr_manifest and version. Then
        checkout the directory

        Args:
            cr_manifest: aconfig.Config
                The manifest to be used for the checkout path
            version: str
                The version to checkout

        Returns:
            destination_directory: pathlib.Path
                The destination directory for the checkout
        """

        # Generate checkout directory and ensure path exists
        def sanitize_for_path(path):
            keepcharacters = (" ", ".", "_")
            return "".join(
                c for c in path if c.isalnum() or c in keepcharacters
            ).rstrip()

        # Setup destination template to allow for CR specific checkout paths
        # The entirety of the cr_manifest is included as a dict as well as some
        # custom keys
        template_mappings = {
            # Include the entire dict first so that the sanitized default values
            # take precedence
            **cr_manifest,
            "version": version,
            "kind": sanitize_for_path(cr_manifest.kind),
            "apiVersion": sanitize_for_path(cr_manifest.apiVersion),
            "namespace": sanitize_for_path(cr_manifest.metadata.namespace),
            "name": sanitize_for_path(cr_manifest.metadata.name),
        }

        # Get the checkout directory and method
        try:
            formatted_path = config.vcs.dest.format(**template_mappings)
        except KeyError as exc:
            log.warning(
                "Invalid key: %s found in vcs destination template", exc, exc_info=True
            )
            raise ConfigError("Invalid Key found in vcs destination template") from exc

        checkout_dir = pathlib.Path(formatted_path)
        checkout_method = VCSCheckoutMethod(config.vcs.checkout_method)

        log.debug2(
            "Checking out into directory %s with method %s",
            checkout_dir,
            checkout_method.value,
        )
        self.vcs.checkout_ref(version, checkout_dir, checkout_method)
        return checkout_dir

    @staticmethod
    def _unimport_controller_module(module_name: str) -> Set[str]:
        """Helper to un-import the given module and its parents/siblings/
        children

        Args:
            module_name: str
                The name of the module that holds the Controller

        Returns:
            reimport_modules: Set[str]
                All modules that were unimported and will need to be reimported
        """
        reimport_modules = set()
        if module_name in sys.modules:
            log.debug2("UnImporting controller module: %s", module_name)
            sys.modules.pop(module_name)
            reimport_modules.add(module_name)

        # UnImport the controller and any parent/sibling/child modules so
        # controller can be reimported from the most recent sys path
        module_parts = module_name.split(".")
        for i in range(1, len(module_parts)):
            parent_module = ".".join(module_parts[:-i])
            if parent_module in sys.modules:
                log.debug3("UnImporting module: %s", parent_module)
                if sys.modules.pop(parent_module, None):
                    reimport_modules.add(parent_module)
        for child_module in [
            mod_name
            for mod_name in sys.modules
            if mod_name.startswith(f"{module_parts[0]}.")
        ]:
            log.debug3("UnImporting child module: %s", child_module)
            if sys.modules.pop(child_module, None):
                reimport_modules.add(child_module)
        return reimport_modules

    def _import_controller(
        self, controller_info: CONTROLLER_INFO
    ) -> CONTROLLER_CLASS_TYPE:
        """Parse the controller info and reimport the controller

        Args:
            controller_info:CONTROLLER_INFO
                The description of a controller. See CONTROLLER_INFO for
                more information
        Returns:
            controller_class: Type[Controller]
                The reimported Controller

        """
        log.debug2("Parsing controller_info")
        if isinstance(controller_info, str):
            class_module_parts = controller_info.rsplit(".", maxsplit=1)
            assert_config(
                len(class_module_parts) == 2,
                f"Invalid controller_class [{controller_info}]. Format is <module>.<class>",
            )
            module_name, class_name = class_module_parts
        else:
            class_name = controller_info.__name__
            module_name = controller_info.__module__

        # Reimport module if reimporting is enabled and if it already exists
        log.debug3(
            "Running controller %s from module %s [reimport? %s, in sys.modules? %s]",
            class_name,
            module_name,
            self.reimport_controller,
            module_name in sys.modules,
        )
        reimport_modules = {module_name}
        if self.reimport_controller:
            reimport_modules = reimport_modules.union(
                self._unimport_controller_module(module_name)
            )

        # Attempt to import the modules
        log.debug2("Attempting to import [%s.%s]", module_name, class_name)
        for reimport_name in reimport_modules:
            try:
                app_module = importlib.import_module(reimport_name)
                if reimport_name == module_name:
                    if not hasattr(app_module, class_name):
                        raise ConfigError(
                            f"Invalid controller_class [{class_name}]."
                            f" Class not found in module [{reimport_name}]"
                        )
                    controller_class = getattr(app_module, class_name)

                    # Import controller in function to avoid circular imports
                    # Local
                    from .controller import (  # pylint: disable=import-outside-toplevel
                        Controller,
                    )

                    if not issubclass(controller_class, Controller):
                        raise ConfigError(
                            f"Invalid controller_class [{module_name}.{class_name}]."
                            f" [{class_name}] is not a Controller"
                        )

            except ImportError as exc:
                # If this is the module that holds the controller, it _needs_ to
                # be reimported
                if reimport_name == module_name:
                    log.error(
                        "Failed to import [%s.%s]. Failed to import [%s]",
                        reimport_name,
                        class_name,
                        reimport_name,
                        exc_info=True,
                    )
                    raise ConfigError("Invalid Controller Class Specified") from exc
                # Otherwise, it's ok for import to fail
                else:
                    log.debug("Not able to reimport %s", reimport_name)

        log.debug(
            "Imported Controller %s from file %s",
            controller_class,
            sys.modules[controller_class.__module__].__file__,
        )

        return controller_class

    def _configure_controller(
        self, controller_class: CONTROLLER_CLASS_TYPE
    ) -> CONTROLLER_TYPE:
        """Construct the Controller Class

        Args:
            controller_class: CONTROLLER_CLASS_TYPE
                The Controller class to be reconciled

        Returns:
            controller: Controller
                The constructed Controller

        """
        log.debug3("Constructing controller")
        controller = controller_class()
        return controller

    def _get_reconcile_config(
        self,
        cr_manifest: aconfig.Config,
        deploy_manager: DeployManagerBase,
        controller_defaults: aconfig.Config,
    ) -> aconfig.Config:
        """Construct the flattened backend config for this reconciliation
        starting with a deepcopy of the base and merge in overrides from the CR

        Args:
            cr_manifest: aconfig.Config:
                The manifest to get overrides from
            deploy_manager: DeployManagerBase:
                The deploy manager to get the default configmap config
            controller_defaults: aconfig.Config:
                The config defaults provided by the controller class

        Returns:
            reconcile_config: aconfig.Config
                The reconciliation config
        """
        metadata = cr_manifest.get("metadata", {})
        annotations = metadata.get("annotations", {})
        namespace = metadata.get("namespace")
        cr_config_defaults = cr_manifest.get(constants.CONFIG_OVERRIDES, {})
        annotation_config_defaults = {}
        if constants.CONFIG_DEFAULTS_ANNOTATION_NAME in annotations:
            log.debug("Pulling config_defaults based on annotation")
            config_defaults_name = annotations[
                constants.CONFIG_DEFAULTS_ANNOTATION_NAME
            ]

            # Allow sub-keys to be delineated by "/"
            parts = config_defaults_name.split("/")
            config_defaults_cm_name = parts[0]

            log.debug2(
                "Reading config_defaults from ConfigMap [%s]", config_defaults_cm_name
            )
            success, content = deploy_manager.get_object_current_state(
                kind="ConfigMap",
                name=config_defaults_cm_name,
                namespace=namespace,
                api_version="v1",
            )
            assert_cluster(success, "Failed to look up config defaults form ConfigMap")
            assert_config(
                content is not None,
                f"Did not find configured config defaults ConfigMap: {config_defaults_cm_name}",
            )
            assert_config("data" in content, "Got ConfigMap content with out 'data'")
            config_defaults_content = content["data"]
            assert_config(
                isinstance(config_defaults_content, dict),
                f"Incorrectly formatted config_defaults ConfigMap: {config_defaults_cm_name}",
            )

            # Parse as a Config
            log.debug2("Parsing app config dict")
            annotation_config_defaults = aconfig.Config(
                config_defaults_content, override_env_vars=False
            )

        return merge_configs(
            copy.deepcopy(controller_defaults),
            merge_configs(annotation_config_defaults, cr_config_defaults),
        )

    def _get_temp_patches(  # pylint: disable=too-many-locals
        self, deploy_manager: DeployManagerBase, cr_manifest: aconfig.Config
    ) -> List[aconfig.Config]:
        """Fetch the ordered list of temporary patches that should apply to this
        rollout.

        Args:
            deploy_manager: DeployManagerBase
                The DeployManager used to get the current temporary patches
            cr_manifest: aconfig.Config
                The manifest of this reconciliation
        """

        # Look for patch annotations on the CR
        patch_annotation = (
            cr_manifest.get("metadata", {})
            .get("annotations", {})
            .get(constants.TEMPORARY_PATCHES_ANNOTATION_NAME, "{}")
        )
        log.debug3("Raw patch annotation: %s", patch_annotation)
        try:
            raw_patches = json.loads(patch_annotation)
            if not isinstance(raw_patches, dict):
                msg = f"Patches annotation not a dict: {raw_patches}"
                log.error(msg)
                raise RolloutError(msg)
            patches = {}
            for patch_name, patch_meta in raw_patches.items():
                patch_meta["timestamp"] = dateutil.parser.parse(patch_meta["timestamp"])
                patches[patch_name] = patch_meta
                if "api_version" not in patch_meta:
                    raise KeyError("api_version")
        except json.decoder.JSONDecodeError as err:
            msg = f"Could not parse patches from annotation [{patch_annotation}]"
            log.error(msg)
            raise RolloutError(msg) from err
        except dateutil.parser.ParserError as err:
            msg = f"Failed to parse patch timestamp [{patch_annotation}]"
            log.error(msg)
            raise RolloutError(msg) from err
        except KeyError as err:
            msg = f"Patch meta incorrectly formatted [{patch_annotation}]"
            log.error(msg)
            raise RolloutError(msg) from err

        # Fetch the state of each patch and add it to the output, sorted by
        # timestamp with the earliest first
        temporary_patches = []
        for patch_name, patch_meta in sorted(
            list(patches.items()), key=lambda x: x[1]["timestamp"]
        ):
            # Do the fetch
            log.debug2("Fetching patch [%s/%s]", patch_name, patch_meta["timestamp"])
            namespace = cr_manifest.get("metadata", {}).get("namespace")
            patch_api_version = patch_meta["api_version"]
            patch_kind = patch_meta.get("kind", "TemporaryPatch")
            success, content = deploy_manager.get_object_current_state(
                kind=patch_kind,
                name=patch_name,
                api_version=patch_api_version,
                namespace=namespace,
            )
            assert_cluster(success, f"Failed to fetch patch content for [{patch_name}]")
            assert_config(content is not None, f"Patch not found [{patch_name}]")

            # Pull the patch spec and add it to the list
            assert_config(
                content.get("spec") is not None,
                f"No spec found in patch [{patch_name}]",
            )
            temporary_patches.append(aconfig.Config(content, override_env_vars=False))

        return temporary_patches

    ## Status Details ############################################################

    def _update_resource_status(
        self, deploy_manager: DeployManagerBase, manifest: aconfig.Config, **kwargs
    ) -> dict:
        """Helper function to update the status of a resource given a deploy_manager, manifest
        and status kwargs

        Args:
            deploy_manager: DeployManagerBase
                The DeployManager used to update the resource
            manifest: aconfig.Config
                The manifest of the resource being updated
            **kwargs:
                The key word arguments passed to update_resource_status

        Returns:
            updated_status: dict
                The updated status applied to the resource
        """
        return status.update_resource_status(
            deploy_manager,
            manifest.kind,
            manifest.apiVersion,
            manifest.metadata.name,
            manifest.metadata.namespace,
            **kwargs,
        )

    def _update_reconcile_start_status(self, session: Session):
        """Update the status for a resource at the start of a reconciliation

        Args:
            session: Session
                The session of the reconcile which includes the DeployManager and resource

        """
        ready_condition = status.get_condition(status.READY_CONDITION, session.status)
        ready_reason = ready_condition.get("reason")
        if ready_reason is None or session.current_version is None:
            ready_reason = status.ReadyReason.INITIALIZING

        optional_kwargs = {}
        if session.current_version and session.version != session.current_version:
            log.debug(
                "Version change detected: %s -> %s",
                session.current_version,
                session.version,
            )
            optional_kwargs = {
                "updating_reason": status.UpdatingReason.VERSION_CHANGE,
                "updating_message": "Version Change Started: "
                f"[{session.current_version}] -> [{session.version}]",
            }
            ready_reason = status.ReadyReason.IN_PROGRESS

        self._update_resource_status(
            session.deploy_manager,
            session.cr_manifest,
            ready_reason=ready_reason,
            ready_message=ready_condition.get("message", "Initial Rollout Started"),
            supported_versions=config.supported_versions,
            **optional_kwargs,
        )

    def _update_reconcile_completion_status(
        self, session: Session, completion_state: CompletionState
    ):
        """Perform CR status updates based on the results of the rollout steps. The status logic is
        as follows:
          1. Initial Rollout: Ready-INITIALIZING, Updating-VERIFY_WAIT
          2. Everything complete: Ready-STABLE, Updating-STABLE
          3. Everything except after_verify: Ready-IN_PROGRESS, Updating-STABLE
          4. other: Updating-VERIFY_WAIT

          Args:
            session: Session
                The session of the reconcile which includes the DeployManager and resource
            completion_state: CompletionState
                The result of the rollout
        """
        status_update = {"component_state": completion_state}

        # If everything completed and verified, set ready and updating to STABLE
        # and set the status's reconciled version to the desired version
        if completion_state.verify_completed():
            status_update["ready_reason"] = status.ReadyReason.STABLE
            status_update["ready_message"] = "Verify Complete"
            status_update["updating_reason"] = status.UpdatingReason.STABLE
            status_update["updating_message"] = "Rollout Complete"
            status_update["version"] = session.version

        # If the completion_state didn't fail then update the ready condition with
        # in_progress and the updating condition with verification incomplete
        else:
            current_status = session.get_status()

            # If not initializing then update the ready condition with in_progress
            current_ready_cond = status.get_condition(
                status.READY_CONDITION, current_status
            )
            if (
                current_ready_cond.get("reason")
                != status.ReadyReason.INITIALIZING.value
            ):
                status_update["ready_reason"] = status.ReadyReason.IN_PROGRESS
                status_update["ready_message"] = "Verify InProgress"

            status_update["updating_reason"] = status.UpdatingReason.VERIFY_WAIT
            status_update["updating_message"] = "Component verification incomplete"

        log.debug3("Updating status after reconcile: %s", status_update)
        self._update_resource_status(
            session.deploy_manager, session.cr_manifest, **status_update
        )

    def _update_error_status(
        self, resource: Union[dict, aconfig.Config], error: Exception
    ) -> dict:
        """Update the status of a resource after an error occurred. This function
        setups up it's own deploy manager and parses the resource. This way errors at any
        phase of reconciliation can still get updated

        Args:
            resource: Union[dict, aconfig.Config]
                The resource that's status is being updated
            error: Exception
                The exception that stopped the reconciliation

        Returns:
            status: dict
                The updated status after the error message
        """
        cr_manifest = self.parse_manifest(resource)
        deploy_manager = self.setup_deploy_manager(resource)

        # Get the completion state if possible
        component_state = getattr(error, "completion_state", None)

        # Expected Oper8 Errors
        if isinstance(error, PreconditionError):
            status_update = {
                "updating_reason": status.UpdatingReason.PRECONDITION_WAIT,
                "updating_message": str(error),
                "component_state": component_state,
            }
        elif isinstance(error, (VerificationError, Oper8ExpectedError)):
            status_update = {
                "updating_reason": status.UpdatingReason.VERIFY_WAIT,
                "updating_message": str(error),
                "component_state": component_state,
            }
        elif isinstance(error, ConfigError):
            status_update = {
                "ready_reason": status.ReadyReason.CONFIG_ERROR,
                "ready_message": str(error),
                "updating_reason": status.UpdatingReason.ERRORED,
                "updating_message": str(error),
                "component_state": component_state,
            }
        elif isinstance(error, ClusterError):
            status_update = {
                "updating_reason": status.UpdatingReason.CLUSTER_ERROR,
                "updating_message": str(error),
                "component_state": component_state,
            }

        elif isinstance(error, (RolloutError, Oper8FatalError)):
            status_update = {
                "ready_reason": status.ReadyReason.ERRORED,
                "ready_message": str(error),
                "updating_reason": status.UpdatingReason.ERRORED,
                "updating_message": str(error),
                "component_state": component_state,
            }

        # Catchall for non oper8 errors
        else:
            status_update = {
                "ready_reason": status.ReadyReason.ERRORED,
                "ready_message": str(error),
                "updating_reason": status.UpdatingReason.ERRORED,
                "updating_message": str(error),
            }

        return self._update_resource_status(
            deploy_manager, cr_manifest, **status_update
        )

__init__(home_dir=None, deploy_manager=None, enable_vcs=None, reimport_controller=True)

The constructor sets up the properties used across every reconcile and checks that the current config is valid.

Parameters:

Name Type Description Default
home_dir str

Optional[str]=None The root directory for importing controllers or VCS checkout

None
deploy_manager Optional[DeployManagerBase]

Optional[DeployManager]=None Deploy manager to use. If not given, a new DeployManager will be created for each reconcile.

None
enable_vcs Optional[bool]

Optional[bool]=True Parameter to manually control the state of VCS on a per instance basis

None
reimport_controller Optional[bool]

Optional[bool]=None Parameter to manually control if a controller needs to be reimported each reconcile.

True
Source code in oper8/reconcile.py
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
def __init__(
    self,
    home_dir: str = None,
    deploy_manager: Optional[DeployManagerBase] = None,
    enable_vcs: Optional[bool] = None,
    reimport_controller: Optional[bool] = True,
):
    """The constructor sets up the properties used across every
    reconcile and checks that the current config is valid.

    Args:
        home_dir:  Optional[str]=None
            The root directory for importing controllers or VCS checkout
        deploy_manager:  Optional[DeployManager]=None
            Deploy manager to use. If not given, a new DeployManager will
            be created for each reconcile.
        enable_vcs:  Optional[bool]=True
            Parameter to manually control the state of VCS on a per instance
            basis
        reimport_controller:  Optional[bool]=None
            Parameter to manually control if a controller needs to be reimported each
            reconcile.
    """

    if home_dir:
        self.home_dir = home_dir
    elif config.vcs.enabled:
        self.home_dir = config.vcs.repo
    else:
        self.home_dir = os.getcwd()

    self.vcs = None

    # If enable_vcs is not provided than default to
    # config
    if enable_vcs is None:
        enable_vcs = config.vcs.enabled

    if enable_vcs:
        assert_config(
            config.vcs.repo,
            "Can not enable vcs without supply source repo at vcs.repo",
        )
        assert_config(
            config.vcs.dest,
            "Cannot require enable vcs without providing a destination",
        )
        vcs_checkout_methods = [method.value for method in VCSCheckoutMethod]
        assert_config(
            config.vcs.checkout_method in vcs_checkout_methods,
            f"VCS checkout method must be one of the following {vcs_checkout_methods}",
        )

        self.vcs = VCS(self.home_dir)

    # Ensure config is setup correctly for strict_versioning
    if config.strict_versioning:
        assert_config(
            config.supported_versions is not None,
            "Must provide supported_versions with strict_versioning=True",
        )
        assert_config(
            config.vcs.field is not None,
            "Must provide vcs.field with strict_versioning=True",
        )

    self.deploy_manager = deploy_manager
    self.reimport_controller = reimport_controller

configure_logging(cr_manifest, reconciliation_id) classmethod

Configure the logging for a given reconcile

Parameters:

Name Type Description Default
cr_manifest Config

aconfig.Config The resource to get annotation overrides from

required
reconciliation_id str

str The unique id for the reconciliation

required
Source code in oper8/reconcile.py
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
@classmethod
def configure_logging(cls, cr_manifest: aconfig.Config, reconciliation_id: str):
    """Configure the logging for a given reconcile

    Args:
        cr_manifest: aconfig.Config
            The resource to get annotation overrides from
        reconciliation_id: str
            The unique id for the reconciliation
    """

    # Fetch the annotations for logging
    # NOTE: We use safe fetching here because this happens before CR
    #   verification in the Session constructor
    annotations = cr_manifest.get("metadata", {}).get("annotations", {})
    default_level = annotations.get(
        constants.LOG_DEFAULT_LEVEL_NAME, config.log_level
    )

    filters = annotations.get(constants.LOG_FILTERS_NAME, config.log_filters)
    log_json = annotations.get(constants.LOG_JSON_NAME, str(config.log_json))
    log_thread_id = annotations.get(
        constants.LOG_THREAD_ID_NAME, str(config.log_thread_id)
    )

    # Convert boolean args
    log_json = (log_json or "").lower() == "true"
    log_thread_id = (log_thread_id or "").lower() == "true"

    # Keep the old handler. This is useful if running with ansible as
    # it will preserve the handler generator set up to log to a file
    # since ansible captures all logging output
    handler_generator = None
    if logging.root.handlers:
        old_handler = logging.root.handlers[0]

        def handler_generator():
            return old_handler

    alog.configure(
        default_level=default_level,
        filters=filters,
        formatter=(
            Oper8JsonFormatter(cr_manifest, reconciliation_id)
            if log_json
            else "pretty"
        ),
        thread_id=log_thread_id,
        handler_generator=handler_generator,
    )

generate_id() classmethod

Generates a unique human readable id for this reconciliation

Returns:

Name Type Description
id str

str A unique base32 encoded id

Source code in oper8/reconcile.py
385
386
387
388
389
390
391
392
393
394
395
396
397
@classmethod
def generate_id(cls) -> str:
    """Generates a unique human readable id for this reconciliation

    Returns:
        id: str
            A unique base32 encoded id
    """
    uuid4 = uuid.uuid4()
    base32_str = base64.b32encode(uuid4.bytes).decode("utf-8")
    reconcile_id = base32_str[:22]
    log.debug("Generated reconcile id: %s", reconcile_id)
    return reconcile_id

parse_manifest(resource) classmethod

Parse a raw resource into an aconfig Config

Parameters:

Name Type Description Default
resource Union[dict, Config]

Union[dict, aconfig.Config]) The resource to be parsed into a manifest

required

Returns cr_manifest: aconfig.Config The parsed and validated config

Source code in oper8/reconcile.py
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
@classmethod
def parse_manifest(cls, resource: Union[dict, aconfig.Config]) -> aconfig.Config:
    """Parse a raw resource into an aconfig Config

    Args:
        resource: Union[dict, aconfig.Config])
            The resource to be parsed into a manifest

    Returns
        cr_manifest: aconfig.Config
            The parsed and validated config
    """
    try:
        cr_manifest = aconfig.Config(resource, override_env_vars=False)
    except (ValueError, SyntaxError, AttributeError) as exc:
        raise ValueError("Failed to parse full_cr") from exc

    return cr_manifest

reconcile(controller_info, resource, is_finalizer=False)

This is the main entrypoint for reconciliations and contains the core implementation. The general reconcile path is as follows:

1. Parse the raw CR manifest
2. Setup logging based on config with overrides from CR
3. Check if the CR is paused and for strict versioning
4. Setup directory if VCS is enabled
5. Import and construct the Controller
6. Setup the DeployManager and Session objects
7. Run the Controller reconcile

Parameters:

Name Type Description Default
controller_info CONTROLLER_INFO

CONTROLLER_INFO The description of a controller. See CONTROLLER_INFO for more information

required
resource Union[dict, Config]

Union[dict, aconfig.Config] A raw representation of the resource to be reconciled

required
is_finalizer bool

bool=False Whether the resource is being deleted

False

Returns:

Name Type Description
reconcile_result ReconciliationResult

ReconciliationResult The result of the reconcile

Source code in oper8/reconcile.py
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
@alog.logged_function(log.info)
@alog.timed_function(log.info, "Reconcile finished in: ")
def reconcile(
    self,
    controller_info: CONTROLLER_INFO,
    resource: Union[dict, aconfig.Config],
    is_finalizer: bool = False,
) -> ReconciliationResult:
    """This is the main entrypoint for reconciliations and contains the
    core implementation. The general reconcile path is as follows:

        1. Parse the raw CR manifest
        2. Setup logging based on config with overrides from CR
        3. Check if the CR is paused and for strict versioning
        4. Setup directory if VCS is enabled
        5. Import and construct the Controller
        6. Setup the DeployManager and Session objects
        7. Run the Controller reconcile

    Args:
        controller_info: CONTROLLER_INFO
            The description of a controller. See CONTROLLER_INFO for
            more information
        resource: Union[dict, aconfig.Config]
            A raw representation of the resource to be reconciled
        is_finalizer: bool=False
            Whether the resource is being deleted

    Returns:
        reconcile_result:  ReconciliationResult
            The result of the reconcile
    """

    # Parse the full CR content
    cr_manifest = self.parse_manifest(resource)

    # generate id unique to this session
    reconcile_id = self.generate_id()

    # Initialize logging prior to any other work
    self.configure_logging(cr_manifest, reconcile_id)

    # If paused, do nothing and don't requeue
    if self._is_paused(cr_manifest):
        log.info("CR is paused. Exiting reconciliation")
        result = ReconciliationResult(requeue=False, requeue_params=RequeueParams())
        return result

    # Check strict versioning before continuing
    if config.strict_versioning:
        self._check_strict_versioning(cr_manifest)

    # Check if VCS is enabled and then attempt to checkout
    if config.vcs.enabled:
        self.setup_vcs(cr_manifest)

    # Import controller and setup the instance
    controller = self.setup_controller(controller_info)

    # Configure deploy manager on a per reconcile basis for
    # owner references unless a manager is provided on initialization
    deploy_manager = self.setup_deploy_manager(cr_manifest)

    # Setup Session
    session = self.setup_session(
        controller, cr_manifest, deploy_manager, reconcile_id
    )

    # Run the controller reconcile
    result = self.run_controller(controller, session, is_finalizer)

    return result

run_controller(controller, session, is_finalizer)

Run the Controller's reconciliation or finalizer with the constructed Session. This function also updates the CR status and handles requeue logic.

Parameters:

Name Type Description Default
controller CONTROLLER_TYPE

Controller The Controller being reconciled

required
session Session

Session The current Session state

required
is_finalizer bool

Whether the resource is being deleted

required

Returns:

Name Type Description
reconciliation_result ReconciliationResult

ReconciliationResult The result of the reconcile

Source code in oper8/reconcile.py
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
def run_controller(
    self, controller: CONTROLLER_TYPE, session: Session, is_finalizer: bool
) -> ReconciliationResult:
    """Run the Controller's reconciliation or finalizer with the constructed Session.
    This function also updates the CR status and handles requeue logic.

    Args:
        controller: Controller
            The Controller being reconciled
        session: Session
            The current Session state
        is_finalizer:
            Whether the resource is being deleted

    Returns:
        reconciliation_result: ReconciliationResult
            The result of the reconcile
    """
    log.info(
        "%s resource %s/%s/%s",
        "Finalizing" if is_finalizer else "Reconciling",
        session.kind,
        session.namespace,
        session.name,
    )

    # Ensure the resource has the proper finalizers
    if controller.has_finalizer:
        add_finalizer(session, controller.finalizer)

    # Update the Resource status
    if config.manage_status:
        self._update_reconcile_start_status(session)

    # Reconcile the controller
    completion_state = controller.run_reconcile(
        session,
        is_finalizer=is_finalizer,
    )

    if config.manage_status:
        self._update_reconcile_completion_status(session, completion_state)

    # Check if the controller session should requeue
    requeue, requeue_params = controller.should_requeue(session)
    if not requeue_params:
        requeue_params = RequeueParams()

    # Remove managed finalizers if not requeuing
    if not requeue and is_finalizer and controller.has_finalizer:
        remove_finalizer(session, controller.finalizer)

    return ReconciliationResult(requeue=requeue, requeue_params=requeue_params)

safe_reconcile(controller_info, resource, is_finalizer=False)

This function calls out to reconcile but catches any errors thrown. This function guarantees a safe result which is needed by some Watch Managers

Parameters:

Name Type Description Default
controller_info CONTROLLER_INFO

CONTROLLER_INFO The description of a controller. See CONTROLLER_INFO for more information

required
resource dict

Union[dict, aconfig.Config] A raw representation of the reconcile

required
is_finalize

bool=False Whether the resource is being deleted

required

Returns:

Name Type Description
reconcile_result ReconciliationResult

ReconciliationResult The result of the reconcile

Source code in oper8/reconcile.py
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
def safe_reconcile(
    self,
    controller_info: CONTROLLER_INFO,
    resource: dict,
    is_finalizer: bool = False,
) -> ReconciliationResult:
    """
    This function calls out to reconcile but catches any errors thrown. This
    function guarantees a safe result which is needed by some Watch Managers

    Args:
        controller_info: CONTROLLER_INFO
            The description of a controller. See CONTROLLER_INFO for
            more information
        resource: Union[dict, aconfig.Config]
            A raw representation of the reconcile
        is_finalize: bool=False
            Whether the resource is being deleted

    Returns:
        reconcile_result:  ReconciliationResult
            The result of the reconcile

    """

    try:
        return self.reconcile(controller_info, resource, is_finalizer)

    # VCSMultiProcessError is an expected error caused by oper8 which should
    # not be handled by the exception handling code
    except VCSMultiProcessError as exc:
        # Requeue after ~7.5 seconds. Add randomness to avoid
        # repeated conflicts
        requeue_time = 5 + random.uniform(0, 5)
        params = RequeueParams(
            requeue_after=datetime.timedelta(seconds=requeue_time)
        )
        log.debug("VCS Multiprocessing Error Detected: {%s}", exc, exc_info=True)
        log.warning(
            "VCS Setup failed due to other process. Requeueing in %ss",
            requeue_time,
        )
        return ReconciliationResult(
            requeue=True, requeue_params=params, exception=exc
        )

    # Capture all generic exceptions
    except Exception as exc:  # pylint: disable=broad-except
        log.warning("Handling caught error in reconcile: %s", exc, exc_info=True)
        error = exc

    if config.manage_status:
        try:
            self._update_error_status(resource, error)
            log.debug("Update CR status with error message")
        except Exception as exc:  # pylint: disable=broad-except
            log.error("Failed to update status: %s", exc, exc_info=True)

    # If we got to this return it means there was an
    # exception during reconcile and we should requeue
    # with the default backoff period
    log.info("Requeuing CR due to error during reconcile")
    return ReconciliationResult(
        requeue=True, requeue_params=RequeueParams(), exception=error
    )

setup_controller(controller_info)

Import the requested Controller class and enable any compatibility layers

Parameters:

Name Type Description Default
controller_info CONTROLLER_INFO

CONTROLLER_INFO The description of a controller. See CONTROLLER_INFO for more information

required

Returns: controller: The required Controller Class

Source code in oper8/reconcile.py
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
def setup_controller(
    self, controller_info: CONTROLLER_INFO
) -> CONTROLLER_CLASS_TYPE:
    """
    Import the requested Controller class and enable any compatibility layers

    Args:
        controller_info:CONTROLLER_INFO
            The description of a controller. See CONTROLLER_INFO for
            more information
    Returns:
        controller:
            The required Controller Class
    """

    # Local
    from .controller import (  # pylint: disable=import-outside-toplevel, cyclic-import
        Controller,
    )

    # If controller info is already a constructed controller then
    # skip importing
    if isinstance(controller_info, Controller):
        return controller_info

    controller_class = self._import_controller(controller_info)
    return self._configure_controller(controller_class)

setup_deploy_manager(cr_manifest)

Configure a deploy_manager for a reconcile given a manifest

Parameters:

Name Type Description Default
cr_manifest Config

aconfig.Config The resource to be used as an owner_ref

required

Returns:

Name Type Description
deploy_manager DeployManagerBase

DeployManagerBase The deploy_manager to be used during reconcile

Source code in oper8/reconcile.py
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
def setup_deploy_manager(self, cr_manifest: aconfig.Config) -> DeployManagerBase:
    """
    Configure a deploy_manager for a reconcile given a manifest

    Args:
        cr_manifest: aconfig.Config
            The resource to be used as an owner_ref

    Returns:
        deploy_manager: DeployManagerBase
            The deploy_manager to be used during reconcile
    """
    if self.deploy_manager:
        return self.deploy_manager

    if config.dry_run:
        log.debug("Using DryRunDeployManager")
        return DryRunDeployManager()

    log.debug("Using OpenshiftDeployManager")
    return OpenshiftDeployManager(owner_cr=cr_manifest)

setup_session(controller, cr_manifest, deploy_manager, reconciliation_id)

Construct the session, including gathering the backend config and any temp patches

Parameters:

Name Type Description Default
controller CONTROLLER_TYPE

Controller The controller class being reconciled

required
cr_manifest Config

aconfig.Config The resource being reconciled

required
deploy_manager DeployManagerBase

DeployManagerBase The deploy manager used in the cluster

required
reconciliation_id str

str The id for the reconcile

required
Return

session: Session The session for reconcile

Source code in oper8/reconcile.py
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
def setup_session(
    self,
    controller: CONTROLLER_TYPE,
    cr_manifest: aconfig.Config,
    deploy_manager: DeployManagerBase,
    reconciliation_id: str,
) -> Session:
    """Construct the session, including gathering the backend config and any temp patches

    Args:
        controller: Controller
            The controller class being reconciled
        cr_manifest: aconfig.Config
            The resource being reconciled
        deploy_manager: DeployManagerBase
            The deploy manager used in the cluster
        reconciliation_id: str
            The id for the reconcile

    Return:
        session: Session
            The session for reconcile
    """
    # Get backend config for reconciliation
    controller_defaults = controller.get_config_defaults()
    reconciliation_config = self._get_reconcile_config(
        cr_manifest=cr_manifest,
        deploy_manager=deploy_manager,
        controller_defaults=controller_defaults,
    )
    log.debug4("Gathered Config: %s", reconciliation_config)

    # Get Temporary patches
    patches = self._get_temp_patches(deploy_manager, cr_manifest)
    log.debug3("Found %d patches", len(patches))

    # Get the complete CR Manifest including defaults
    cr_manifest_defaults = controller.get_cr_manifest_defaults()
    full_cr_manifest = merge_configs(
        aconfig.Config(cr_manifest_defaults),
        cr_manifest,
    )

    return Session(
        reconciliation_id=reconciliation_id,
        cr_manifest=full_cr_manifest,
        config=reconciliation_config,
        deploy_manager=deploy_manager,
        temporary_patches=patches,
    )

setup_vcs(cr_manifest)

Setups the VCS directory and sys.path for a reconcile. This function also ensures that the version is valid if config.strict_versioning is enabled.

Parameters:

Name Type Description Default
cr_manifest Config

aconfig.Config The cr manifest to pull the requested version from.

required
Source code in oper8/reconcile.py
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
def setup_vcs(self, cr_manifest: aconfig.Config):
    """Setups the VCS directory and sys.path for a reconcile.
    This function also ensures that the version is valid if
    config.strict_versioning is enabled.

    Args:
        cr_manifest: aconfig.Config
            The cr manifest to pull the requested version from.
    """
    version = get_manifest_version(cr_manifest)
    if not version:
        raise ValueError("CR Manifest has no version")

    log.debug(
        "Setting up working directory with src: %s and version: %s",
        self.home_dir,
        version,
    )
    working_dir = self._setup_directory(cr_manifest, version)

    # Construct working dir path from vcs and git directory
    if config.vcs.module_dir:
        module_path = pathlib.Path(config.vcs.module_dir)
        working_dir = working_dir / module_path

    if not working_dir.is_dir():
        log.error(
            "Working directory %s could not be found. Invalid module path",
            working_dir,
        )
        raise ConfigError(
            f"Module path: '{module_path}' could not be found in repository"
        )

    log.debug4("Changing working directory to %s", working_dir)
    os.chdir(working_dir)
    sys.path.insert(0, str(working_dir))

ReconciliationResult dataclass

ReconciliationResult is the result of a reconciliation session

Source code in oper8/reconcile.py
69
70
71
72
73
74
75
76
77
78
@dataclass
class ReconciliationResult:
    """ReconciliationResult is the result of a reconciliation session"""

    # Flag to control requeue of current reconcile request
    requeue: bool
    # Parameters for requeue request
    requeue_params: RequeueParams = field(default_factory=RequeueParams)
    # Flag to identify if the reconciliation raised an exception
    exception: Exception = None

RequeueParams dataclass

RequeueParams holds parameters for requeue request

Source code in oper8/reconcile.py
58
59
60
61
62
63
64
65
66
@dataclass
class RequeueParams:
    """RequeueParams holds parameters for requeue request"""

    requeue_after: datetime.timedelta = field(
        default_factory=lambda: datetime.timedelta(
            seconds=float(config.requeue_after_seconds)
        )
    )

rollout_manager

This module holds the implementation of the DAG constructs used to perform the dependency management for rollout

RolloutManager

This class manages the dependencies needed to roll out a set of nodes

Source code in oper8/rollout_manager.py
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
class RolloutManager:
    """This class manages the dependencies needed to roll out a set of nodes"""

    @classmethod
    def run_node(
        cls,
        func: Callable[[Component, Session], bool],
        session: Session,
        component: Component,
        fail_halt_runner=True,
    ):
        """
        Generic function to execute a node during Rollout

        Args:
            func: Callable[[Component,Session], bool]
                The function to be called
            session: Session
                The session that's currently being rolled out
            component: Component
                The component being rolled out
        """
        success = False
        exception = None
        rollout_failed = False

        try:
            success = func(session, component)
            if fail_halt_runner and not success:
                rollout_failed = True

        # If a failure occurred by throwing, treat that the same as an
        # explicit failure.
        except Oper8Error as err:
            log.debug("Caught Oper8Error during rollout of [%s]", component)
            success = False
            rollout_failed = err.is_fatal_error
            exception = err
        except Exception as err:  # pylint: disable=broad-except
            log.warning(
                "Caught exception during rollout of [%s]",
                component,
                exc_info=True,
            )
            success = False
            rollout_failed = True
            exception = err

        # If the rollout failed for any reason, raise an exception. This
        # will halt the graph execution.
        if not success:
            log.debug("[deploy] Halting rollout")
            raise DagHaltError(rollout_failed, exception=exception)

        log.debug3("Done with executing node: %s", component)

    def __init__(
        self,
        session: Session,
        after_deploy: Optional[Callable[[Session], bool]] = None,
        after_deploy_unsuccessful: Optional[Callable[[Session], bool]] = None,
        after_verify: Optional[Callable[[Session], bool]] = None,
        after_verify_unsuccessful: Optional[Callable[[Session], bool]] = None,
    ):
        """Construct with the fully-populated session for the rollout

        Args:
            session:  Session
                The current session for the reconciliation
            after_deploy:  Optional[Callable[[Session] bool]]
                An optional callback hook that will be invoked after the deploy
                phase completes. The return indicates whether the validation has
                passed.
            after_deploy_unsuccessful:  Optional[Callable[[Session] bool]]
                An optional callback hook that will be invoked after the deploy
                phase ends with incomplete or failed status. The return indicates
                whether the validation has passed.
            after_verify:  Optional[Callable[[Session] None]]
                An optional callback hook that will be invoked after the verify
                phase completes. The return indicates whether the validation has
                passed.
            after_verify_unsuccessful:  Optional[Callable[[Session] None]]
                An optional callback hook that will be invoked after the verify
                phase ends with failure. The return indicates whether the validation
                has passed.
        """
        self._session = session
        self._after_deploy = after_deploy
        self._after_deploy_unsuccessful = after_deploy_unsuccessful
        self._after_verify = after_verify
        self._after_verify_unsuccessful = after_verify_unsuccessful

        # Read pool size from config
        deploy_threads = config.rollout_manager.deploy_threads
        verify_threads = config.rollout_manager.verify_threads

        # If session is in standalone mode, the recursive deploy -> render in
        # subsystems can cause jsii to fail in some spectacular ways. As such,
        # we force single-threaded execution in standalone mode.
        if config.standalone:
            log.info("Running rollout without threads in standalone mode")
            deploy_threads = 0
            verify_threads = 0

        deploy_node_fn = partial(
            RolloutManager.run_node,
            deploy_component,
            self._session,
        )

        verify_node_fn = partial(
            RolloutManager.run_node,
            verify_component,
            self._session,
            fail_halt_runner=False,
        )

        # Override disabled components with the disable function
        self.disabled_nodes = set()
        for component in session.graph:
            if component.disabled:
                component.set_data(
                    partial(
                        RolloutManager.run_node,
                        disable_component,
                        self._session,
                        component,
                    )
                )

                self.disabled_nodes.add(component)

        self._deploy_graph = Runner(
            "deploy",
            threads=deploy_threads,
            graph=session.graph,
            default_function=deploy_node_fn,
            verify_upstream=not config.dry_run,
        )
        self._verify_graph = Runner(
            "verify",
            threads=verify_threads,
            graph=session.graph,
            default_function=verify_node_fn,
            verify_upstream=not config.dry_run,
        )

    def rollout(  # pylint: disable=too-many-locals,too-many-statements
        self,
    ) -> CompletionState:
        """Perform the rollout

        The logic has four phases:
            1. Deploy Graph: This phase executes the Runner which runs the deploy()
                function for each Component in dependency order. For graph edges
                with configured verification functions, they are also executed
                during this phase.
            2. After Deploy: If configured with an after_deploy hook and (1)
                completed all nodes successfully, this function is called. Any
                raised exceptions indicate that the rollout should not proceed.
            3. Verify Graph: This phase executes the Runner which runs the verify()
                function for each Component in dependency order.
            4. After Verify: If configured with an after_verify hook and (3)
                completed all nodes successfully, this function is called. Any
                raised exceptions indicate that the rollout is not fully
                verified.

        The rollout can terminate in one of three states:
            1. incomplete AND failed: Something unexpected happened and the
                rollout terminated in a fatal state.
            2. incomplete AND NOT failed: One or more nodes did not pass their
                verify steps, but all deploy steps that were attempted
                succeeded.
            3. complete AND NOT failed: The rollout completed all nodes,
                including all verify steps. The managed components are ready to
                take traffic.

        Returns:
            completion_state:  CompletionState
                The final completion state of all nodes in the rollout Runner. This
                is a logical composition of the outputs of the above phases
                based on the termination logic above.
        """

        # NOTE: The Runner is guaranteed to not throw (unless there's a bug!)
        #   so we don't need to wrap run() in a try/except since the except
        #   clause will never catch "expected" errors

        ###########################
        ## Phase 1: Deploy Graph ##
        ###########################
        with alog.ContextTimer(log.trace, "Deploy Graph [%s]: ", self._session.id):
            self._deploy_graph.run()
        deploy_completion_state = self._deploy_graph.completion_state()

        # Log phase 1 results:
        #   * SUCCESS => All Components ran `render()` and `deploy()` without
        #       raising. This is fetched from the `verify_completed()` state
        #       since Components may raise precondition errors resulting in
        #       `deploy_completed()` returning True, indicating that all nodes
        #       ran and none reached a failed state, but not all nodes running
        #       to final completion
        #   * FAILED => Some nodes raised fatal errors
        #   * INCOMPLETE => No errors were raised, but some nodes did not fully
        #       complete without raising
        log.debug3("Deploy completion: %s", deploy_completion_state)
        phase1_complete = deploy_completion_state.verify_completed()
        phase1_failed = deploy_completion_state.failed()
        log.debug(
            "[Phase 1] Deploy result: %s",
            (
                "SUCCESS"
                if phase1_complete
                else ("FAILED" if phase1_failed else "INCOMPLETE")
            ),
        )

        ###########################
        ## Phase 2: After Deploy ##
        ###########################

        phase2_complete = phase1_complete
        phase2_exception = None

        # After unsuccessful deploy.
        if (not phase1_complete or phase1_failed) and self._after_deploy_unsuccessful:
            log.debug2("Running after-deploy-unsuccessful")
            try:
                is_after_deploy_unsuccessful_completed = (
                    self._after_deploy_unsuccessful(self._session, phase1_failed)
                )
                if not is_after_deploy_unsuccessful_completed:
                    phase2_exception = VerificationError(
                        "After-deploy-unsuccessful verification failed"
                    )
            except Exception as err:  # pylint: disable=broad-except
                log.debug2(
                    "Error caught during after-deploy-unsuccessful: %s",
                    err,
                    exc_info=True,
                )
                phase2_exception = err

        # After successful deploy.
        if phase1_complete and self._after_deploy:
            log.debug2("Running after-deploy")
            try:
                phase2_complete = self._after_deploy(self._session)
                if not phase2_complete:
                    phase2_exception = VerificationError(
                        "After-deploy verification failed"
                    )
            except Exception as err:  # pylint: disable=broad-except
                log.debug2("Error caught during after-deploy: %s", err, exc_info=True)
                phase2_complete = False
                phase2_exception = err

        # Log phase 2 results
        log.debug(
            "[Phase 2] After deploy result: %s",
            (
                "SUCCESS"
                if phase2_complete
                else ("FAILED" if phase2_exception else "NOT RUN")
            ),
        )

        ###########################
        ## Phase 3: Verify Graph ##
        ###########################

        # If phase 1 ran without erroring, then run the verify Runner
        phase3_complete = False
        phase3_failed = False
        if not phase1_failed:
            # Configured the verify Runner based off of deployed nodes
            # This way only components that have started will be verified
            deployed_nodes = (
                deploy_completion_state.verified_nodes.union(
                    deploy_completion_state.unverified_nodes
                )
            ) - deploy_completion_state.failed_nodes
            log.debug3("Verify phase running with Nodes: %s", deployed_nodes)

            # Enable/Disable all nodes in verify_graph based on whether they
            # were deployed or not
            for comp in set(self._session.get_components()):
                if comp in deployed_nodes:
                    self._verify_graph.enable_node(comp)
                else:
                    self._verify_graph.disable_node(comp)

                # Disabled components should immediately verify
                if comp in self.disabled_nodes:
                    comp.set_data(lambda *_: True)

            # Run the verify Runner
            with alog.ContextTimer(log.trace, "Verify Graph [%s]: ", self._session.id):
                self._verify_graph.run()
            verify_completion_state = self._verify_graph.completion_state()
            log.debug3("Verify completion: %s", verify_completion_state)
            # Only consider phase3 completed if phase1 and phase2 fully completed
            phase3_complete = (
                verify_completion_state.verify_completed()
                and phase1_complete
                and phase2_complete
            )
            phase3_failed = verify_completion_state.failed()
        else:
            verify_completion_state = CompletionState()

        # Log phase 3 results
        log.debug(
            "[Phase 3] Verify result: %s",
            (
                "SUCCESS"
                if phase3_complete
                else (
                    "FAILED"
                    if phase3_failed
                    else ("INCOMPLETE" if phase2_complete else "NOT RUN")
                )
            ),
        )

        ###########################
        ## Phase 4: After Verify ##
        ###########################

        phase4_complete = phase3_complete
        phase4_exception = None

        # If deployment is completed, but verification is not, run _after_verify_unsuccessful.
        if (
            phase1_complete and phase2_complete and not phase3_complete
        ) and self._after_verify_unsuccessful:
            log.debug("Running after-verify-unsuccessful")
            try:
                is_after_verify_unsuccessful_completed = (
                    self._after_verify_unsuccessful(self._session, phase3_failed)
                )
                if not is_after_verify_unsuccessful_completed:
                    phase4_exception = VerificationError(
                        "After-verify-unsuccessful failed"
                    )
            except Exception as err:  # pylint: disable=broad-except
                log.debug2("Error caught during after-verify: %s", err, exc_info=True)
                phase4_exception = err

        # If both deployment and verification is completed, run _after_verify.
        if phase3_complete and self._after_verify:
            log.debug("Running after-verify")
            try:
                phase4_complete = self._after_verify(self._session)
                if not phase4_complete:
                    phase4_exception = VerificationError("After-verify failed")
            except Exception as err:  # pylint: disable=broad-except
                log.debug2("Error caught during after-verify: %s", err, exc_info=True)
                phase4_complete = False
                phase4_exception = err

        # Log phase 4 results
        log.debug(
            "[Phase 4] After deploy result: %s",
            (
                "SUCCESS"
                if phase4_complete
                else ("FAILED" if phase4_exception else "NOT RUN")
            ),
        )

        # Create a final completion state with the "deployed nodes" pulled
        # from the deploy results and the "verified nodes" pulled from the
        # verify results.
        #
        # Verified Nodes: Nodes that made it all the way through the verify
        #   graph
        # Unverified Nodes: Nodes that are "verified" in the deploy graph, but
        #   are unverified in the verify graph or were not run in the verify
        #   graph and did not fail in the verify graph
        # Failed Nodes: Nodes that failed in either graph
        # Unstarted Nodes: Nodes that were unstarted in the deploy graph
        # Exception: Any exception from any of the phases above
        verified_nodes = verify_completion_state.verified_nodes
        failed_nodes = verify_completion_state.failed_nodes.union(
            deploy_completion_state.failed_nodes
        )
        unverified_nodes = (
            (
                deploy_completion_state.verified_nodes.union(
                    deploy_completion_state.unverified_nodes
                ).union(verify_completion_state.unverified_nodes)
            )
            - verified_nodes
            - failed_nodes
        )
        unstarted_nodes = deploy_completion_state.unstarted_nodes
        exception = (
            deploy_completion_state.exception
            or phase2_exception
            or verify_completion_state.exception
            or phase4_exception
        )
        completion_state = CompletionState(
            verified_nodes=verified_nodes,
            unverified_nodes=unverified_nodes,
            failed_nodes=failed_nodes,
            unstarted_nodes=unstarted_nodes,
            exception=exception,
        )
        log.debug2("Final rollout state: %s", completion_state)
        return completion_state

__init__(session, after_deploy=None, after_deploy_unsuccessful=None, after_verify=None, after_verify_unsuccessful=None)

Construct with the fully-populated session for the rollout

Parameters:

Name Type Description Default
session Session

Session The current session for the reconciliation

required
after_deploy Optional[Callable[[Session], bool]]

Optional[Callable[[Session] bool]] An optional callback hook that will be invoked after the deploy phase completes. The return indicates whether the validation has passed.

None
after_deploy_unsuccessful Optional[Callable[[Session], bool]]

Optional[Callable[[Session] bool]] An optional callback hook that will be invoked after the deploy phase ends with incomplete or failed status. The return indicates whether the validation has passed.

None
after_verify Optional[Callable[[Session], bool]]

Optional[Callable[[Session] None]] An optional callback hook that will be invoked after the verify phase completes. The return indicates whether the validation has passed.

None
after_verify_unsuccessful Optional[Callable[[Session], bool]]

Optional[Callable[[Session] None]] An optional callback hook that will be invoked after the verify phase ends with failure. The return indicates whether the validation has passed.

None
Source code in oper8/rollout_manager.py
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
def __init__(
    self,
    session: Session,
    after_deploy: Optional[Callable[[Session], bool]] = None,
    after_deploy_unsuccessful: Optional[Callable[[Session], bool]] = None,
    after_verify: Optional[Callable[[Session], bool]] = None,
    after_verify_unsuccessful: Optional[Callable[[Session], bool]] = None,
):
    """Construct with the fully-populated session for the rollout

    Args:
        session:  Session
            The current session for the reconciliation
        after_deploy:  Optional[Callable[[Session] bool]]
            An optional callback hook that will be invoked after the deploy
            phase completes. The return indicates whether the validation has
            passed.
        after_deploy_unsuccessful:  Optional[Callable[[Session] bool]]
            An optional callback hook that will be invoked after the deploy
            phase ends with incomplete or failed status. The return indicates
            whether the validation has passed.
        after_verify:  Optional[Callable[[Session] None]]
            An optional callback hook that will be invoked after the verify
            phase completes. The return indicates whether the validation has
            passed.
        after_verify_unsuccessful:  Optional[Callable[[Session] None]]
            An optional callback hook that will be invoked after the verify
            phase ends with failure. The return indicates whether the validation
            has passed.
    """
    self._session = session
    self._after_deploy = after_deploy
    self._after_deploy_unsuccessful = after_deploy_unsuccessful
    self._after_verify = after_verify
    self._after_verify_unsuccessful = after_verify_unsuccessful

    # Read pool size from config
    deploy_threads = config.rollout_manager.deploy_threads
    verify_threads = config.rollout_manager.verify_threads

    # If session is in standalone mode, the recursive deploy -> render in
    # subsystems can cause jsii to fail in some spectacular ways. As such,
    # we force single-threaded execution in standalone mode.
    if config.standalone:
        log.info("Running rollout without threads in standalone mode")
        deploy_threads = 0
        verify_threads = 0

    deploy_node_fn = partial(
        RolloutManager.run_node,
        deploy_component,
        self._session,
    )

    verify_node_fn = partial(
        RolloutManager.run_node,
        verify_component,
        self._session,
        fail_halt_runner=False,
    )

    # Override disabled components with the disable function
    self.disabled_nodes = set()
    for component in session.graph:
        if component.disabled:
            component.set_data(
                partial(
                    RolloutManager.run_node,
                    disable_component,
                    self._session,
                    component,
                )
            )

            self.disabled_nodes.add(component)

    self._deploy_graph = Runner(
        "deploy",
        threads=deploy_threads,
        graph=session.graph,
        default_function=deploy_node_fn,
        verify_upstream=not config.dry_run,
    )
    self._verify_graph = Runner(
        "verify",
        threads=verify_threads,
        graph=session.graph,
        default_function=verify_node_fn,
        verify_upstream=not config.dry_run,
    )

rollout()

Perform the rollout

The logic has four phases
  1. Deploy Graph: This phase executes the Runner which runs the deploy() function for each Component in dependency order. For graph edges with configured verification functions, they are also executed during this phase.
  2. After Deploy: If configured with an after_deploy hook and (1) completed all nodes successfully, this function is called. Any raised exceptions indicate that the rollout should not proceed.
  3. Verify Graph: This phase executes the Runner which runs the verify() function for each Component in dependency order.
  4. After Verify: If configured with an after_verify hook and (3) completed all nodes successfully, this function is called. Any raised exceptions indicate that the rollout is not fully verified.
The rollout can terminate in one of three states
  1. incomplete AND failed: Something unexpected happened and the rollout terminated in a fatal state.
  2. incomplete AND NOT failed: One or more nodes did not pass their verify steps, but all deploy steps that were attempted succeeded.
  3. complete AND NOT failed: The rollout completed all nodes, including all verify steps. The managed components are ready to take traffic.

Returns:

Name Type Description
completion_state CompletionState

CompletionState The final completion state of all nodes in the rollout Runner. This is a logical composition of the outputs of the above phases based on the termination logic above.

Source code in oper8/rollout_manager.py
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
def rollout(  # pylint: disable=too-many-locals,too-many-statements
    self,
) -> CompletionState:
    """Perform the rollout

    The logic has four phases:
        1. Deploy Graph: This phase executes the Runner which runs the deploy()
            function for each Component in dependency order. For graph edges
            with configured verification functions, they are also executed
            during this phase.
        2. After Deploy: If configured with an after_deploy hook and (1)
            completed all nodes successfully, this function is called. Any
            raised exceptions indicate that the rollout should not proceed.
        3. Verify Graph: This phase executes the Runner which runs the verify()
            function for each Component in dependency order.
        4. After Verify: If configured with an after_verify hook and (3)
            completed all nodes successfully, this function is called. Any
            raised exceptions indicate that the rollout is not fully
            verified.

    The rollout can terminate in one of three states:
        1. incomplete AND failed: Something unexpected happened and the
            rollout terminated in a fatal state.
        2. incomplete AND NOT failed: One or more nodes did not pass their
            verify steps, but all deploy steps that were attempted
            succeeded.
        3. complete AND NOT failed: The rollout completed all nodes,
            including all verify steps. The managed components are ready to
            take traffic.

    Returns:
        completion_state:  CompletionState
            The final completion state of all nodes in the rollout Runner. This
            is a logical composition of the outputs of the above phases
            based on the termination logic above.
    """

    # NOTE: The Runner is guaranteed to not throw (unless there's a bug!)
    #   so we don't need to wrap run() in a try/except since the except
    #   clause will never catch "expected" errors

    ###########################
    ## Phase 1: Deploy Graph ##
    ###########################
    with alog.ContextTimer(log.trace, "Deploy Graph [%s]: ", self._session.id):
        self._deploy_graph.run()
    deploy_completion_state = self._deploy_graph.completion_state()

    # Log phase 1 results:
    #   * SUCCESS => All Components ran `render()` and `deploy()` without
    #       raising. This is fetched from the `verify_completed()` state
    #       since Components may raise precondition errors resulting in
    #       `deploy_completed()` returning True, indicating that all nodes
    #       ran and none reached a failed state, but not all nodes running
    #       to final completion
    #   * FAILED => Some nodes raised fatal errors
    #   * INCOMPLETE => No errors were raised, but some nodes did not fully
    #       complete without raising
    log.debug3("Deploy completion: %s", deploy_completion_state)
    phase1_complete = deploy_completion_state.verify_completed()
    phase1_failed = deploy_completion_state.failed()
    log.debug(
        "[Phase 1] Deploy result: %s",
        (
            "SUCCESS"
            if phase1_complete
            else ("FAILED" if phase1_failed else "INCOMPLETE")
        ),
    )

    ###########################
    ## Phase 2: After Deploy ##
    ###########################

    phase2_complete = phase1_complete
    phase2_exception = None

    # After unsuccessful deploy.
    if (not phase1_complete or phase1_failed) and self._after_deploy_unsuccessful:
        log.debug2("Running after-deploy-unsuccessful")
        try:
            is_after_deploy_unsuccessful_completed = (
                self._after_deploy_unsuccessful(self._session, phase1_failed)
            )
            if not is_after_deploy_unsuccessful_completed:
                phase2_exception = VerificationError(
                    "After-deploy-unsuccessful verification failed"
                )
        except Exception as err:  # pylint: disable=broad-except
            log.debug2(
                "Error caught during after-deploy-unsuccessful: %s",
                err,
                exc_info=True,
            )
            phase2_exception = err

    # After successful deploy.
    if phase1_complete and self._after_deploy:
        log.debug2("Running after-deploy")
        try:
            phase2_complete = self._after_deploy(self._session)
            if not phase2_complete:
                phase2_exception = VerificationError(
                    "After-deploy verification failed"
                )
        except Exception as err:  # pylint: disable=broad-except
            log.debug2("Error caught during after-deploy: %s", err, exc_info=True)
            phase2_complete = False
            phase2_exception = err

    # Log phase 2 results
    log.debug(
        "[Phase 2] After deploy result: %s",
        (
            "SUCCESS"
            if phase2_complete
            else ("FAILED" if phase2_exception else "NOT RUN")
        ),
    )

    ###########################
    ## Phase 3: Verify Graph ##
    ###########################

    # If phase 1 ran without erroring, then run the verify Runner
    phase3_complete = False
    phase3_failed = False
    if not phase1_failed:
        # Configured the verify Runner based off of deployed nodes
        # This way only components that have started will be verified
        deployed_nodes = (
            deploy_completion_state.verified_nodes.union(
                deploy_completion_state.unverified_nodes
            )
        ) - deploy_completion_state.failed_nodes
        log.debug3("Verify phase running with Nodes: %s", deployed_nodes)

        # Enable/Disable all nodes in verify_graph based on whether they
        # were deployed or not
        for comp in set(self._session.get_components()):
            if comp in deployed_nodes:
                self._verify_graph.enable_node(comp)
            else:
                self._verify_graph.disable_node(comp)

            # Disabled components should immediately verify
            if comp in self.disabled_nodes:
                comp.set_data(lambda *_: True)

        # Run the verify Runner
        with alog.ContextTimer(log.trace, "Verify Graph [%s]: ", self._session.id):
            self._verify_graph.run()
        verify_completion_state = self._verify_graph.completion_state()
        log.debug3("Verify completion: %s", verify_completion_state)
        # Only consider phase3 completed if phase1 and phase2 fully completed
        phase3_complete = (
            verify_completion_state.verify_completed()
            and phase1_complete
            and phase2_complete
        )
        phase3_failed = verify_completion_state.failed()
    else:
        verify_completion_state = CompletionState()

    # Log phase 3 results
    log.debug(
        "[Phase 3] Verify result: %s",
        (
            "SUCCESS"
            if phase3_complete
            else (
                "FAILED"
                if phase3_failed
                else ("INCOMPLETE" if phase2_complete else "NOT RUN")
            )
        ),
    )

    ###########################
    ## Phase 4: After Verify ##
    ###########################

    phase4_complete = phase3_complete
    phase4_exception = None

    # If deployment is completed, but verification is not, run _after_verify_unsuccessful.
    if (
        phase1_complete and phase2_complete and not phase3_complete
    ) and self._after_verify_unsuccessful:
        log.debug("Running after-verify-unsuccessful")
        try:
            is_after_verify_unsuccessful_completed = (
                self._after_verify_unsuccessful(self._session, phase3_failed)
            )
            if not is_after_verify_unsuccessful_completed:
                phase4_exception = VerificationError(
                    "After-verify-unsuccessful failed"
                )
        except Exception as err:  # pylint: disable=broad-except
            log.debug2("Error caught during after-verify: %s", err, exc_info=True)
            phase4_exception = err

    # If both deployment and verification is completed, run _after_verify.
    if phase3_complete and self._after_verify:
        log.debug("Running after-verify")
        try:
            phase4_complete = self._after_verify(self._session)
            if not phase4_complete:
                phase4_exception = VerificationError("After-verify failed")
        except Exception as err:  # pylint: disable=broad-except
            log.debug2("Error caught during after-verify: %s", err, exc_info=True)
            phase4_complete = False
            phase4_exception = err

    # Log phase 4 results
    log.debug(
        "[Phase 4] After deploy result: %s",
        (
            "SUCCESS"
            if phase4_complete
            else ("FAILED" if phase4_exception else "NOT RUN")
        ),
    )

    # Create a final completion state with the "deployed nodes" pulled
    # from the deploy results and the "verified nodes" pulled from the
    # verify results.
    #
    # Verified Nodes: Nodes that made it all the way through the verify
    #   graph
    # Unverified Nodes: Nodes that are "verified" in the deploy graph, but
    #   are unverified in the verify graph or were not run in the verify
    #   graph and did not fail in the verify graph
    # Failed Nodes: Nodes that failed in either graph
    # Unstarted Nodes: Nodes that were unstarted in the deploy graph
    # Exception: Any exception from any of the phases above
    verified_nodes = verify_completion_state.verified_nodes
    failed_nodes = verify_completion_state.failed_nodes.union(
        deploy_completion_state.failed_nodes
    )
    unverified_nodes = (
        (
            deploy_completion_state.verified_nodes.union(
                deploy_completion_state.unverified_nodes
            ).union(verify_completion_state.unverified_nodes)
        )
        - verified_nodes
        - failed_nodes
    )
    unstarted_nodes = deploy_completion_state.unstarted_nodes
    exception = (
        deploy_completion_state.exception
        or phase2_exception
        or verify_completion_state.exception
        or phase4_exception
    )
    completion_state = CompletionState(
        verified_nodes=verified_nodes,
        unverified_nodes=unverified_nodes,
        failed_nodes=failed_nodes,
        unstarted_nodes=unstarted_nodes,
        exception=exception,
    )
    log.debug2("Final rollout state: %s", completion_state)
    return completion_state

run_node(func, session, component, fail_halt_runner=True) classmethod

Generic function to execute a node during Rollout

Parameters:

Name Type Description Default
func Callable[[Component, Session], bool]

Callable[[Component,Session], bool] The function to be called

required
session Session

Session The session that's currently being rolled out

required
component Component

Component The component being rolled out

required
Source code in oper8/rollout_manager.py
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
@classmethod
def run_node(
    cls,
    func: Callable[[Component, Session], bool],
    session: Session,
    component: Component,
    fail_halt_runner=True,
):
    """
    Generic function to execute a node during Rollout

    Args:
        func: Callable[[Component,Session], bool]
            The function to be called
        session: Session
            The session that's currently being rolled out
        component: Component
            The component being rolled out
    """
    success = False
    exception = None
    rollout_failed = False

    try:
        success = func(session, component)
        if fail_halt_runner and not success:
            rollout_failed = True

    # If a failure occurred by throwing, treat that the same as an
    # explicit failure.
    except Oper8Error as err:
        log.debug("Caught Oper8Error during rollout of [%s]", component)
        success = False
        rollout_failed = err.is_fatal_error
        exception = err
    except Exception as err:  # pylint: disable=broad-except
        log.warning(
            "Caught exception during rollout of [%s]",
            component,
            exc_info=True,
        )
        success = False
        rollout_failed = True
        exception = err

    # If the rollout failed for any reason, raise an exception. This
    # will halt the graph execution.
    if not success:
        log.debug("[deploy] Halting rollout")
        raise DagHaltError(rollout_failed, exception=exception)

    log.debug3("Done with executing node: %s", component)

deploy_component(session, component)

Deploy a component given a particular session

Parameters:

Name Type Description Default
session Session

Session The current rollout session

required
component Component

Component The component to deploy

required

Returns:

Name Type Description
result bool

bool The result of the deploy

Source code in oper8/rollout_manager.py
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
def deploy_component(session: Session, component: Component) -> bool:
    """Deploy a component given a particular session

    Args:
        session: Session
            The current rollout session
        component: Component
            The component to deploy

    Returns:
        result: bool
            The result of the deploy
    """
    # Do the render
    with alog.ContextTimer(log.debug2, "Render duration for %s", component):
        component.render_chart(session)
        log.debug3(
            "Rendered objects for [%s]: %s",
            component,
            [str(obj) for obj in component.managed_objects],
        )

    # Do the deploy
    with alog.ContextTimer(log.debug2, "Deploy duration for %s: ", component):
        return component.deploy(session)

disable_component(session, component)

Disable a component given a particular session

Parameters:

Name Type Description Default
session Session

Session The current rollout session

required
component Component

Component The component to disable

required

Returns:

Name Type Description
result bool

bool The result of the disable

Source code in oper8/rollout_manager.py
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
def disable_component(session: Session, component: Component) -> bool:
    """Disable a component given a particular session

    Args:
        session: Session
            The current rollout session
        component: Component
            The component to disable

    Returns:
        result: bool
            The result of the disable
    """
    # Do the render
    with alog.ContextTimer(log.debug2, "Render duration for %s", component):
        component.render_chart(session)
        log.debug3(
            "Rendered objects for [%s]: %s",
            component,
            [str(obj) for obj in component.managed_objects],
        )

    # Do the deploy
    with alog.ContextTimer(log.debug2, "Disable duration for %s: ", component):
        return component.disable(session)

verify_component(session, component)

Verify a component given a particular session

Parameters:

Name Type Description Default
session Session

Session The current rollout session

required
component Component

Component The component to verify

required

Returns:

Name Type Description
result bool

bool The result of the verify

Source code in oper8/rollout_manager.py
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
def verify_component(session: Session, component: Component) -> bool:
    """Verify a component given a particular session

    Args:
        session: Session
            The current rollout session
        component: Component
            The component to verify

    Returns:
        result: bool
            The result of the verify
    """
    # Do the verify
    with alog.ContextTimer(log.debug2, "Verify duration for %s: ", component):
        return component.verify(session)

session

This module holds the core session state for an individual reconciliation

Session

A session is the core context manager for the state of an in-progress reconciliation

Source code in oper8/session.py
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
class Session:  # pylint: disable=too-many-instance-attributes,too-many-public-methods
    """A session is the core context manager for the state of an in-progress
    reconciliation
    """

    # We strictly define the set of attributes that a Session can have to
    # disallow arbitrary assignment
    __slots__ = [
        "__components",
        "__component_dependencies",
        "__enabled_components",
        "__disabled_components",
        "__id",
        "__cr_manifest",
        "__config",
        "__temporary_patches",
        "__deploy_manager",
        "__status",
        "__current_version",
        "__graph",
        # _app is retained for backwards compatibility
        "_app",
    ]

    def __init__(  # pylint: disable=too-many-arguments
        self,
        reconciliation_id: str,
        cr_manifest: aconfig.Config,
        config: aconfig.Config,
        deploy_manager: DeployManagerBase,
        temporary_patches: Optional[List[dict]] = None,
    ):
        """Construct a session object to hold the state for a reconciliation

        Args:
            reconciliation_id:  str
                The unique ID for this reconciliation
            cr_manifest:  aconfig.Config
                The full value of the CR manifest that triggered this
                reconciliation
            config:  aconfig.Config
                The compiled backend config for this reconciliation
            deploy_manager:  DeployManagerBase
                The preconfigured DeployManager in charge of running the actual
                deploy operations for this deployment
            temporary_patches:  list(dict)
                List of temporary patch object to apply to resources managed by
                this rollout
        """

        ##################################################################
        # Private Members: These members will be hidden from client code #
        ##################################################################

        # Mapping from component name to Component instance
        self.__graph = Graph()

        ###################################################
        # Properties: These properties will be exposed as #
        # @property members to be used by client code     #
        ###################################################

        self.__id = reconciliation_id
        if not isinstance(cr_manifest, aconfig.Config):
            cr_manifest = aconfig.Config(cr_manifest, override_env_vars=False)
        self._validate_cr(cr_manifest)
        self.__cr_manifest = cr_manifest
        if not isinstance(config, aconfig.Config):
            config = aconfig.Config(config, override_env_vars=False)
        self.__config = config
        self.__temporary_patches = temporary_patches or []

        # The deploy manager that will be used to manage interactions with the
        # cluster
        self.__deploy_manager = deploy_manager

        # Get the current status and version so that it can be referenced by the
        # Application and Components that use it
        self.__status = self.get_status()
        self.__current_version = get_version(self.status)

    ## Properties ##############################################################

    @property
    def id(self) -> str:  # pylint: disable=invalid-name
        """The unique reconciliation ID"""
        return self.__id

    @property
    def cr_manifest(self) -> aconfig.Config:
        """The full CR manifest that triggered this reconciliation"""
        return self.__cr_manifest

    @property
    def spec(self) -> aconfig.Config:
        """The spec section of the CR manifest"""
        return self.cr_manifest.get("spec", aconfig.Config({}))

    @property
    def version(self) -> str:
        """The spec.version for this CR"""
        return get_manifest_version(self.cr_manifest)

    @property
    def metadata(self) -> aconfig.Config:
        """The metadata for this CR"""
        return self.cr_manifest.metadata

    @property
    def kind(self) -> str:
        """The kind of the operand for this CR"""
        return self.cr_manifest.kind

    @property
    def api_version(self) -> str:
        """The api version of the operand for this CR"""
        return self.cr_manifest.apiVersion

    @property
    def name(self) -> str:
        """The metadata.name for this CR"""
        return self.metadata.name

    @property
    def namespace(self) -> str:
        """The metadata.namespace for this CR"""
        return self.metadata.namespace

    @property
    def finalizers(self) -> str:
        """The metadata.namespace for this CR"""

        # Manually create finalizer list if it doesn't exist so its
        # editable
        if "finalizers" not in self.metadata:
            self.metadata["finalizers"] = []

        return self.metadata.get("finalizers")

    @property
    def config(self) -> aconfig.Config:
        """The backend config for this reconciliation"""
        return self.__config

    @property
    def temporary_patches(self) -> List[aconfig.Config]:
        """Ordered list of temporary patches that apply to the operand being
        reconciled
        """
        return self.__temporary_patches

    @property
    def status(self) -> aconfig.Config:
        """The operand status"""
        return self.__status

    @property
    def current_version(self) -> aconfig.Config:
        """The most recently reconciled version of the operand"""
        return self.__current_version

    @property
    def deploy_manager(self) -> DeployManagerBase:
        """Allow read access to the deploy manager"""
        return self.__deploy_manager

    @property
    def graph(self) -> str:  # pylint: disable=invalid-name
        """The component graph"""
        return self.__graph

    ## State Management ########################################################
    #
    # These functions are used by derived controllers in their setup_components
    # implementations
    ##

    @alog.logged_function(log.debug2)
    def add_component(self, component: COMPONENT_INSTANCE_TYPE):
        """Add a component to this deploy associated with a specific application

        Args:
            component:  Component
                The component to add to this deploy
            disabled:  bool
                Whether or not the component is disabled in this deploy
        """
        self.graph.add_node(component)

    def add_component_dependency(
        self,
        component: Union[str, COMPONENT_INSTANCE_TYPE],
        upstream_component: Union[str, COMPONENT_INSTANCE_TYPE],
        verify_function: Optional[COMPONENT_VERIFY_FUNCTION] = None,
    ):
        """Add a dependency indicating that one component requires an upstream
        component to be deployed before it can be deployed.

        Args:
            component:  str or Component
                The component or name of component in the deploy that must wait for the upstream
            upstream_component:  str or Component
                The upstream component or name of upstream that must be deployed before component
            verify_function:  callable
                A callable function of the form `def verify(session) -> bool:`
                to use to verify that the dependency has been satisfied. This
                will be used to block deployment of the component beyond
                requiring that the upstream has been deployed successfully.
        """
        # Get component obj if name was provided
        component_node = component
        if isinstance(component, str):
            component_node = self.get_component(component)

        upstream_component_node = upstream_component
        if isinstance(upstream_component, str):
            upstream_component_node = self.get_component(upstream_component)

        if not component_node or not upstream_component_node:
            raise ValueError(
                f"Cannot add dependency [{component} -> {upstream_component}]",
                " for unknown component(s)",
            )

        if component_node.disabled or upstream_component_node.disabled:
            raise ValueError(
                f"Cannot add dependency [{component} -> {upstream_component}]",
                " for with disabled component(s)",
            )

        # Add session parameter to verify function if one was provided
        if verify_function:
            verify_function = partial(verify_function, self)
        self.graph.add_node_dependency(
            component_node, upstream_component_node, verify_function
        )

    ## Utilities ###############################################################
    #
    # These utilities may be used anywhere in client code to perform common
    # operations based on the state of the session.
    ##
    def get_component(
        self, name: str, disabled: Optional[bool] = None
    ) -> Optional[COMPONENT_INSTANCE_TYPE]:
        """Get an individual component by name

        Args:
            name: str
                Name of component to return
            disabled: Optional[bool]
                Option on wether to return disabled components. If this option is not supplied then
                the referenced component will be returned irregardless whether its disabled
                or enabled

        Returns:
            component: Optional[Component]
                The component with the given name or None if component does not exit or does
                not match disabled arg
        """
        comp = self.graph.get_node(name)

        # Only filter disabled/enabled components if the option was passed in.
        if isinstance(disabled, bool):
            if disabled:
                return comp if comp.disabled else None
            return comp if not comp.disabled else None

        return comp

    def get_components(self, disabled: bool = False) -> List[COMPONENT_INSTANCE_TYPE]:
        """Get all components associated with an application

        Args:
            disabled:  bool
                Whether to return disabled or enabled components

        Returns:
            components:  list(Component)
                The list of Component objects associated with the given
                application
        """
        assert isinstance(
            disabled, bool
        ), "Disabled flag must be a bool. You may be using the old function signature!"

        # Get list of all components.
        comp_list = self.graph.get_all_nodes()

        # Filter out disabled/enabled components using get_component
        filtered_list = [
            comp for comp in comp_list if self.get_component(comp.get_name(), disabled)
        ]

        return filtered_list

    def get_component_dependencies(
        self,
        component: Union[str, COMPONENT_INSTANCE_TYPE],
    ) -> List[Tuple[COMPONENT_INSTANCE_TYPE, Optional[COMPONENT_VERIFY_FUNCTION]]]:
        """Get the list of (upstream_name, verify_function) tuples for a given
        component.

        NOTE: This is primarily for use inside of the RolloutManager. Do not use
            this method in user code unless you know what you're doing!

        Args:
            component_name:  str
                The name of the component to lookup dependencies for

        Returns:
            upstreams:  List[Tuple[str, Optional[VERIFY_FUNCTION]]]
                The list of upstream (name, verify_fn) pairs
        """
        component_node = component
        if isinstance(component, str):
            component_node = self.get_component(component)

        return component_node.get_children()

    def get_scoped_name(self, name: str) -> str:
        """Get a name that is scoped to the application instance

        Args:
            name:  str
                The name of a resource that will be managed by this operator
                which should have instance name scoping applied

        Returns:
            scoped_name:  str
                The scoped and truncated version of the input name
        """
        scoped_name = self.get_truncated_name(f"{self.name}-{name}")
        log.debug3("Scoped name [%s] -> [%s]", name, scoped_name)
        return scoped_name

    @staticmethod
    def get_truncated_name(name: str) -> str:
        """Perform truncation on a cluster name to make it conform to kubernetes
        limits while remaining unique.

        Args:
            name:  str
                The name of the resource that should be truncated and made
                unique

        Returns:
            truncated_name:  str
                A version of name that has been truncated and made unique
        """
        if len(name) > MAX_NAME_LEN:
            sha = hashlib.sha256()
            sha.update(name.encode("utf-8"))
            trunc_name = name[: MAX_NAME_LEN - 4] + sha.hexdigest()[:4]
            log.debug2("Truncated name [%s] -> [%s]", name, trunc_name)
            name = trunc_name
        return name

    def get_object_current_state(
        self,
        kind: str,
        name: str,
        api_version: Optional[str] = None,
        namespace: Optional[str] = _SESSION_NAMESPACE,
    ) -> Tuple[bool, Optional[dict]]:
        """Get the current state of the given object in the namespace of this
        session

        Args:
            kind:  str
                The kind of the object to fetch
            name:  str
                The full name of the object to fetch
            api_version:  str
                The api_version of the resource kind to fetch

        Returns:
            success:  bool
                Whether or not the state fetch operation succeeded
            current_state:  dict or None
                The dict representation of the current object's configuration,
                or None if not present
        """
        namespace = namespace if namespace != _SESSION_NAMESPACE else self.namespace
        return self.deploy_manager.get_object_current_state(
            kind=kind,
            name=name,
            namespace=namespace,
            api_version=api_version,
        )

    def filter_objects_current_state(  # pylint: disable=too-many-arguments
        self,
        kind: str,
        api_version: Optional[str] = None,
        label_selector: Optional[str] = None,
        field_selector: Optional[str] = None,
        namespace: Optional[str] = _SESSION_NAMESPACE,
    ) -> Tuple[bool, List[dict]]:
        """Get the current state of the given object in the namespace of this
        session

        Args:
            kind:  str
                The kind of the object to fetch
            label_selector:  str
                The label selector to filter the results by
            field_selector:  str
                The field selector to filter the results by
            api_version:  str
                The api_version of the resource kind to fetch

        Returns:
            success:  bool
                Whether or not the state fetch operation succeeded
            current_state:  List[Dict]
                The list of resources in dict representation,
                or [] if none match
        """
        namespace = namespace if namespace != _SESSION_NAMESPACE else self.namespace
        return self.deploy_manager.filter_objects_current_state(
            kind=kind,
            namespace=namespace,
            api_version=api_version,
            label_selector=label_selector,
            field_selector=field_selector,
        )

    @alog.logged_function(log.debug2)
    @alog.timed_function(log.debug2)
    def get_status(self) -> dict:
        """Get the status of the resource being managed by this session or an
        empty dict if not available

        Returns:
            current_status:  dict
                The dict representation of the status subresource for the CR
                being managed by this session
        """

        # Pull the kind, name, and namespace
        kind = self.cr_manifest.get("kind")
        name = self.name
        api_version = self.api_version
        log.debug3("Getting status for %s.%s/%s", api_version, kind, name)

        # Fetch the current status
        success, content = self.get_object_current_state(
            kind=kind,
            name=name,
            api_version=api_version,
        )
        assert_cluster(
            success, f"Failed to fetch status for [{api_version}/{kind}/{name}]"
        )
        if content:
            return content.get("status", {})
        return {}

    ## Implementation Details ##################################################

    @staticmethod
    def _validate_cr(cr_manifest: aconfig.Config):
        """Ensure that all expected elements of the CR are present. Expected
        elements are those that are guaranteed to be present by the kube API.
        """
        assert "kind" in cr_manifest, "CR missing required section ['kind']"
        assert "apiVersion" in cr_manifest, "CR missing required section ['apiVersion']"
        assert "metadata" in cr_manifest, "CR missing required section ['metadata']"
        assert (
            "name" in cr_manifest.metadata
        ), "CR missing required section ['metadata.name']"
        assert (
            "namespace" in cr_manifest.metadata
        ), "CR missing required section ['metadata.namespace']"

api_version property

The api version of the operand for this CR

config property

The backend config for this reconciliation

cr_manifest property

The full CR manifest that triggered this reconciliation

current_version property

The most recently reconciled version of the operand

deploy_manager property

Allow read access to the deploy manager

finalizers property

The metadata.namespace for this CR

graph property

The component graph

id property

The unique reconciliation ID

kind property

The kind of the operand for this CR

metadata property

The metadata for this CR

name property

The metadata.name for this CR

namespace property

The metadata.namespace for this CR

spec property

The spec section of the CR manifest

status property

The operand status

temporary_patches property

Ordered list of temporary patches that apply to the operand being reconciled

version property

The spec.version for this CR

__init__(reconciliation_id, cr_manifest, config, deploy_manager, temporary_patches=None)

Construct a session object to hold the state for a reconciliation

Parameters:

Name Type Description Default
reconciliation_id str

str The unique ID for this reconciliation

required
cr_manifest Config

aconfig.Config The full value of the CR manifest that triggered this reconciliation

required
config Config

aconfig.Config The compiled backend config for this reconciliation

required
deploy_manager DeployManagerBase

DeployManagerBase The preconfigured DeployManager in charge of running the actual deploy operations for this deployment

required
temporary_patches Optional[List[dict]]

list(dict) List of temporary patch object to apply to resources managed by this rollout

None
Source code in oper8/session.py
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
def __init__(  # pylint: disable=too-many-arguments
    self,
    reconciliation_id: str,
    cr_manifest: aconfig.Config,
    config: aconfig.Config,
    deploy_manager: DeployManagerBase,
    temporary_patches: Optional[List[dict]] = None,
):
    """Construct a session object to hold the state for a reconciliation

    Args:
        reconciliation_id:  str
            The unique ID for this reconciliation
        cr_manifest:  aconfig.Config
            The full value of the CR manifest that triggered this
            reconciliation
        config:  aconfig.Config
            The compiled backend config for this reconciliation
        deploy_manager:  DeployManagerBase
            The preconfigured DeployManager in charge of running the actual
            deploy operations for this deployment
        temporary_patches:  list(dict)
            List of temporary patch object to apply to resources managed by
            this rollout
    """

    ##################################################################
    # Private Members: These members will be hidden from client code #
    ##################################################################

    # Mapping from component name to Component instance
    self.__graph = Graph()

    ###################################################
    # Properties: These properties will be exposed as #
    # @property members to be used by client code     #
    ###################################################

    self.__id = reconciliation_id
    if not isinstance(cr_manifest, aconfig.Config):
        cr_manifest = aconfig.Config(cr_manifest, override_env_vars=False)
    self._validate_cr(cr_manifest)
    self.__cr_manifest = cr_manifest
    if not isinstance(config, aconfig.Config):
        config = aconfig.Config(config, override_env_vars=False)
    self.__config = config
    self.__temporary_patches = temporary_patches or []

    # The deploy manager that will be used to manage interactions with the
    # cluster
    self.__deploy_manager = deploy_manager

    # Get the current status and version so that it can be referenced by the
    # Application and Components that use it
    self.__status = self.get_status()
    self.__current_version = get_version(self.status)

add_component(component)

Add a component to this deploy associated with a specific application

Parameters:

Name Type Description Default
component COMPONENT_INSTANCE_TYPE

Component The component to add to this deploy

required
disabled

bool Whether or not the component is disabled in this deploy

required
Source code in oper8/session.py
219
220
221
222
223
224
225
226
227
228
229
@alog.logged_function(log.debug2)
def add_component(self, component: COMPONENT_INSTANCE_TYPE):
    """Add a component to this deploy associated with a specific application

    Args:
        component:  Component
            The component to add to this deploy
        disabled:  bool
            Whether or not the component is disabled in this deploy
    """
    self.graph.add_node(component)

add_component_dependency(component, upstream_component, verify_function=None)

Add a dependency indicating that one component requires an upstream component to be deployed before it can be deployed.

Parameters:

Name Type Description Default
component Union[str, COMPONENT_INSTANCE_TYPE]

str or Component The component or name of component in the deploy that must wait for the upstream

required
upstream_component Union[str, COMPONENT_INSTANCE_TYPE]

str or Component The upstream component or name of upstream that must be deployed before component

required
verify_function Optional[COMPONENT_VERIFY_FUNCTION]

callable A callable function of the form def verify(session) -> bool: to use to verify that the dependency has been satisfied. This will be used to block deployment of the component beyond requiring that the upstream has been deployed successfully.

None
Source code in oper8/session.py
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
def add_component_dependency(
    self,
    component: Union[str, COMPONENT_INSTANCE_TYPE],
    upstream_component: Union[str, COMPONENT_INSTANCE_TYPE],
    verify_function: Optional[COMPONENT_VERIFY_FUNCTION] = None,
):
    """Add a dependency indicating that one component requires an upstream
    component to be deployed before it can be deployed.

    Args:
        component:  str or Component
            The component or name of component in the deploy that must wait for the upstream
        upstream_component:  str or Component
            The upstream component or name of upstream that must be deployed before component
        verify_function:  callable
            A callable function of the form `def verify(session) -> bool:`
            to use to verify that the dependency has been satisfied. This
            will be used to block deployment of the component beyond
            requiring that the upstream has been deployed successfully.
    """
    # Get component obj if name was provided
    component_node = component
    if isinstance(component, str):
        component_node = self.get_component(component)

    upstream_component_node = upstream_component
    if isinstance(upstream_component, str):
        upstream_component_node = self.get_component(upstream_component)

    if not component_node or not upstream_component_node:
        raise ValueError(
            f"Cannot add dependency [{component} -> {upstream_component}]",
            " for unknown component(s)",
        )

    if component_node.disabled or upstream_component_node.disabled:
        raise ValueError(
            f"Cannot add dependency [{component} -> {upstream_component}]",
            " for with disabled component(s)",
        )

    # Add session parameter to verify function if one was provided
    if verify_function:
        verify_function = partial(verify_function, self)
    self.graph.add_node_dependency(
        component_node, upstream_component_node, verify_function
    )

filter_objects_current_state(kind, api_version=None, label_selector=None, field_selector=None, namespace=_SESSION_NAMESPACE)

Get the current state of the given object in the namespace of this session

Parameters:

Name Type Description Default
kind str

str The kind of the object to fetch

required
label_selector Optional[str]

str The label selector to filter the results by

None
field_selector Optional[str]

str The field selector to filter the results by

None
api_version Optional[str]

str The api_version of the resource kind to fetch

None

Returns:

Name Type Description
success bool

bool Whether or not the state fetch operation succeeded

current_state List[dict]

List[Dict] The list of resources in dict representation, or [] if none match

Source code in oper8/session.py
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
def filter_objects_current_state(  # pylint: disable=too-many-arguments
    self,
    kind: str,
    api_version: Optional[str] = None,
    label_selector: Optional[str] = None,
    field_selector: Optional[str] = None,
    namespace: Optional[str] = _SESSION_NAMESPACE,
) -> Tuple[bool, List[dict]]:
    """Get the current state of the given object in the namespace of this
    session

    Args:
        kind:  str
            The kind of the object to fetch
        label_selector:  str
            The label selector to filter the results by
        field_selector:  str
            The field selector to filter the results by
        api_version:  str
            The api_version of the resource kind to fetch

    Returns:
        success:  bool
            Whether or not the state fetch operation succeeded
        current_state:  List[Dict]
            The list of resources in dict representation,
            or [] if none match
    """
    namespace = namespace if namespace != _SESSION_NAMESPACE else self.namespace
    return self.deploy_manager.filter_objects_current_state(
        kind=kind,
        namespace=namespace,
        api_version=api_version,
        label_selector=label_selector,
        field_selector=field_selector,
    )

get_component(name, disabled=None)

Get an individual component by name

Parameters:

Name Type Description Default
name str

str Name of component to return

required
disabled Optional[bool]

Optional[bool] Option on wether to return disabled components. If this option is not supplied then the referenced component will be returned irregardless whether its disabled or enabled

None

Returns:

Name Type Description
component Optional[COMPONENT_INSTANCE_TYPE]

Optional[Component] The component with the given name or None if component does not exit or does not match disabled arg

Source code in oper8/session.py
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
def get_component(
    self, name: str, disabled: Optional[bool] = None
) -> Optional[COMPONENT_INSTANCE_TYPE]:
    """Get an individual component by name

    Args:
        name: str
            Name of component to return
        disabled: Optional[bool]
            Option on wether to return disabled components. If this option is not supplied then
            the referenced component will be returned irregardless whether its disabled
            or enabled

    Returns:
        component: Optional[Component]
            The component with the given name or None if component does not exit or does
            not match disabled arg
    """
    comp = self.graph.get_node(name)

    # Only filter disabled/enabled components if the option was passed in.
    if isinstance(disabled, bool):
        if disabled:
            return comp if comp.disabled else None
        return comp if not comp.disabled else None

    return comp

get_component_dependencies(component)

Get the list of (upstream_name, verify_function) tuples for a given component.

This is primarily for use inside of the RolloutManager. Do not use

this method in user code unless you know what you're doing!

Parameters:

Name Type Description Default
component_name

str The name of the component to lookup dependencies for

required

Returns:

Name Type Description
upstreams List[Tuple[COMPONENT_INSTANCE_TYPE, Optional[COMPONENT_VERIFY_FUNCTION]]]

List[Tuple[str, Optional[VERIFY_FUNCTION]]] The list of upstream (name, verify_fn) pairs

Source code in oper8/session.py
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
def get_component_dependencies(
    self,
    component: Union[str, COMPONENT_INSTANCE_TYPE],
) -> List[Tuple[COMPONENT_INSTANCE_TYPE, Optional[COMPONENT_VERIFY_FUNCTION]]]:
    """Get the list of (upstream_name, verify_function) tuples for a given
    component.

    NOTE: This is primarily for use inside of the RolloutManager. Do not use
        this method in user code unless you know what you're doing!

    Args:
        component_name:  str
            The name of the component to lookup dependencies for

    Returns:
        upstreams:  List[Tuple[str, Optional[VERIFY_FUNCTION]]]
            The list of upstream (name, verify_fn) pairs
    """
    component_node = component
    if isinstance(component, str):
        component_node = self.get_component(component)

    return component_node.get_children()

get_components(disabled=False)

Get all components associated with an application

Parameters:

Name Type Description Default
disabled bool

bool Whether to return disabled or enabled components

False

Returns:

Name Type Description
components List[COMPONENT_INSTANCE_TYPE]

list(Component) The list of Component objects associated with the given application

Source code in oper8/session.py
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
def get_components(self, disabled: bool = False) -> List[COMPONENT_INSTANCE_TYPE]:
    """Get all components associated with an application

    Args:
        disabled:  bool
            Whether to return disabled or enabled components

    Returns:
        components:  list(Component)
            The list of Component objects associated with the given
            application
    """
    assert isinstance(
        disabled, bool
    ), "Disabled flag must be a bool. You may be using the old function signature!"

    # Get list of all components.
    comp_list = self.graph.get_all_nodes()

    # Filter out disabled/enabled components using get_component
    filtered_list = [
        comp for comp in comp_list if self.get_component(comp.get_name(), disabled)
    ]

    return filtered_list

get_object_current_state(kind, name, api_version=None, namespace=_SESSION_NAMESPACE)

Get the current state of the given object in the namespace of this session

Parameters:

Name Type Description Default
kind str

str The kind of the object to fetch

required
name str

str The full name of the object to fetch

required
api_version Optional[str]

str The api_version of the resource kind to fetch

None

Returns:

Name Type Description
success bool

bool Whether or not the state fetch operation succeeded

current_state Optional[dict]

dict or None The dict representation of the current object's configuration, or None if not present

Source code in oper8/session.py
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
def get_object_current_state(
    self,
    kind: str,
    name: str,
    api_version: Optional[str] = None,
    namespace: Optional[str] = _SESSION_NAMESPACE,
) -> Tuple[bool, Optional[dict]]:
    """Get the current state of the given object in the namespace of this
    session

    Args:
        kind:  str
            The kind of the object to fetch
        name:  str
            The full name of the object to fetch
        api_version:  str
            The api_version of the resource kind to fetch

    Returns:
        success:  bool
            Whether or not the state fetch operation succeeded
        current_state:  dict or None
            The dict representation of the current object's configuration,
            or None if not present
    """
    namespace = namespace if namespace != _SESSION_NAMESPACE else self.namespace
    return self.deploy_manager.get_object_current_state(
        kind=kind,
        name=name,
        namespace=namespace,
        api_version=api_version,
    )

get_scoped_name(name)

Get a name that is scoped to the application instance

Parameters:

Name Type Description Default
name str

str The name of a resource that will be managed by this operator which should have instance name scoping applied

required

Returns:

Name Type Description
scoped_name str

str The scoped and truncated version of the input name

Source code in oper8/session.py
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
def get_scoped_name(self, name: str) -> str:
    """Get a name that is scoped to the application instance

    Args:
        name:  str
            The name of a resource that will be managed by this operator
            which should have instance name scoping applied

    Returns:
        scoped_name:  str
            The scoped and truncated version of the input name
    """
    scoped_name = self.get_truncated_name(f"{self.name}-{name}")
    log.debug3("Scoped name [%s] -> [%s]", name, scoped_name)
    return scoped_name

get_status()

Get the status of the resource being managed by this session or an empty dict if not available

Returns:

Name Type Description
current_status dict

dict The dict representation of the status subresource for the CR being managed by this session

Source code in oper8/session.py
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
@alog.logged_function(log.debug2)
@alog.timed_function(log.debug2)
def get_status(self) -> dict:
    """Get the status of the resource being managed by this session or an
    empty dict if not available

    Returns:
        current_status:  dict
            The dict representation of the status subresource for the CR
            being managed by this session
    """

    # Pull the kind, name, and namespace
    kind = self.cr_manifest.get("kind")
    name = self.name
    api_version = self.api_version
    log.debug3("Getting status for %s.%s/%s", api_version, kind, name)

    # Fetch the current status
    success, content = self.get_object_current_state(
        kind=kind,
        name=name,
        api_version=api_version,
    )
    assert_cluster(
        success, f"Failed to fetch status for [{api_version}/{kind}/{name}]"
    )
    if content:
        return content.get("status", {})
    return {}

get_truncated_name(name) staticmethod

Perform truncation on a cluster name to make it conform to kubernetes limits while remaining unique.

Parameters:

Name Type Description Default
name str

str The name of the resource that should be truncated and made unique

required

Returns:

Name Type Description
truncated_name str

str A version of name that has been truncated and made unique

Source code in oper8/session.py
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
@staticmethod
def get_truncated_name(name: str) -> str:
    """Perform truncation on a cluster name to make it conform to kubernetes
    limits while remaining unique.

    Args:
        name:  str
            The name of the resource that should be truncated and made
            unique

    Returns:
        truncated_name:  str
            A version of name that has been truncated and made unique
    """
    if len(name) > MAX_NAME_LEN:
        sha = hashlib.sha256()
        sha.update(name.encode("utf-8"))
        trunc_name = name[: MAX_NAME_LEN - 4] + sha.hexdigest()[:4]
        log.debug2("Truncated name [%s] -> [%s]", name, trunc_name)
        name = trunc_name
    return name

setup_vcs

This module uses VCS to create a trimmed down repo with a selection of local branches and tags and a fully flattened history.

VCSRepoInitializer

This class encapsulates the key attributes of the VCS repo initialization process

Source code in oper8/setup_vcs.py
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
class VCSRepoInitializer:
    """This class encapsulates the key attributes of the VCS repo initialization
    process
    """

    # The git repo that is being compressed for VCS versioning
    source_repo: VCS
    # The git repo where the VCS versioning repo is going to be created
    dest_repo: VCS
    # The remote within the destination repo that refers to the source repo
    source_remote: str
    # The reference to the root empty commit in the destination repo
    root_ref: str

    # Default branch name used when creating the repo
    DEFAULT_BRANCH_NAME = "__root__"

    # The name of the source remote
    SOURCE_REMOTE = "__source__"

    def __init__(self, source: str, destination: str, force: bool):
        """Initialize and set up the repos and common attributes"""

        # Make sure the source is a git repo
        try:
            self.source_repo = VCS(source)
        except VCSConfigError as err:
            msg = f"Invalid source git repo: {source}"
            log.error(msg)
            raise ValueError(msg) from err
        log.debug("Source Repo: %s", source)

        # Set up the dest and make sure it's empty
        if os.path.isfile(destination):
            msg = f"Invalid destination: {destination} is a file"
            log.error(msg)
            raise ValueError(msg)
        os.makedirs(destination, exist_ok=True)
        contents = os.listdir(destination)
        if contents:
            if not force:
                msg = f"Invalid destination: {destination} is not empty"
                log.error(msg)
                raise ValueError(msg)
            log.debug("Force cleaning dest %s", destination)
            for entry in contents:
                full_path = os.path.join(destination, entry)
                if os.path.isdir(full_path):
                    log.debug3("Removing dir: %s", full_path)
                    shutil.rmtree(full_path)
                else:
                    log.debug3("Removing file: %s", full_path)
                    os.remove(full_path)

        # Initialize the dest as an empty repo
        log.info("Initializing dest repo: %s", destination)
        self.dest_repo = VCS(
            destination, create_if_needed=True, initial_head=self.DEFAULT_BRANCH_NAME
        )
        self.dest_repo.create_commit("root")
        self.dest_repo.add_remote(self.SOURCE_REMOTE, source)
        self.root_ref = self.dest_repo.head

    def initialize_branches(
        self,
        branch_expr: Optional[List[str]],
        tag_expr: Optional[List[str]],
    ):
        """Perform the initialize of all branches in the destination repo from
        the branches and tags that match the given expressions.
        """
        # Get all tags and branches
        tags = self._list_tags(self.source_repo)
        branches = self._list_branches(self.source_repo)
        log.debug2("All Tags: %s", tags)
        log.debug2("All Branches: %s", branches)

        # Filter the tags and branches by the filter arguments
        keep_tags = self._filter_refs(tags, tag_expr)
        keep_branches = self._filter_refs(branches, branch_expr)
        log.debug2("Keep Tags: %s", keep_tags)
        log.debug2("Keep Branches: %s", keep_branches)

        # For each retained reference, fetch the ref from the source, check out the
        # files to the dest, and make a fresh commit
        for keep_ref in keep_tags:
            log.debug("Making destination branch [%s] from tag", keep_ref)
            self._make_dest_branch(keep_ref, False)
        for keep_ref in keep_branches:
            log.debug("Making destination branch [%s] from branch", keep_ref)
            self._make_dest_branch(keep_ref, True)

    def clean_up(self):
        """Clean out all unnecessary content from the destination repo"""

        # Check the root back out
        log.debug3("Checking out root")
        self.dest_repo.checkout_ref(self.root_ref)

        # Delete the source remote
        self.dest_repo.delete_remote(self.SOURCE_REMOTE)

        # Remove all tags
        for tag_name in self._list_tags(self.dest_repo):
            self.dest_repo.delete_tag(tag_name)

        # Remove the root branch and leave HEAD detached
        self.dest_repo.checkout_detached_head()
        self.dest_repo.delete_branch(self.DEFAULT_BRANCH_NAME)

        # Compress the references to remove orphaned refs and objects
        self.dest_repo.compress_references()

    ## Impl ##

    def _get_all_checkout_files(self, keep_ref: str) -> List[str]:
        """Get all of the file paths in the given ref relative to the dest repo

        # NOTE: This relies on pygit2 syntax!
        """
        commit, _ = self.dest_repo.get_ref(keep_ref)
        diff = commit.tree.diff_to_workdir()
        return [delta.new_file.path for delta in diff.deltas]

    def _make_dest_branch(self, keep_ref: str, is_branch: bool):
        """This is the function that does the main work of copying code from the
        source to the destination and creating a clean commit.
        """
        # Make sure the root is checked out in the destination repo
        log.debug3("Checking out root")
        self.dest_repo.checkout_ref(self.root_ref)

        # Fetch the ref to keep from the source in the dest
        log.debug3("Fetching %s", keep_ref)
        self.dest_repo.fetch_remote(self.SOURCE_REMOTE, {keep_ref})

        # Check out the files
        remote_ref_name = keep_ref
        if is_branch:
            remote_ref_name = f"refs/remotes/{self.SOURCE_REMOTE}/{keep_ref}"
        log.debug3("Checking out files for %s", remote_ref_name)
        self.dest_repo.checkout_ref(
            remote_ref_name, paths=self._get_all_checkout_files(remote_ref_name)
        )

        # Make a new branch named with this ref's shorthand name with any remote
        # information removed
        branch_name = keep_ref
        log.debug2("Dest branch name: %s", branch_name)
        root_commit, _ = self.dest_repo.get_ref(self.root_ref)
        branch = self.dest_repo.create_branch(branch_name, root_commit)
        self.dest_repo.checkout_ref(branch.name)

        # Make a commit with these files
        self.dest_repo.create_commit(keep_ref, parents=[root_commit.id])

        # Check the root branch back out
        self.dest_repo.checkout_ref(self.DEFAULT_BRANCH_NAME)

    ## Static Helpers ##

    @staticmethod
    def _list_branches(repo: VCS) -> List[str]:
        """List all of the local branches

        Args:
            repo (VCS): The repo to list

        Returns
            refs (List[str]): A set of all branch references
        """
        refs = set()
        for ref in repo.list_refs():
            _, repo_ref = repo.get_ref(ref)
            name_parts = repo_ref.name.split("/")
            if (
                "tags" not in name_parts
                and "HEAD" not in name_parts
                and "remotes" not in name_parts
                and repo_ref.name != "refs/stash"
            ):
                refs.add(ref)
        return sorted(sorted(refs))

    @staticmethod
    def _list_tags(repo: VCS) -> List[str]:
        """List all of the tags and references in the repo

        Args:
            repo (VCS): The repo to list

        Returns
            refs (List[str]): A set of all tag references
        """
        return list(
            sorted(
                {
                    ref
                    for ref in repo.list_refs()
                    if "refs/tags" in repo.get_ref(ref)[1].name
                }
            )
        )

    @staticmethod
    def _filter_refs(refs: List[str], exprs: Optional[List[str]]) -> List[str]:
        """Keep all refs that match at least one of the expressions"""
        return [ref for ref in refs if any(re.match(expr, ref) for expr in exprs or [])]

__init__(source, destination, force)

Initialize and set up the repos and common attributes

Source code in oper8/setup_vcs.py
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
def __init__(self, source: str, destination: str, force: bool):
    """Initialize and set up the repos and common attributes"""

    # Make sure the source is a git repo
    try:
        self.source_repo = VCS(source)
    except VCSConfigError as err:
        msg = f"Invalid source git repo: {source}"
        log.error(msg)
        raise ValueError(msg) from err
    log.debug("Source Repo: %s", source)

    # Set up the dest and make sure it's empty
    if os.path.isfile(destination):
        msg = f"Invalid destination: {destination} is a file"
        log.error(msg)
        raise ValueError(msg)
    os.makedirs(destination, exist_ok=True)
    contents = os.listdir(destination)
    if contents:
        if not force:
            msg = f"Invalid destination: {destination} is not empty"
            log.error(msg)
            raise ValueError(msg)
        log.debug("Force cleaning dest %s", destination)
        for entry in contents:
            full_path = os.path.join(destination, entry)
            if os.path.isdir(full_path):
                log.debug3("Removing dir: %s", full_path)
                shutil.rmtree(full_path)
            else:
                log.debug3("Removing file: %s", full_path)
                os.remove(full_path)

    # Initialize the dest as an empty repo
    log.info("Initializing dest repo: %s", destination)
    self.dest_repo = VCS(
        destination, create_if_needed=True, initial_head=self.DEFAULT_BRANCH_NAME
    )
    self.dest_repo.create_commit("root")
    self.dest_repo.add_remote(self.SOURCE_REMOTE, source)
    self.root_ref = self.dest_repo.head

clean_up()

Clean out all unnecessary content from the destination repo

Source code in oper8/setup_vcs.py
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
def clean_up(self):
    """Clean out all unnecessary content from the destination repo"""

    # Check the root back out
    log.debug3("Checking out root")
    self.dest_repo.checkout_ref(self.root_ref)

    # Delete the source remote
    self.dest_repo.delete_remote(self.SOURCE_REMOTE)

    # Remove all tags
    for tag_name in self._list_tags(self.dest_repo):
        self.dest_repo.delete_tag(tag_name)

    # Remove the root branch and leave HEAD detached
    self.dest_repo.checkout_detached_head()
    self.dest_repo.delete_branch(self.DEFAULT_BRANCH_NAME)

    # Compress the references to remove orphaned refs and objects
    self.dest_repo.compress_references()

initialize_branches(branch_expr, tag_expr)

Perform the initialize of all branches in the destination repo from the branches and tags that match the given expressions.

Source code in oper8/setup_vcs.py
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
def initialize_branches(
    self,
    branch_expr: Optional[List[str]],
    tag_expr: Optional[List[str]],
):
    """Perform the initialize of all branches in the destination repo from
    the branches and tags that match the given expressions.
    """
    # Get all tags and branches
    tags = self._list_tags(self.source_repo)
    branches = self._list_branches(self.source_repo)
    log.debug2("All Tags: %s", tags)
    log.debug2("All Branches: %s", branches)

    # Filter the tags and branches by the filter arguments
    keep_tags = self._filter_refs(tags, tag_expr)
    keep_branches = self._filter_refs(branches, branch_expr)
    log.debug2("Keep Tags: %s", keep_tags)
    log.debug2("Keep Branches: %s", keep_branches)

    # For each retained reference, fetch the ref from the source, check out the
    # files to the dest, and make a fresh commit
    for keep_ref in keep_tags:
        log.debug("Making destination branch [%s] from tag", keep_ref)
        self._make_dest_branch(keep_ref, False)
    for keep_ref in keep_branches:
        log.debug("Making destination branch [%s] from branch", keep_ref)
        self._make_dest_branch(keep_ref, True)

setup_vcs(source, destination=None, branch_expr=None, tag_expr=__UNSET__, force=False)

This utility will initialize an operator's VCS directory for use with oper8's VCS versioning.

Parameters:

Name Type Description Default
source str

The path to the source repository on disk

required
destination Optional[str]

The path where the VCS repo should be created

None
branch_expr Optional[List[str]]

Regular expression(s) to use to identify branches to retain in the VCS repo

None
tag_expr Optional[List[str]]

Regular expression(s) to use to identify tags to retain in the VCS repo

__UNSET__
force bool

Force overwrite existing destination

False
Source code in oper8/setup_vcs.py
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
def setup_vcs(
    source: str,
    destination: Optional[str] = None,
    branch_expr: Optional[List[str]] = None,
    tag_expr: Optional[List[str]] = __UNSET__,
    force: bool = False,
):
    """This utility will initialize an operator's VCS directory for use with
    oper8's VCS versioning.

    Args:
        source (str): The path to the source repository on disk
        destination (Optional[str]): The path where the VCS repo should be
            created
        branch_expr (Optional[List[str]]): Regular expression(s) to use to
            identify branches to retain in the VCS repo
        tag_expr (Optional[List[str]]): Regular expression(s) to use to
            identify tags to retain in the VCS repo
        force (bool): Force overwrite existing destination
    """
    initializer = VCSRepoInitializer(
        source=source, destination=destination or DEFAULT_DEST, force=force
    )
    initializer.initialize_branches(
        branch_expr=branch_expr,
        tag_expr=tag_expr if tag_expr is not __UNSET__ else [DEFAULT_TAG_EXPR],
    )
    initializer.clean_up()

status

This module holds the common functionality used to represent the status of resources managed by oper8

Oper8 supports the following orthogonal status conditions:

  • Ready: True if the service is able to take traffic
  • Updating: True if a modification is being actively applied to the application

Additionally, oper8 supports a top-level status element to report the detailed status of the managed components. The schema is: { "componentStatus": { "allComponents": [list of all node names], "deployedComponents": [list of nodes that have successfully deployed], "verifiedComponents": [list of nodes that have successfully verified], "failedComponents": [list of nodes that have successfully verified], "deployed": "N/M", "verified": "N/M", } }

ReadyReason

Bases: Enum

Nested class to hold reason constants for the Ready condition

Source code in oper8/status.py
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
class ReadyReason(Enum):
    """Nested class to hold reason constants for the Ready condition"""

    # The application is stable and ready for traffic
    STABLE = "Stable"

    # The application is rolling out for the first time
    INITIALIZING = "Initializing"

    # The application rollout is in progress and will continue
    # the next reconcile
    IN_PROGRESS = "InProgress"

    # The application has hit an unrecoverable config error during rollout
    CONFIG_ERROR = "ConfigError"

    # The application has hit an unrecoverable error during rollout
    ERRORED = "Errored"

ServiceStatus

Bases: Enum

Nested class to hold status constants for the service status

Source code in oper8/status.py
112
113
114
115
116
117
118
119
120
121
122
class ServiceStatus(Enum):
    """Nested class to hold status constants for the service status"""

    # Installation or Update reconciliation is in-progress
    IN_PROGRESS = "InProgress"

    # Installation or Update failed with error
    FAILED = "Failed"

    # Service is in stable state
    COMPLETED = "Completed"

UpdatingReason

Bases: Enum

Nested class to hold reason constants for the Updating condition

Source code in oper8/status.py
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
class UpdatingReason(Enum):
    """Nested class to hold reason constants for the Updating condition"""

    # There are no updates to apply to the application
    STABLE = "Stable"

    # A required precondition was not met
    PRECONDITION_WAIT = "PreconditionWait"

    # A required deployment verification condition was not met
    VERIFY_WAIT = "VerifyWait"

    # The application attempted to perform an operation against the cluster that
    # failed unexpectedly
    CLUSTER_ERROR = "ClusterError"

    # An error occurred, so the application is not attempting to update
    ERRORED = "Errored"

    # Version upgrade is initiated
    VERSION_CHANGE = "VersionChange"

get_condition(type_name, current_status)

Extract the given condition type from a status object

Parameters:

Name Type Description Default
type

str The condition type to fetch

required
current_status dict

dict The dict representation of the status for a given application

required

Returns:

Name Type Description
condition dict

dict The condition object if found, empty dict otherwise

Source code in oper8/status.py
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
def get_condition(type_name: str, current_status: dict) -> dict:
    """Extract the given condition type from a status object

    Args:
        type:  str
            The condition type to fetch
        current_status:  dict
            The dict representation of the status for a given application

    Returns:
        condition:  dict
            The condition object if found, empty dict otherwise
    """
    cond = [
        cond
        for cond in current_status.get("conditions", [])
        if cond.get("type") == type_name
    ]
    if cond:
        assert len(cond) == 1, f"Found multiple condition entries for {type_name}"
        return cond[0]
    return {}

get_version(current_status)

Extract the current version (not desired version) from a status object

Parameters:

Name Type Description Default
current_status dict

dict The dict representation of the status for a given application

required

Returns:

Name Type Description
version Optional[str]

Optional[dict] The current version if found in a status object, None otherwise.

Source code in oper8/status.py
415
416
417
418
419
420
421
422
423
424
425
426
427
def get_version(current_status: dict) -> Optional[str]:
    """Extract the current version (not desired version) from a status object

    Args:
        current_status: dict
            The dict representation of the status for a given application

    Returns:
        version: Optional[dict]
            The current version if found in a status object, None otherwise.

    """
    return nested_get(current_status, VERSIONS_FIELD_CURRENT_VERSION)

make_application_status(ready_reason=None, ready_message='', updating_reason=None, updating_message='', component_state=None, external_conditions=None, external_status=None, version=None, supported_versions=None, operator_version=None, kind=None)

Create a full status object for an application

Parameters:

Name Type Description Default
ready_reason Optional[Union[ReadyReason, str]]

Optional[ReadyReason or str] The reason enum for the Ready condition

None
ready_message str

str Plain-text message explaining the Ready condition value

''
updating_reason Optional[Union[UpdatingReason, str]]

Optional[UpdatingReason or str] The reason enum for the Updating condition

None
updating_message str

str Plain-text message explaining the Updating condition value

''
component_state Optional[CompletionState]

Optional[CompletionState] The terminal state of components in the latest rollout

None
external_conditions Optional[List[dict]]

Optional[List[dict]] Additional conditions to include in the update

None
external_status Optional[dict]

Optional[dict] Additional key/value status elements besides "conditions" that should be preserved through the update

None
version Optional[str]

Optional[str] The verified version of the application

None
supported_versions Optional[List[str]]

Optional[List[str]] The list of supported versions for this application

None
operator_version Optional[str]

Optional[str] The operator version for this application

None
kind Optional[str]

Optional[str] The kind of reconciling CR. If specified, this function adds service status field which is compliant with IBM Cloud Pak requirements.

None

Returns:

Name Type Description
current_status dict

dict Dict representation of the status for the application

Source code in oper8/status.py
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
def make_application_status(  # pylint: disable=too-many-arguments,too-many-locals
    ready_reason: Optional[Union[ReadyReason, str]] = None,
    ready_message: str = "",
    updating_reason: Optional[Union[UpdatingReason, str]] = None,
    updating_message: str = "",
    component_state: Optional[CompletionState] = None,
    external_conditions: Optional[List[dict]] = None,
    external_status: Optional[dict] = None,
    version: Optional[str] = None,
    supported_versions: Optional[List[str]] = None,
    operator_version: Optional[str] = None,
    kind: Optional[str] = None,
) -> dict:
    """Create a full status object for an application

    Args:
        ready_reason:  Optional[ReadyReason or str]
            The reason enum for the Ready condition
        ready_message:  str
            Plain-text message explaining the Ready condition value
        updating_reason:  Optional[UpdatingReason or str]
            The reason enum for the Updating condition
        updating_message:  str
            Plain-text message explaining the Updating condition value
        component_state:  Optional[CompletionState]
            The terminal state of components in the latest rollout
        external_conditions:  Optional[List[dict]]
            Additional conditions to include in the update
        external_status:  Optional[dict]
            Additional key/value status elements besides "conditions" that
            should be preserved through the update
        version:  Optional[str]
            The verified version of the application
        supported_versions:  Optional[List[str]]
            The list of supported versions for this application
        operator_version:  Optional[str]
            The operator version for this application
        kind: Optional[str]
            The kind of reconciling CR. If specified, this function adds
            service status field which is compliant with IBM Cloud Pak
            requirements.

    Returns:
        current_status:  dict
            Dict representation of the status for the application
    """
    now = datetime.now()
    conditions = []
    if ready_reason is not None:
        conditions.append(_make_ready_condition(ready_reason, ready_message, now))
    if updating_reason is not None:
        conditions.append(
            _make_updating_condition(updating_reason, updating_message, now)
        )
    conditions.extend(external_conditions or [])
    status = external_status or {}
    status["conditions"] = conditions

    # If a component_state is given, create the top-level status elements to
    # track which components have deployed and verified
    if component_state is not None:
        log.debug2("Adding component state to status")
        status[COMPONENT_STATUS] = _make_component_state(component_state)
        log.debug3(status[COMPONENT_STATUS])

    # Create the versions section
    if version is not None:
        nested_set(status, VERSIONS_FIELD_CURRENT_VERSION, version)
    if supported_versions is not None:
        nested_set(
            status,
            VERSIONS_FIELD_AVAILABLE_VERSIONS,
            [_make_available_version(version) for version in supported_versions],
        )
    if operator_version is not None:
        nested_set(status, OPERATOR_VERSION, operator_version)

    # Create service status section
    if kind:
        # make field name follow k8s naming convention
        service_status_field = kind[0].lower()
        if len(kind) > 1:
            service_status_field += kind[1:]
        service_status_field += "Status"

        # Only update service status if the current value is set by oper8. This
        # allows services to override the service status section
        current_service_status = status.get(service_status_field)
        managed_service_values = [status.value for status in ServiceStatus]
        if (
            not current_service_status
            or current_service_status in managed_service_values
        ):
            current_service_status = _make_service_status(
                ready_reason, updating_reason
            ).value

        status[service_status_field] = current_service_status

    return status

status_changed(current_status, new_status)

Compare two status objects to determine if there is a meaningful change between the current status and the proposed new status. A meaningful change is defined as any change besides a timestamp.

Parameters:

Name Type Description Default
current_status dict

dict The raw status dict from the current CR

required
new_status dict

dict The proposed new status

required

Returns:

Name Type Description
status_changed bool

bool True if there is a meaningful change between the current status and the new status

Source code in oper8/status.py
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
def status_changed(current_status: dict, new_status: dict) -> bool:
    """Compare two status objects to determine if there is a meaningful change
    between the current status and the proposed new status. A meaningful change
    is defined as any change besides a timestamp.

    Args:
        current_status:  dict
            The raw status dict from the current CR
        new_status:  dict
            The proposed new status

    Returns:
        status_changed:  bool
            True if there is a meaningful change between the current status and
            the new status
    """
    # Status objects must be dicts
    if not isinstance(current_status, dict) or not isinstance(new_status, dict):
        return True

    # Perform a deep diff, excluding timestamps
    return bool(
        DeepDiff(
            current_status,
            new_status,
            exclude_obj_callback=lambda _, path: path.endswith(f"{TIMESTAMP_KEY}']"),
        )
    )

update_application_status(current_status, **kwargs)

Create an updated status based on the values in the current status

Parameters:

Name Type Description Default
current_status dict

dict The dict representation of the status for a given application

required
**kwargs

Additional keyword args to pass to make_application_status

{}

Returns:

Name Type Description
updated_status dict

dict Updated dict representation of the status for the application

Source code in oper8/status.py
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
def update_application_status(current_status: dict, **kwargs) -> dict:
    """Create an updated status based on the values in the current status

    Args:
        current_status:  dict
            The dict representation of the status for a given application
        **kwargs:
            Additional keyword args to pass to make_application_status

    Returns:
        updated_status:  dict
            Updated dict representation of the status for the application
    """
    # Make a deep copy of current_status so that we aren't accidentally
    # modifying the current status object. This prevents a bug where status
    # changes are not detected
    current_status = copy.deepcopy(current_status)

    # Make a dict of type -> condition. This is necessary because other
    # conditions may be applied by ansible
    current_conditions = current_status.get("conditions", [])
    current_condition_map = {cond["type"]: cond for cond in current_conditions}
    ready_cond = current_condition_map.get(READY_CONDITION, {})
    updating_cond = current_condition_map.get(UPDATING_CONDITION, {})

    # Setup the kwargs for the status call
    ready_reason = ready_cond.get("reason")
    updating_reason = updating_cond.get("reason")
    if ready_reason:
        kwargs.setdefault("ready_reason", ReadyReason(ready_reason))
    if updating_reason:
        kwargs.setdefault("updating_reason", UpdatingReason(updating_reason))
    kwargs.setdefault("ready_message", ready_cond.get("message", ""))
    kwargs.setdefault("updating_message", updating_cond.get("message", ""))

    # Extract external conditions managed by other portions of the operator
    external_conditions = [
        cond
        for cond in current_conditions
        if cond.get("type") not in [READY_CONDITION, UPDATING_CONDITION]
    ]
    log.debug3("External conditions: %s", external_conditions)
    kwargs["external_conditions"] = external_conditions
    log.debug3("Merged status kwargs: %s", kwargs)

    # Extract external status elements (besides conditions) managed by other
    # portions of the operator
    external_status = {
        key: val for key, val in current_status.items() if key != "conditions"
    }
    kwargs["external_status"] = external_status
    kwargs["operator_version"] = config.operator_version

    return make_application_status(**kwargs)

update_resource_status(deploy_manager, kind, api_version, name, namespace, **kwargs)

Create an updated status based on the values in the current status

Parameters:

Name Type Description Default
deploy_manager DeployManagerBase

DeployManagerBase The deploymanager used to get and set status

required
kind str

str The kind of the resource

required
api_version str

str The api_version of the resource

required
name str

str The name of the resource

required
namespace str

str The namespace the resource is located in

required
**kwargs dict

Dict Any additional keyword arguments to be passed to update_application_status

{}

Returns:

Name Type Description
status_object dict

Dict The applied status if successful

Source code in oper8/status.py
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
def update_resource_status(
    deploy_manager: "DeployManagerBase",  # noqa: F821
    kind: str,
    api_version: str,
    name: str,
    namespace: str,
    **kwargs: dict,
) -> dict:
    """Create an updated status based on the values in the current status

    Args:
        deploy_manager: DeployManagerBase
            The deploymanager used to get and set status
        kind: str
            The kind of the resource
        api_version: str
            The api_version of the resource
        name: str
            The name of the resource
        namespace: str
            The namespace the resource is located in
        **kwargs: Dict
            Any additional keyword arguments to be passed to update_application_status

    Returns:
        status_object: Dict
            The applied status if successful

    """
    log.debug3(
        "Updating status for %s/%s.%s/%s",
        namespace,
        api_version,
        kind,
        name,
    )

    # Fetch the current status from the cluster
    success, current_state = deploy_manager.get_object_current_state(
        api_version=api_version,
        kind=kind,
        name=name,
        namespace=namespace,
    )
    if not success:
        log.warning("Failed to fetch current state for %s/%s/%s", namespace, kind, name)
        return {}
    current_status = (current_state or {}).get("status", {})
    log.debug3("Pre-update status: %s", current_status)

    # Merge in the given status
    status_object = update_application_status(current_status, kind=kind, **kwargs)
    log.debug3("Updated status: %s", status_object)

    # Check to see if the status values of any conditions have changed and
    # only update the status if it has changed
    if status_changed(current_status, status_object):
        log.debug("Found meaningful change. Updating status")
        log.debug2("(current) %s != (updated) %s", current_status, status_object)

        # Do the update
        success, _ = deploy_manager.set_status(
            kind=kind,
            name=name,
            namespace=namespace,
            api_version=api_version,
            status=status_object,
        )

        # Since this is just a status update, we don't fail if the update fails,
        # but we do throw a warning
        if not success:
            log.warning("Failed to update status for [%s/%s/%s]", namespace, kind, name)
            return {}

    return status_object

test_helpers

helpers

This module holds common helper functions for making testing easy

DummyComponentBase

Bases: Component

This base class provides all of the common functionality for DummyComponent and DummyLegacyComponent

Source code in oper8/test_helpers/helpers.py
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
class DummyComponentBase(Component):
    """This base class provides all of the common functionality for
    DummyComponent and DummyLegacyComponent
    """

    def __init__(
        self,
        name=None,
        session=None,
        api_objects=None,
        api_object_deps=None,
        render_chart_fail=False,
        deploy_fail=False,
        disable_fail=False,
        verify_fail=False,
        build_chart_fail=False,
        disabled=False,
        **kwargs,
    ):
        # Hang onto config inputs
        self.api_objects = api_objects or []
        self.api_object_deps = api_object_deps or {}

        # Mock passthroughs to the base class
        self.render_chart_fail = render_chart_fail
        self.render_chart = mock.Mock(
            side_effect=get_failable_method(self.render_chart_fail, self.render_chart)
        )
        self.deploy_fail = deploy_fail
        self.deploy = mock.Mock(
            side_effect=get_failable_method(self.deploy_fail, super().deploy)
        )
        self.disable_fail = disable_fail
        self.disable = mock.Mock(
            side_effect=get_failable_method(self.disable_fail, super().disable)
        )
        self.verify_fail = verify_fail
        self.verify = mock.Mock(
            side_effect=get_failable_method(self.verify_fail, super().verify)
        )
        self.build_chart_fail = build_chart_fail
        self.build_chart = mock.Mock(
            side_effect=get_failable_method(self.build_chart_fail, self.build_chart)
        )

        # Initialize Component
        if name is None:
            super().__init__(session=session, disabled=disabled)
        else:
            super().__init__(name=name, session=session, disabled=disabled)

    @alog.logged_function(log.debug2)
    def _add_resources(self, scope, session):
        """This will be called in both implementations in their respective
        places
        """
        api_objs = self._gather_dummy_resources(scope, session)

        # Add dependencies between objects
        for downstream_name, upstreams in self.api_object_deps.items():
            assert downstream_name in api_objs, "Bad test config"
            downstream = api_objs[downstream_name]
            for upstream_name in upstreams:
                assert upstream_name in api_objs, "Bad test config"
                upstream = api_objs[upstream_name]
                downstream.add_dependency(upstream)

    def _gather_dummy_resources(self, scope, session):
        api_objs = {}
        for api_obj in self.api_objects:
            log.debug3("Creating api object: %s", api_obj)

            # Create api object from tuple or generate one if callable
            if isinstance(api_obj, tuple):
                object_name, object_def = api_obj

                object_def = merge_configs(
                    {
                        "apiVersion": "v1",
                        "metadata": {"name": object_name, "namespace": TEST_NAMESPACE},
                    },
                    object_def,
                )
            elif isinstance(api_obj, dict):
                object_def = api_obj
                object_name = api_obj.get("metadata", {}).get("name")
            else:
                object_def = api_obj(self, session)
                object_name = api_obj.name

            resource_node = self.add_resource(object_name, object_def)
            if resource_node is not None:
                api_objs[resource_node.get_name()] = resource_node
        return api_objs

    def get_rendered_configs(self):
        configs = []
        for obj in self.managed_objects:
            configs.append(aconfig.Config(obj.definition))
        return configs

DummyController

Bases: Controller

Configurable implementation of a controller that can be used in unit tests to simulate Controller behavior

Source code in oper8/test_helpers/helpers.py
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
class DummyController(Controller):
    """Configurable implementation of a controller that can be used in unit
    tests to simulate Controller behavior
    """

    ##################
    ## Construction ##
    ##################

    group = "foo.bar.com"
    version = "v42"
    kind = "Foo"

    def __init__(
        self,
        components=None,
        after_deploy_fail=False,
        after_deploy_unsuccessful_fail=False,
        after_verify_fail=False,
        after_verify_unsuccessful_fail=False,
        setup_components_fail=False,
        finalize_components_fail=False,
        should_requeue_fail=False,
        component_type=DummyNodeComponent,
        **kwargs,
    ):
        # Initialize parent
        super().__init__(**kwargs)

        # Set up objects that this controller will manage directly
        self.component_specs = components or []
        self.component_type = component_type

        # Set up mocks
        self.after_deploy_fail = after_deploy_fail
        self.after_deploy_unsuccessful_fail = after_deploy_unsuccessful_fail
        self.after_verify_fail = after_verify_fail
        self.after_verify_unsuccessful_fail = after_verify_unsuccessful_fail
        self.setup_components_fail = setup_components_fail
        self.finalize_components_fail = finalize_components_fail
        self.should_requeue_fail = should_requeue_fail
        self.after_deploy = mock.Mock(
            side_effect=get_failable_method(
                self.after_deploy_fail, super().after_deploy
            )
        )
        self.after_deploy_unsuccessful = mock.Mock(
            side_effect=get_failable_method(
                self.after_deploy_unsuccessful_fail, super().after_deploy_unsuccessful
            )
        )
        self.after_verify = mock.Mock(
            side_effect=get_failable_method(
                self.after_verify_fail, super().after_verify
            )
        )
        self.after_verify_unsuccessful = mock.Mock(
            side_effect=get_failable_method(
                self.after_verify_unsuccessful_fail, super().after_verify_unsuccessful
            )
        )
        self.setup_components = mock.Mock(
            side_effect=get_failable_method(
                self.setup_components_fail, self.setup_components
            )
        )
        self.finalize_components = mock.Mock(
            side_effect=get_failable_method(
                self.finalize_components_fail, self.finalize_components
            )
        )
        self.should_requeue = mock.Mock(
            side_effect=get_failable_method(
                self.should_requeue_fail, super().should_requeue
            )
        )

    ##############################
    ## Interface Implementation ##
    ##############################

    def setup_components(self, session: Session):
        """Set up the components based on the component specs passed in"""

        # Add the components
        for component in self.component_specs:
            name = component["name"]
            log.debug2("Adding component %s (kwargs: %s)", name, component)
            comp = self._make_dummy_component(
                session=session,
                **component,
            )
            log.debug2("Component name: %s", comp.name)
            log.debug2(
                "Components in session [%s]: %s",
                session.id,
                [
                    comp.name
                    for comp in session.get_components()
                    + session.get_components(disabled=True)
                ],
            )

        # Add the dependencies after the nodes (so that we can look up by name)
        component_map = {
            comp.name: comp
            for comp in session.get_components() + session.get_components(disabled=True)
        }
        for component in self.component_specs:
            comp = component_map[component["name"]]
            upstreams = component.get("upstreams", [])
            for upstream in upstreams:
                session.add_component_dependency(comp, upstream)

        # Hang onto the components so that they can be checked
        self.components = component_map

    ############################
    ## Implementation Details ##
    ############################

    def _make_dummy_component(self, name="dummy", session=None, **kwargs):
        """This helper wraps any DummyComponent class so that the name class
        attribute is not overwritten by the next instance.
        """

        class WrappedDummyComponent(self.component_type):
            pass

        WrappedDummyComponent.name = name
        return WrappedDummyComponent(session=session, **kwargs)
setup_components(session)

Set up the components based on the component specs passed in

Source code in oper8/test_helpers/helpers.py
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
def setup_components(self, session: Session):
    """Set up the components based on the component specs passed in"""

    # Add the components
    for component in self.component_specs:
        name = component["name"]
        log.debug2("Adding component %s (kwargs: %s)", name, component)
        comp = self._make_dummy_component(
            session=session,
            **component,
        )
        log.debug2("Component name: %s", comp.name)
        log.debug2(
            "Components in session [%s]: %s",
            session.id,
            [
                comp.name
                for comp in session.get_components()
                + session.get_components(disabled=True)
            ],
        )

    # Add the dependencies after the nodes (so that we can look up by name)
    component_map = {
        comp.name: comp
        for comp in session.get_components() + session.get_components(disabled=True)
    }
    for component in self.component_specs:
        comp = component_map[component["name"]]
        upstreams = component.get("upstreams", [])
        for upstream in upstreams:
            session.add_component_dependency(comp, upstream)

    # Hang onto the components so that they can be checked
    self.components = component_map

DummyNodeComponent

Bases: DummyComponentBase

Configurable dummy component which will create an arbitrary set of resource node instances.

Source code in oper8/test_helpers/helpers.py
461
462
463
464
465
466
467
468
469
470
class DummyNodeComponent(DummyComponentBase):
    """
    Configurable dummy component which will create an arbitrary set of
    resource node instances.
    """

    def __init__(self, session, *args, **kwargs):
        """Construct with the additional option to fail build_chart"""
        super().__init__(*args, session=session, **kwargs)
        self._add_resources(self, session)
__init__(session, *args, **kwargs)

Construct with the additional option to fail build_chart

Source code in oper8/test_helpers/helpers.py
467
468
469
470
def __init__(self, session, *args, **kwargs):
    """Construct with the additional option to fail build_chart"""
    super().__init__(*args, session=session, **kwargs)
    self._add_resources(self, session)

FailOnce

Helper callable that will fail once on the N'th call

Source code in oper8/test_helpers/helpers.py
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
class FailOnce:
    """Helper callable that will fail once on the N'th call"""

    def __init__(self, fail_val, fail_number=1):
        self.call_count = 0
        self.fail_number = fail_number
        self.fail_val = fail_val

    def __call__(self, *_, **__):
        self.call_count += 1
        if self.call_count == self.fail_number:
            log.debug("Failing on call %d with %s", self.call_count, self.fail_val)
            if isinstance(self.fail_val, type) and issubclass(self.fail_val, Exception):
                raise self.fail_val("Raising!")
            return self.fail_val
        log.debug("Not failing on call %d", self.call_count)
        return

MockComponent

Bases: DummyNodeComponent

Dummy component with a valid mock name

Source code in oper8/test_helpers/helpers.py
473
474
475
476
class MockComponent(DummyNodeComponent):
    """Dummy component with a valid mock name"""

    name = "mock"

MockDeployManager

Bases: DryRunDeployManager

The MockDeployManager wraps a standard DryRunDeployManager and adds configuration options to simulate failures in each of its operations.

Source code in oper8/test_helpers/helpers.py
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
class MockDeployManager(DryRunDeployManager):
    """The MockDeployManager wraps a standard DryRunDeployManager and adds
    configuration options to simulate failures in each of its operations.
    """

    def __init__(
        self,
        deploy_fail=False,
        deploy_raise=False,
        disable_fail=False,
        disable_raise=False,
        get_state_fail=False,
        get_state_raise=False,
        watch_fail=False,
        watch_raise=False,
        generate_resource_version=True,
        set_status_fail=False,
        set_status_raise=False,
        auto_enable=True,
        resources=None,
        resource_dir=None,
        **kwargs,
    ):
        """This DeployManager can be configured to have various failure cases
        and will mock the state of the cluster so that get_object_current_state
        will pull its information from the local dict.
        """

        # Add apiVersion to resources that are missing it, then initialize the
        # dry run manager

        resources = resources or []
        # Parse pre-populated resources if needed
        resources = resources + (RunOperatorCmd._parse_resource_dir(resource_dir))

        for resource in resources:
            resource.setdefault("apiVersion", "v1")
        super().__init__(
            resources, generate_resource_version=generate_resource_version, **kwargs
        )

        self.watch_fail = "assert" if watch_raise else watch_fail
        self.deploy_fail = "assert" if deploy_raise else deploy_fail
        self.disable_fail = "assert" if disable_raise else disable_fail
        self.get_state_fail = "assert" if get_state_raise else get_state_fail
        self.set_status_fail = "assert" if set_status_raise else set_status_fail

        # If auto-enabling, turn the mocks on now
        if auto_enable:
            self.enable_mocks()

    #######################
    ## Helpers for Tests ##
    #######################

    def enable_mocks(self):
        """Turn the mocks on"""
        self.deploy = mock.Mock(
            side_effect=get_failable_method(
                self.deploy_fail, super().deploy, (False, False)
            )
        )
        self.disable = mock.Mock(
            side_effect=get_failable_method(
                self.disable_fail, super().disable, (False, False)
            )
        )
        self.get_object_current_state = mock.Mock(
            side_effect=get_failable_method(
                self.get_state_fail, super().get_object_current_state, (False, None)
            )
        )
        self.set_status = mock.Mock(
            side_effect=get_failable_method(
                self.set_status_fail, super().set_status, (False, False)
            )
        )
        self.watch_objects = mock.Mock(
            side_effect=get_failable_method(self.watch_fail, super().watch_objects, [])
        )

    def get_obj(self, kind, name, namespace=None, api_version=None):
        return self.get_object_current_state(kind, name, namespace, api_version)[1]

    def has_obj(self, *args, **kwargs):
        return self.get_obj(*args, **kwargs) is not None
__init__(deploy_fail=False, deploy_raise=False, disable_fail=False, disable_raise=False, get_state_fail=False, get_state_raise=False, watch_fail=False, watch_raise=False, generate_resource_version=True, set_status_fail=False, set_status_raise=False, auto_enable=True, resources=None, resource_dir=None, **kwargs)

This DeployManager can be configured to have various failure cases and will mock the state of the cluster so that get_object_current_state will pull its information from the local dict.

Source code in oper8/test_helpers/helpers.py
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
def __init__(
    self,
    deploy_fail=False,
    deploy_raise=False,
    disable_fail=False,
    disable_raise=False,
    get_state_fail=False,
    get_state_raise=False,
    watch_fail=False,
    watch_raise=False,
    generate_resource_version=True,
    set_status_fail=False,
    set_status_raise=False,
    auto_enable=True,
    resources=None,
    resource_dir=None,
    **kwargs,
):
    """This DeployManager can be configured to have various failure cases
    and will mock the state of the cluster so that get_object_current_state
    will pull its information from the local dict.
    """

    # Add apiVersion to resources that are missing it, then initialize the
    # dry run manager

    resources = resources or []
    # Parse pre-populated resources if needed
    resources = resources + (RunOperatorCmd._parse_resource_dir(resource_dir))

    for resource in resources:
        resource.setdefault("apiVersion", "v1")
    super().__init__(
        resources, generate_resource_version=generate_resource_version, **kwargs
    )

    self.watch_fail = "assert" if watch_raise else watch_fail
    self.deploy_fail = "assert" if deploy_raise else deploy_fail
    self.disable_fail = "assert" if disable_raise else disable_fail
    self.get_state_fail = "assert" if get_state_raise else get_state_fail
    self.set_status_fail = "assert" if set_status_raise else set_status_fail

    # If auto-enabling, turn the mocks on now
    if auto_enable:
        self.enable_mocks()
enable_mocks()

Turn the mocks on

Source code in oper8/test_helpers/helpers.py
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
def enable_mocks(self):
    """Turn the mocks on"""
    self.deploy = mock.Mock(
        side_effect=get_failable_method(
            self.deploy_fail, super().deploy, (False, False)
        )
    )
    self.disable = mock.Mock(
        side_effect=get_failable_method(
            self.disable_fail, super().disable, (False, False)
        )
    )
    self.get_object_current_state = mock.Mock(
        side_effect=get_failable_method(
            self.get_state_fail, super().get_object_current_state, (False, None)
        )
    )
    self.set_status = mock.Mock(
        side_effect=get_failable_method(
            self.set_status_fail, super().set_status, (False, False)
        )
    )
    self.watch_objects = mock.Mock(
        side_effect=get_failable_method(self.watch_fail, super().watch_objects, [])
    )

MockTopApp

Bases: Controller

Mock implementation of a top-level Controller to allow subsystems to be tested as "children"

Source code in oper8/test_helpers/helpers.py
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
@controller(
    group="unit.test.com",
    version="v42",
    kind="MockTopApp",
)
class MockTopApp(Controller):
    """Mock implementation of a top-level Controller to allow subsystems to be
    tested as "children"
    """

    def __init__(self, config_defaults=None, component_types=None):
        super().__init__(config_defaults=config_defaults)
        self.component_types = component_types or []

    def setup_components(self, session: Session):
        for component_type in self.component_types:
            component_type(session=session)

    def do_rollout(self, session):
        try:
            return self.run_reconcile(session)
        except Exception as err:
            log.debug("Caught error in rollout: %s", err, exc_info=True)

MockedOpenshiftDeployManager

Bases: OpenshiftDeployManager

Override class that uses the mocked client

Source code in oper8/test_helpers/helpers.py
448
449
450
451
452
453
454
455
456
457
458
class MockedOpenshiftDeployManager(OpenshiftDeployManager):
    """Override class that uses the mocked client"""

    def __init__(self, manage_ansible_status=False, owner_cr=None, *args, **kwargs):
        self._mock_args = args
        self._mock_kwargs = kwargs
        super().__init__(manage_ansible_status, owner_cr)

    def _setup_client(self):
        mock_client = MockKubClient(*self._mock_args, **self._mock_kwargs)
        return DynamicClient(mock_client)

ModuleExit

Bases: Exception

Exception we'll use to break out when sys.exit was called

Source code in oper8/test_helpers/helpers.py
643
644
class ModuleExit(Exception):
    """Exception we'll use to break out when sys.exit was called"""

deep_merge(a, b)

NOTE: This should really be eliminated in favor of just using merge_configs

Source code in oper8/test_helpers/helpers.py
823
824
825
826
827
def deep_merge(a, b):
    """NOTE: This should really be eliminated in favor of just using
    merge_configs
    """
    return merge_configs(a, b)

default_branch_name() cached

Helper to get the current git context's default branch name

Source code in oper8/test_helpers/helpers.py
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
@lru_cache(maxsize=1)
def default_branch_name() -> str:
    """Helper to get the current git context's default branch name"""
    try:
        return (
            subprocess.run(
                "git config --get init.defaultBranch".split(),
                check=True,
                stdout=subprocess.PIPE,
            )
            .stdout.decode("utf-8")
            .strip()
        )
    except subprocess.CalledProcessError:
        return "master"

library_config(**config_overrides)

This context manager sets library config values temporarily and reverts them on completion

Source code in oper8/test_helpers/helpers.py
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
@contextmanager
def library_config(**config_overrides):
    """This context manager sets library config values temporarily and reverts
    them on completion
    """
    # Override the configs and hang onto the old values
    old_vals = {}
    for key, val in config_overrides.items():
        if key in config_detail_dict:
            old_vals[key] = config_detail_dict[key]
        config_detail_dict[key] = val

    # Yield to the context
    yield

    # Revert to the old values
    for key in config_overrides:
        if key in old_vals:
            config_detail_dict[key] = old_vals[key]
        else:
            del config_detail_dict[key]

make_patch(patch_type, body, name='test', target=None, namespace=TEST_NAMESPACE, api_version='org.oper8/v1', kind='TemporaryPatch')

Make a sample TemporaryPatch resource body

Source code in oper8/test_helpers/helpers.py
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
def make_patch(
    patch_type,
    body,
    name="test",
    target=None,
    namespace=TEST_NAMESPACE,
    api_version="org.oper8/v1",
    kind="TemporaryPatch",
):
    """Make a sample TemporaryPatch resource body"""
    target = target or {}
    patch_obj = {
        "apiVersion": api_version,
        "kind": kind,
        "metadata": {"name": name},
        "spec": {
            "apiVersion": target.get("apiVersion", "fake"),
            "kind": target.get("kind", "fake"),
            "name": target.get("metadata", {}).get("name", "fake"),
            "patchType": patch_type,
            "patch": body,
        },
    }
    if namespace is not None:
        patch_obj["metadata"]["namespace"] = namespace
    return aconfig.Config(
        patch_obj,
        override_env_vars=False,
    )

mock_config_file(config_object)

Yuck! Ansible makes it tough to actually inject parameters in since it expects that modules will only be run by its parent runner.

Source code in oper8/test_helpers/helpers.py
675
676
677
678
679
680
681
682
683
684
685
@contextmanager
def mock_config_file(config_object):
    """Yuck! Ansible makes it tough to actually inject parameters in since it
    expects that modules will only be run by its parent runner.
    """
    # Third Party
    import ansible.module_utils.basic

    ansible.module_utils.basic._ANSIBLE_ARGS = json.dumps(config_object).encode("utf-8")
    yield
    ansible.module_utils.basic._ANSIBLE_ARGS = None

setup_session_ctx(*args, **kwargs)

Context manager wrapper around setup_session. This simplifies the porting process from WA and really provides no functional benefit.

Source code in oper8/test_helpers/helpers.py
116
117
118
119
120
121
@contextmanager
def setup_session_ctx(*args, **kwargs):
    """Context manager wrapper around setup_session. This simplifies the porting
    process from WA and really provides no functional benefit.
    """
    yield setup_session(*args, **kwargs)

kub_mock

This module implements a mock of the kubernetes client library which can be used to patch the api client in an ansible module.

We attempt to emulate the internals of the kubernetes api_client, but this is based on code inspection of the current implementation and is certainly subject to change!

MockKubClient

Bases: ApiClient

Mocked version of kubernetes.client.ApiClient which swaps out the implementation of call_api() to use preconfigured responses

Source code in oper8/test_helpers/kub_mock.py
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
class MockKubClient(kubernetes.client.ApiClient):
    """Mocked version of kubernetes.client.ApiClient which swaps out the
    implementation of call_api() to use preconfigured responses
    """

    def __init__(self, cluster_state=None, *args, **kwargs):
        super().__init__(*args, **kwargs)

        # Save the current cluster state
        self.__cluster_state = cluster_state or {}
        log.debug3("Cluster State: %s", self.__cluster_state)

        # Setup listener variables
        self.__queue_lock = RLock()
        self.__watch_queues = set()

        # Save a list of kinds for each api group
        self._api_group_kinds = {}

        # Canned handlers
        self._handlers = {
            "/apis": {"GET": self.apis},
            "/version": {"GET": lambda *_, **__: self._make_response({})},
            "/api/v1": {"GET": self.api_v1},
        }
        for namespace, ns_entries in self.__cluster_state.items():
            for kind, kind_entries in ns_entries.items():
                for api_version, version_entries in kind_entries.items():
                    # Add handlers common to this kind
                    self._add_handlers_for_kind(namespace, kind, api_version)

                    # Add handlers for the individual pre-existing instances
                    for name, obj_state in version_entries.items():
                        # If this is a static object state, make sure the
                        # metadata aligns correctly
                        if isinstance(obj_state, dict):
                            self._add_resource_defaults(
                                obj_state=obj_state,
                                namespace=namespace,
                                kind=kind,
                                api_version=api_version,
                                name=name,
                            )

                        # Add the endpoints for this resource
                        self._add_handlers_for_resource(
                            namespace=namespace,
                            kind=kind,
                            api_version=api_version,
                            name=name,
                        )

        log.debug("Configured Handlers: %s", list(self._handlers.keys()))

    def call_api(
        self,
        resource_path,
        method,
        path_params=None,
        query_params=None,
        header_params=None,
        body=None,
        **kwargs,
    ):
        """Mocked out call function to return preconfigured responses

        NOTE: this is set up to work with how openshift.dynamic.DynamicClient
            calls self.client.call_api. It (currently) passes some as positional
            args and some as kwargs.
        """
        for key, value in query_params:
            if key == "watch" and value:
                method = "WATCH"
                break

        log.debug2("Mock [%s] request to [%s]", method, resource_path)
        log.debug4("Path Params: %s", path_params)
        log.debug4("Query Params: %s", query_params)
        log.debug4("Header Params: %s", header_params)
        log.debug4("Body: %s", body)

        # Find the right handler and execute it
        return self._get_handler(resource_path, method)(
            resource_path=resource_path,
            path_params=path_params,
            query_params=query_params,
            header_params=header_params,
            body=body,
            **kwargs,
        )

    ## Implementation Helpers ##################################################

    @staticmethod
    def _make_response(body, status_code=200):
        log.debug2("Making response with code: %d", status_code)
        log.debug4(body)
        resp = MockKubRestResponse(
            aconfig.Config(
                {
                    "status": status_code,
                    "reason": "MOCK",
                    "data": json.dumps(body).encode("utf8"),
                }
            )
        )
        if not 200 <= status_code <= 299:
            raise kubernetes.client.rest.ApiException(http_resp=resp)
        return resp

    def _get_handler(self, resource_path, method):
        # Look for a configured handler that matches the path exactly
        handler = self._handlers.get(resource_path, {}).get(method)

        # If no handler found, start looking for '*' handlers
        path_parts = resource_path.split("/")
        while handler is None and path_parts:
            star_path = "/".join(path_parts + ["*"])
            log.debug4("Looking for [%s]", star_path)
            handler = self._handlers.get(star_path, {}).get(method)
            path_parts.pop()

        # Return whatever we've found or not_found
        return handler or self.not_found

    @staticmethod
    def _get_kind_variants(kind):
        return [kind.lower(), f"{kind.lower()}s", kind]

    @staticmethod
    def _get_version_parts(api_version):
        parts = api_version.split("/", 1)
        if len(parts) == 2:
            group_name, version = parts
            api_endpoint = f"/apis/{group_name}/{version}"
        else:
            group_name = None
            version = api_version
            api_endpoint = f"/api/{api_version}"
        return group_name, version, api_endpoint

    def _add_handlers_for_kind(self, namespace, kind, api_version):
        # Set up the information needed for the apis and crds calls
        group_name, version, api_endpoint = self._get_version_parts(api_version)
        self._api_group_kinds.setdefault(group_name, {}).setdefault(version, []).append(
            kind
        )

        # Add a configured handler for this group type
        log.debug("Adding resource handler for: %s", api_endpoint)
        self._handlers[api_endpoint] = {
            "GET": lambda *_, **__: (self.current_state_crds(group_name, version))
        }

        # Add POST handlers for this type
        for kind_variant in self._get_kind_variants(kind):
            # Add different endpoints based on namespace
            if namespace:
                endpoint = f"{api_endpoint}/namespaces/{namespace}/{kind_variant}/*"
            else:
                endpoint = f"{api_endpoint}/{kind_variant}/*"

            log.debug2(
                "Adding POST & PUT & GET & WATCH & PATCH handler for (%s: %s)",
                kind,
                endpoint,
            )
            self._handlers[endpoint] = {
                "WATCH": lambda resource_path, body, *_, **__: (
                    self.current_state_watch(
                        resource_path, api_version, kind, resourced=False
                    )
                ),
                "PATCH": lambda resource_path, body, *_, **__: (
                    self.current_state_patch(resource_path, api_version, kind, body)
                ),
                "PUT": lambda resource_path, body, *_, **__: (
                    self.current_state_patch(resource_path, api_version, kind, body)
                ),
                "POST": lambda resource_path, body, *_, **__: (
                    self.current_state_post(resource_path, api_version, kind, body)
                ),
                "GET": lambda resource_path, *_, **__: (
                    self.current_state_list(resource_path, api_version, kind)
                ),
            }

    def _remove_handlers_for_resource(self, namespace, kind, api_version, name):
        # Get crucial API information out of the object
        _, __, api_endpoint = self._get_version_parts(api_version)
        for kind_variant in self._get_kind_variants(kind):
            if namespace:
                resource_api_endpoint = (
                    f"{api_endpoint}/namespaces/{namespace}/{kind_variant}/{name}"
                )
            else:
                resource_api_endpoint = f"{api_endpoint}/{kind_variant}/{name}"

            status_resource_api_endpoint = "/".join([resource_api_endpoint, "status"])
            del self._handlers[resource_api_endpoint]
            del self._handlers[status_resource_api_endpoint]

    def _add_handlers_for_resource(self, namespace, kind, api_version, name):
        # Get crucial API information out of the object
        _, __, api_endpoint = self._get_version_parts(api_version)

        # Add configured handlers for GET/PUT on this resource
        for kind_variant in self._get_kind_variants(kind):
            # The endpoint that will be used to hit this specific
            # resource
            if namespace:
                resource_api_endpoint = (
                    f"{api_endpoint}/namespaces/{namespace}/{kind_variant}/{name}"
                )
            else:
                resource_api_endpoint = f"{api_endpoint}/{kind_variant}/{name}"

            # Add the handlers
            log.debug("Adding GET handler for: %s", resource_api_endpoint)
            self._handlers[resource_api_endpoint] = {
                "GET": lambda *_, x=resource_api_endpoint, **__: (
                    self.current_state_get(x, api_version, kind)
                ),
                "PUT": lambda body, *_, x=resource_api_endpoint, **__: (
                    self.current_state_put(x, api_version, kind, body)
                ),
                "PATCH": lambda body, *_, x=resource_api_endpoint, **__: (
                    self.current_state_patch(x, api_version, kind, body)
                ),
                "DELETE": lambda *_, x=resource_api_endpoint, **__: (
                    self.current_state_delete(x, api_version, kind)
                ),
                "WATCH": lambda resource_path, body, *_, **__: (
                    self.current_state_watch(
                        resource_path, api_version, kind, resourced=True
                    )
                ),
            }

            # Add status PUT
            status_resource_api_endpoint = "/".join([resource_api_endpoint, "status"])
            self._handlers[status_resource_api_endpoint] = {
                "PUT": lambda body, *_, x=status_resource_api_endpoint, **__: (
                    self.current_state_put(
                        x,
                        api_version,
                        kind,
                        body,
                        is_status=True,
                    )
                )
            }

    def _get_object_state(self, method, namespace, kind, api_version, name):
        create = method in ["PUT", "PATCH", "POST"]
        if create:
            content = (
                self.__cluster_state.setdefault(namespace, {})
                .setdefault(kind, {})
                .setdefault(api_version, {})
                .get(name, {})
            )
        else:
            content = (
                self.__cluster_state.get(namespace, {})
                .get(kind, {})
                .get(api_version, {})
                .get(name)
            )

        # If it's a callable, call it!
        if callable(content):
            log.debug2("Making callable resource content")
            content = content(
                method=method,
                namespace=namespace,
                kind=kind,
                api_version=api_version,
                name=name,
            )

            # Add the defaults and handle the case where it's a tuple with a
            # status code
            if isinstance(content, tuple):
                body, status = content
                log.debug3("Handling tuple content with status [%s]", status)
                self._add_resource_defaults(body, namespace, kind, api_version, name)
                content = (body, status)
            else:
                self._add_resource_defaults(content, namespace, kind, api_version, name)
            log.debug3("Content: %s", content)
        return content

    def _list_object_state(self, method, namespace, kind, api_version):
        if method != "GET":
            return ([], 405)

        content = (
            self.__cluster_state.setdefault(namespace, {})
            .setdefault(kind, {})
            .setdefault(api_version, {})
        )

        resource_list = []
        return_status = 200
        for resource_name in content:
            resource = content[resource_name]

            # If it's a callable, call it!
            if callable(resource):
                log.debug2("Making callable resource content")
                resource = resource(
                    method=method,
                    namespace=namespace,
                    kind=kind,
                    api_version=api_version,
                    name=resource_name,
                )

            # Add the defaults and handle the case where it's a tuple with a
            # status code
            if isinstance(resource, tuple):
                resource, status = resource
                if status == 403:
                    return_status = 403
                    break
                self._add_resource_defaults(
                    resource, namespace, kind, api_version, resource_name
                )
            else:
                self._add_resource_defaults(
                    resource, namespace, kind, api_version, resource_name
                )

            log.debug3("Resource: %s", content)
            resource_list.append(resource)

        content = {"apiVersion": "v1", "kind": "List", "items": resource_list}
        return (content, return_status)

    def _update_object_current_state(self, namespace, kind, api_version, name, state):
        """Helper function to update a resource in the cluster and update all watch queues"""
        self.__cluster_state.setdefault(namespace, {}).setdefault(kind, {}).setdefault(
            api_version, {}
        )

        # Get the event type based on if name already exists
        event_type = KubeEventType.ADDED
        if name in self.__cluster_state[namespace][kind][api_version]:
            event_type = KubeEventType.MODIFIED

        # Update the cluster state and add resource_path if it doesn't already exist
        self.__cluster_state[namespace][kind][api_version][name] = state
        self._add_handlers_for_resource(
            namespace=namespace, kind=kind, api_version=api_version, name=name
        )
        self._update_watch_queues(event_type, state)

    def _delete_object_state(self, namespace, kind, api_version, name):
        """Helper function to delete a resource in the cluster and update all watch queues"""

        original_object = self.__cluster_state[namespace][kind][api_version][name]

        del self.__cluster_state[namespace][kind][api_version][name]
        if not self.__cluster_state[namespace][kind][api_version]:
            del self.__cluster_state[namespace][kind][api_version]
        if not self.__cluster_state[namespace][kind]:
            del self.__cluster_state[namespace][kind]
        if not self.__cluster_state[namespace]:
            del self.__cluster_state[namespace]

        self._update_watch_queues(KubeEventType.DELETED, original_object)

        # Remove any endpoint handlers for this resource
        self._remove_handlers_for_resource(namespace, kind, api_version, name)

        return True

    def _add_watch_queue(self, queue):
        with self.__queue_lock:
            log.debug3("Adding watch queue %s", queue)
            self.__watch_queues.add(queue)

    def _remove_watch_queue(self, queue):
        with self.__queue_lock:
            log.debug3("Removing watch queue %s", queue)
            self.__watch_queues.remove(queue)

    def _update_watch_queues(self, event, object):
        with self.__queue_lock:
            log.debug2("Updating watch queues with %s event", event)
            for queue in self.__watch_queues:
                queue.put((event, object))

    @staticmethod
    def _add_resource_defaults(obj_state, namespace, kind, api_version, name):
        obj_state["apiVersion"] = api_version
        obj_state["kind"] = kind
        md = obj_state.setdefault("metadata", {})
        if namespace:
            md["namespace"] = namespace
        md["name"] = name
        last_applied_annotation = annotate_last_applied(obj_state)
        md.setdefault("annotations", {}).update(
            last_applied_annotation["metadata"]["annotations"]
        )

    @classmethod
    def _patch_resource(cls, base, overrides):
        """Merge helper that supports removing elements when the override is set
        to None
        """
        for key, value in overrides.items():
            if value is None and key in base:
                del base[key]

            elif (
                key not in base
                or not isinstance(base[key], dict)
                or not isinstance(value, dict)
            ):
                base[key] = value
            else:
                base[key] = cls._patch_resource(base[key], value)

        return base

    ## Handlers ################################################################

    @classmethod
    def not_found(cls, *_, **__):
        log.debug3("Not Found")
        return cls._make_response(
            {
                "kind": "Status",
                "apiVersion": "v1",
                "metadata": {},
                "status": "Failure",
                "message": "the server could not find the requested resource",
                "reason": "NotFound",
                "details": {},
                "code": 404,
            },
            404,
        )

    def apis(self, *_, **__):
        api_group_list = {"kind": "APIGroupList", "apiVersion": "v1", "groups": []}
        for group_name, api_versions in self._api_group_kinds.items():
            if group_name is None:
                continue
            group = {"name": group_name, "versions": []}
            for api_version in api_versions:
                group["versions"].append(
                    {
                        "groupVersion": f"{group_name}/{api_version}",
                        "version": api_version,
                    }
                )
            group["preferredVersion"] = group["versions"][0]
            api_group_list["groups"].append(group)
        return self._make_response(api_group_list)

    def api_v1(self, *_, **__):
        return self.current_state_crds(None, "v1")

    def current_state_crds(self, group_name, api_version):
        resource_list = {
            "kind": "APIResourceList",
            "apiVersion": "v1",
            "groupVersion": f"{group_name}/{api_version}",
            "resources": [],
        }
        for kind in self._api_group_kinds.get(group_name, {}).get(api_version, []):
            resource_list["resources"].append(
                {
                    "name": f"{kind.lower()}s",
                    "singularName": kind.lower(),
                    "namespaced": True,
                    "kind": kind,
                    "verbs": [
                        "delete",
                        "deletecollection",
                        "get",
                        "list",
                        "patch",
                        "create",
                        "update",
                        "watch",
                    ],
                    "storageVersionHash": base64.b64encode(kind.encode("utf-8")).decode(
                        "utf-8"
                    ),
                }
            )
            resource_list["resources"].append(
                {
                    "name": f"{kind.lower()}s/status",
                    "singularName": "",
                    "namespaced": True,
                    "kind": kind,
                    "verbs": ["get", "patch", "update"],
                }
            )
        return self._make_response(resource_list)

    def current_state_watch(
        self, api_endpoint, api_version, kind, resourced=False, query_params=None
    ):
        # Parse the endpoint for the namespace and name
        endpoint_parts = api_endpoint.split("/")
        namespace = None
        name = None
        if "namespaces" in endpoint_parts:
            namespace = endpoint_parts[endpoint_parts.index("namespaces") + 1]

        if resourced:
            name = endpoint_parts[-1]

        # Return Watch Stream Response
        return MockWatchStreamResponse(
            api_client=self,
            api_version=api_version,
            kind=kind,
            namespace=namespace,
            name=name,
            timeout=(query_params or {}).get("timeoutSeconds"),
        )

    def current_state_get(self, api_endpoint, api_version, kind):
        # Parse the endpoint for the namespace and name
        endpoint_parts = api_endpoint.split("/")
        namespace = ""
        if "namespaces" in endpoint_parts:
            namespace = endpoint_parts[endpoint_parts.index("namespaces") + 1]
        name = endpoint_parts[-1]

        # Look up the resources in the cluster state
        log.debug2(
            "Looking for current state of [%s/%s/%s/%s]",
            namespace,
            kind,
            api_version,
            name,
        )
        content = self._get_object_state(
            method="GET",
            namespace=namespace,
            kind=kind,
            api_version=api_version,
            name=name,
        )
        log.debug4("Content: %s", content)
        if content is not None:
            # If the content includes a status code, make the response with it
            if isinstance(content, tuple):
                return self._make_response(*content)
            return self._make_response(content)
        return self.not_found()

    def current_state_list(self, api_endpoint, api_version, kind):
        # Parse the endpoint for the namespace and name and where the kind is located
        # in endpoint_parts
        endpoint_parts = api_endpoint.split("/")
        namespace = ""
        kind_loc = 1
        if "namespaces" in endpoint_parts:
            kind_loc = endpoint_parts.index("namespaces") + 2
            namespace = endpoint_parts[kind_loc - 1]
        else:
            version_split = api_version.split("/")
            # 2 for ["","api"] and then add length of api_version split which would be
            # 2 for resources with a group and 1 without e.g. v1 = 1 and foo.bar.com/v1 would be 2
            kind_loc = 2 + len(version_split)

        # If Api was trying to get a specific resource and not list then return 404
        # as object must not have been found. This is checked by seeing if the kind
        # is at the end of the endpoint_parts
        if kind_loc != len(endpoint_parts) - 1:
            return self.not_found()

        # Look up the resources in the cluster state
        log.debug2(
            "Listing current state of [%s/%s/%s]",
            namespace,
            kind,
            api_version,
        )
        content = self._list_object_state(
            method="GET",
            namespace=namespace,
            kind=kind,
            api_version=api_version,
        )
        log.debug4("Content: %s", content)
        if content is not None:
            # If the content includes a status code, make the response with it
            if isinstance(content, tuple):
                return self._make_response(*content)
            return self._make_response(content)
        return self.not_found()

    def current_state_put(self, api_endpoint, api_version, kind, body, is_status=False):
        # Parse the endpoint for the namespace and name
        endpoint_parts = api_endpoint.split("/")
        namespace = ""
        if "namespaces" in endpoint_parts:
            namespace = endpoint_parts[endpoint_parts.index("namespaces") + 1]
        name = endpoint_parts[-1] if not is_status else endpoint_parts[-2]

        # Look up the resources in the cluster state
        log.debug2(
            "Looking for current state of [%s/%s/%s/%s]",
            namespace,
            kind,
            api_version,
            name,
        )
        content = self._get_object_state(
            method="PUT",
            namespace=namespace,
            kind=kind,
            api_version=api_version,
            name=name,
        )
        log.debug3("Current Content: %s", content)

        # If the content has a status code, unpack it
        status_code = 200
        if isinstance(content, tuple):
            content, status_code = content

        # If it's a non-200 status code, don't make the update
        if status_code != 200:
            return self._make_response(content, status_code)

        # If this is a status, we are only updating the status and keeping the
        # existing content
        if is_status:
            content.update({"status": body.get("status", {})})
            updated_content = content
        else:
            if "status" in body:
                del body["status"]
            updated_content = body
        log.debug3(
            "Updating [%s/%s/%s/%s] with body: %s",
            namespace,
            kind,
            api_version,
            name,
            updated_content,
        )
        self._update_object_current_state(
            namespace, kind, api_version, name, updated_content
        )

        return self._make_response(updated_content, status_code)

    def current_state_patch(self, api_endpoint, api_version, kind, body):
        # Parse the endpoint for the namespace and name
        endpoint_parts = api_endpoint.split("/")
        namespace = ""
        if "namespaces" in endpoint_parts:
            namespace = endpoint_parts[endpoint_parts.index("namespaces") + 1]
        name = endpoint_parts[-1]

        # Look up the resources in the cluster state
        log.debug2(
            "Looking for current state of [%s/%s/%s/%s]",
            namespace,
            kind,
            api_version,
            name,
        )
        content = self._get_object_state(
            method="PATCH",
            namespace=namespace,
            kind=kind,
            api_version=api_version,
            name=name,
        )
        log.debug3("Current Content: %s", content)
        log.debug3("Update body: %s", body)

        # If the content has a status code, unpack it
        status_code = 200
        if isinstance(content, tuple):
            content, status_code = content

        # If it's a non-200 status code, don't make the update
        if status_code != 200:
            return self._make_response(content, status_code)

        # Merge in the new body
        if "status" in body:
            del body["status"]
        log.debug3(
            "Updating [%s/%s/%s/%s] with body: %s",
            namespace,
            kind,
            api_version,
            name,
            body,
        )
        updated_content = self._patch_resource(content, body)
        log.debug3("Updated content: %s", updated_content)
        self._update_object_current_state(
            namespace, kind, api_version, name, updated_content
        )

        return self._make_response(updated_content, status_code)

    def current_state_post(self, api_endpoint, api_version, kind, body):
        log.debug2("Creating current state for [%s]", api_endpoint)

        # Parse the endpoint and body for the namespace and name
        endpoint_parts = api_endpoint.split("/")
        namespace = ""
        if "namespaces" in endpoint_parts:
            namespace = endpoint_parts[endpoint_parts.index("namespaces") + 1]
        name = body.get("metadata", {}).get("name")

        # Look up the resources in the cluster state
        log.debug2(
            "Looking for current state of [%s/%s/%s/%s]",
            namespace,
            kind,
            api_version,
            name,
        )
        content = self._get_object_state(
            method="POST",
            namespace=namespace,
            kind=kind,
            api_version=api_version,
            name=name,
        )
        log.debug3("Current Content: %s", content)

        # If the content has a status code, unpack it
        status_code = 200
        if isinstance(content, tuple):
            content, status_code = content

        # If it's a non-200 status code, don't make the update
        if status_code != 200:
            return self._make_response(content, status_code)

        # Overwrite the body
        log.debug3("Overwrite content: %s", body)
        self._update_object_current_state(namespace, kind, api_version, name, body)

        return self._make_response(body, status_code)

    def current_state_delete(self, api_endpoint, api_version, kind):
        # Parse the endpoint for the namespace and name
        endpoint_parts = api_endpoint.split("/")
        namespace = ""
        if "namespaces" in endpoint_parts:
            namespace = endpoint_parts[endpoint_parts.index("namespaces") + 1]
        name = endpoint_parts[-1]

        # Look up the resources in the cluster state
        log.debug2(
            "Looking for current state of [%s/%s/%s/%s]",
            namespace,
            kind,
            api_version,
            name,
        )

        content = self._get_object_state("DELETE", namespace, kind, api_version, name)
        response_code = 200
        if isinstance(content, tuple):
            content, response_code = content
        deleted = content is not None

        if content is not None:
            self._delete_object_state(
                namespace=namespace,
                kind=kind,
                api_version=api_version,
                name=name,
            )
        response_content = {
            "kind": "status",
            "apiVersion": "v1",
            "metadata": {},
            "details": {
                "name": name,
                "kind": kind,
            },
        }

        if deleted:
            response_content["status"] = "Success"
            response_content["details"]["uid"] = "Hope nothing uses this"
        else:
            response_content["status"] = "Failure"
            response_content["message"] = f'{kind} "{name}" not found'
            response_code = 404
        return self._make_response(response_content, response_code)
call_api(resource_path, method, path_params=None, query_params=None, header_params=None, body=None, **kwargs)

Mocked out call function to return preconfigured responses

this is set up to work with how openshift.dynamic.DynamicClient

calls self.client.call_api. It (currently) passes some as positional args and some as kwargs.

Source code in oper8/test_helpers/kub_mock.py
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
def call_api(
    self,
    resource_path,
    method,
    path_params=None,
    query_params=None,
    header_params=None,
    body=None,
    **kwargs,
):
    """Mocked out call function to return preconfigured responses

    NOTE: this is set up to work with how openshift.dynamic.DynamicClient
        calls self.client.call_api. It (currently) passes some as positional
        args and some as kwargs.
    """
    for key, value in query_params:
        if key == "watch" and value:
            method = "WATCH"
            break

    log.debug2("Mock [%s] request to [%s]", method, resource_path)
    log.debug4("Path Params: %s", path_params)
    log.debug4("Query Params: %s", query_params)
    log.debug4("Header Params: %s", header_params)
    log.debug4("Body: %s", body)

    # Find the right handler and execute it
    return self._get_handler(resource_path, method)(
        resource_path=resource_path,
        path_params=path_params,
        query_params=query_params,
        header_params=header_params,
        body=body,
        **kwargs,
    )

MockWatchStreamResponse

Helper class used to stream resources from the MockKubeClient using a queue. When the streaming method is called the MockWatchStreamResponse registers a threading queue with the MockKubClient and yields all events

Source code in oper8/test_helpers/kub_mock.py
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
class MockWatchStreamResponse:
    """Helper class used to stream resources from the MockKubeClient using a queue.
    When the streaming method is called the MockWatchStreamResponse registers a
    threading queue with the MockKubClient and yields all events"""

    def __init__(
        self,
        api_client: "MockKubClient",
        api_version: str,
        kind: str,
        namespace: Optional[str] = None,
        name: Optional[str] = None,
        timeout: Optional[int] = None,
    ):
        self.api_client = api_client
        self.watch_queue = Queue()
        self.timeout = timeout or 250

        self.kind = kind
        self.api_version = api_version
        self.namespace = namespace
        self.name = name

        # Shutdown flag
        self.shutdown = Event()

    def __del__(self):
        self.api_client._remove_watch_queue(self.watch_queue)

    def stream(self, *args, **kwargs):
        """Continuously yield events from the cluster until the shutdown or timeout"""

        # Get the current resource state
        current_resources = []
        if self.name:
            current_obj = self.api_client._get_object_state(
                method="GET",
                namespace=self.namespace,
                kind=self.kind,
                api_version=self.api_version,
                name=self.name,
            )
            if current_obj:
                current_resources.append(current_obj)
        else:
            response, code = self.api_client._list_object_state(
                method="GET",
                namespace=self.namespace,
                kind=self.kind,
                api_version=self.api_version,
            )
            current_resources = response.get("items")

        # yield back the resources
        for resource in current_resources:
            log.debug2("Yielding initial state event")
            yield self._make_watch_response(KubeEventType.ADDED, resource)

        log.debug2("Yielded initial state. Starting watch")
        # Create a watch queue and add it to the api_client
        self.api_client._add_watch_queue(self.watch_queue)

        # Configure the timeout and end times
        timeout_delta = timedelta(seconds=self.timeout)
        end_time = datetime.now() + timeout_delta
        while True:
            timeout = (end_time - datetime.now()).total_seconds() or 1
            try:
                event_type, resource = self.watch_queue.get(timeout=timeout)
            except Empty:
                return

            if self._check_end_conditions(end_time):
                return

            resource_metadata = resource.get("metadata", {})

            # Ensure the kind/apiversion/namespace match the requested
            if (
                resource.get("kind") == self.kind
                and resource.get("apiVersion") == self.api_version
                and resource_metadata.get("namespace") == self.namespace
            ):
                # If resourced then ensure the name matches
                if self.name and resource_metadata.get("name") != self.name:
                    continue

                log.debug2("Yielding watch event")
                yield self._make_watch_response(event_type, resource)

    def close(self):
        pass

    def release_conn(self):
        self.shutdown.set()

    def _check_end_conditions(self, end_time):
        log.debug3("Checking shutdown and endtime conditions")
        if self.shutdown.is_set():
            return True

        return end_time < datetime.now()

    def _make_watch_response(self, event, object):
        # Add new line to watch response
        response = json.dumps({"type": event.value, "object": object}) + "\n"
        return response
stream(*args, **kwargs)

Continuously yield events from the cluster until the shutdown or timeout

Source code in oper8/test_helpers/kub_mock.py
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
def stream(self, *args, **kwargs):
    """Continuously yield events from the cluster until the shutdown or timeout"""

    # Get the current resource state
    current_resources = []
    if self.name:
        current_obj = self.api_client._get_object_state(
            method="GET",
            namespace=self.namespace,
            kind=self.kind,
            api_version=self.api_version,
            name=self.name,
        )
        if current_obj:
            current_resources.append(current_obj)
    else:
        response, code = self.api_client._list_object_state(
            method="GET",
            namespace=self.namespace,
            kind=self.kind,
            api_version=self.api_version,
        )
        current_resources = response.get("items")

    # yield back the resources
    for resource in current_resources:
        log.debug2("Yielding initial state event")
        yield self._make_watch_response(KubeEventType.ADDED, resource)

    log.debug2("Yielded initial state. Starting watch")
    # Create a watch queue and add it to the api_client
    self.api_client._add_watch_queue(self.watch_queue)

    # Configure the timeout and end times
    timeout_delta = timedelta(seconds=self.timeout)
    end_time = datetime.now() + timeout_delta
    while True:
        timeout = (end_time - datetime.now()).total_seconds() or 1
        try:
            event_type, resource = self.watch_queue.get(timeout=timeout)
        except Empty:
            return

        if self._check_end_conditions(end_time):
            return

        resource_metadata = resource.get("metadata", {})

        # Ensure the kind/apiversion/namespace match the requested
        if (
            resource.get("kind") == self.kind
            and resource.get("apiVersion") == self.api_version
            and resource_metadata.get("namespace") == self.namespace
        ):
            # If resourced then ensure the name matches
            if self.name and resource_metadata.get("name") != self.name:
                continue

            log.debug2("Yielding watch event")
            yield self._make_watch_response(event_type, resource)

mock_kub_client_constructor(*args, **kwargs)

Context manager to patch the api client

Source code in oper8/test_helpers/kub_mock.py
957
958
959
960
961
962
963
964
965
966
967
@contextmanager
def mock_kub_client_constructor(*args, **kwargs):
    """Context manager to patch the api client"""
    log.debug("Getting mocked client")
    client = MockKubClient(*args, **kwargs)
    log.debug("Mock client complete")
    with mock.patch(
        "kubernetes.config.new_client_from_config",
        return_value=client,
    ):
        yield client

oper8x_helpers

This module holds helpers that rely on oper8.x

set_tls_ca_secret(session)

Set the key/cert content for the shared CA secret. This function returns the pem-encoded values for convenience in other tests

Source code in oper8/test_helpers/oper8x_helpers.py
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
def set_tls_ca_secret(session):
    """Set the key/cert content for the shared CA secret. This function returns
    the pem-encoded values for convenience in other tests
    """
    with open(os.path.join(TEST_DATA_DIR, "test_ca.key")) as f:
        key_pem = f.read()
    with open(os.path.join(TEST_DATA_DIR, "test_ca.crt")) as f:
        crt_pem = f.read()
    set_secret_data(
        session,
        InternalCaComponent.CA_SECRET_NAME,
        data={
            InternalCaComponent.CA_KEY_FILENAME: common.b64_secret(key_pem),
            InternalCaComponent.CA_CRT_FILENAME: common.b64_secret(crt_pem),
        },
    )

    return key_pem, crt_pem

pwm_helpers

Utils and common classes for the python watch manager tests

DisabledLeadershipManager

Bases: LeadershipManagerBase

Leadership Manager that is always disabled

Source code in oper8/test_helpers/pwm_helpers.py
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
class DisabledLeadershipManager(LeadershipManagerBase):
    """Leadership Manager that is always disabled"""

    def __init__(self):
        self.shutdown_event = Event()

    def acquire_resource(self, resource):
        return False

    def acquire(self, force: bool = False) -> bool:
        if force:
            self.shutdown_event.set()
        return self.shutdown_event.wait()

    def release(self):
        raise NotImplementedError()

    def release_resource(self, resource=None):
        raise NotImplementedError()

    def is_leader(self):
        return False

MockedReconcileThread

Bases: ReconcileThread

Subclass of ReconcileThread that mocks the subprocess. This was more reliable than using unittest.mock

Source code in oper8/test_helpers/pwm_helpers.py
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
class MockedReconcileThread(ReconcileThread):
    """Subclass of ReconcileThread that mocks the subprocess. This was more
    reliable than using unittest.mock"""

    _disable_singleton = True

    def __init__(
        self,
        deploy_manager=None,
        leadership_manager=None,
        subprocess_wait_time=0.1,
        returned_messages=None,
    ):
        self.requests = Queue()
        self.timer_events = Queue()
        self.processes_started = 0
        self.processes_finished = 0
        self.watch_threads_created = 0
        self.subprocess_wait_time = subprocess_wait_time
        self.returned_messages = returned_messages or [[]]
        super().__init__(deploy_manager, leadership_manager)

    def push_request(self, request: ReconcileRequest):
        self.requests.put(request)
        super().push_request(request)

    def get_request(self) -> ReconcileRequest:
        return self.requests.get()

    def _handle_watch_request(self, request: WatchRequest):
        self.watch_threads_created += 1
        return super()._handle_watch_request(request)

    def _handle_process_end(self, reconcile_process: ReconcileProcess):
        self.processes_finished += 1
        return super()._handle_process_end(reconcile_process)

    def _start_reconcile_process(
        self, request: ReconcileRequest, pipe: Connection
    ) -> multiprocessing.Process:
        self.processes_started += 1

        returned_messages = []
        if len(self.returned_messages) > 0:
            returned_messages = self.returned_messages.pop(0)

        # Create and start a mocked reconcile process
        process = self.spawn_ctx.Process(
            target=mocked_create_and_start_entrypoint,
            args=[
                self.logging_queue,
                request,
                pipe,
                self.subprocess_wait_time,
                returned_messages,
            ],
        )
        process.start()
        log.debug3(f"Started child process with pid: {process.pid}")

        return process

    def _create_timer_event_for_request(
        self, request: ReconcileRequest, result: ReconciliationResult = None
    ):
        timer_event = super()._create_timer_event_for_request(request, result)
        self.timer_events.put(timer_event)
        return timer_event

mocked_create_and_start_entrypoint(logging_queue, request, result_pipe, wait_time=0.5, returned_messages=None)

Source code in oper8/test_helpers/pwm_helpers.py
211
212
213
214
215
216
217
218
219
220
221
def mocked_create_and_start_entrypoint(
    logging_queue: multiprocessing.Queue,
    request: ReconcileRequest,
    result_pipe: Connection,
    wait_time=0.5,
    returned_messages=None,
):
    """"""
    time.sleep(wait_time)
    for message in returned_messages or []:
        result_pipe.send(message)

read_heartbeat_file(hb_file)

Parse a heartbeat file into a datetime

Source code in oper8/test_helpers/pwm_helpers.py
224
225
226
227
228
229
def read_heartbeat_file(hb_file: str) -> datetime:
    """Parse a heartbeat file into a datetime"""
    with open(hb_file) as handle:
        hb_str = handle.read()

    return datetime.strptime(hb_str, HeartbeatThread._DATE_FORMAT)

utils

Common utilities shared across components in the library

abstractclassproperty

This decorator implements a classproperty that will raise when accessed

Source code in oper8/utils.py
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
class abstractclassproperty:  # pylint: disable=invalid-name,too-few-public-methods
    """This decorator implements a classproperty that will raise when accessed"""

    def __init__(self, func):
        self.prop_name = func.__name__

    def __get__(self, *args):
        # If this is being called by __setattr__, we're ok because it's
        # attempting to set the attribute on the class
        curframe = inspect.currentframe()
        callframe = inspect.getouterframes(curframe, 2)[1]
        caller_name = callframe[3]
        if caller_name == "__setattr__":
            return None

        # If this is a help() call or a pdoc documentation request, return an
        # object with a docstring indicating that the property is abstract
        if (
            "help" in callframe.frame.f_code.co_names
            or callframe.frame.f_globals["__name__"] == "pdoc"
        ):

            class AbstractClassProperty:  # pylint: disable=missing-class-docstring
                __slots__ = []
                __doc__ = f"""The <{self.prop_name}> property is an abstract class property
                that must be overwritten in derived children
                """

            return AbstractClassProperty

        raise NotImplementedError(
            f"Cannot access abstractclassproperty {self.prop_name}"
        )

classproperty

@classmethod+@property CITE: https://stackoverflow.com/a/22729414

Source code in oper8/utils.py
285
286
287
288
289
290
291
292
293
294
class classproperty:  # pylint: disable=invalid-name,too-few-public-methods
    """@classmethod+@property
    CITE: https://stackoverflow.com/a/22729414
    """

    def __init__(self, func):
        self.func = classmethod(func)

    def __get__(self, *args):
        return self.func.__get__(*args)()

add_finalizer(session, finalizer)

This helper adds a finalizer to current session CR

Parameters:

Name Type Description Default
session SESSION_TYPE

Session The session for the current deploy

required
finalizer str

str The finalizer to be added

required
Source code in oper8/utils.py
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
def add_finalizer(session: SESSION_TYPE, finalizer: str):
    """This helper adds a finalizer to current session CR

    Args:
        session:  Session
            The session for the current deploy
        finalizer: str
            The finalizer to be added
    """
    if finalizer in session.finalizers:
        return

    log.debug("Adding finalizer: %s", finalizer)

    manifest = {
        "kind": session.kind,
        "apiVersion": session.api_version,
        "metadata": copy.deepcopy(session.metadata),
    }
    manifest["metadata"].setdefault("finalizers", []).append(finalizer)
    success, _ = session.deploy_manager.deploy([manifest])

    # Once successfully added to cluster than add it to session
    assert_cluster(success, f"Failed add finalizer {finalizer}")
    session.finalizers.append(finalizer)

get_manifest_version(cr_manifest)

Get the version for a given custom resource or from the config if version override provided

Parameters:

Name Type Description Default
cr_manifest Config

aconfig.Config The manifest to pull the version from

required

Returns:

Name Type Description
version str

str The current version

Source code in oper8/utils.py
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
def get_manifest_version(cr_manifest: aconfig.Config) -> str:
    """Get the version for a given custom resource or from the config
    if version override provided

    Args:
        cr_manifest: aconfig.Config
            The manifest to pull the version from

    Returns:
        version: str
            The current version
    """
    if config.vcs.version_override:
        return config.vcs.version_override
    return nested_get(cr_manifest, config.vcs.field)

get_passthrough_annotations(session)

This helper gets the set of annotations that should be passed from a parent CR to a child subsystem CR.

Parameters:

Name Type Description Default
session

DeploySession The session for the current deploy

required

Returns:

Name Type Description
annotations

Dict[str, str] The dict mapping of annotations that should be passed through

Source code in oper8/utils.py
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
def get_passthrough_annotations(session):
    """This helper gets the set of annotations that should be passed from a
    parent CR to a child subsystem CR.

    Args:
        session:  DeploySession
            The session for the current deploy

    Returns:
        annotations:  Dict[str, str]
            The dict mapping of annotations that should be passed through
    """
    annotations = session.metadata.get("annotations", {})
    passthrough_annotations = {
        k: v for k, v in annotations.items() if k in constants.ALL_ANNOTATIONS
    }

    log.debug2("Oper8 passthrough annotations: %s", passthrough_annotations)
    return passthrough_annotations

merge_configs(base, overrides)

Helper to perform a deep merge of the overrides into the base. The merge is done in place, but the resulting dict is also returned for convenience.

The merge logic is quite simple: If both the base and overrides have a key and the type of the key for both is a dict, recursively merge, otherwise set the base value to the override value.

Parameters:

Name Type Description Default
base

dict The base config that will be updated with the overrides

required
overrides

dict The override config

required

Returns:

Name Type Description
merged dict

dict The merged results of overrides merged onto base

Source code in oper8/utils.py
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
def merge_configs(base, overrides) -> dict:
    """Helper to perform a deep merge of the overrides into the base. The merge
    is done in place, but the resulting dict is also returned for convenience.

    The merge logic is quite simple: If both the base and overrides have a key
    and the type of the key for both is a dict, recursively merge, otherwise
    set the base value to the override value.

    Args:
        base:  dict
            The base config that will be updated with the overrides
        overrides:  dict
            The override config

    Returns:
        merged:  dict
            The merged results of overrides merged onto base
    """
    for key, value in overrides.items():
        if (
            key not in base
            or not isinstance(base[key], dict)
            or not isinstance(value, dict)
        ):
            base[key] = value
        else:
            base[key] = merge_configs(base[key], value)

    return base

nested_get(dct, key, dflt=None)

Helper to get values from a dict using 'foo.bar' key notation

Parameters:

Name Type Description Default
dct dict

dict The dict into which the key will be set

required
key str

str Key that may contain '.' notation indicating dict nesting

required

Returns:

Name Type Description
val Any

Any Whatever is found at the given key or None if the key is not found. This includes missing intermediate dicts.

Source code in oper8/utils.py
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
def nested_get(dct: dict, key: str, dflt=None) -> Any:
    """Helper to get values from a dict using 'foo.bar' key notation

    Args:
        dct:  dict
            The dict into which the key will be set
        key:  str
            Key that may contain '.' notation indicating dict nesting

    Returns:
        val:  Any
            Whatever is found at the given key or None if the key is not found.
            This includes missing intermediate dicts.
    """
    parts = key.split(constants.NESTED_DICT_DELIM)
    for i, part in enumerate(parts[:-1]):
        dct = dct.get(part, __MISSING__)
        if dct is __MISSING__:
            return dflt
        if not isinstance(dct, dict):
            raise TypeError(
                "Intermediate key {} is not a dict".format(  # pylint: disable=consider-using-f-string
                    constants.NESTED_DICT_DELIM.join(parts[:i])
                )
            )
    return dct.get(parts[-1], dflt)

nested_set(dct, key, val)

Helper to set values in a dict using 'foo.bar' key notation

Parameters:

Name Type Description Default
dct dict

dict The dict into which the key will be set

required
key str

str Key that may contain '.' notation indicating dict nesting

required
val Any

Any The value to place at the nested key

required
Source code in oper8/utils.py
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
def nested_set(dct: dict, key: str, val: Any):
    """Helper to set values in a dict using 'foo.bar' key notation

    Args:
        dct:  dict
            The dict into which the key will be set
        key:  str
            Key that may contain '.' notation indicating dict nesting
        val:  Any
            The value to place at the nested key
    """
    parts = key.split(constants.NESTED_DICT_DELIM)
    for i, part in enumerate(parts[:-1]):
        dct = dct.setdefault(part, {})
        if not isinstance(dct, dict):
            raise TypeError(
                "Intermediate key {} is not a dict".format(  # pylint: disable=consider-using-f-string
                    constants.NESTED_DICT_DELIM.join(parts[:i])
                )
            )
    dct[parts[-1]] = val

remove_finalizer(session, finalizer)

This helper gets removes a finalizer from the current session controller

Parameters:

Name Type Description Default
session SESSION_TYPE

Session The session for the current deploy

required
finalizer str

str The finalizer to remove

required

Returns:

Name Type Description
annotations

Dict[str, str] The dict mapping of annotations that should be passed through

Source code in oper8/utils.py
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
def remove_finalizer(session: SESSION_TYPE, finalizer: str):
    """This helper gets removes a finalizer from the current session controller

    Args:
        session:  Session
            The session for the current deploy
        finalizer: str
            The finalizer to remove

    Returns:
        annotations:  Dict[str, str]
            The dict mapping of annotations that should be passed through
    """
    if finalizer not in session.finalizers:
        return

    log.debug("Removing finalizer: %s", finalizer)

    # Create manifest with only required fields
    manifest = {
        "kind": session.kind,
        "apiVersion": session.api_version,
        "metadata": copy.deepcopy(session.metadata),
    }

    # Check to see if the object exists in the cluster
    success, found = session.get_object_current_state(
        kind=session.kind,
        api_version=session.api_version,
        name=session.name,
    )
    assert_cluster(success, "Failed to look up CR for self")

    # If still present in the cluster, update it without the finalizer
    if found:
        manifest["metadata"]["finalizers"].remove(finalizer)
        success, _ = session.deploy_manager.deploy([manifest])

        # Once successfully removed from cluster than remove from session
        assert_cluster(success, f"Failed remove finalizer {finalizer}")

    # If the finalizer has been confirmed to not be there, remove it from the
    # in-memory finalizers
    session.finalizers.remove(finalizer)

sanitize_for_serialization(obj)

Builds a JSON POST object. If obj is None, return None. If obj is str, int, long, float, bool, return directly. If obj is datetime.datetime, datetime.date convert to string in iso8601 format. If obj is list, sanitize each element in the list. If obj is dict, return the dict. If obj is OpenAPI model, return the properties dict. :param obj: The data to serialize. :return: The serialized form of data.

Source code in oper8/utils.py
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
def sanitize_for_serialization(obj):  # pylint: disable=too-many-return-statements
    """Builds a JSON POST object.
    If obj is None, return None.
    If obj is str, int, long, float, bool, return directly.
    If obj is datetime.datetime, datetime.date
        convert to string in iso8601 format.
    If obj is list, sanitize each element in the list.
    If obj is dict, return the dict.
    If obj is OpenAPI model, return the properties dict.
    :param obj: The data to serialize.
    :return: The serialized form of data.
    """
    if obj is None:  # pylint: disable=no-else-return
        return None
    elif isinstance(obj, (float, bool, bytes, six.text_type) + six.integer_types):
        return obj
    elif isinstance(obj, list):
        return [sanitize_for_serialization(sub_obj) for sub_obj in obj]
    elif isinstance(obj, tuple):
        return tuple(sanitize_for_serialization(sub_obj) for sub_obj in obj)
    elif isinstance(obj, (datetime.datetime, datetime.date)):
        return obj.isoformat()
    elif isinstance(obj, ResourceNode):
        return sanitize_for_serialization(obj.manifest)
    elif isinstance(obj, property):
        return sanitize_for_serialization(obj.fget())

    if isinstance(obj, dict):
        obj_dict = obj
    elif hasattr(obj, "attribute_map"):
        # Convert model obj to dict except
        # `openapi_types` and `attribute_map`.
        # Convert attribute name to json key in
        # model definition for request.
        obj_dict = {}
        for attr, name in six.iteritems(obj.attribute_map):
            if hasattr(obj, attr):
                obj_dict[name] = getattr(obj, attr)

    # Prune fields which are None but keep
    # empty arrays or dictionaries
    return_dict = {}
    for key, val in six.iteritems(obj_dict):
        updated_obj = sanitize_for_serialization(val)
        if updated_obj is not None:
            return_dict[key] = updated_obj
    return return_dict

vcs

Version Control System class manages a specific git directory.

VCS

Generic class for handling a git repository. This class contains helper functions to get, list, and checkout references. Each instance of this class corresponds to a different git directory

Source code in oper8/vcs.py
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
class VCS:
    """Generic class for handling a git repository. This class contains helper functions
    to get, list, and checkout references. Each instance of this class corresponds to a
    different git directory
    """

    def __init__(self, directory: str, create_if_needed: bool = False, **kwargs):
        """Initialize the pygit2 Repository reference

        Args:
            directory: str
                The git directory
            create_if_needed: bool
                If True, the repo will be initialized if it doesn't already
                exist
            **kwargs:
                Passthrough args to the repository setup
        """
        # Get repo reference
        try:
            # Check for global file and create one if needed. This
            # is needed due to this issue: https://github.com/libgit2/pygit2/issues/915
            config_file = (
                pathlib.Path(option(GIT_OPT_GET_SEARCH_PATH, GIT_CONFIG_LEVEL_GLOBAL))
                / ".gitconfig"
            )
            if not config_file.exists():
                config_file.touch(exist_ok=True)

            # Disable safe git directories. This solves a common problem
            # when running in openshift where the running user is different
            # from the owner of the filesystem
            global_config = Config.get_global_config()
            global_config["safe.directory"] = "*"

            self.repo = Repository(directory)
            log.debug2("Found repo: %s", self.repo)
        except GitError as err:
            if create_if_needed:
                self.repo = init_repository(directory, **kwargs)
            else:
                log.error("Invalid Repo: %s", err, exc_info=True)
                raise VCSConfigError(f"Invalid Repo at {directory}") from err

    ### Accessors

    @property
    def head(self) -> str:
        """Get a reference to the current HEAD"""
        return str(self.repo.head.target)

    def get_ref(self, refish: str) -> Tuple[Commit, Reference]:
        """Get a git commit and reference from a shorthand string

        Args:
            refish: str
                The human readable form of a git reference like branch name
                or commit hash

        Returns
            commit_and_reference: Tuple[Commit,Reference]
                Both a commit and reference for a given refish
        """
        try:
            return self.repo.resolve_refish(refish)
        except KeyError as err:
            log.error("Unable to find version %s in repo", refish)
            raise VCSConfigError(  # pylint: disable=raise-missing-from
                f"Version: '{refish}' not found in repo"
            ) from err

    def list_refs(self) -> Set[str]:
        """List all of the tags and references in the repo

        Returns
            ref_list: Set[str]
                A set of all references' shorthand as strings
        """
        # Loop through repo tags to get each tag's short name
        refs_set = set()
        for ref in self.repo.references.objects:
            refs_set.add(ref.shorthand)

        return refs_set

    ### Mutators

    def checkout_ref(
        self,
        refish: str,
        dest_path: Optional[pathlib.Path] = None,
        method: VCSCheckoutMethod = VCSCheckoutMethod.WORKTREE,
        **kwargs,
    ):
        """Checkout a refish to a given destination directory. This function
        first attempts to create a worktree but on failure will do a traditional
        clone

        Args:
            refish: str
                The refish to be checked out in the dest_dir
            dest_path: Optional[pathlib.Path]
                The destination directory if not in-place
            method: VCSCheckoutMethod=VCSCheckoutMethod.WORKTREE
                The checkout method to use, either a git clone or worktree add
            **kwargs
                Kwargs to pass through to checkout
        """

        # Get the commit and ref for a given refish
        commit, ref = self.get_ref(refish)

        # If in-place, check out directly
        if not dest_path:
            log.debug2("Checking out %s in place", refish)
            self.repo.checkout(ref, **kwargs)
            return

        # Check if dest directory already exists and if it has the correct
        # commit
        if dest_path.is_dir():
            dest_vcs = VCS(dest_path)

            # Check if the dest index file has been created. It is the last
            # part of a checkout. If index has not been created than another
            # process must be working on it
            dest_index_file = pathlib.Path(dest_vcs.repo.path) / "index"
            if not dest_index_file.is_file():
                raise VCSMultiProcessError(
                    "Index file not found. Checkout already in progress "
                )

            if dest_vcs.repo.head.peel(Commit) != commit:
                raise VCSConfigError(
                    f"Destination directory {dest_path} already exists with incorrect branch"
                )
            return

        # Create the directory if it doesn't exist
        dest_path.parents[0].mkdir(parents=True, exist_ok=True)

        if method == VCSCheckoutMethod.WORKTREE:
            # Create a unique branch for each worktree
            cleaned_dest_dir = "_".join(dest_path.parts[1:])
            branch_name = f"{refish}_{cleaned_dest_dir}"

            branch = self.create_branch(branch_name, commit)
            self._create_worktree(branch_name, dest_path, branch)
        elif method == VCSCheckoutMethod.CLONE:
            self._clone_ref(dest_path, ref, **kwargs)
        else:
            raise VCSConfigError(f"Invalid checkout method: {method}")

    def create_commit(
        self,
        message: str,
        parents: Optional[List[str]] = None,
        committer_name: str = "Oper8",
        committer_email: str = "noreply@oper8.org",
    ):
        """Create a commit in the repo with the files currently in the index

        Args:
            message: str
                The commit message
            parents: Optional[List[str]]
                Parent commit hashes
            committer_name: str
                The name of the committer
            committer_email: str
                Email address for this committer
        """
        parents = parents or []
        parent_commits = []
        for parent in parents:
            try:
                parent_commits.append(self.repo.get(parent))
            except ValueError as err:
                raise ValueError(f"Invalid parent commit: {parent}") from err
        signature = Signature(committer_name, committer_email)
        self.repo.create_commit(
            "HEAD", signature, signature, message, self.repo.index.write_tree(), parents
        )

    def add_remote(self, remote_name: str, remote_path: str):
        """Add a named remote to the repo

        Args:
            remote_name: str
                The name of the remote
            remote_path: str
                The path on disk to the remote repo
        """
        self.repo.remotes.create(remote_name, remote_path)

    def delete_remote(self, remote_name: str):
        """Remove a remote from the repo

        Args:
            remote_name:  str
                The name of the remote
        """
        self.repo.remotes.delete(remote_name)

    def fetch_remote(
        self,
        remote_name: str,
        refs: Optional[Set[str]] = None,
        wait: bool = True,
    ):
        """Fetch content from the named remote. If no refs given, all refs are
        fetched.

        Args:
            remote_name: str
                The name of the remote to fetch
            refs: Optional[Set[str]]
                The refs to fetch (fetch all if not given)
            wait: bool
                If true, wait for fetch to complete
        """
        remote = self.repo.remotes[remote_name]
        progress = remote.fetch(list(refs or []))
        while wait and progress.received_objects < progress.total_objects:
            time.sleep(0.1)  # pragma: no cover

    def create_branch(self, branch_name: str, commit: Commit) -> Branch:
        """Create branch given a name and commit

        Args:
            branch_name: str
                The name to be created
            commit: Commit
                The commit for the branch to be created from

        Returns:
            branch: Branch
                The created branch"""
        if branch_name in self.repo.branches:
            branch = self.repo.branches.get(branch_name)
            if branch.peel(Commit) != commit:
                raise VCSRuntimeError("Branch already exists with incorrect commit")
            return branch

        try:
            log.debug("Creating branch for %s", branch_name)
            return self.repo.branches.create(branch_name, commit)
        except AlreadyExistsError as err:
            # Branch must have been created by different processes
            log.warning("Branch %s already exists", branch_name)
            raise VCSMultiProcessError(f"Branch {branch_name} already exists") from err

        except OSError as err:
            raise VCSRuntimeError("Unable to create branch") from err

    def delete_branch(self, branch_name: str):
        """Delete a branch from the repo

        Args:
            branch_name:  str
                The name of the branch
        """
        self.repo.branches.delete(branch_name)

    def delete_tag(self, tag_name: str):
        """Delete a tag from the repo

        Args:
            tag_name:  str
                The name of the tag
        """
        self.repo.references.delete(f"refs/tags/{tag_name}")

    def checkout_detached_head(self, refish: Optional[str] = None):
        """Check out the current HEAD commit as a detached head

        Args:
            refish:  Optional[str]
                The ref to check out. If not given, the current HEAD is used
        """
        refish = refish or self.head

        # Create a placeholder reference to a non-existent remote
        dummy_ref = self.repo.references.create(
            "refs/remotes/doesnotexist/foobar", refish
        )
        self.repo.checkout(dummy_ref)
        self.repo.references.delete(dummy_ref.name)

    def compress_references(self):
        """Compress unreachable references in the repo"""
        self.repo.compress_references()

    ### Implementation Details

    def _clone_ref(self, dest_path: pathlib.Path, ref: Reference, **kwargs):
        """Clone a refish to a given destination directory

        Args:
            dest_path: pathlib.Path
                The destination directory
            refish: str
                The branch or ref to be checked out
            **kwargs
                Kwargs to pass through to checkout
        """
        try:
            dest_repo = clone_repository(self.repo.path, dest_path)
            dest_repo.checkout(refname=ref, **kwargs)
        except (OSError, GitError, KeyError) as err:
            log.error("Unable to clone refish: %s", ref.shorthand, exc_info=True)
            raise VCSRuntimeError("Unable to clone ref from repo") from err

    def _create_worktree(
        self, worktree_name: str, dest_path: pathlib.Path, branch: Branch
    ):
        """Create worktree for branch. This is better than a direct checkout
        as it saves space on checkout and is faster. This is especially
        beneficial on repositories with large git directories

        Args:
           worktree_name: str
               The name of the worktree
           dest_path: pathlib.Path
               The destination directory
           branch: Branch
               The branch to be checked out in the worktree
        """
        log.debug("Creating new worktree for %s", worktree_name)
        try:
            self.repo.add_worktree(worktree_name, dest_path, branch)
        except AlreadyExistsError as err:
            # Worktree must have been created by different processes
            log.warning("Worktree %s already exists", worktree_name)
            raise VCSMultiProcessError(
                f"Worktree {worktree_name} already exists"
            ) from err
        except GitError as err:
            # If reference is already checked out it must have been done by a different process
            if "is already checked out" in str(err):
                log.warning(
                    "Branch %s already checked out by other process",
                    worktree_name,
                    exc_info=True,
                )
                raise VCSMultiProcessError(
                    f"Branch {worktree_name} already checked out by other process"
                ) from err

            log.error(
                "Unexpected Git Error when adding worktree: %s", err, exc_info=True
            )
            raise VCSRuntimeError(
                "Adding worktree failed with unexpected git error"
            ) from err

head property

Get a reference to the current HEAD

__init__(directory, create_if_needed=False, **kwargs)

Initialize the pygit2 Repository reference

Parameters:

Name Type Description Default
directory str

str The git directory

required
create_if_needed bool

bool If True, the repo will be initialized if it doesn't already exist

False
**kwargs

Passthrough args to the repository setup

{}
Source code in oper8/vcs.py
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
def __init__(self, directory: str, create_if_needed: bool = False, **kwargs):
    """Initialize the pygit2 Repository reference

    Args:
        directory: str
            The git directory
        create_if_needed: bool
            If True, the repo will be initialized if it doesn't already
            exist
        **kwargs:
            Passthrough args to the repository setup
    """
    # Get repo reference
    try:
        # Check for global file and create one if needed. This
        # is needed due to this issue: https://github.com/libgit2/pygit2/issues/915
        config_file = (
            pathlib.Path(option(GIT_OPT_GET_SEARCH_PATH, GIT_CONFIG_LEVEL_GLOBAL))
            / ".gitconfig"
        )
        if not config_file.exists():
            config_file.touch(exist_ok=True)

        # Disable safe git directories. This solves a common problem
        # when running in openshift where the running user is different
        # from the owner of the filesystem
        global_config = Config.get_global_config()
        global_config["safe.directory"] = "*"

        self.repo = Repository(directory)
        log.debug2("Found repo: %s", self.repo)
    except GitError as err:
        if create_if_needed:
            self.repo = init_repository(directory, **kwargs)
        else:
            log.error("Invalid Repo: %s", err, exc_info=True)
            raise VCSConfigError(f"Invalid Repo at {directory}") from err

add_remote(remote_name, remote_path)

Add a named remote to the repo

Parameters:

Name Type Description Default
remote_name str

str The name of the remote

required
remote_path str

str The path on disk to the remote repo

required
Source code in oper8/vcs.py
250
251
252
253
254
255
256
257
258
259
def add_remote(self, remote_name: str, remote_path: str):
    """Add a named remote to the repo

    Args:
        remote_name: str
            The name of the remote
        remote_path: str
            The path on disk to the remote repo
    """
    self.repo.remotes.create(remote_name, remote_path)

checkout_detached_head(refish=None)

Check out the current HEAD commit as a detached head

Parameters:

Name Type Description Default
refish Optional[str]

Optional[str] The ref to check out. If not given, the current HEAD is used

None
Source code in oper8/vcs.py
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
def checkout_detached_head(self, refish: Optional[str] = None):
    """Check out the current HEAD commit as a detached head

    Args:
        refish:  Optional[str]
            The ref to check out. If not given, the current HEAD is used
    """
    refish = refish or self.head

    # Create a placeholder reference to a non-existent remote
    dummy_ref = self.repo.references.create(
        "refs/remotes/doesnotexist/foobar", refish
    )
    self.repo.checkout(dummy_ref)
    self.repo.references.delete(dummy_ref.name)

checkout_ref(refish, dest_path=None, method=VCSCheckoutMethod.WORKTREE, **kwargs)

Checkout a refish to a given destination directory. This function first attempts to create a worktree but on failure will do a traditional clone

Parameters:

Name Type Description Default
refish str

str The refish to be checked out in the dest_dir

required
dest_path Optional[Path]

Optional[pathlib.Path] The destination directory if not in-place

None
method VCSCheckoutMethod

VCSCheckoutMethod=VCSCheckoutMethod.WORKTREE The checkout method to use, either a git clone or worktree add

WORKTREE
Source code in oper8/vcs.py
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
def checkout_ref(
    self,
    refish: str,
    dest_path: Optional[pathlib.Path] = None,
    method: VCSCheckoutMethod = VCSCheckoutMethod.WORKTREE,
    **kwargs,
):
    """Checkout a refish to a given destination directory. This function
    first attempts to create a worktree but on failure will do a traditional
    clone

    Args:
        refish: str
            The refish to be checked out in the dest_dir
        dest_path: Optional[pathlib.Path]
            The destination directory if not in-place
        method: VCSCheckoutMethod=VCSCheckoutMethod.WORKTREE
            The checkout method to use, either a git clone or worktree add
        **kwargs
            Kwargs to pass through to checkout
    """

    # Get the commit and ref for a given refish
    commit, ref = self.get_ref(refish)

    # If in-place, check out directly
    if not dest_path:
        log.debug2("Checking out %s in place", refish)
        self.repo.checkout(ref, **kwargs)
        return

    # Check if dest directory already exists and if it has the correct
    # commit
    if dest_path.is_dir():
        dest_vcs = VCS(dest_path)

        # Check if the dest index file has been created. It is the last
        # part of a checkout. If index has not been created than another
        # process must be working on it
        dest_index_file = pathlib.Path(dest_vcs.repo.path) / "index"
        if not dest_index_file.is_file():
            raise VCSMultiProcessError(
                "Index file not found. Checkout already in progress "
            )

        if dest_vcs.repo.head.peel(Commit) != commit:
            raise VCSConfigError(
                f"Destination directory {dest_path} already exists with incorrect branch"
            )
        return

    # Create the directory if it doesn't exist
    dest_path.parents[0].mkdir(parents=True, exist_ok=True)

    if method == VCSCheckoutMethod.WORKTREE:
        # Create a unique branch for each worktree
        cleaned_dest_dir = "_".join(dest_path.parts[1:])
        branch_name = f"{refish}_{cleaned_dest_dir}"

        branch = self.create_branch(branch_name, commit)
        self._create_worktree(branch_name, dest_path, branch)
    elif method == VCSCheckoutMethod.CLONE:
        self._clone_ref(dest_path, ref, **kwargs)
    else:
        raise VCSConfigError(f"Invalid checkout method: {method}")

compress_references()

Compress unreachable references in the repo

Source code in oper8/vcs.py
355
356
357
def compress_references(self):
    """Compress unreachable references in the repo"""
    self.repo.compress_references()

create_branch(branch_name, commit)

Create branch given a name and commit

Parameters:

Name Type Description Default
branch_name str

str The name to be created

required
commit Commit

Commit The commit for the branch to be created from

required

Returns:

Name Type Description
branch Branch

Branch The created branch

Source code in oper8/vcs.py
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
def create_branch(self, branch_name: str, commit: Commit) -> Branch:
    """Create branch given a name and commit

    Args:
        branch_name: str
            The name to be created
        commit: Commit
            The commit for the branch to be created from

    Returns:
        branch: Branch
            The created branch"""
    if branch_name in self.repo.branches:
        branch = self.repo.branches.get(branch_name)
        if branch.peel(Commit) != commit:
            raise VCSRuntimeError("Branch already exists with incorrect commit")
        return branch

    try:
        log.debug("Creating branch for %s", branch_name)
        return self.repo.branches.create(branch_name, commit)
    except AlreadyExistsError as err:
        # Branch must have been created by different processes
        log.warning("Branch %s already exists", branch_name)
        raise VCSMultiProcessError(f"Branch {branch_name} already exists") from err

    except OSError as err:
        raise VCSRuntimeError("Unable to create branch") from err

create_commit(message, parents=None, committer_name='Oper8', committer_email='noreply@oper8.org')

Create a commit in the repo with the files currently in the index

Parameters:

Name Type Description Default
message str

str The commit message

required
parents Optional[List[str]]

Optional[List[str]] Parent commit hashes

None
committer_name str

str The name of the committer

'Oper8'
committer_email str

str Email address for this committer

'noreply@oper8.org'
Source code in oper8/vcs.py
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
def create_commit(
    self,
    message: str,
    parents: Optional[List[str]] = None,
    committer_name: str = "Oper8",
    committer_email: str = "noreply@oper8.org",
):
    """Create a commit in the repo with the files currently in the index

    Args:
        message: str
            The commit message
        parents: Optional[List[str]]
            Parent commit hashes
        committer_name: str
            The name of the committer
        committer_email: str
            Email address for this committer
    """
    parents = parents or []
    parent_commits = []
    for parent in parents:
        try:
            parent_commits.append(self.repo.get(parent))
        except ValueError as err:
            raise ValueError(f"Invalid parent commit: {parent}") from err
    signature = Signature(committer_name, committer_email)
    self.repo.create_commit(
        "HEAD", signature, signature, message, self.repo.index.write_tree(), parents
    )

delete_branch(branch_name)

Delete a branch from the repo

Parameters:

Name Type Description Default
branch_name str

str The name of the branch

required
Source code in oper8/vcs.py
321
322
323
324
325
326
327
328
def delete_branch(self, branch_name: str):
    """Delete a branch from the repo

    Args:
        branch_name:  str
            The name of the branch
    """
    self.repo.branches.delete(branch_name)

delete_remote(remote_name)

Remove a remote from the repo

Parameters:

Name Type Description Default
remote_name str

str The name of the remote

required
Source code in oper8/vcs.py
261
262
263
264
265
266
267
268
def delete_remote(self, remote_name: str):
    """Remove a remote from the repo

    Args:
        remote_name:  str
            The name of the remote
    """
    self.repo.remotes.delete(remote_name)

delete_tag(tag_name)

Delete a tag from the repo

Parameters:

Name Type Description Default
tag_name str

str The name of the tag

required
Source code in oper8/vcs.py
330
331
332
333
334
335
336
337
def delete_tag(self, tag_name: str):
    """Delete a tag from the repo

    Args:
        tag_name:  str
            The name of the tag
    """
    self.repo.references.delete(f"refs/tags/{tag_name}")

fetch_remote(remote_name, refs=None, wait=True)

Fetch content from the named remote. If no refs given, all refs are fetched.

Parameters:

Name Type Description Default
remote_name str

str The name of the remote to fetch

required
refs Optional[Set[str]]

Optional[Set[str]] The refs to fetch (fetch all if not given)

None
wait bool

bool If true, wait for fetch to complete

True
Source code in oper8/vcs.py
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
def fetch_remote(
    self,
    remote_name: str,
    refs: Optional[Set[str]] = None,
    wait: bool = True,
):
    """Fetch content from the named remote. If no refs given, all refs are
    fetched.

    Args:
        remote_name: str
            The name of the remote to fetch
        refs: Optional[Set[str]]
            The refs to fetch (fetch all if not given)
        wait: bool
            If true, wait for fetch to complete
    """
    remote = self.repo.remotes[remote_name]
    progress = remote.fetch(list(refs or []))
    while wait and progress.received_objects < progress.total_objects:
        time.sleep(0.1)  # pragma: no cover

get_ref(refish)

Get a git commit and reference from a shorthand string

Parameters:

Name Type Description Default
refish str

str The human readable form of a git reference like branch name or commit hash

required

Returns commit_and_reference: Tuple[Commit,Reference] Both a commit and reference for a given refish

Source code in oper8/vcs.py
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
def get_ref(self, refish: str) -> Tuple[Commit, Reference]:
    """Get a git commit and reference from a shorthand string

    Args:
        refish: str
            The human readable form of a git reference like branch name
            or commit hash

    Returns
        commit_and_reference: Tuple[Commit,Reference]
            Both a commit and reference for a given refish
    """
    try:
        return self.repo.resolve_refish(refish)
    except KeyError as err:
        log.error("Unable to find version %s in repo", refish)
        raise VCSConfigError(  # pylint: disable=raise-missing-from
            f"Version: '{refish}' not found in repo"
        ) from err

list_refs()

List all of the tags and references in the repo

Returns ref_list: Set[str] A set of all references' shorthand as strings

Source code in oper8/vcs.py
137
138
139
140
141
142
143
144
145
146
147
148
149
def list_refs(self) -> Set[str]:
    """List all of the tags and references in the repo

    Returns
        ref_list: Set[str]
            A set of all references' shorthand as strings
    """
    # Loop through repo tags to get each tag's short name
    refs_set = set()
    for ref in self.repo.references.objects:
        refs_set.add(ref.shorthand)

    return refs_set

VCSCheckoutMethod

Bases: Enum

Enum for available VCS checkout methods

Source code in oper8/vcs.py
40
41
42
43
44
class VCSCheckoutMethod(Enum):
    """Enum for available VCS checkout methods"""

    WORKTREE = "worktree"
    CLONE = "clone"

VCSConfigError

Bases: ConfigError

Error for VCS Specific config exception

Source code in oper8/vcs.py
55
56
class VCSConfigError(ConfigError):
    """Error for VCS Specific config exception"""

VCSMultiProcessError

Bases: PreconditionError

VCS Error for when multiple git processes attempt to update the git directory at the same time

Source code in oper8/vcs.py
50
51
52
class VCSMultiProcessError(PreconditionError):
    """VCS Error for when multiple git processes attempt to update the git directory
    at the same time"""

VCSRuntimeError

Bases: Oper8FatalError

Error for general git exceptions

Source code in oper8/vcs.py
59
60
class VCSRuntimeError(Oper8FatalError):
    """Error for general git exceptions"""

verify_resources

This library holds common verification routines for individual kubernetes resources.

verify_deployment(object_state)

Verify that all members of a deployment are ready and all members are rolled out to new version in case of update.

Source code in oper8/verify_resources.py
151
152
153
154
155
156
157
158
159
160
161
162
def verify_deployment(object_state: dict) -> bool:
    """Verify that all members of a deployment are ready
    and all members are rolled out to new version in case of update.
    """
    return _verify_condition(
        object_state, AVAILABLE_CONDITION_KEY, True
    ) and _verify_condition(
        object_state,
        PROGRESSING_CONDITION_KEY,
        True,
        expected_reason=NEW_RS_AVAILABLE_REASON,
    )

verify_job(object_state)

Verify that a job has completed successfully

Source code in oper8/verify_resources.py
145
146
147
148
def verify_job(object_state: dict) -> bool:
    """Verify that a job has completed successfully"""
    # https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/job-v1/#JobStatus
    return _verify_condition(object_state, COMPLETE_CONDITION_KEY, True)

verify_pod(object_state)

Verify that a pod resources is ready

Source code in oper8/verify_resources.py
140
141
142
def verify_pod(object_state: dict) -> bool:
    """Verify that a pod resources is ready"""
    return _verify_condition(object_state, "Ready", True)

verify_resource(kind, name, api_version, session, *, namespace=_SESSION_NAMESPACE, verify_function=None, is_subsystem=False, condition_type=None, timestamp_key=None)

Verify a resource detailed in a ManagedObject.

we can't do type-hinting on the session because importing

DeploySession creates a circular dependency with Component. We should probably fix that...

This function will run the appropriate verification function for the given resource kind.

Parameters:

Name Type Description Default
kind str

str The kind of the resource to look for

required
name str

str The name of the resource to look for

required
api_version str

str The api_version of the resource to look for

required
session

DeploySession The current deployment session

required
Kwargs

is_subsystem: bool Whether or not the given kind is an oper8 subsystem condition_type: str For non-standard types, this is the type name for the condition to check for verification timestamp_key: str For non-standard types, this is the key in the condition to use to sort by date

Returns:

Name Type Description
success bool

bool True on successful deployment verification, False on failure conditions

Source code in oper8/verify_resources.py
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
def verify_resource(
    kind: str,
    name: str,
    api_version: str,
    session,
    *,
    # Use a predefined _SESSION_NAMESPACE default instead of None to differentiate between
    # non-namespaced resources (which pass None) and those that use session.namespace
    namespace: Optional[str] = _SESSION_NAMESPACE,
    verify_function: Optional[RESOURCE_VERIFY_FUNCTION] = None,
    is_subsystem: bool = False,
    condition_type: Optional[str] = None,
    timestamp_key: Optional[str] = None,
) -> bool:
    """Verify a resource detailed in a ManagedObject.

    NOTE: we can't do type-hinting on the session because importing
        DeploySession creates a circular dependency with Component. We should
        probably fix that...

    This function will run the appropriate verification function for the given
    resource kind.

    Args:
        kind:  str
            The kind of the resource to look for
        name:  str
            The name of the resource to look for
        api_version:  str
            The api_version of the resource to look for
        session:  DeploySession
            The current deployment session

    Kwargs:
        is_subsystem:  bool
            Whether or not the given kind is an oper8 subsystem
        condition_type:  str
            For non-standard types, this is the type name for the condition to
            check for verification
        timestamp_key:  str
            For non-standard types, this is the key in the condition to use to
            sort by date

    Returns:
        success:  bool
            True on successful deployment verification, False on failure
            conditions
    """

    # Configure namespace if it isn't set
    namespace = namespace if namespace != _SESSION_NAMESPACE else session.namespace

    # Get the state of the object
    log.debug2("Fetching current content for [%s/%s] to verify it", kind, name)
    success, content = session.get_object_current_state(
        kind=kind, name=name, api_version=api_version, namespace=namespace
    )
    assert success, f"Failed to fetch state of [{kind}/{name}]"

    # If the object is not found, it is not verified
    if not content:
        log.debug("Could not find [%s/%s]. Not Ready.", kind, name)
        return False

    # If a custom condition_type is given, then use the general condition
    # verifier
    if condition_type is not None:
        log.debug(
            "Using custom verification for [%s/%s] with condition [%s]",
            kind,
            name,
            condition_type,
        )
        return _verify_condition(
            content, condition_type, True, timestamp_key or DEFAULT_TIMESTAMP_KEY
        )

    # Run the appropriate verification function if there is one available
    verify_fn = verify_function or _resource_verifiers.get(kind)
    if not verify_fn and is_subsystem:
        log.debug("Using oper8 subsystem verifier for [%s/%s]", kind, name)
        verify_fn = partial(
            verify_subsystem,
            desired_version=session.version,
        )

    # If a verifier was found, run it
    if verify_fn:
        log.debug2("Running [%s] verifier for [%s/%s]", kind, kind, name)
        return verify_fn(content)

    # If no other verifier found, we consider it verified as long as it is
    # present in the cluster
    log.debug2("No kind-specific verifier for [%s/%s]", kind, name)
    return True

verify_statefulset(object_state)

Verify that all desired replicas of a StatefulSet are ready

Source code in oper8/verify_resources.py
165
166
167
168
169
170
171
172
173
def verify_statefulset(object_state: dict) -> bool:
    """Verify that all desired replicas of a StatefulSet are ready"""
    obj_status = object_state.get("status", {})
    expected_replicas = obj_status.get("replicas")
    if expected_replicas is None:
        log.debug2("No replicas found in statefulset status. Not ready.")
        return False
    ready_replicas = obj_status.get("readyReplicas", 0)
    return ready_replicas == expected_replicas

verify_subsystem(object_state, desired_version=None)

Verify that an oper8-managed subsystem is ready

Source code in oper8/verify_resources.py
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
def verify_subsystem(object_state: dict, desired_version: str = None) -> bool:
    """Verify that an oper8-managed subsystem is ready"""

    current_version = status.get_version(object_state.get("status", {}))
    # Once rollout finishes with verification, version status is added.
    #   Until then, mark the subsystem as unverified.
    if desired_version and not current_version:
        log.debug2(
            "Reconciled version %s does not match desired: %s",
            current_version,
            desired_version,
        )
        return False

    return (
        _verify_condition(
            object_state, status.READY_CONDITION, True, status.TIMESTAMP_KEY
        )
        and _verify_condition(
            object_state, status.UPDATING_CONDITION, False, status.TIMESTAMP_KEY
        )
        and current_version == desired_version
    )

watch_manager

Top-level watch_manager imports

ansible_watch_manager

This module holds the ansible implementation of the WatchManager

ansible_watch_manager

Ansible-based implementation of the WatchManager

AnsibleWatchManager

Bases: WatchManagerBase

The AnsibleWatchManager uses the core of an ansible-based operator to manage watching resources. The key elements are:

  1. Manage a watches.yaml file for all watched resources
  2. Manage a playbook for each watched resource
  3. Manage the ansible operator's executable as a subprocess
Source code in oper8/watch_manager/ansible_watch_manager/ansible_watch_manager.py
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
class AnsibleWatchManager(WatchManagerBase):
    """The AnsibleWatchManager uses the core of an ansible-based operator to
    manage watching resources. The key elements are:

    1. Manage a `watches.yaml` file for all watched resources
    2. Manage a playbook for each watched resource
    3. Manage the ansible operator's executable as a subprocess
    """

    # Shared singleton process used to manage all watches via ansible
    ANSIBLE_PROCESS = None

    # Defaults for initialization args held separately to allow for override
    # precedence order
    _DEFAULT_INIT_KWARGS = {
        "ansible_base_path": "/opt/ansible",
        "ansible_entrypoint": DEFAULT_ENTRYPOINT,
        "ansible_args": "",
        "manage_status": False,
        "watch_dependent_resources": False,
        "reconcile_period": "10m",
        "playbook_parameters": None,
    }

    def __init__(
        self,
        controller_type: Type[Controller],
        *,
        ansible_base_path: Optional[str] = None,
        ansible_entrypoint: Optional[str] = None,
        ansible_args: Optional[str] = None,
        manage_status: Optional[bool] = None,
        watch_dependent_resources: Optional[bool] = None,
        reconcile_period: Optional[str] = None,
        playbook_parameters: Optional[dict] = None,
    ):
        """Construct with the core watch binding and configuration args for the
        watches.yaml and playbook.yaml files.

        NOTE: All args may be overridden in the `ansible_watch_manager` section
            of the library config. The precedence order is:

        1. Directly passed arguments
        2. Config values
        3. Default values from code

        A passed None value in any of these is considered "unset"

        Args:
            controller_type:  Type[Controller],
                The Controller type that will manage this group/version/kind

        Kwargs:
            ansible_base_path:  str
                The base path where the ansible runtime will be run. This is
                also used to determine where the watches.yaml and playbooks will
                be managed.
            ansible_entrypoint:  str
                The command to use to run ansible
            ansible_args: str
                Additional flags to be passed to `ansible_entrypoint`
            manage_status:  bool
                Whether or not to let ansible manage status on the CR
            watch_dependent_resources:  bool
                Whether or not to trigger a reconciliation on change to
                dependent resources.
            reconcile_period:  str
                String representation of the time duration to use for periodic
                reconciliations
            playbook_parameters:  dict
                Parameters to use to configure the k8s_application module in the
                playbook
        """
        # Make sure that the shared ansible process is not already started
        assert (
            self.ANSIBLE_PROCESS is None
        ), "Cannot create an AnsibleWatchManager after starting another AnsibleWatchManager"

        # Set up the function arguments based on override precedence
        ansible_base_path = self._init_arg("ansible_base_path", ansible_base_path, str)
        ansible_entrypoint = self._init_arg(
            "ansible_entrypoint", ansible_entrypoint, str
        )
        ansible_args = self._init_arg("ansible_args", ansible_args, str)
        manage_status = self._init_arg("manage_status", manage_status, bool)
        watch_dependent_resources = self._init_arg(
            "watch_dependent_resources", watch_dependent_resources, bool
        )
        reconcile_period = self._init_arg("reconcile_period", reconcile_period, str)
        playbook_parameters = self._init_arg(
            "playbook_parameters", playbook_parameters, dict
        )

        super().__init__(controller_type)
        self._ansible_base_path = ansible_base_path
        self._ansible_entrypoint = ansible_entrypoint
        self._ansible_args = ansible_args

        # Create the playbook
        playbook_path = self._add_playbook(playbook_parameters)

        # Create the entry in the watches.yaml
        self._add_watch_entry(
            playbook_path=playbook_path,
            manage_status=manage_status,
            watch_dependent_resources=watch_dependent_resources,
            reconcile_period=reconcile_period,
            add_finalizer=controller_type.has_finalizer,
            disable_vcs=getattr(controller_type, "disable_vcs", None),
        )

    ## Interface ###############################################################

    def watch(self) -> bool:
        """Start the global ansible process if not already started

        NOTE: This is intentionally not thread safe! The watches should all be
            managed from the primary entrypoint thread.

        Returns:
            success:  bool
                True if the asible process is running correctly
        """
        cls = self.__class__
        if cls.ANSIBLE_PROCESS is None:
            log.info("Starting ansible watch process")
            env = copy.deepcopy(os.environ)
            env["ANSIBLE_LIBRARY"] = self._ansible_library_path()
            env["ANSIBLE_ROLES_PATH"] = self._ansible_roles_path()
            cls.ANSIBLE_PROCESS = (
                subprocess.Popen(  # pylint: disable=consider-using-with
                    shlex.split(
                        " ".join((self._ansible_entrypoint, self._ansible_args)).strip()
                    ),
                    cwd=self._ansible_base_path,
                    env=env,
                )
            )

        # If the process does not have a returncode on poll, it's up. This is a
        # point-in-time statement. There's no way for this code to actually
        # validate the state of the process since it may crash at any
        # indeterminate time after starting.
        return self.ANSIBLE_PROCESS.poll() is None

    def wait(self):
        """Wait for the ansible process to terminate"""
        if self.ANSIBLE_PROCESS is not None:
            self.ANSIBLE_PROCESS.wait()

    def stop(self):
        """Attempt to terminate the ansible process. This asserts that the
        process has been created in order to avoid race conditions with a None
        check.
        """
        assert self.ANSIBLE_PROCESS is not None, "Cannot stop before watching"
        log.info("Killing shared ansible process")
        self.ANSIBLE_PROCESS.terminate()
        kill_start_time = time.time()
        while (
            self.ANSIBLE_PROCESS.poll() is None
            and time.time() - kill_start_time
            < config.ansible_watch_manager.kill_max_wait
        ):
            time.sleep(0.001)
        assert (
            self.ANSIBLE_PROCESS.poll() is not None
        ), "The only way to shut down ansible is with a sledge hammer!"

    ## Implementation Details ##################################################

    @classmethod
    def _init_arg(cls, arg_name, passed_value, arg_type):
        """Helper to enforce init arg precedence"""
        if passed_value is not None:
            return passed_value
        config_value = config.ansible_watch_manager.get(arg_name)
        if config_value is not None:
            if arg_type is not None and not isinstance(config_value, arg_type):
                assert_config(
                    isinstance(config_value, str),
                    f"Invalid type for ansible_watch_manager.{arg_name}: "
                    + "{type(config_value)} should be {arg_type}",
                )
                if arg_type is bool:
                    config_value = config_value.lower() == "true"
                elif arg_type is dict:
                    config_value = json.loads(config_value)
                assert_config(
                    isinstance(config_value, arg_type),
                    f"Cannot convert ansible_watch_manager.{arg_name} from str to {arg_type}",
                )
            return config_value
        assert (
            arg_name in cls._DEFAULT_INIT_KWARGS
        ), f"Programming Error: Unsupported init kwarg: {arg_name}"
        return cls._DEFAULT_INIT_KWARGS[arg_name]

    def _add_playbook(self, playbook_parameters):
        """Create a playbook for this watch"""

        # Open the base template for playbooks
        playbook_base_path = os.path.join(
            self._resources_path(),
            "playbook-base.yaml",
        )
        with open(playbook_base_path, encoding="utf-8") as handle:
            playbook_base = yaml.safe_load(handle)

        # Add the provided variables
        module_vars = playbook_parameters or {}
        module_vars.setdefault("strict_versioning", False)
        kind = self.controller_type.kind.lower()
        log_file = f"{kind}.{{{{ ansible_operator_meta.name }}}}.log"
        log_dir = config.ansible_watch_manager.log_file_dir
        if log_dir is not None:
            log.debug2("Adding log dir: %s", log_dir)
            log_file = os.path.join(log_dir, log_file)
        module_vars.setdefault("log_file", log_file)
        playbook_base[0]["tasks"][0]["vars"] = module_vars

        # Add the controller_class
        controller_class = (
            f"{self.controller_type.__module__}.{self.controller_type.__name__}"
        )
        log.debug3("controller_class: %s", controller_class)
        module_vars["controller_class"] = controller_class

        # Add the full_cr template
        group_template = self.group.lower().replace(".", "_").replace("-", "_")
        cr_template = f"{{{{ _{group_template}_{self.kind.lower()} }}}}"
        module_vars["full_cr"] = cr_template

        # Write it out to the right place
        log.debug3(
            "%s/%s/%s playbook vars: %s",
            self.group,
            self.version,
            self.kind,
            module_vars,
        )
        playbook_path = os.path.join(
            self._ansible_base_path, f"playbook-{self.kind.lower()}.yaml"
        )
        with open(playbook_path, "w", encoding="utf-8") as handle:
            yaml.dump(playbook_base, handle)
        return playbook_path

    def _add_watch_entry(  # pylint: disable=too-many-arguments
        self,
        playbook_path: str,
        manage_status: bool,
        watch_dependent_resources: bool,
        reconcile_period: str,
        add_finalizer: bool,
        disable_vcs: Optional[bool],
    ):
        """Add an entry to the watches.yaml file, creating it if needed"""

        # Load the current watches.yaml content, or start fresh
        watches_path = os.path.join(self._ansible_base_path, "watches.yaml")
        if os.path.exists(watches_path):
            with open(watches_path, encoding="utf-8") as handle:
                watches = yaml.safe_load(handle)
        else:
            watches = []

        # Make sure there is not already an entry for this watch
        matches = [
            (
                watch_entry["group"] == self.group
                and watch_entry["version"] == self.version
                and watch_entry["kind"] == self.kind
            )
            for watch_entry in watches
        ]
        assert True not in matches, (
            "Can't have multiple watch entries for the same group/version/kind! "
            + f"{self.group}/{self.version}/{self.kind}"
        )
        log.debug2("Adding new watch for %s", self)
        watch_entry = {
            "group": self.group,
            "version": self.version,
            "kind": self.kind,
            "vars": {"operation": "add"},
        }
        if disable_vcs is not None:
            str_val = str(not disable_vcs).lower()
            log.debug(
                "Adding watch variable [enable_ansible_vcs = '%s'] for %s/%s/%s",
                str_val,
                self.group,
                self.version,
                self.kind,
            )
            watch_entry["vars"]["enable_ansible_vcs"] = str_val
        watches.append(watch_entry)

        # Update the watch entry with the configuration for this watch
        watch_entry["playbook"] = playbook_path
        watch_entry["manageStatus"] = manage_status
        watch_entry["watchDependentResources"] = watch_dependent_resources
        watch_entry["reconcilePeriod"] = reconcile_period

        # If requested, add a version of the watch that manages the finalizer
        if add_finalizer:
            finalizer_name = self.controller_type.finalizer
            log.debug2("Adding finalizer: %s", finalizer_name)
            watch_entry["finalizer"] = {
                "name": finalizer_name,
                "vars": {"operation": "remove"},
            }

        # Write the watches.yaml file back out
        with open(watches_path, "w", encoding="utf-8") as handle:
            yaml.dump(watches, handle)

    @staticmethod
    def _resources_path():
        """Get the path to the static resources for ansible"""
        return os.path.realpath(os.path.join(os.path.dirname(__file__), "resources"))

    @staticmethod
    def _ansible_library_path():
        """Get the absolute path to the ansible library with the k8s_applicaiton
        module
        """
        return os.path.realpath(os.path.join(os.path.dirname(__file__), "modules"))

    @classmethod
    def _ansible_roles_path(cls):
        """Get the absolute path to the ansible roles"""
        return os.path.join(cls._resources_path(), "roles")
__init__(controller_type, *, ansible_base_path=None, ansible_entrypoint=None, ansible_args=None, manage_status=None, watch_dependent_resources=None, reconcile_period=None, playbook_parameters=None)

Construct with the core watch binding and configuration args for the watches.yaml and playbook.yaml files.

All args may be overridden in the ansible_watch_manager section

of the library config. The precedence order is:

  1. Directly passed arguments
  2. Config values
  3. Default values from code

A passed None value in any of these is considered "unset"

Parameters:

Name Type Description Default
controller_type Type[Controller]

Type[Controller], The Controller type that will manage this group/version/kind

required
Kwargs

ansible_base_path: str The base path where the ansible runtime will be run. This is also used to determine where the watches.yaml and playbooks will be managed. ansible_entrypoint: str The command to use to run ansible ansible_args: str Additional flags to be passed to ansible_entrypoint manage_status: bool Whether or not to let ansible manage status on the CR watch_dependent_resources: bool Whether or not to trigger a reconciliation on change to dependent resources. reconcile_period: str String representation of the time duration to use for periodic reconciliations playbook_parameters: dict Parameters to use to configure the k8s_application module in the playbook

Source code in oper8/watch_manager/ansible_watch_manager/ansible_watch_manager.py
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
def __init__(
    self,
    controller_type: Type[Controller],
    *,
    ansible_base_path: Optional[str] = None,
    ansible_entrypoint: Optional[str] = None,
    ansible_args: Optional[str] = None,
    manage_status: Optional[bool] = None,
    watch_dependent_resources: Optional[bool] = None,
    reconcile_period: Optional[str] = None,
    playbook_parameters: Optional[dict] = None,
):
    """Construct with the core watch binding and configuration args for the
    watches.yaml and playbook.yaml files.

    NOTE: All args may be overridden in the `ansible_watch_manager` section
        of the library config. The precedence order is:

    1. Directly passed arguments
    2. Config values
    3. Default values from code

    A passed None value in any of these is considered "unset"

    Args:
        controller_type:  Type[Controller],
            The Controller type that will manage this group/version/kind

    Kwargs:
        ansible_base_path:  str
            The base path where the ansible runtime will be run. This is
            also used to determine where the watches.yaml and playbooks will
            be managed.
        ansible_entrypoint:  str
            The command to use to run ansible
        ansible_args: str
            Additional flags to be passed to `ansible_entrypoint`
        manage_status:  bool
            Whether or not to let ansible manage status on the CR
        watch_dependent_resources:  bool
            Whether or not to trigger a reconciliation on change to
            dependent resources.
        reconcile_period:  str
            String representation of the time duration to use for periodic
            reconciliations
        playbook_parameters:  dict
            Parameters to use to configure the k8s_application module in the
            playbook
    """
    # Make sure that the shared ansible process is not already started
    assert (
        self.ANSIBLE_PROCESS is None
    ), "Cannot create an AnsibleWatchManager after starting another AnsibleWatchManager"

    # Set up the function arguments based on override precedence
    ansible_base_path = self._init_arg("ansible_base_path", ansible_base_path, str)
    ansible_entrypoint = self._init_arg(
        "ansible_entrypoint", ansible_entrypoint, str
    )
    ansible_args = self._init_arg("ansible_args", ansible_args, str)
    manage_status = self._init_arg("manage_status", manage_status, bool)
    watch_dependent_resources = self._init_arg(
        "watch_dependent_resources", watch_dependent_resources, bool
    )
    reconcile_period = self._init_arg("reconcile_period", reconcile_period, str)
    playbook_parameters = self._init_arg(
        "playbook_parameters", playbook_parameters, dict
    )

    super().__init__(controller_type)
    self._ansible_base_path = ansible_base_path
    self._ansible_entrypoint = ansible_entrypoint
    self._ansible_args = ansible_args

    # Create the playbook
    playbook_path = self._add_playbook(playbook_parameters)

    # Create the entry in the watches.yaml
    self._add_watch_entry(
        playbook_path=playbook_path,
        manage_status=manage_status,
        watch_dependent_resources=watch_dependent_resources,
        reconcile_period=reconcile_period,
        add_finalizer=controller_type.has_finalizer,
        disable_vcs=getattr(controller_type, "disable_vcs", None),
    )
stop()

Attempt to terminate the ansible process. This asserts that the process has been created in order to avoid race conditions with a None check.

Source code in oper8/watch_manager/ansible_watch_manager/ansible_watch_manager.py
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
def stop(self):
    """Attempt to terminate the ansible process. This asserts that the
    process has been created in order to avoid race conditions with a None
    check.
    """
    assert self.ANSIBLE_PROCESS is not None, "Cannot stop before watching"
    log.info("Killing shared ansible process")
    self.ANSIBLE_PROCESS.terminate()
    kill_start_time = time.time()
    while (
        self.ANSIBLE_PROCESS.poll() is None
        and time.time() - kill_start_time
        < config.ansible_watch_manager.kill_max_wait
    ):
        time.sleep(0.001)
    assert (
        self.ANSIBLE_PROCESS.poll() is not None
    ), "The only way to shut down ansible is with a sledge hammer!"
wait()

Wait for the ansible process to terminate

Source code in oper8/watch_manager/ansible_watch_manager/ansible_watch_manager.py
176
177
178
179
def wait(self):
    """Wait for the ansible process to terminate"""
    if self.ANSIBLE_PROCESS is not None:
        self.ANSIBLE_PROCESS.wait()
watch()

Start the global ansible process if not already started

This is intentionally not thread safe! The watches should all be

managed from the primary entrypoint thread.

Returns:

Name Type Description
success bool

bool True if the asible process is running correctly

Source code in oper8/watch_manager/ansible_watch_manager/ansible_watch_manager.py
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
def watch(self) -> bool:
    """Start the global ansible process if not already started

    NOTE: This is intentionally not thread safe! The watches should all be
        managed from the primary entrypoint thread.

    Returns:
        success:  bool
            True if the asible process is running correctly
    """
    cls = self.__class__
    if cls.ANSIBLE_PROCESS is None:
        log.info("Starting ansible watch process")
        env = copy.deepcopy(os.environ)
        env["ANSIBLE_LIBRARY"] = self._ansible_library_path()
        env["ANSIBLE_ROLES_PATH"] = self._ansible_roles_path()
        cls.ANSIBLE_PROCESS = (
            subprocess.Popen(  # pylint: disable=consider-using-with
                shlex.split(
                    " ".join((self._ansible_entrypoint, self._ansible_args)).strip()
                ),
                cwd=self._ansible_base_path,
                env=env,
            )
        )

    # If the process does not have a returncode on poll, it's up. This is a
    # point-in-time statement. There's no way for this code to actually
    # validate the state of the process since it may crash at any
    # indeterminate time after starting.
    return self.ANSIBLE_PROCESS.poll() is None

base

This module holds the base class interface for the various implementations of WatchManager

WatchManagerBase

Bases: ABC

A WatchManager is responsible for linking a kubernetes custom resource type with a Controller that will execute the reconciliation loop

Source code in oper8/watch_manager/base.py
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
class WatchManagerBase(abc.ABC):
    """A WatchManager is responsible for linking a kubernetes custom resource
    type with a Controller that will execute the reconciliation loop
    """

    # Class-global mapping of all watches managed by this operator
    _ALL_WATCHES = {}

    ## Interface ###############################################################

    def __init__(
        self,
        controller_type: Type[Controller],
    ):
        """Construct with the controller type that will be watched

        Args:
            controller_type:  Type[Controller],
                The Controller instance that will manage this group/version/kind
        """
        self.controller_type = controller_type
        self.group = controller_type.group
        self.version = controller_type.version
        self.kind = controller_type.kind

        # Register this watch instance
        watch_key = str(self)
        assert (
            watch_key not in self._ALL_WATCHES
        ), "Only a single controller may watch a given group/version/kind"
        self._ALL_WATCHES[watch_key] = self

    @abc.abstractmethod
    def watch(self) -> bool:
        """The watch function is responsible for initializing the persistent
        watch and returning whether or not the watch was started successfully.

        Returns:
            success:  bool
                True if the watch was spawned correctly, False otherwise.
        """

    @abc.abstractmethod
    def wait(self):
        """The wait function is responsible for blocking until the managed watch
        has been terminated.
        """

    @abc.abstractmethod
    def stop(self):
        """Terminate this watch if it is currently running"""

    ## Utilities ###############################################################

    @classmethod
    def start_all(cls) -> bool:
        """This utility starts all registered watches

        Returns:
            success:  bool
                True if all watches started succssfully, False otherwise
        """
        started_watches = []
        success = True
        # NOTE: sorting gives deterministic order so that launch failures can be
        #   diagnosed (and tested) more easily. This is not strictly necessary,
        #   but it also doesn't hurt and it is nice to have.
        for _, watch in sorted(cls._ALL_WATCHES.items()):
            if watch.watch():
                log.debug("Successfully started %s", watch)
                started_watches.append(watch)
            else:
                log.warning("Failed to start %s", watch)
                success = False

                # Shut down all successfully started watches
                for started_watch in started_watches:
                    started_watch.stop()

                # Don't start any of the others
                break

        # Wait on all of them to terminate
        for watch in cls._ALL_WATCHES.values():
            watch.wait()

        return success

    @classmethod
    def stop_all(cls):
        """This utility stops all watches"""
        for watch in cls._ALL_WATCHES.values():
            try:
                watch.stop()
                log.debug2("Waiting for %s to terminate", watch)
                watch.wait()
            except Exception as exc:  # pylint: disable=broad-exception-caught
                log.error("Failed to stop watch manager %s", exc, exc_info=True)

    ## Implementation Details ##################################################

    def __str__(self):
        """String representation of this watch"""
        return f"Watch[{self.controller_type}]"
__init__(controller_type)

Construct with the controller type that will be watched

Parameters:

Name Type Description Default
controller_type Type[Controller]

Type[Controller], The Controller instance that will manage this group/version/kind

required
Source code in oper8/watch_manager/base.py
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
def __init__(
    self,
    controller_type: Type[Controller],
):
    """Construct with the controller type that will be watched

    Args:
        controller_type:  Type[Controller],
            The Controller instance that will manage this group/version/kind
    """
    self.controller_type = controller_type
    self.group = controller_type.group
    self.version = controller_type.version
    self.kind = controller_type.kind

    # Register this watch instance
    watch_key = str(self)
    assert (
        watch_key not in self._ALL_WATCHES
    ), "Only a single controller may watch a given group/version/kind"
    self._ALL_WATCHES[watch_key] = self
__str__()

String representation of this watch

Source code in oper8/watch_manager/base.py
120
121
122
def __str__(self):
    """String representation of this watch"""
    return f"Watch[{self.controller_type}]"
start_all() classmethod

This utility starts all registered watches

Returns:

Name Type Description
success bool

bool True if all watches started succssfully, False otherwise

Source code in oper8/watch_manager/base.py
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
@classmethod
def start_all(cls) -> bool:
    """This utility starts all registered watches

    Returns:
        success:  bool
            True if all watches started succssfully, False otherwise
    """
    started_watches = []
    success = True
    # NOTE: sorting gives deterministic order so that launch failures can be
    #   diagnosed (and tested) more easily. This is not strictly necessary,
    #   but it also doesn't hurt and it is nice to have.
    for _, watch in sorted(cls._ALL_WATCHES.items()):
        if watch.watch():
            log.debug("Successfully started %s", watch)
            started_watches.append(watch)
        else:
            log.warning("Failed to start %s", watch)
            success = False

            # Shut down all successfully started watches
            for started_watch in started_watches:
                started_watch.stop()

            # Don't start any of the others
            break

    # Wait on all of them to terminate
    for watch in cls._ALL_WATCHES.values():
        watch.wait()

    return success
stop() abstractmethod

Terminate this watch if it is currently running

Source code in oper8/watch_manager/base.py
67
68
69
@abc.abstractmethod
def stop(self):
    """Terminate this watch if it is currently running"""
stop_all() classmethod

This utility stops all watches

Source code in oper8/watch_manager/base.py
107
108
109
110
111
112
113
114
115
116
@classmethod
def stop_all(cls):
    """This utility stops all watches"""
    for watch in cls._ALL_WATCHES.values():
        try:
            watch.stop()
            log.debug2("Waiting for %s to terminate", watch)
            watch.wait()
        except Exception as exc:  # pylint: disable=broad-exception-caught
            log.error("Failed to stop watch manager %s", exc, exc_info=True)
wait() abstractmethod

The wait function is responsible for blocking until the managed watch has been terminated.

Source code in oper8/watch_manager/base.py
61
62
63
64
65
@abc.abstractmethod
def wait(self):
    """The wait function is responsible for blocking until the managed watch
    has been terminated.
    """
watch() abstractmethod

The watch function is responsible for initializing the persistent watch and returning whether or not the watch was started successfully.

Returns:

Name Type Description
success bool

bool True if the watch was spawned correctly, False otherwise.

Source code in oper8/watch_manager/base.py
51
52
53
54
55
56
57
58
59
@abc.abstractmethod
def watch(self) -> bool:
    """The watch function is responsible for initializing the persistent
    watch and returning whether or not the watch was started successfully.

    Returns:
        success:  bool
            True if the watch was spawned correctly, False otherwise.
    """

dry_run_watch_manager

Dry run implementation of the WatchManager abstraction

DryRunWatchManager

Bases: WatchManagerBase

The DryRunWatchManager implements the WatchManagerBase interface with using a single shared DryRunDeployManager to manage an in-memory representation of the cluster.

Source code in oper8/watch_manager/dry_run_watch_manager.py
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
class DryRunWatchManager(WatchManagerBase):
    """
    The DryRunWatchManager implements the WatchManagerBase interface with using
    a single shared DryRunDeployManager to manage an in-memory representation of
    the cluster.
    """

    reconcile_manager = None

    def __init__(
        self,
        controller_type: Type[Controller],
        deploy_manager: Optional[DryRunDeployManager] = None,
    ):
        """Construct with the type of controller to watch and optionally a
        deploy_manager instance. A deploy_manager will be constructed if none is
        given.

        Args:
            controller_type:  Type[Controller]
                The class for the controller that will be watched
            deploy_manager:  Optional[DryRunWatchManager]
                If given, this deploy_manager will be used. This allows for
                there to be pre-populated resources. Note that it _must_ be a
                DryRunDeployManager (or child class) that supports registering
                watches.
        """
        super().__init__(controller_type)

        # Set up the deploy manager
        self._deploy_manager = deploy_manager or DryRunDeployManager()

        # We lazily initialize the controller instance in watch and _resource in run_reconcile
        self._controller = None
        self._resource = {}

        # We initialize the reconcile_manager instance on first watch creation
        if not self.reconcile_manager:
            self.reconcile_manager = ReconcileManager(
                deploy_manager=self._deploy_manager, reimport_controller=False
            )

    def watch(self) -> bool:
        """Register the watch with the deploy manager"""
        if self._controller is not None:
            log.warning("Cannot watch multiple times!")
            return False

        log.debug("Registering %s with the DeployManager", self.controller_type)

        # Construct controller
        self._controller = self.controller_type()

        # Register watch and finalizers
        api_version = f"{self.group}/{self.version}"
        self._deploy_manager.register_watch(
            api_version=api_version,
            kind=self.kind,
            callback=partial(self.run_reconcile, False),
        )
        if self.controller_type.has_finalizer:
            log.debug("Registering finalizer")
            self._deploy_manager.register_finalizer(
                api_version=api_version,
                kind=self.kind,
                callback=partial(self.run_reconcile, True),
            )

        return True

    def wait(self):
        """There is nothing to do in wait"""

    def stop(self):
        """There is nothing to do in stop"""

    def run_reconcile(self, is_finalizer: bool, resource: dict):
        """Wrapper function to simplify parameter/partial mapping"""
        if not self.reconcile_manager:
            return

        # Only run reconcile if it's a unique resource
        resource_metadata = self._resource.get("metadata", {})
        if (
            self._resource.get("kind") == resource.get("kind")
            and self._resource.get("apiVersion") == resource.get("apiVersion")
            and resource_metadata.get("name")
            == resource.get("metadata", {}).get("name")
            and resource_metadata.get("namespace")
            == resource.get("metadata", {}).get("namespace")
        ):
            return

        # Save the current resource and log handlers then restore it after the reconcile
        # is completed
        log_formatters = {}
        for handler in logging.getLogger().handlers:
            log_formatters[handler] = handler.formatter
        current_resource = self._resource
        self._resource = resource

        self.reconcile_manager.reconcile(self._controller, resource, is_finalizer)
        self._resource = current_resource
        for handler, formatter in log_formatters.items():
            handler.setFormatter(formatter)
__init__(controller_type, deploy_manager=None)

Construct with the type of controller to watch and optionally a deploy_manager instance. A deploy_manager will be constructed if none is given.

Parameters:

Name Type Description Default
controller_type Type[Controller]

Type[Controller] The class for the controller that will be watched

required
deploy_manager Optional[DryRunDeployManager]

Optional[DryRunWatchManager] If given, this deploy_manager will be used. This allows for there to be pre-populated resources. Note that it must be a DryRunDeployManager (or child class) that supports registering watches.

None
Source code in oper8/watch_manager/dry_run_watch_manager.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
def __init__(
    self,
    controller_type: Type[Controller],
    deploy_manager: Optional[DryRunDeployManager] = None,
):
    """Construct with the type of controller to watch and optionally a
    deploy_manager instance. A deploy_manager will be constructed if none is
    given.

    Args:
        controller_type:  Type[Controller]
            The class for the controller that will be watched
        deploy_manager:  Optional[DryRunWatchManager]
            If given, this deploy_manager will be used. This allows for
            there to be pre-populated resources. Note that it _must_ be a
            DryRunDeployManager (or child class) that supports registering
            watches.
    """
    super().__init__(controller_type)

    # Set up the deploy manager
    self._deploy_manager = deploy_manager or DryRunDeployManager()

    # We lazily initialize the controller instance in watch and _resource in run_reconcile
    self._controller = None
    self._resource = {}

    # We initialize the reconcile_manager instance on first watch creation
    if not self.reconcile_manager:
        self.reconcile_manager = ReconcileManager(
            deploy_manager=self._deploy_manager, reimport_controller=False
        )
run_reconcile(is_finalizer, resource)

Wrapper function to simplify parameter/partial mapping

Source code in oper8/watch_manager/dry_run_watch_manager.py
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
def run_reconcile(self, is_finalizer: bool, resource: dict):
    """Wrapper function to simplify parameter/partial mapping"""
    if not self.reconcile_manager:
        return

    # Only run reconcile if it's a unique resource
    resource_metadata = self._resource.get("metadata", {})
    if (
        self._resource.get("kind") == resource.get("kind")
        and self._resource.get("apiVersion") == resource.get("apiVersion")
        and resource_metadata.get("name")
        == resource.get("metadata", {}).get("name")
        and resource_metadata.get("namespace")
        == resource.get("metadata", {}).get("namespace")
    ):
        return

    # Save the current resource and log handlers then restore it after the reconcile
    # is completed
    log_formatters = {}
    for handler in logging.getLogger().handlers:
        log_formatters[handler] = handler.formatter
    current_resource = self._resource
    self._resource = resource

    self.reconcile_manager.reconcile(self._controller, resource, is_finalizer)
    self._resource = current_resource
    for handler, formatter in log_formatters.items():
        handler.setFormatter(formatter)
stop()

There is nothing to do in stop

Source code in oper8/watch_manager/dry_run_watch_manager.py
95
96
def stop(self):
    """There is nothing to do in stop"""
wait()

There is nothing to do in wait

Source code in oper8/watch_manager/dry_run_watch_manager.py
92
93
def wait(self):
    """There is nothing to do in wait"""
watch()

Register the watch with the deploy manager

Source code in oper8/watch_manager/dry_run_watch_manager.py
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
def watch(self) -> bool:
    """Register the watch with the deploy manager"""
    if self._controller is not None:
        log.warning("Cannot watch multiple times!")
        return False

    log.debug("Registering %s with the DeployManager", self.controller_type)

    # Construct controller
    self._controller = self.controller_type()

    # Register watch and finalizers
    api_version = f"{self.group}/{self.version}"
    self._deploy_manager.register_watch(
        api_version=api_version,
        kind=self.kind,
        callback=partial(self.run_reconcile, False),
    )
    if self.controller_type.has_finalizer:
        log.debug("Registering finalizer")
        self._deploy_manager.register_finalizer(
            api_version=api_version,
            kind=self.kind,
            callback=partial(self.run_reconcile, True),
        )

    return True

python_watch_manager

This module holds the pure-python implementation of the WatchManager

filters

init file for Filter submodule. Imports all filters, functions, and classes from filters module

common

Common functions used for interacting with filters including default filter classes

get_configured_filter() cached

Get the default filter that should be applied to every resource

Returns:

Name Type Description
default_filter Filter

Filter The default filter specified in the Config

Source code in oper8/watch_manager/python_watch_manager/filters/common.py
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
@lru_cache(maxsize=1)
def get_configured_filter() -> Filter:
    """Get the default filter that should be applied to every resource

    Returns:
        default_filter: Filter
            The default filter specified in the Config"""

    filter_name = config.python_watch_manager.filter

    # Check for filter in default list or attempt to
    # manually import one
    if filter_name in FILTER_CLASSES:
        filter_obj = FILTER_CLASSES[filter_name]
    elif inspect.isclass(filter_name) and issubclass(filter_name, Filter):
        filter_obj = filter_name
    elif isinstance(filter_name, str):
        filter_obj = import_filter(filter_name)
    # If no filter is provided then always enable
    else:
        filter_obj = EnableFilter

    log.debug2(f"Found filter: {filter_obj}")
    return filter_obj
get_filters_for_resource_id(controller_type, resource_id)

Get the filters for a particular resource_id given a controller_type

Parameters:

Name Type Description Default
controller_type CONTROLLER_CLASS_TYPE

CONTROLLER_CLASS_TYPE The controller type whose filters we're inspecting

required
resource_id RESOURCE_ID_TYPE

"ResourceId" The requested resource

required

Returns:

Name Type Description
filter_list List[Filter]

List[Filter] The list of filters to be applied

Source code in oper8/watch_manager/python_watch_manager/filters/common.py
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
def get_filters_for_resource_id(
    controller_type: CONTROLLER_CLASS_TYPE, resource_id: RESOURCE_ID_TYPE
) -> List[Filter]:
    """Get the filters for a particular resource_id given a controller_type

    Args:
        controller_type: CONTROLLER_CLASS_TYPE
            The controller type whose filters we're inspecting
        resource_id: "ResourceId"
            The requested resource

    Returns:
        filter_list: List[Filter]
            The list of filters to be applied
    """
    filters = getattr(controller_type, "pwm_filters", [])

    if isinstance(filters, list):
        return_filters = filters

    elif isinstance(filters, dict):
        return_filters = filters.get(resource_id.global_id, [])

    else:
        raise ConfigError(f"Invalid type for PWM filters: {type(filters)}")

    log.debug3(f"Found filters {return_filters} for resource: {resource_id}")
    return return_filters
import_filter(filter_name)

Import a filter from a string reference

Parameters:

Name Type Description Default
filter_name str

str Filter name in . form

required

Returns:

Name Type Description
imported_filter Filter

Filter The filter that was requested

Source code in oper8/watch_manager/python_watch_manager/filters/common.py
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
def import_filter(filter_name: str) -> Filter:
    """Import a filter from a string reference

    Args:
        filter_name: str
            Filter name in <module>.<filter> form

    Returns:
        imported_filter: Filter
            The filter that was requested
    """
    module_path, class_name = filter_name.rsplit(".", 1)
    try:
        filter_module = importlib.import_module(module_path)
        filter_obj = getattr(filter_module, class_name)
    except (ImportError, AttributeError) as exc:
        raise ConfigError(
            f"Invalid Filter: {filter_name}. Module or class not found"
        ) from exc

    if (
        inspect.isclass(filter_obj) and not issubclass(filter_obj, Filter)
    ) and not isinstance(filter_obj, (Filter, list, tuple)):
        raise ConfigError(f"{filter_obj} is not a instance of {Filter}")

    return filter_obj
filters

Filters are used to limit the amount of events being reconciled by a watch manager This is based off of the kubernetes controller runtime's "predicates": https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.15.0/pkg/predicate#Funcs The default set of filters is derived from operator-sdk's ansible predicates https://github.com/operator-framework/operator-sdk/blob/50c6ac03746ff4edf582feb9a71d2a7ea6ae6c40/internal/ansible/controller/controller.go#L105

AnnotationFilter

Bases: Filter

Filter resources to reconcile on annotation changes

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
class AnnotationFilter(Filter):
    """Filter resources to reconcile on annotation changes"""

    def __init__(self, resource: ManagedObject):
        """Initialize the annotation hash variable"""
        self.annotations = None
        super().__init__(resource)

    def test(  # pylint: disable=inconsistent-return-statements
        self,
        resource: ManagedObject,
        event: KubeEventType,
    ) -> Optional[bool]:
        """Test if a resource's annotation has changed"""
        # Ignore Added and deleted events
        if event in [KubeEventType.ADDED, KubeEventType.DELETED]:
            return

        return self.annotations != self.get_annotation_hash(resource)

    def update(self, resource: ManagedObject):
        """Update the currently stored annotation"""
        self.annotations = self.get_annotation_hash(resource)

    def get_annotation_hash(self, resource: ManagedObject) -> str:
        """Helper function to get the annotation hash"""
        return obj_to_hash(resource.metadata.get("annotations", {}))
__init__(resource)

Initialize the annotation hash variable

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
251
252
253
254
def __init__(self, resource: ManagedObject):
    """Initialize the annotation hash variable"""
    self.annotations = None
    super().__init__(resource)
get_annotation_hash(resource)

Helper function to get the annotation hash

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
272
273
274
def get_annotation_hash(self, resource: ManagedObject) -> str:
    """Helper function to get the annotation hash"""
    return obj_to_hash(resource.metadata.get("annotations", {}))
test(resource, event)

Test if a resource's annotation has changed

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
256
257
258
259
260
261
262
263
264
265
266
def test(  # pylint: disable=inconsistent-return-statements
    self,
    resource: ManagedObject,
    event: KubeEventType,
) -> Optional[bool]:
    """Test if a resource's annotation has changed"""
    # Ignore Added and deleted events
    if event in [KubeEventType.ADDED, KubeEventType.DELETED]:
        return

    return self.annotations != self.get_annotation_hash(resource)
update(resource)

Update the currently stored annotation

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
268
269
270
def update(self, resource: ManagedObject):
    """Update the currently stored annotation"""
    self.annotations = self.get_annotation_hash(resource)
CreationDeletionFilter

Bases: Filter

Filter to ensure reconciliation on creation and deletion events

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
class CreationDeletionFilter(Filter):
    """Filter to ensure reconciliation on creation and deletion events"""

    def test(  # pylint: disable=inconsistent-return-statements
        self,
        resource: ManagedObject,
        event: KubeEventType,
    ) -> Optional[bool]:
        """Return true if event is ADDED or DELETED"""

        # Ignore non Added/Deleted Events
        if event not in [KubeEventType.ADDED, KubeEventType.DELETED]:
            return

        return True
test(resource, event)

Return true if event is ADDED or DELETED

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
124
125
126
127
128
129
130
131
132
133
134
135
def test(  # pylint: disable=inconsistent-return-statements
    self,
    resource: ManagedObject,
    event: KubeEventType,
) -> Optional[bool]:
    """Return true if event is ADDED or DELETED"""

    # Ignore non Added/Deleted Events
    if event not in [KubeEventType.ADDED, KubeEventType.DELETED]:
        return

    return True
DependentWatchFilter

Bases: Filter

Don't reconcile creation events as we can assume the owner created them

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
350
351
352
353
354
355
356
class DependentWatchFilter(Filter):
    """Don't reconcile creation events as we can assume the owner created
    them"""

    def test(self, resource: ManagedObject, event: KubeEventType) -> Optional[bool]:
        """Return False if event is ADDED"""
        return event != KubeEventType.ADDED
test(resource, event)

Return False if event is ADDED

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
354
355
356
def test(self, resource: ManagedObject, event: KubeEventType) -> Optional[bool]:
    """Return False if event is ADDED"""
    return event != KubeEventType.ADDED
DisableFilter

Bases: Filter

Filter to disable all reconciles

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
378
379
380
381
382
383
class DisableFilter(Filter):
    """Filter to disable all reconciles"""

    def test(self, resource: ManagedObject, event: KubeEventType) -> Optional[bool]:
        """Always return False"""
        return False
test(resource, event)

Always return False

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
381
382
383
def test(self, resource: ManagedObject, event: KubeEventType) -> Optional[bool]:
    """Always return False"""
    return False
EnableFilter

Bases: Filter

Filter to run all reconciles

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
386
387
388
389
390
391
class EnableFilter(Filter):
    """Filter to run all reconciles"""

    def test(self, resource: ManagedObject, event: KubeEventType) -> Optional[bool]:
        """Always return True"""
        return True
test(resource, event)

Always return True

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
389
390
391
def test(self, resource: ManagedObject, event: KubeEventType) -> Optional[bool]:
    """Always return True"""
    return True
Filter

Bases: ABC

Generic Filter Interface for subclassing. Every subclass should implement a test function which returns true when a resource should be reconciled. Subclasses can optionally implement a update method if the filter requires storing some stateful information like ResourceVersion or Metadata.

NOTE: A unique Filter instance is created for each resource

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
class Filter(ABC):
    """Generic Filter Interface for subclassing. Every subclass should implement a
    `test` function which returns true when a resource should be reconciled. Subclasses
    can optionally implement a `update` method if the filter requires storing some stateful
    information like ResourceVersion or Metadata.

    NOTE: A unique Filter instance is created for each resource
    """

    def __init__(self, resource: ManagedObject):  # noqa: B027
        """Initializer can be used to detect configuration or create instance
        variables. Even though a resource is provided it should not set state until
        update is called

        Args:
            resource: ManagedObject
                This resource can be used by subclass to gather generic information.

        """

    ## Abstract Interface ######################################################
    #
    # These functions must be implemented by child classes
    ##

    @abstractmethod
    def test(self, resource: ManagedObject, event: KubeEventType) -> Optional[bool]:
        """Test whether the resource&event passes the filter. Returns true if
        the filter should be reconciled and return false if it should not be. A filter
        can optionally return None to ignore an event

        Args:
            resource: ManagedObject
                The current resource being checked
            event: KubeEventType
                The event type that triggered this filter

        Returns:
            result: Optional[bool]
                The result of the test.

        """

    ## Base Class Interface ####################################################
    #
    # These methods MAY be implemented by children, but contain default
    # implementations that are appropriate for simple cases.
    #
    ##

    def update(self, resource: ManagedObject):  # noqa: B027
        """Update the instances current state.

        Args:
            resource: ManagedObject
               The current state of the resource
        """

    def update_and_test(self, resource: ManagedObject, event: KubeEventType) -> bool:
        """First test a resource/event against a filter then update the current state

        Args:
            resource: ManagedObject
                The resource being filtered
            event: KubeEventType
                The event to be filtered

        Returns:
            test_result: bool
                The test result
        """
        result = self.test(resource, event)
        if result is not None and not result:
            log.debug3(
                "Failed filter: %s with return val %s",
                self,
                result,
                extra={"resource": resource},
            )
        self.update(resource)
        return result
__init__(resource)

Initializer can be used to detect configuration or create instance variables. Even though a resource is provided it should not set state until update is called

Parameters:

Name Type Description Default
resource ManagedObject

ManagedObject This resource can be used by subclass to gather generic information.

required
Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
44
45
46
47
48
49
50
51
52
53
def __init__(self, resource: ManagedObject):  # noqa: B027
    """Initializer can be used to detect configuration or create instance
    variables. Even though a resource is provided it should not set state until
    update is called

    Args:
        resource: ManagedObject
            This resource can be used by subclass to gather generic information.

    """
test(resource, event) abstractmethod

Test whether the resource&event passes the filter. Returns true if the filter should be reconciled and return false if it should not be. A filter can optionally return None to ignore an event

Parameters:

Name Type Description Default
resource ManagedObject

ManagedObject The current resource being checked

required
event KubeEventType

KubeEventType The event type that triggered this filter

required

Returns:

Name Type Description
result Optional[bool]

Optional[bool] The result of the test.

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
@abstractmethod
def test(self, resource: ManagedObject, event: KubeEventType) -> Optional[bool]:
    """Test whether the resource&event passes the filter. Returns true if
    the filter should be reconciled and return false if it should not be. A filter
    can optionally return None to ignore an event

    Args:
        resource: ManagedObject
            The current resource being checked
        event: KubeEventType
            The event type that triggered this filter

    Returns:
        result: Optional[bool]
            The result of the test.

    """
update(resource)

Update the instances current state.

Parameters:

Name Type Description Default
resource ManagedObject

ManagedObject The current state of the resource

required
Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
85
86
87
88
89
90
91
def update(self, resource: ManagedObject):  # noqa: B027
    """Update the instances current state.

    Args:
        resource: ManagedObject
           The current state of the resource
    """
update_and_test(resource, event)

First test a resource/event against a filter then update the current state

Parameters:

Name Type Description Default
resource ManagedObject

ManagedObject The resource being filtered

required
event KubeEventType

KubeEventType The event to be filtered

required

Returns:

Name Type Description
test_result bool

bool The test result

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
def update_and_test(self, resource: ManagedObject, event: KubeEventType) -> bool:
    """First test a resource/event against a filter then update the current state

    Args:
        resource: ManagedObject
            The resource being filtered
        event: KubeEventType
            The event to be filtered

    Returns:
        test_result: bool
            The test result
    """
    result = self.test(resource, event)
    if result is not None and not result:
        log.debug3(
            "Failed filter: %s with return val %s",
            self,
            result,
            extra={"resource": resource},
        )
    self.update(resource)
    return result
GenerationFilter

Bases: Filter

Filter for reconciling on generation changes for resources that support it

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
class GenerationFilter(Filter):
    """Filter for reconciling on generation changes for resources that support it"""

    def __init__(self, resource: ManagedObject):
        """Set generation instance variable"""
        super().__init__(resource)
        self.generation = None

    def test(  # pylint: disable=inconsistent-return-statements
        self,
        resource: ManagedObject,
        event: KubeEventType,
    ) -> Optional[bool]:
        """Return true if resource generation is different than before"""
        # Only update&test resources with a generation
        if not self.generation:
            return

        # Only test on resource updates
        if event in [KubeEventType.ADDED, KubeEventType.DELETED]:
            return

        # Test if new generation is different
        return self.generation != resource.metadata.get("generation")

    def update(self, resource: ManagedObject):
        """Update the currently observed generation"""
        self.generation = resource.metadata.get("generation")
__init__(resource)

Set generation instance variable

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
141
142
143
144
def __init__(self, resource: ManagedObject):
    """Set generation instance variable"""
    super().__init__(resource)
    self.generation = None
test(resource, event)

Return true if resource generation is different than before

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
def test(  # pylint: disable=inconsistent-return-statements
    self,
    resource: ManagedObject,
    event: KubeEventType,
) -> Optional[bool]:
    """Return true if resource generation is different than before"""
    # Only update&test resources with a generation
    if not self.generation:
        return

    # Only test on resource updates
    if event in [KubeEventType.ADDED, KubeEventType.DELETED]:
        return

    # Test if new generation is different
    return self.generation != resource.metadata.get("generation")
update(resource)

Update the currently observed generation

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
163
164
165
def update(self, resource: ManagedObject):
    """Update the currently observed generation"""
    self.generation = resource.metadata.get("generation")
LabelFilter

Bases: Filter

Filter for resources that match a set of labels

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
362
363
364
365
366
367
368
369
370
371
372
373
374
375
class LabelFilter(Filter):
    """Filter for resources that match a set of labels"""

    @abstractclassproperty
    def labels(self) -> dict:
        """Subclasses must implement a labels class attribute"""

    def test(self, resource: ManagedObject, event: KubeEventType) -> Optional[bool]:
        """Return true is a resource matches the requested labels"""
        resource_labels = resource.get("metadata", {}).get("labels")
        # Check to make sure every requested label matches
        return all(
            resource_labels.get(label) == value for label, value in self.labels.items()
        )
labels()

Subclasses must implement a labels class attribute

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
365
366
367
@abstractclassproperty
def labels(self) -> dict:
    """Subclasses must implement a labels class attribute"""
test(resource, event)

Return true is a resource matches the requested labels

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
369
370
371
372
373
374
375
def test(self, resource: ManagedObject, event: KubeEventType) -> Optional[bool]:
    """Return true is a resource matches the requested labels"""
    resource_labels = resource.get("metadata", {}).get("labels")
    # Check to make sure every requested label matches
    return all(
        resource_labels.get(label) == value for label, value in self.labels.items()
    )
NoGenerationFilter

Bases: Filter

Filter for reconciling changes to spec on resources that don't support the generation field like pods. It does this by hashing the objects excluding status and metadata

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
class NoGenerationFilter(Filter):
    """Filter for reconciling changes to spec on resources that don't support
    the generation field like pods. It does this by hashing the objects excluding
    status and metadata"""

    def __init__(self, resource: ManagedObject):
        """Check if resource supports generation and initialize the hash dict"""
        self.supports_generation = resource.metadata.get("generation") is not None
        self.resource_hashes = {}
        super().__init__(resource)

    def test(  # pylint: disable=inconsistent-return-statements
        self,
        resource: ManagedObject,
        event: KubeEventType,
    ) -> Optional[bool]:
        """Return True if a resources current hash differs from the current"""
        # Don't test resources that support generation or if we don't have hashes yet
        if self.supports_generation or not self.resource_hashes:
            return

        # Only test on resource updates
        if event in [KubeEventType.ADDED, KubeEventType.DELETED]:
            return

        # Check each stored resource hash to see if its
        # changed
        for key, obj_has in self.resource_hashes.items():
            if obj_has != obj_to_hash(resource.get(key)):
                log.debug2("Detected change in %s", key)
                return True

        return False

    def update(self, resource: ManagedObject):
        """Update the observed spec hashes"""
        if self.supports_generation:
            return

        # Get the default hashes for all object keys except metadata
        # and status
        for key, obj in resource.definition.items():
            if key in ["metadata", "status", "kind", "apiVersion"]:
                continue

            self.resource_hashes[key] = obj_to_hash(obj)
__init__(resource)

Check if resource supports generation and initialize the hash dict

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
173
174
175
176
177
def __init__(self, resource: ManagedObject):
    """Check if resource supports generation and initialize the hash dict"""
    self.supports_generation = resource.metadata.get("generation") is not None
    self.resource_hashes = {}
    super().__init__(resource)
test(resource, event)

Return True if a resources current hash differs from the current

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
def test(  # pylint: disable=inconsistent-return-statements
    self,
    resource: ManagedObject,
    event: KubeEventType,
) -> Optional[bool]:
    """Return True if a resources current hash differs from the current"""
    # Don't test resources that support generation or if we don't have hashes yet
    if self.supports_generation or not self.resource_hashes:
        return

    # Only test on resource updates
    if event in [KubeEventType.ADDED, KubeEventType.DELETED]:
        return

    # Check each stored resource hash to see if its
    # changed
    for key, obj_has in self.resource_hashes.items():
        if obj_has != obj_to_hash(resource.get(key)):
            log.debug2("Detected change in %s", key)
            return True

    return False
update(resource)

Update the observed spec hashes

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
202
203
204
205
206
207
208
209
210
211
212
213
def update(self, resource: ManagedObject):
    """Update the observed spec hashes"""
    if self.supports_generation:
        return

    # Get the default hashes for all object keys except metadata
    # and status
    for key, obj in resource.definition.items():
        if key in ["metadata", "status", "kind", "apiVersion"]:
            continue

        self.resource_hashes[key] = obj_to_hash(obj)
PauseFilter

Bases: Filter

This filter skips resources that have the oper8 pause annotation

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
305
306
307
308
309
310
311
312
class PauseFilter(Filter):
    """This filter skips resources that have the oper8 pause annotation"""

    def test(self, resource: ManagedObject, event: KubeEventType) -> Optional[bool]:
        """Test if a resource has the pause annotation"""
        return not ReconcileManager._is_paused(  # pylint: disable=protected-access
            resource
        )
test(resource, event)

Test if a resource has the pause annotation

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
308
309
310
311
312
def test(self, resource: ManagedObject, event: KubeEventType) -> Optional[bool]:
    """Test if a resource has the pause annotation"""
    return not ReconcileManager._is_paused(  # pylint: disable=protected-access
        resource
    )
ResourceVersionFilter

Bases: Filter

Filter for duplicate resource versions which happens when restarting a watch connection

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
class ResourceVersionFilter(Filter):
    """Filter for duplicate resource versions which happens when restarting a
    watch connection"""

    def __init__(self, resource: ManagedObject):
        """Initialize the resource version list"""
        # Use a dequeue instead of a list/set to set a bound on the number
        # of tracked versions
        self.resource_versions = deque([], maxlen=RESOURCE_VERSION_KEEP_COUNT)
        super().__init__(resource)

    def test(  # pylint: disable=inconsistent-return-statements
        self,
        resource: ManagedObject,
        event: KubeEventType,
    ) -> Optional[bool]:
        """Test if the resource's resourceVersion has been seen before"""

        # Don't skip add events as the kubernetes watch can duplicate events
        if event == KubeEventType.DELETED:
            return

        return resource.resource_version not in self.resource_versions

    def update(self, resource: ManagedObject):
        """Add the resources ResourceVersion to the list"""
        self.resource_versions.append(resource.resource_version)
__init__(resource)

Initialize the resource version list

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
220
221
222
223
224
225
def __init__(self, resource: ManagedObject):
    """Initialize the resource version list"""
    # Use a dequeue instead of a list/set to set a bound on the number
    # of tracked versions
    self.resource_versions = deque([], maxlen=RESOURCE_VERSION_KEEP_COUNT)
    super().__init__(resource)
test(resource, event)

Test if the resource's resourceVersion has been seen before

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
227
228
229
230
231
232
233
234
235
236
237
238
def test(  # pylint: disable=inconsistent-return-statements
    self,
    resource: ManagedObject,
    event: KubeEventType,
) -> Optional[bool]:
    """Test if the resource's resourceVersion has been seen before"""

    # Don't skip add events as the kubernetes watch can duplicate events
    if event == KubeEventType.DELETED:
        return

    return resource.resource_version not in self.resource_versions
update(resource)

Add the resources ResourceVersion to the list

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
240
241
242
def update(self, resource: ManagedObject):
    """Add the resources ResourceVersion to the list"""
    self.resource_versions.append(resource.resource_version)
SubsystemStatusFilter

Bases: Filter

Reconcile oper8 controllers when their oper8 status changes

This has passed basic validation but has not been rigorously tested

in the field

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
class SubsystemStatusFilter(Filter):
    """Reconcile oper8 controllers when their oper8 status changes

    EXPERIMENTAL: This has passed basic validation but has not been rigorously tested
     in the field
    """

    def __init__(self, resource: ManagedObject):
        """Initialize the currently observed ready condition"""
        self.ready_condition = None
        super().__init__(resource)

    def test(  # pylint: disable=inconsistent-return-statements
        self,
        resource: ManagedObject,
        event: KubeEventType,
    ) -> Optional[bool]:
        """Test if a resources subsystem condition has changed"""
        if event in [KubeEventType.ADDED, KubeEventType.DELETED]:
            return

        return self.ready_condition != get_condition(
            READY_CONDITION, resource.get("status", {})
        ).get("reason")

    def update(self, resource: ManagedObject):
        """Update the currently observed ready condition"""
        self.ready_condition = get_condition(
            READY_CONDITION, resource.get("status", {})
        ).get("reason")
__init__(resource)

Initialize the currently observed ready condition

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
322
323
324
325
def __init__(self, resource: ManagedObject):
    """Initialize the currently observed ready condition"""
    self.ready_condition = None
    super().__init__(resource)
test(resource, event)

Test if a resources subsystem condition has changed

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
327
328
329
330
331
332
333
334
335
336
337
338
def test(  # pylint: disable=inconsistent-return-statements
    self,
    resource: ManagedObject,
    event: KubeEventType,
) -> Optional[bool]:
    """Test if a resources subsystem condition has changed"""
    if event in [KubeEventType.ADDED, KubeEventType.DELETED]:
        return

    return self.ready_condition != get_condition(
        READY_CONDITION, resource.get("status", {})
    ).get("reason")
update(resource)

Update the currently observed ready condition

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
340
341
342
343
344
def update(self, resource: ManagedObject):
    """Update the currently observed ready condition"""
    self.ready_condition = get_condition(
        READY_CONDITION, resource.get("status", {})
    ).get("reason")
UserAnnotationFilter

Bases: AnnotationFilter

Filter resources to reconcile on user annotation changes. This excludes kubernetes and openshift annotations

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
class UserAnnotationFilter(AnnotationFilter):
    """Filter resources to reconcile on user annotation changes. This excludes
    kubernetes and openshift annotations
    """

    def get_annotation_hash(self, resource: ManagedObject) -> str:
        """Overriden function to exclude common platform annotations from
        the annotation hash"""
        output_annotations = {}
        for key, value in resource.metadata.get("annotations", {}).items():
            if self.contains_platform_key(key):
                continue

            output_annotations[key] = value

        return obj_to_hash(output_annotations)

    def contains_platform_key(self, key: str) -> bool:
        """Helper to check if the key contains one of the
        platform annotations"""
        return any(
            reserved_key in key for reserved_key in RESERVED_PLATFORM_ANNOTATIONS
        )
contains_platform_key(key)

Helper to check if the key contains one of the platform annotations

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
294
295
296
297
298
299
def contains_platform_key(self, key: str) -> bool:
    """Helper to check if the key contains one of the
    platform annotations"""
    return any(
        reserved_key in key for reserved_key in RESERVED_PLATFORM_ANNOTATIONS
    )
get_annotation_hash(resource)

Overriden function to exclude common platform annotations from the annotation hash

Source code in oper8/watch_manager/python_watch_manager/filters/filters.py
282
283
284
285
286
287
288
289
290
291
292
def get_annotation_hash(self, resource: ManagedObject) -> str:
    """Overriden function to exclude common platform annotations from
    the annotation hash"""
    output_annotations = {}
    for key, value in resource.metadata.get("annotations", {}).items():
        if self.contains_platform_key(key):
            continue

        output_annotations[key] = value

    return obj_to_hash(output_annotations)
manager

Module contains helpers for processing a group of filters

FilterManager

Bases: Filter

The FilterManager class helps process conditional filters and groups of filters. Filters that in a list are "anded" together while Filters in a tuple are "ored". This class also contains helpers to recursively convert between ClassInfo and Filters.

Source code in oper8/watch_manager/python_watch_manager/filters/manager.py
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
class FilterManager(Filter):
    """The FilterManager class helps process conditional filters and groups of filters.
    Filters that in a list are "anded" together while Filters in a tuple are "ored".
    This class also contains helpers to recursively convert between ClassInfo and Filters.
    """

    def __init__(
        self,
        filters: Union[List[Type[Filter]], Tuple[Type[Filter]]],
        resource: ManagedObject,
    ):
        """Initialize all filters in the provided group

        Args:
            filters: Union[List[Type[Filter]], Tuple[Type[Filter]]]
                The filters to manage
            resource: ManagedObject
                The initial resource
        """
        self.filters = self.__recursive_filter_init(filters, resource)

    ### Public Interface

    def update_and_test(
        self, resource: ManagedObject, event: KubeEventType
    ) -> Optional[bool]:
        """Recursively update and test each filter"""
        return self.__recursive_update_and_test(self.filters, resource, event)

    def test(self, resource: ManagedObject, event: KubeEventType) -> Optional[bool]:
        """Recursively test each filter"""
        # test with test_only set to True so nothing is updated
        return self.__recursive_update_and_test(
            self.filters, resource, event, test_only=True
        )

    def update(self, resource: ManagedObject):
        """Update each filter recursively"""
        # Update with update_only set to True so no tests are ran
        self.__recursive_update_and_test(  # pylint: disable=redundant-keyword-arg
            self, self.filters, resource, None, update_only=True
        )

    @classmethod
    def to_info(cls, filters: Type[Filter]) -> Type[ClassInfo]:
        """Helper function to convert from filters to ClassInfos. This is used for pickling and IPC

        Args:
            filters: Type[Filter]
                The filters to convert

        Returns:
            class_info: Type[ClassInfo]
                The class info objects describing the filter
        """
        return cls.__recursive_filter_info(filters)

    @classmethod
    def from_info(cls, info: Type[ClassInfo]) -> Type[Filter]:
        """Helper function to convert from ClassInfos to a filter


        Args:
            class_info: Type[ClassInfo]
                The classinfos to convert back into filters

        Returns:
            filters: Type[Filter]
                The converted filter objects
        """
        return cls.__recursive_filter_info(info)

    ### Private Helper Functions
    @classmethod
    def __recursive_filter_info(
        cls, descriptor: Union[Type[Filter], Type[ClassInfo]]
    ) -> Union[Type[Filter], Type[ClassInfo]]:
        """Recursive helper to convert from filters to class infos and back

        Args:
            descriptor: Union[Type[Filter],Type[ClassInfo]]
                Either the filter or class_info to convert

        Returns:
            type: Union[Type[Filter],Type[ClassInfo]]
                The converted types
        """

        def convert_filter_type(descriptor):
            """Generic function to convert between types"""

            # If we get a filter than we're converting to ClassInfo else
            # we're converting back to Filters
            if inspect.isclass(descriptor) and issubclass(descriptor, Filter):
                return ClassInfo.from_type(descriptor)
            if isinstance(descriptor, ClassInfo):
                return descriptor.to_class()
            # The instance must be a list or a tuple to be processed
            raise ValueError(
                f"Unknown type: {type(descriptor)} {descriptor} passed to convert_filter_type"
            )

        return cls.__recursive_map(descriptor, convert_filter_type)

    def __recursive_filter_init(
        self,
        filters: Union[List[Type[Filter]], Tuple[Type[Filter]], Type[Filter]],
        resource: ManagedObject,
    ) -> Union[List[Filter], Tuple[Filter], Filter]:
        """Helper function to recursively init each filter

        Args:
            filters: Union[List[Type[Filter]], Tuple[Type[Filter]], Type[Filter]]
                The filters to be initialized
            resource: ManagedObject
                The resource to pass to the filters
        Returns:
            filters: Union[List[Filter], Tuple[Filter], Filter]
                The initalized filters
        """

        def init_filter(filter_type: Type[Filter]) -> Filter:
            if not (inspect.isclass(filter_type) and issubclass(filter_type, Filter)):
                raise ValueError(
                    f"Unknown type: {type(filter_type)} passed to init_filter"
                )

            return filter_type(resource)

        return self.__recursive_map(filters, init_filter)

    def __recursive_update_and_test(  # pylint: disable=too-many-arguments, inconsistent-return-statements
        self,
        filters: Union[list, tuple, Filter],
        resource: ManagedObject,
        event: KubeEventType,
        update_only: bool = False,
        test_only: bool = False,
    ) -> Optional[bool]:
        """Helper function to recursively update, test, or both.

        Args:
            filters: Union[list, tuple, Filter]
                The current filters being tested. This is updated when recurring
            resource: ManagedObject
                The current resource being updated/tested
            event: KubeEventType,
                The current event type being updated/tested
            update_only: bool = False
                Whether to only update the filters
            test_only: bool = False,
                Whether to only test the filters

        Returns:
            result: Optional[bool]
                The result of the tests if it was ran
        """
        if update_only and test_only:
            raise ValueError("update_only and test_only can not both be True")

        # Check Initial object types and exit condition
        if isinstance(filters, Filter):
            # If instance is a filter than call either update or update_and_test
            # depending on the failed status
            if update_only:
                filters.update(resource)
                return
            if test_only:
                return filters.test(resource, event)

            return filters.update_and_test(resource, event)

        # If filter list is empty then immediately return success
        if not filters:
            return True

        return_value = None
        operation = operator.and_ if isinstance(filters, list) else operator.or_

        for filter_combo in filters:
            # Recursively processes the filter combo
            result = self.__recursive_update_and_test(
                filter_combo, resource, event, update_only, test_only
            )

            # If return_value has already been set then combine it with the most recent result
            # via the operation
            if result is not None:
                if return_value is not None:
                    return_value = operation(return_value, result)
                else:
                    return_value = result

            # There are two scenarios when filters only need to get updated not tested. The first
            # is when an "and" condition fails or when an "or" succeeds. In both instances we
            # know the end result so testing can be skipped for performance
            if (
                (not update_only and not test_only)
                and result
                and (
                    (operation == operator.and_ and not result)
                    or (operation == operator.or_ and result)
                )
            ):
                update_only = True

        # If no filter cared about the event then don't
        # reconcile
        if return_value is None:
            return False

        return return_value

    @classmethod
    def __recursive_map(
        cls, filters: Union[List[Any], Tuple[Any]], operation: Callable[[Filter], Any]
    ):
        """Helper function to map an operation onto every object in a filter chain

        Args:
            filters: Union[List[Any], Tuple[Any]]
                The filters to map onto
            op: Callable[[Filter],None]
                The function to map onto each filter
        """

        # Directly check tuple to ignore NamedTuples and subclasses
        if not (isinstance(filters, list) or type(filters) is tuple):
            return operation(filters)

        filter_list = []
        for filter_obj in filters:
            filter_list.append(cls.__recursive_map(filter_obj, operation))

        # Ensure the returned iterable is the same type as the original
        return type(filters)(filter_list)
__init__(filters, resource)

Initialize all filters in the provided group

Parameters:

Name Type Description Default
filters Union[List[Type[Filter]], Tuple[Type[Filter]]]

Union[List[Type[Filter]], Tuple[Type[Filter]]] The filters to manage

required
resource ManagedObject

ManagedObject The initial resource

required
Source code in oper8/watch_manager/python_watch_manager/filters/manager.py
38
39
40
41
42
43
44
45
46
47
48
49
50
51
def __init__(
    self,
    filters: Union[List[Type[Filter]], Tuple[Type[Filter]]],
    resource: ManagedObject,
):
    """Initialize all filters in the provided group

    Args:
        filters: Union[List[Type[Filter]], Tuple[Type[Filter]]]
            The filters to manage
        resource: ManagedObject
            The initial resource
    """
    self.filters = self.__recursive_filter_init(filters, resource)
__recursive_filter_info(descriptor) classmethod

Recursive helper to convert from filters to class infos and back

Parameters:

Name Type Description Default
descriptor Union[Type[Filter], Type[ClassInfo]]

Union[Type[Filter],Type[ClassInfo]] Either the filter or class_info to convert

required

Returns:

Name Type Description
type Union[Type[Filter], Type[ClassInfo]]

Union[Type[Filter],Type[ClassInfo]] The converted types

Source code in oper8/watch_manager/python_watch_manager/filters/manager.py
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
@classmethod
def __recursive_filter_info(
    cls, descriptor: Union[Type[Filter], Type[ClassInfo]]
) -> Union[Type[Filter], Type[ClassInfo]]:
    """Recursive helper to convert from filters to class infos and back

    Args:
        descriptor: Union[Type[Filter],Type[ClassInfo]]
            Either the filter or class_info to convert

    Returns:
        type: Union[Type[Filter],Type[ClassInfo]]
            The converted types
    """

    def convert_filter_type(descriptor):
        """Generic function to convert between types"""

        # If we get a filter than we're converting to ClassInfo else
        # we're converting back to Filters
        if inspect.isclass(descriptor) and issubclass(descriptor, Filter):
            return ClassInfo.from_type(descriptor)
        if isinstance(descriptor, ClassInfo):
            return descriptor.to_class()
        # The instance must be a list or a tuple to be processed
        raise ValueError(
            f"Unknown type: {type(descriptor)} {descriptor} passed to convert_filter_type"
        )

    return cls.__recursive_map(descriptor, convert_filter_type)
__recursive_filter_init(filters, resource)

Helper function to recursively init each filter

Parameters:

Name Type Description Default
filters Union[List[Type[Filter]], Tuple[Type[Filter]], Type[Filter]]

Union[List[Type[Filter]], Tuple[Type[Filter]], Type[Filter]] The filters to be initialized

required
resource ManagedObject

ManagedObject The resource to pass to the filters

required

Returns: filters: Union[List[Filter], Tuple[Filter], Filter] The initalized filters

Source code in oper8/watch_manager/python_watch_manager/filters/manager.py
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
def __recursive_filter_init(
    self,
    filters: Union[List[Type[Filter]], Tuple[Type[Filter]], Type[Filter]],
    resource: ManagedObject,
) -> Union[List[Filter], Tuple[Filter], Filter]:
    """Helper function to recursively init each filter

    Args:
        filters: Union[List[Type[Filter]], Tuple[Type[Filter]], Type[Filter]]
            The filters to be initialized
        resource: ManagedObject
            The resource to pass to the filters
    Returns:
        filters: Union[List[Filter], Tuple[Filter], Filter]
            The initalized filters
    """

    def init_filter(filter_type: Type[Filter]) -> Filter:
        if not (inspect.isclass(filter_type) and issubclass(filter_type, Filter)):
            raise ValueError(
                f"Unknown type: {type(filter_type)} passed to init_filter"
            )

        return filter_type(resource)

    return self.__recursive_map(filters, init_filter)
__recursive_map(filters, operation) classmethod

Helper function to map an operation onto every object in a filter chain

Parameters:

Name Type Description Default
filters Union[List[Any], Tuple[Any]]

Union[List[Any], Tuple[Any]] The filters to map onto

required
op

Callable[[Filter],None] The function to map onto each filter

required
Source code in oper8/watch_manager/python_watch_manager/filters/manager.py
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
@classmethod
def __recursive_map(
    cls, filters: Union[List[Any], Tuple[Any]], operation: Callable[[Filter], Any]
):
    """Helper function to map an operation onto every object in a filter chain

    Args:
        filters: Union[List[Any], Tuple[Any]]
            The filters to map onto
        op: Callable[[Filter],None]
            The function to map onto each filter
    """

    # Directly check tuple to ignore NamedTuples and subclasses
    if not (isinstance(filters, list) or type(filters) is tuple):
        return operation(filters)

    filter_list = []
    for filter_obj in filters:
        filter_list.append(cls.__recursive_map(filter_obj, operation))

    # Ensure the returned iterable is the same type as the original
    return type(filters)(filter_list)
__recursive_update_and_test(filters, resource, event, update_only=False, test_only=False)

Helper function to recursively update, test, or both.

Parameters:

Name Type Description Default
filters Union[list, tuple, Filter]

Union[list, tuple, Filter] The current filters being tested. This is updated when recurring

required
resource ManagedObject

ManagedObject The current resource being updated/tested

required
event KubeEventType

KubeEventType, The current event type being updated/tested

required
update_only bool

bool = False Whether to only update the filters

False
test_only bool

bool = False, Whether to only test the filters

False

Returns:

Name Type Description
result Optional[bool]

Optional[bool] The result of the tests if it was ran

Source code in oper8/watch_manager/python_watch_manager/filters/manager.py
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
def __recursive_update_and_test(  # pylint: disable=too-many-arguments, inconsistent-return-statements
    self,
    filters: Union[list, tuple, Filter],
    resource: ManagedObject,
    event: KubeEventType,
    update_only: bool = False,
    test_only: bool = False,
) -> Optional[bool]:
    """Helper function to recursively update, test, or both.

    Args:
        filters: Union[list, tuple, Filter]
            The current filters being tested. This is updated when recurring
        resource: ManagedObject
            The current resource being updated/tested
        event: KubeEventType,
            The current event type being updated/tested
        update_only: bool = False
            Whether to only update the filters
        test_only: bool = False,
            Whether to only test the filters

    Returns:
        result: Optional[bool]
            The result of the tests if it was ran
    """
    if update_only and test_only:
        raise ValueError("update_only and test_only can not both be True")

    # Check Initial object types and exit condition
    if isinstance(filters, Filter):
        # If instance is a filter than call either update or update_and_test
        # depending on the failed status
        if update_only:
            filters.update(resource)
            return
        if test_only:
            return filters.test(resource, event)

        return filters.update_and_test(resource, event)

    # If filter list is empty then immediately return success
    if not filters:
        return True

    return_value = None
    operation = operator.and_ if isinstance(filters, list) else operator.or_

    for filter_combo in filters:
        # Recursively processes the filter combo
        result = self.__recursive_update_and_test(
            filter_combo, resource, event, update_only, test_only
        )

        # If return_value has already been set then combine it with the most recent result
        # via the operation
        if result is not None:
            if return_value is not None:
                return_value = operation(return_value, result)
            else:
                return_value = result

        # There are two scenarios when filters only need to get updated not tested. The first
        # is when an "and" condition fails or when an "or" succeeds. In both instances we
        # know the end result so testing can be skipped for performance
        if (
            (not update_only and not test_only)
            and result
            and (
                (operation == operator.and_ and not result)
                or (operation == operator.or_ and result)
            )
        ):
            update_only = True

    # If no filter cared about the event then don't
    # reconcile
    if return_value is None:
        return False

    return return_value
from_info(info) classmethod

Helper function to convert from ClassInfos to a filter

Parameters:

Name Type Description Default
class_info

Type[ClassInfo] The classinfos to convert back into filters

required

Returns:

Name Type Description
filters Type[Filter]

Type[Filter] The converted filter objects

Source code in oper8/watch_manager/python_watch_manager/filters/manager.py
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
@classmethod
def from_info(cls, info: Type[ClassInfo]) -> Type[Filter]:
    """Helper function to convert from ClassInfos to a filter


    Args:
        class_info: Type[ClassInfo]
            The classinfos to convert back into filters

    Returns:
        filters: Type[Filter]
            The converted filter objects
    """
    return cls.__recursive_filter_info(info)
test(resource, event)

Recursively test each filter

Source code in oper8/watch_manager/python_watch_manager/filters/manager.py
61
62
63
64
65
66
def test(self, resource: ManagedObject, event: KubeEventType) -> Optional[bool]:
    """Recursively test each filter"""
    # test with test_only set to True so nothing is updated
    return self.__recursive_update_and_test(
        self.filters, resource, event, test_only=True
    )
to_info(filters) classmethod

Helper function to convert from filters to ClassInfos. This is used for pickling and IPC

Parameters:

Name Type Description Default
filters Type[Filter]

Type[Filter] The filters to convert

required

Returns:

Name Type Description
class_info Type[ClassInfo]

Type[ClassInfo] The class info objects describing the filter

Source code in oper8/watch_manager/python_watch_manager/filters/manager.py
75
76
77
78
79
80
81
82
83
84
85
86
87
@classmethod
def to_info(cls, filters: Type[Filter]) -> Type[ClassInfo]:
    """Helper function to convert from filters to ClassInfos. This is used for pickling and IPC

    Args:
        filters: Type[Filter]
            The filters to convert

    Returns:
        class_info: Type[ClassInfo]
            The class info objects describing the filter
    """
    return cls.__recursive_filter_info(filters)
update(resource)

Update each filter recursively

Source code in oper8/watch_manager/python_watch_manager/filters/manager.py
68
69
70
71
72
73
def update(self, resource: ManagedObject):
    """Update each filter recursively"""
    # Update with update_only set to True so no tests are ran
    self.__recursive_update_and_test(  # pylint: disable=redundant-keyword-arg
        self, self.filters, resource, None, update_only=True
    )
update_and_test(resource, event)

Recursively update and test each filter

Source code in oper8/watch_manager/python_watch_manager/filters/manager.py
55
56
57
58
59
def update_and_test(
    self, resource: ManagedObject, event: KubeEventType
) -> Optional[bool]:
    """Recursively update and test each filter"""
    return self.__recursive_update_and_test(self.filters, resource, event)
AndFilter(*args)

An "And" Filter is just a list of filters

Source code in oper8/watch_manager/python_watch_manager/filters/manager.py
22
23
24
def AndFilter(*args):  # pylint: disable=invalid-name
    """An "And" Filter is just a list of filters"""
    return list(args)
OrFilter(*args)

An "Or" Filter is just a tuple of filters

Source code in oper8/watch_manager/python_watch_manager/filters/manager.py
27
28
29
def OrFilter(*args):  # pylint: disable=invalid-name
    """An "Or" Filter is just a tuple of filters"""
    return tuple(args)

leader_election

init file for leadership election classes. Imports all leadership managers and defines a generic helper

get_leader_election_class()

Get the current configured leadership election

Source code in oper8/watch_manager/python_watch_manager/leader_election/__init__.py
15
16
17
18
19
20
21
22
23
24
25
def get_leader_election_class() -> Type[LeadershipManagerBase]:
    """Get the current configured leadership election"""
    if config.python_watch_manager.lock.type == "leader-for-life":
        return LeaderForLifeManager
    if config.python_watch_manager.lock.type == "leader-with-lease":
        return LeaderWithLeaseManager
    if config.python_watch_manager.lock.type == "annotation":
        return AnnotationLeadershipManager
    if config.python_watch_manager.lock.type == "dryrun":
        return DryRunLeadershipManager
    return DryRunLeadershipManager
annotation

Annotation Based Leadership Manager

AnnotationLeadershipManager

Bases: LeadershipManagerBase

Annotation based leadership manager that uses two annotations to track leadership on a per-resource basis. This allows for horizontally scalable operations.

This has passed basic validation but has not been rigorously tested

in the field

Source code in oper8/watch_manager/python_watch_manager/leader_election/annotation.py
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
class AnnotationLeadershipManager(LeadershipManagerBase):
    """
    Annotation based leadership manager that uses two annotations
    to track leadership on a per-resource basis. This allows for
    horizontally scalable operations.

    EXPERIMENTAL: This has passed basic validation but has not been rigorously tested
     in the field
    """

    def __init__(self, deploy_manager: DeployManagerBase = None):
        """Initialize Leadership and gather current name

        Args:
            deploy_manager: DeployManagerBase = None
                DeployManager for this Manager
        """

        super().__init__(deploy_manager)
        self.duration_delta = parse_time_delta(
            config.python_watch_manager.lock.duration
        )

        # Gather lock_name, namespace and pod manifest
        self.pod_name = get_pod_name()
        assert_config(self.pod_name, "Unable to detect pod name")

    ## Lock Interface ####################################################
    def acquire(self, force: bool = False) -> bool:
        """
        Return true as leadership is managed at resource level
        """
        return True

    def acquire_resource(self, resource: ManagedObject):
        """Check a resource for leadership annotation and add one if it's expired
        or does not exit"""
        success, current_resource = self.deploy_manager.get_object_current_state(
            resource.kind, resource.name, resource.namespace, resource.api_version
        )
        if not success or not current_resource:
            log.warning(
                "Unable to fetch owner resource %s/%s/%s/%s",
                resource.kind,
                resource.api_version,
                resource.namespace,
                resource.name,
            )
            return False

        if "annotations" not in current_resource.get("metadata"):
            current_resource["metadata"]["annotations"] = {}

        # Check the current annotation
        annotations = current_resource["metadata"]["annotations"]
        current_time = datetime.now()

        # If no leader than take ownership
        if not annotations.get(LEASE_NAME_ANNOTATION_NAME):
            annotations[LEASE_NAME_ANNOTATION_NAME] = self.pod_name
            annotations[LEASE_TIME_ANNOTATION_NAME] = current_time.isoformat()

        # If already the current leader then update lease time
        elif self.pod_name == annotations.get(LEASE_NAME_ANNOTATION_NAME):
            annotations[LEASE_TIME_ANNOTATION_NAME] = current_time.isoformat()

        # If the current leader's lease has timed out than take ownership
        elif not self._check_lease_time(
            annotations.get(LEASE_TIME_ANNOTATION_NAME), current_time
        ):
            annotations[LEASE_NAME_ANNOTATION_NAME] = self.pod_name
            annotations[LEASE_TIME_ANNOTATION_NAME] = current_time.isoformat()

        # Otherwise unable to acquire lock
        else:
            return False

        success, _ = self.deploy_manager.deploy([current_resource])
        if not success:
            log.warning(
                "Unable to update resource annotation%s/%s/%s/%s",
                resource.kind,
                resource.api_version,
                resource.namespace,
                resource.name,
            )
            return False

        return True

    def release(self):
        """
        Release lock on global resource
        """
        return True

    def release_resource(self, resource: ManagedObject):
        """
        Release lock on specific resource by removing the annotation
        """
        current_resource = copy(resource.definition)

        # Only clear annotation if we're the current leader
        if self.pod_name == current_resource["metadata"].get("annotations", {}).get(
            LEASE_NAME_ANNOTATION_NAME
        ):
            current_resource["metadata"]["annotations"][
                LEASE_NAME_ANNOTATION_NAME
            ] = None
            current_resource["metadata"]["annotations"][
                LEASE_TIME_ANNOTATION_NAME
            ] = None
            self.deploy_manager.deploy([current_resource])

        return True

    def is_leader(self, resource: Optional[ManagedObject] = None):
        """
        Determines if current instance is leader
        """
        if resource:
            annotations = resource.metadata.get("annotations", {})
            return self.pod_name == annotations.get(
                LEASE_NAME_ANNOTATION_NAME
            ) and self._check_lease_time(annotations.get(LEASE_TIME_ANNOTATION_NAME))

        return True

    def _check_lease_time(
        self, lease_time: str, current_time: Optional[datetime] = None
    ) -> bool:
        """Helper function to check if lease time is still valid

        Args:
            lease_time: str
                A datetime in isoformat
            current_time: Optional[datetime]
                The time to compare the lease_time to. Use datetime.now() if None

        Returns:
            valid_lease: bool
                If the lease should still be an owner
        """
        # Don't default to datetime.now() in function args as that's only evaluated once
        current_time = current_time or datetime.now()
        return current_time < datetime.fromisoformat(lease_time) + self.duration_delta
__init__(deploy_manager=None)

Initialize Leadership and gather current name

Parameters:

Name Type Description Default
deploy_manager DeployManagerBase

DeployManagerBase = None DeployManager for this Manager

None
Source code in oper8/watch_manager/python_watch_manager/leader_election/annotation.py
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
def __init__(self, deploy_manager: DeployManagerBase = None):
    """Initialize Leadership and gather current name

    Args:
        deploy_manager: DeployManagerBase = None
            DeployManager for this Manager
    """

    super().__init__(deploy_manager)
    self.duration_delta = parse_time_delta(
        config.python_watch_manager.lock.duration
    )

    # Gather lock_name, namespace and pod manifest
    self.pod_name = get_pod_name()
    assert_config(self.pod_name, "Unable to detect pod name")
acquire(force=False)

Return true as leadership is managed at resource level

Source code in oper8/watch_manager/python_watch_manager/leader_election/annotation.py
51
52
53
54
55
def acquire(self, force: bool = False) -> bool:
    """
    Return true as leadership is managed at resource level
    """
    return True
acquire_resource(resource)

Check a resource for leadership annotation and add one if it's expired or does not exit

Source code in oper8/watch_manager/python_watch_manager/leader_election/annotation.py
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
def acquire_resource(self, resource: ManagedObject):
    """Check a resource for leadership annotation and add one if it's expired
    or does not exit"""
    success, current_resource = self.deploy_manager.get_object_current_state(
        resource.kind, resource.name, resource.namespace, resource.api_version
    )
    if not success or not current_resource:
        log.warning(
            "Unable to fetch owner resource %s/%s/%s/%s",
            resource.kind,
            resource.api_version,
            resource.namespace,
            resource.name,
        )
        return False

    if "annotations" not in current_resource.get("metadata"):
        current_resource["metadata"]["annotations"] = {}

    # Check the current annotation
    annotations = current_resource["metadata"]["annotations"]
    current_time = datetime.now()

    # If no leader than take ownership
    if not annotations.get(LEASE_NAME_ANNOTATION_NAME):
        annotations[LEASE_NAME_ANNOTATION_NAME] = self.pod_name
        annotations[LEASE_TIME_ANNOTATION_NAME] = current_time.isoformat()

    # If already the current leader then update lease time
    elif self.pod_name == annotations.get(LEASE_NAME_ANNOTATION_NAME):
        annotations[LEASE_TIME_ANNOTATION_NAME] = current_time.isoformat()

    # If the current leader's lease has timed out than take ownership
    elif not self._check_lease_time(
        annotations.get(LEASE_TIME_ANNOTATION_NAME), current_time
    ):
        annotations[LEASE_NAME_ANNOTATION_NAME] = self.pod_name
        annotations[LEASE_TIME_ANNOTATION_NAME] = current_time.isoformat()

    # Otherwise unable to acquire lock
    else:
        return False

    success, _ = self.deploy_manager.deploy([current_resource])
    if not success:
        log.warning(
            "Unable to update resource annotation%s/%s/%s/%s",
            resource.kind,
            resource.api_version,
            resource.namespace,
            resource.name,
        )
        return False

    return True
is_leader(resource=None)

Determines if current instance is leader

Source code in oper8/watch_manager/python_watch_manager/leader_election/annotation.py
139
140
141
142
143
144
145
146
147
148
149
def is_leader(self, resource: Optional[ManagedObject] = None):
    """
    Determines if current instance is leader
    """
    if resource:
        annotations = resource.metadata.get("annotations", {})
        return self.pod_name == annotations.get(
            LEASE_NAME_ANNOTATION_NAME
        ) and self._check_lease_time(annotations.get(LEASE_TIME_ANNOTATION_NAME))

    return True
release()

Release lock on global resource

Source code in oper8/watch_manager/python_watch_manager/leader_election/annotation.py
113
114
115
116
117
def release(self):
    """
    Release lock on global resource
    """
    return True
release_resource(resource)

Release lock on specific resource by removing the annotation

Source code in oper8/watch_manager/python_watch_manager/leader_election/annotation.py
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
def release_resource(self, resource: ManagedObject):
    """
    Release lock on specific resource by removing the annotation
    """
    current_resource = copy(resource.definition)

    # Only clear annotation if we're the current leader
    if self.pod_name == current_resource["metadata"].get("annotations", {}).get(
        LEASE_NAME_ANNOTATION_NAME
    ):
        current_resource["metadata"]["annotations"][
            LEASE_NAME_ANNOTATION_NAME
        ] = None
        current_resource["metadata"]["annotations"][
            LEASE_TIME_ANNOTATION_NAME
        ] = None
        self.deploy_manager.deploy([current_resource])

    return True
base

Base classes for leader election implementations

LeadershipManagerBase

Bases: ABC

Base class for leader election. Leadership election in the PWM is split into two types: global and resource locks. Global locks are required to run any reconciliation while resource locks are required to reconcile a specific resources. Most child classes implement one of these locks.

Source code in oper8/watch_manager/python_watch_manager/leader_election/base.py
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
class LeadershipManagerBase(abc.ABC):
    """
    Base class for leader election. Leadership election in the PWM
    is split into two types: global and resource locks. Global locks
    are required to run any reconciliation while resource locks are
    required to reconcile a specific resources. Most child classes
    implement one of these locks.
    """

    def __init__(self, deploy_manager: DeployManagerBase = None):
        """
        Initialize Class

        Args:
            deploy_manager:  DeployManagerBase
                DeployManager used in lock acquisition
        """
        self.deploy_manager = deploy_manager

    ## Lock Interface ####################################################
    @abc.abstractmethod
    def acquire(self, force: bool = False) -> bool:
        """
        Acquire or renew global lock

        Args:
            force:  bool
                Whether to force acquire the lock irregardless of status. Used
                on shutdown

        Returns:
            success:  bool
                True on successful acquisition
        """

    @abc.abstractmethod
    def acquire_resource(self, resource: ManagedObject) -> bool:
        """
        Acquire or renew lock on specific resource

        Args:
            resource:  ManagedObject
                Resource to acquire lock for
        Returns:
            success:  bool
                True on successful acquisition
        """

    @abc.abstractmethod
    def release(self):
        """
        Release global lock
        """

    @abc.abstractmethod
    def release_resource(self, resource: ManagedObject):
        """
        Release lock on specific resource

        Args:
            resource:  ManagedObject
                Resource to release lock for
        """

    @abc.abstractmethod
    def is_leader(self, resource: Optional[ManagedObject] = None):
        """
        Determines if current instance is leader

        Args:
            resource:  Optional[ManagedObject]
                If provided the resource to determine if current instance
                is leader for. If no resource if provided then the global
                lock is checked
        Returns:
            leader:  bool
                True if instance is leader
        """
__init__(deploy_manager=None)

Initialize Class

Parameters:

Name Type Description Default
deploy_manager DeployManagerBase

DeployManagerBase DeployManager used in lock acquisition

None
Source code in oper8/watch_manager/python_watch_manager/leader_election/base.py
30
31
32
33
34
35
36
37
38
def __init__(self, deploy_manager: DeployManagerBase = None):
    """
    Initialize Class

    Args:
        deploy_manager:  DeployManagerBase
            DeployManager used in lock acquisition
    """
    self.deploy_manager = deploy_manager
acquire(force=False) abstractmethod

Acquire or renew global lock

Parameters:

Name Type Description Default
force bool

bool Whether to force acquire the lock irregardless of status. Used on shutdown

False

Returns:

Name Type Description
success bool

bool True on successful acquisition

Source code in oper8/watch_manager/python_watch_manager/leader_election/base.py
41
42
43
44
45
46
47
48
49
50
51
52
53
54
@abc.abstractmethod
def acquire(self, force: bool = False) -> bool:
    """
    Acquire or renew global lock

    Args:
        force:  bool
            Whether to force acquire the lock irregardless of status. Used
            on shutdown

    Returns:
        success:  bool
            True on successful acquisition
    """
acquire_resource(resource) abstractmethod

Acquire or renew lock on specific resource

Parameters:

Name Type Description Default
resource ManagedObject

ManagedObject Resource to acquire lock for

required

Returns: success: bool True on successful acquisition

Source code in oper8/watch_manager/python_watch_manager/leader_election/base.py
56
57
58
59
60
61
62
63
64
65
66
67
@abc.abstractmethod
def acquire_resource(self, resource: ManagedObject) -> bool:
    """
    Acquire or renew lock on specific resource

    Args:
        resource:  ManagedObject
            Resource to acquire lock for
    Returns:
        success:  bool
            True on successful acquisition
    """
is_leader(resource=None) abstractmethod

Determines if current instance is leader

Parameters:

Name Type Description Default
resource Optional[ManagedObject]

Optional[ManagedObject] If provided the resource to determine if current instance is leader for. If no resource if provided then the global lock is checked

None

Returns: leader: bool True if instance is leader

Source code in oper8/watch_manager/python_watch_manager/leader_election/base.py
85
86
87
88
89
90
91
92
93
94
95
96
97
98
@abc.abstractmethod
def is_leader(self, resource: Optional[ManagedObject] = None):
    """
    Determines if current instance is leader

    Args:
        resource:  Optional[ManagedObject]
            If provided the resource to determine if current instance
            is leader for. If no resource if provided then the global
            lock is checked
    Returns:
        leader:  bool
            True if instance is leader
    """
release() abstractmethod

Release global lock

Source code in oper8/watch_manager/python_watch_manager/leader_election/base.py
69
70
71
72
73
@abc.abstractmethod
def release(self):
    """
    Release global lock
    """
release_resource(resource) abstractmethod

Release lock on specific resource

Parameters:

Name Type Description Default
resource ManagedObject

ManagedObject Resource to release lock for

required
Source code in oper8/watch_manager/python_watch_manager/leader_election/base.py
75
76
77
78
79
80
81
82
83
@abc.abstractmethod
def release_resource(self, resource: ManagedObject):
    """
    Release lock on specific resource

    Args:
        resource:  ManagedObject
            Resource to release lock for
    """
ThreadedLeaderManagerBase

Bases: LeadershipManagerBase

Base class for threaded leadership election. This base class aids in the creation of leadership election classes that require constantly checking or updating a resource. Child classes only need to implement renew_or_acquire, and it will automatically be looped while lock acquisition is needed

Source code in oper8/watch_manager/python_watch_manager/leader_election/base.py
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
class ThreadedLeaderManagerBase(LeadershipManagerBase, metaclass=ABCSingletonMeta):
    """
    Base class for threaded leadership election. This base class aids in the
    creation of leadership election classes that require constantly checking
    or updating a resource. Child classes only need to implement renew_or_acquire,
    and it will automatically be looped while lock acquisition is needed
    """

    def __init__(self, deploy_manager: DeployManagerBase):
        """
        Initialize class with events to track leadership and shutdown and
        a lock to ensure renew_or_acquire is only ran once.

        Args:
            deploy_manager: DeployManagerBase
                DeployManager for leader election
        """
        super().__init__(deploy_manager)

        # Events to track status
        self.leader = threading.Event()
        self.shutdown = threading.Event()

        # Lock to ensure multiple acquires aren't running at the same time
        self.run_lock = threading.Lock()

        # Object to track Leadership thread
        self.leadership_thread = threading.Thread(
            name="leadership_thread", target=self.run, daemon=True
        )

        # Calculate threaded poll time:
        poll_time_delta = parse_time_delta(config.python_watch_manager.lock.poll_time)
        if not poll_time_delta:
            log.error(
                "Invalid 'python_watch_manager.lock.poll_time' value: '%s'",
                config.python_watch_manager.lock.poll_time,
            )
            raise ConfigError(
                "Invalid 'python_watch_manager.lock.poll_time' value: "
                f"'{config.python_watch_manager.lock.poll_time}'"
            )
        self.poll_time = poll_time_delta.seconds

    ## Public Interface ####################################################

    def renew_or_acquire(self):
        """
        Renew or acquire leadership lock
        """
        raise NotImplementedError

    def acquire_lock(self):
        """
        Helper function for child classes to acquire leadership lock
        """
        if not self.leader.is_set():
            log.debug2("Acquiring leadership lock")
        # Always set the lock during acquire_lock to avoid concurrency issues
        self.leader.set()

    def release_lock(self):
        """
        Helper function for child classes to release lock
        """
        if self.leader.is_set():
            log.debug2("Releasing leadership lock")
        self.leader.clear()

    ## Lock Interface ####################################################
    def acquire(self, force: bool = False):
        """
        Start/Restart leadership thread or run renew_or_acquire

        Args:
            force:  bool=False
                Whether to force acquire the lock

        Returns:
            success:  bool
                True on successful acquisition
        """
        if force:
            self.leader.set()
            return True

        # ident is set when thread has started
        if not self.leadership_thread.is_alive():
            # Recreate leadership thread if its already exited
            if self.leadership_thread.ident:
                self.leadership_thread = threading.Thread(
                    name="leadership_thread", target=self.run, daemon=True
                )
            log.info(
                "Starting %s: %s", self.__class__.__name__, self.leadership_thread.name
            )
            self.leadership_thread.start()
        else:
            self.run_renew_or_acquire()

        return self.leader.wait()

    def acquire_resource(self, resource: ManagedObject) -> bool:
        """
        Lock in background so acquire_resource just waits for value

        Args:
            resource:  ManagedObject
                Resource that is being locked

        Returns:
            success:  bool
                True on successful acquisition else False
        """
        return self.leader.wait()

    def release(self):
        """
        Release lock and shutdown leader election thread. This thread
        first shuts down the background thread before clearing the lock
        """
        self.shutdown.set()
        self.leadership_thread.join()
        self.leader.clear()

    def release_resource(self, resource: ManagedObject):
        """
        Release resource is not implemented in Threaded classes
        """

    def is_leader(self, resource: Optional[ManagedObject] = None) -> bool:
        """
        Return if leader event has been acquired

        Returns:
            leader: bool
                If instance is current leader
        """
        return self.leader.is_set()

    ## Implementation Details ####################################################

    def run(self):
        """
        Loop to continuously run renew or acquire every so often
        """
        while True:
            if self.shutdown.is_set():
                log.debug("Shutting down %s Thread", self.__class__.__name__)
                return

            self.run_renew_or_acquire()
            self.shutdown.wait(self.poll_time)

    def run_renew_or_acquire(self):
        """
        Run renew_or_acquire safely and with threaded lock
        """
        log.debug2("Running renew or acquire for %s lock", self.__class__.__name__)
        with self.run_lock:
            try:
                self.renew_or_acquire()
            except Exception as err:
                log.warning(
                    "Error detected while acquiring leadership lock", exc_info=True
                )
                raise RuntimeError("Error detected when acquiring lock") from err
__init__(deploy_manager)

Initialize class with events to track leadership and shutdown and a lock to ensure renew_or_acquire is only ran once.

Parameters:

Name Type Description Default
deploy_manager DeployManagerBase

DeployManagerBase DeployManager for leader election

required
Source code in oper8/watch_manager/python_watch_manager/leader_election/base.py
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
def __init__(self, deploy_manager: DeployManagerBase):
    """
    Initialize class with events to track leadership and shutdown and
    a lock to ensure renew_or_acquire is only ran once.

    Args:
        deploy_manager: DeployManagerBase
            DeployManager for leader election
    """
    super().__init__(deploy_manager)

    # Events to track status
    self.leader = threading.Event()
    self.shutdown = threading.Event()

    # Lock to ensure multiple acquires aren't running at the same time
    self.run_lock = threading.Lock()

    # Object to track Leadership thread
    self.leadership_thread = threading.Thread(
        name="leadership_thread", target=self.run, daemon=True
    )

    # Calculate threaded poll time:
    poll_time_delta = parse_time_delta(config.python_watch_manager.lock.poll_time)
    if not poll_time_delta:
        log.error(
            "Invalid 'python_watch_manager.lock.poll_time' value: '%s'",
            config.python_watch_manager.lock.poll_time,
        )
        raise ConfigError(
            "Invalid 'python_watch_manager.lock.poll_time' value: "
            f"'{config.python_watch_manager.lock.poll_time}'"
        )
    self.poll_time = poll_time_delta.seconds
acquire(force=False)

Start/Restart leadership thread or run renew_or_acquire

Parameters:

Name Type Description Default
force bool

bool=False Whether to force acquire the lock

False

Returns:

Name Type Description
success

bool True on successful acquisition

Source code in oper8/watch_manager/python_watch_manager/leader_election/base.py
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
def acquire(self, force: bool = False):
    """
    Start/Restart leadership thread or run renew_or_acquire

    Args:
        force:  bool=False
            Whether to force acquire the lock

    Returns:
        success:  bool
            True on successful acquisition
    """
    if force:
        self.leader.set()
        return True

    # ident is set when thread has started
    if not self.leadership_thread.is_alive():
        # Recreate leadership thread if its already exited
        if self.leadership_thread.ident:
            self.leadership_thread = threading.Thread(
                name="leadership_thread", target=self.run, daemon=True
            )
        log.info(
            "Starting %s: %s", self.__class__.__name__, self.leadership_thread.name
        )
        self.leadership_thread.start()
    else:
        self.run_renew_or_acquire()

    return self.leader.wait()
acquire_lock()

Helper function for child classes to acquire leadership lock

Source code in oper8/watch_manager/python_watch_manager/leader_election/base.py
153
154
155
156
157
158
159
160
def acquire_lock(self):
    """
    Helper function for child classes to acquire leadership lock
    """
    if not self.leader.is_set():
        log.debug2("Acquiring leadership lock")
    # Always set the lock during acquire_lock to avoid concurrency issues
    self.leader.set()
acquire_resource(resource)

Lock in background so acquire_resource just waits for value

Parameters:

Name Type Description Default
resource ManagedObject

ManagedObject Resource that is being locked

required

Returns:

Name Type Description
success bool

bool True on successful acquisition else False

Source code in oper8/watch_manager/python_watch_manager/leader_election/base.py
203
204
205
206
207
208
209
210
211
212
213
214
215
def acquire_resource(self, resource: ManagedObject) -> bool:
    """
    Lock in background so acquire_resource just waits for value

    Args:
        resource:  ManagedObject
            Resource that is being locked

    Returns:
        success:  bool
            True on successful acquisition else False
    """
    return self.leader.wait()
is_leader(resource=None)

Return if leader event has been acquired

Returns:

Name Type Description
leader bool

bool If instance is current leader

Source code in oper8/watch_manager/python_watch_manager/leader_election/base.py
231
232
233
234
235
236
237
238
239
def is_leader(self, resource: Optional[ManagedObject] = None) -> bool:
    """
    Return if leader event has been acquired

    Returns:
        leader: bool
            If instance is current leader
    """
    return self.leader.is_set()
release()

Release lock and shutdown leader election thread. This thread first shuts down the background thread before clearing the lock

Source code in oper8/watch_manager/python_watch_manager/leader_election/base.py
217
218
219
220
221
222
223
224
def release(self):
    """
    Release lock and shutdown leader election thread. This thread
    first shuts down the background thread before clearing the lock
    """
    self.shutdown.set()
    self.leadership_thread.join()
    self.leader.clear()
release_lock()

Helper function for child classes to release lock

Source code in oper8/watch_manager/python_watch_manager/leader_election/base.py
162
163
164
165
166
167
168
def release_lock(self):
    """
    Helper function for child classes to release lock
    """
    if self.leader.is_set():
        log.debug2("Releasing leadership lock")
    self.leader.clear()
release_resource(resource)

Release resource is not implemented in Threaded classes

Source code in oper8/watch_manager/python_watch_manager/leader_election/base.py
226
227
228
229
def release_resource(self, resource: ManagedObject):
    """
    Release resource is not implemented in Threaded classes
    """
renew_or_acquire()

Renew or acquire leadership lock

Source code in oper8/watch_manager/python_watch_manager/leader_election/base.py
147
148
149
150
151
def renew_or_acquire(self):
    """
    Renew or acquire leadership lock
    """
    raise NotImplementedError
run()

Loop to continuously run renew or acquire every so often

Source code in oper8/watch_manager/python_watch_manager/leader_election/base.py
243
244
245
246
247
248
249
250
251
252
253
def run(self):
    """
    Loop to continuously run renew or acquire every so often
    """
    while True:
        if self.shutdown.is_set():
            log.debug("Shutting down %s Thread", self.__class__.__name__)
            return

        self.run_renew_or_acquire()
        self.shutdown.wait(self.poll_time)
run_renew_or_acquire()

Run renew_or_acquire safely and with threaded lock

Source code in oper8/watch_manager/python_watch_manager/leader_election/base.py
255
256
257
258
259
260
261
262
263
264
265
266
267
def run_renew_or_acquire(self):
    """
    Run renew_or_acquire safely and with threaded lock
    """
    log.debug2("Running renew or acquire for %s lock", self.__class__.__name__)
    with self.run_lock:
        try:
            self.renew_or_acquire()
        except Exception as err:
            log.warning(
                "Error detected while acquiring leadership lock", exc_info=True
            )
            raise RuntimeError("Error detected when acquiring lock") from err
dry_run

Implementation of the DryRun LeaderElection

DryRunLeadershipManager

Bases: LeadershipManagerBase

DryRunLeaderElection class implements an empty leadership election manager which always acts as a leader. This is useful for dryrun or running without leadership election

Source code in oper8/watch_manager/python_watch_manager/leader_election/dry_run.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
class DryRunLeadershipManager(LeadershipManagerBase):
    """DryRunLeaderElection class implements an empty leadership
    election manager which always acts as a leader. This is useful
    for dryrun or running without leadership election"""

    def acquire(self, force: bool = False):
        """
        Return true as dryrun is always leader
        """
        return True

    def acquire_resource(self, resource: ManagedObject):
        """
        Return true as dryrun is always leader
        """
        return True

    def release(self):
        """
        NoOp in DryRun as lock is not real
        """

    def release_resource(self, resource: ManagedObject):
        """
        NoOp in DryRun as lock is not real
        """

    def is_leader(self, resource: Optional[ManagedObject] = None):
        """
        DryRunLeadershipManager is always leader
        """
        return True
acquire(force=False)

Return true as dryrun is always leader

Source code in oper8/watch_manager/python_watch_manager/leader_election/dry_run.py
15
16
17
18
19
def acquire(self, force: bool = False):
    """
    Return true as dryrun is always leader
    """
    return True
acquire_resource(resource)

Return true as dryrun is always leader

Source code in oper8/watch_manager/python_watch_manager/leader_election/dry_run.py
21
22
23
24
25
def acquire_resource(self, resource: ManagedObject):
    """
    Return true as dryrun is always leader
    """
    return True
is_leader(resource=None)

DryRunLeadershipManager is always leader

Source code in oper8/watch_manager/python_watch_manager/leader_election/dry_run.py
37
38
39
40
41
def is_leader(self, resource: Optional[ManagedObject] = None):
    """
    DryRunLeadershipManager is always leader
    """
    return True
release()

NoOp in DryRun as lock is not real

Source code in oper8/watch_manager/python_watch_manager/leader_election/dry_run.py
27
28
29
30
def release(self):
    """
    NoOp in DryRun as lock is not real
    """
release_resource(resource)

NoOp in DryRun as lock is not real

Source code in oper8/watch_manager/python_watch_manager/leader_election/dry_run.py
32
33
34
35
def release_resource(self, resource: ManagedObject):
    """
    NoOp in DryRun as lock is not real
    """
lease

Implementation of the Leader-with-Lease LeaderElection

LeaderWithLeaseManager

Bases: ThreadedLeaderManagerBase

LeaderWithLeaseManager Class implements the "leader-with-lease" operator-sdk lock type. This lock creates a lease object with the operator pod as owner and constantly re-acquires the lock.

Source code in oper8/watch_manager/python_watch_manager/leader_election/lease.py
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
class LeaderWithLeaseManager(ThreadedLeaderManagerBase):
    """
    LeaderWithLeaseManager Class implements the "leader-with-lease" operator-sdk
    lock type. This lock creates a lease object with the operator pod as owner and
    constantly re-acquires the lock.
    """

    def __init__(self, deploy_manager):
        """
        Initialize class with lock_name, current namespace, and pod information
        """
        super().__init__(deploy_manager)

        # Gather lock_name, namespace and pod manifest
        self.lock_name = (
            config.operator_name
            if config.operator_name
            else config.python_watch_manager.lock.name
        )
        self.namespace = get_operator_namespace()
        self.lock_identity = get_pod_name()
        assert_config(self.lock_name, "Unable to detect lock name")
        assert_config(self.namespace, "Unable to detect operator namespace")
        assert_config(self.lock_identity, "Unable to detect lock identity")

    def renew_or_acquire(self):
        """
        Renew or acquire lock by checking the current lease status
        """

        # Template out the expected lease. This is edited based on the current
        # lease status
        current_time = datetime.now(timezone.utc)
        lease_resource_version = None
        expected_lease_data = {
            "holderIdentity": self.lock_identity,
            "acquireTime": current_time.strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
            "leaseDurationSeconds": round(
                parse_time_delta(
                    config.python_watch_manager.lock.duration
                ).total_seconds()
            ),
            "leaseTransitions": 1,
            "renewTime": current_time.strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
        }

        # Get current lease
        success, lease_obj = self.deploy_manager.get_object_current_state(
            kind="Lease",
            name=self.lock_name,
            namespace=self.namespace,
            api_version="coordination.k8s.io/v1",
        )
        if not success:
            log.warning("Unable to fetch lease %s/%s", self.namespace, self.lock_name)

        # If lease exists then verify current holder is valid or update the expected
        # lease with the proper values
        if lease_obj and lease_obj.get("spec"):
            log.debug2(
                "Lease object %s already exists, checking holder", self.lock_name
            )

            lease_resource_version = lease_obj.get("metadata", {}).get(
                "resourceVersion"
            )
            lease_spec = lease_obj.get("spec")
            lock_holder = lease_spec.get("holderIdentity")

            if lock_holder != self.lock_identity:
                renew_time = parse(lease_spec.get("renewTime"))
                lease_duration = timedelta(
                    seconds=lease_spec.get("leaseDurationSeconds")
                )

                # If the renew+lease is after the current time than the other
                # lease holder is still valid
                if (renew_time + lease_duration) > current_time:
                    self.release_lock()
                    return

                log.info("Taking leadership from %s", lock_holder)
                # Increment leaseTransitions as we're taking ownership
                expected_lease_data["leaseTransitions"] = (
                    lease_spec.get("leaseTransitions", 1) + 1
                )

            # If we're the current holder than keep the current acquire time
            else:
                log.debug2(
                    "Lease object already owned. Reusing acquireTime and transitions"
                )
                expected_lease_data["acquireTime"] = lease_spec.get("acquireTime")
                expected_lease_data["leaseTransitions"] = lease_spec.get(
                    "leaseTransitions"
                )

        # Create or update the lease obj
        lease_resource = {
            "kind": "Lease",
            "apiVersion": "coordination.k8s.io/v1",
            "metadata": {
                "name": self.lock_name,
                "namespace": self.namespace,
            },
            "spec": expected_lease_data,
        }
        if lease_resource_version:
            lease_resource["metadata"]["resourceVersion"] = lease_resource_version

        success, _ = self.deploy_manager.deploy(
            [lease_resource], manage_owner_references=False
        )
        if not success:
            log.warning("Unable to acquire leadership lock")
            self.release_lock()
        else:
            self.acquire_lock()
__init__(deploy_manager)

Initialize class with lock_name, current namespace, and pod information

Source code in oper8/watch_manager/python_watch_manager/leader_election/lease.py
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
def __init__(self, deploy_manager):
    """
    Initialize class with lock_name, current namespace, and pod information
    """
    super().__init__(deploy_manager)

    # Gather lock_name, namespace and pod manifest
    self.lock_name = (
        config.operator_name
        if config.operator_name
        else config.python_watch_manager.lock.name
    )
    self.namespace = get_operator_namespace()
    self.lock_identity = get_pod_name()
    assert_config(self.lock_name, "Unable to detect lock name")
    assert_config(self.namespace, "Unable to detect operator namespace")
    assert_config(self.lock_identity, "Unable to detect lock identity")
renew_or_acquire()

Renew or acquire lock by checking the current lease status

Source code in oper8/watch_manager/python_watch_manager/leader_election/lease.py
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
def renew_or_acquire(self):
    """
    Renew or acquire lock by checking the current lease status
    """

    # Template out the expected lease. This is edited based on the current
    # lease status
    current_time = datetime.now(timezone.utc)
    lease_resource_version = None
    expected_lease_data = {
        "holderIdentity": self.lock_identity,
        "acquireTime": current_time.strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
        "leaseDurationSeconds": round(
            parse_time_delta(
                config.python_watch_manager.lock.duration
            ).total_seconds()
        ),
        "leaseTransitions": 1,
        "renewTime": current_time.strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
    }

    # Get current lease
    success, lease_obj = self.deploy_manager.get_object_current_state(
        kind="Lease",
        name=self.lock_name,
        namespace=self.namespace,
        api_version="coordination.k8s.io/v1",
    )
    if not success:
        log.warning("Unable to fetch lease %s/%s", self.namespace, self.lock_name)

    # If lease exists then verify current holder is valid or update the expected
    # lease with the proper values
    if lease_obj and lease_obj.get("spec"):
        log.debug2(
            "Lease object %s already exists, checking holder", self.lock_name
        )

        lease_resource_version = lease_obj.get("metadata", {}).get(
            "resourceVersion"
        )
        lease_spec = lease_obj.get("spec")
        lock_holder = lease_spec.get("holderIdentity")

        if lock_holder != self.lock_identity:
            renew_time = parse(lease_spec.get("renewTime"))
            lease_duration = timedelta(
                seconds=lease_spec.get("leaseDurationSeconds")
            )

            # If the renew+lease is after the current time than the other
            # lease holder is still valid
            if (renew_time + lease_duration) > current_time:
                self.release_lock()
                return

            log.info("Taking leadership from %s", lock_holder)
            # Increment leaseTransitions as we're taking ownership
            expected_lease_data["leaseTransitions"] = (
                lease_spec.get("leaseTransitions", 1) + 1
            )

        # If we're the current holder than keep the current acquire time
        else:
            log.debug2(
                "Lease object already owned. Reusing acquireTime and transitions"
            )
            expected_lease_data["acquireTime"] = lease_spec.get("acquireTime")
            expected_lease_data["leaseTransitions"] = lease_spec.get(
                "leaseTransitions"
            )

    # Create or update the lease obj
    lease_resource = {
        "kind": "Lease",
        "apiVersion": "coordination.k8s.io/v1",
        "metadata": {
            "name": self.lock_name,
            "namespace": self.namespace,
        },
        "spec": expected_lease_data,
    }
    if lease_resource_version:
        lease_resource["metadata"]["resourceVersion"] = lease_resource_version

    success, _ = self.deploy_manager.deploy(
        [lease_resource], manage_owner_references=False
    )
    if not success:
        log.warning("Unable to acquire leadership lock")
        self.release_lock()
    else:
        self.acquire_lock()
life

Implementation of the Leader-for-Life LeaderElection

LeaderForLifeManager

Bases: ThreadedLeaderManagerBase

LeaderForLifeManager Class implements the old "leader-for-life" operator-sdk lock type. This lock creates a configmap with the operator pod as owner in the current namespace. This way when the pod is deleted or list so is the configmap.

Source code in oper8/watch_manager/python_watch_manager/leader_election/life.py
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
class LeaderForLifeManager(ThreadedLeaderManagerBase):
    """
    LeaderForLifeManager Class implements the old "leader-for-life" operator-sdk
    lock type. This lock creates a configmap with the operator pod as owner in
    the current namespace. This way when the pod is deleted or list so is the
    configmap.
    """

    def __init__(self, deploy_manager):
        """
        Initialize class with lock_name, current namespace, and pod information
        """
        super().__init__(deploy_manager)

        # Gather lock_name, namespace and pod manifest
        self.lock_name = (
            config.operator_name
            if config.operator_name
            else config.python_watch_manager.lock.name
        )

        self.namespace = get_operator_namespace()
        pod_name = get_pod_name()
        assert_config(self.lock_name, "Unable to detect lock name")
        assert_config(self.namespace, "Unable to detect operator namespace")
        assert_config(pod_name, "Unable to detect pod name")

        # Get the current pod context which is used in the lock configmap
        log.debug("Gathering pod context information")
        success, pod_obj = self.deploy_manager.get_object_current_state(
            kind="Pod", name=pod_name, namespace=self.namespace, api_version="v1"
        )
        if not success or not pod_obj:
            log.error(
                "Unable to fetch pod %s/%s Unable to use leader-for-life without ownerReference",
                self.namespace,
                pod_name,
            )
            raise ConfigError(
                f"Unable to fetch pod {self.namespace}/{pod_name}."
                "Unable to use leader-for-life without ownerReference"
            )

        self.pod_manifest = ManagedObject(pod_obj)

    def renew_or_acquire(self):
        """
        Renew or acquire lock by checking the current configmap status
        """
        # Get current config map
        success, cluster_config_map = self.deploy_manager.get_object_current_state(
            kind="ConfigMap",
            name=self.lock_name,
            namespace=self.namespace,
            api_version="v1",
        )
        if not success:
            log.warning(
                "Unable to fetch config map %s/%s", self.namespace, self.lock_name
            )

        # If configmap exists then verify owner ref
        if cluster_config_map:
            log.debug2(
                f"ConfigMap Lock {cluster_config_map} already exists, checking ownership"
            )
            owner_ref_list = nested_get(
                cluster_config_map, "metadata.ownerReferences", []
            )
            if len(owner_ref_list) != 1:
                log.error(
                    "Invalid leadership config map detected. Only one owner allowed"
                )
                self.release_lock()
                return

            if owner_ref_list[0].get("uid") == self.pod_manifest.uid:
                self.acquire_lock()
            else:
                self.release_lock()

        # Create configmap if it doesn't exist
        else:
            log.debug2(f"ConfigMap Lock {cluster_config_map} does not exist, creating")
            config_map = {
                "kind": "ConfigMap",
                "apiVersion": "v1",
                "metadata": {
                    "name": self.lock_name,
                    "namespace": self.namespace,
                },
            }
            update_owner_references(
                self.deploy_manager, self.pod_manifest.definition, config_map
            )
            success, _ = self.deploy_manager.deploy(
                [config_map], manage_owner_references=False
            )
            if not success:
                log.warning("Unable to acquire leadership lock")
                self.release_lock()
            else:
                self.acquire_lock()
__init__(deploy_manager)

Initialize class with lock_name, current namespace, and pod information

Source code in oper8/watch_manager/python_watch_manager/leader_election/life.py
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
def __init__(self, deploy_manager):
    """
    Initialize class with lock_name, current namespace, and pod information
    """
    super().__init__(deploy_manager)

    # Gather lock_name, namespace and pod manifest
    self.lock_name = (
        config.operator_name
        if config.operator_name
        else config.python_watch_manager.lock.name
    )

    self.namespace = get_operator_namespace()
    pod_name = get_pod_name()
    assert_config(self.lock_name, "Unable to detect lock name")
    assert_config(self.namespace, "Unable to detect operator namespace")
    assert_config(pod_name, "Unable to detect pod name")

    # Get the current pod context which is used in the lock configmap
    log.debug("Gathering pod context information")
    success, pod_obj = self.deploy_manager.get_object_current_state(
        kind="Pod", name=pod_name, namespace=self.namespace, api_version="v1"
    )
    if not success or not pod_obj:
        log.error(
            "Unable to fetch pod %s/%s Unable to use leader-for-life without ownerReference",
            self.namespace,
            pod_name,
        )
        raise ConfigError(
            f"Unable to fetch pod {self.namespace}/{pod_name}."
            "Unable to use leader-for-life without ownerReference"
        )

    self.pod_manifest = ManagedObject(pod_obj)
renew_or_acquire()

Renew or acquire lock by checking the current configmap status

Source code in oper8/watch_manager/python_watch_manager/leader_election/life.py
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
def renew_or_acquire(self):
    """
    Renew or acquire lock by checking the current configmap status
    """
    # Get current config map
    success, cluster_config_map = self.deploy_manager.get_object_current_state(
        kind="ConfigMap",
        name=self.lock_name,
        namespace=self.namespace,
        api_version="v1",
    )
    if not success:
        log.warning(
            "Unable to fetch config map %s/%s", self.namespace, self.lock_name
        )

    # If configmap exists then verify owner ref
    if cluster_config_map:
        log.debug2(
            f"ConfigMap Lock {cluster_config_map} already exists, checking ownership"
        )
        owner_ref_list = nested_get(
            cluster_config_map, "metadata.ownerReferences", []
        )
        if len(owner_ref_list) != 1:
            log.error(
                "Invalid leadership config map detected. Only one owner allowed"
            )
            self.release_lock()
            return

        if owner_ref_list[0].get("uid") == self.pod_manifest.uid:
            self.acquire_lock()
        else:
            self.release_lock()

    # Create configmap if it doesn't exist
    else:
        log.debug2(f"ConfigMap Lock {cluster_config_map} does not exist, creating")
        config_map = {
            "kind": "ConfigMap",
            "apiVersion": "v1",
            "metadata": {
                "name": self.lock_name,
                "namespace": self.namespace,
            },
        }
        update_owner_references(
            self.deploy_manager, self.pod_manifest.definition, config_map
        )
        success, _ = self.deploy_manager.deploy(
            [config_map], manage_owner_references=False
        )
        if not success:
            log.warning("Unable to acquire leadership lock")
            self.release_lock()
        else:
            self.acquire_lock()

python_watch_manager

Python-based implementation of the WatchManager

PythonWatchManager

Bases: WatchManagerBase

The PythonWatchManager uses the kubernetes watch client to watch a particular Controller and execute reconciles. It does the following two things

  1. Request a generic watch request for each namespace
  2. Start a reconcile thread to start reconciliation subprocesses
Source code in oper8/watch_manager/python_watch_manager/python_watch_manager.py
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
class PythonWatchManager(WatchManagerBase):
    """The PythonWatchManager uses the kubernetes watch client to watch
    a particular Controller and execute reconciles. It does the following
    two things

    1. Request a generic watch request for each namespace
    2. Start a reconcile thread to start reconciliation subprocesses
    """

    def __init__(
        self,
        controller_type: Type[Controller],
        deploy_manager: Optional[OpenshiftDeployManager] = None,
        namespace_list: Optional[List[str]] = None,
    ):
        """Initialize the required threads and submit the watch requests
        Args:
            controller_type: Type[Controller]
                The controller to be watched
            deploy_manager: Optional[OpenshiftDeployManager] = None
                An optional DeployManager override
            namespace_list: Optional[List[str]] = []
                A list of namespaces to watch
        """
        super().__init__(controller_type)

        # Handle functional args
        if deploy_manager is None:
            log.debug("Using OpenshiftDeployManager")
            deploy_manager = OpenshiftDeployManager()
        self.deploy_manager = deploy_manager

        # Setup watch namespace
        self.namespace_list = namespace_list or []
        if not namespace_list and config.watch_namespace != "":
            self.namespace_list = config.watch_namespace.split(",")

        # Setup Control variables
        self.shutdown = threading.Event()

        # Setup Threads. These are both singleton instances and will be
        # the same across all PythonWatchManagers
        self.leadership_manager: LeadershipManagerBase = get_leader_election_class()(
            self.deploy_manager
        )
        self.reconcile_thread: ReconcileThread = ReconcileThread(
            deploy_manager=self.deploy_manager,
            leadership_manager=self.leadership_manager,
        )
        self.heartbeat_thread: Optional[HeartbeatThread] = None
        if config.python_watch_manager.heartbeat_file:
            self.heartbeat_thread = HeartbeatThread(
                config.python_watch_manager.heartbeat_file,
                config.python_watch_manager.heartbeat_period,
            )

        # Start thread for each resource watch
        self.controller_watches: List[WatchThread] = []
        if len(self.namespace_list) == 0 or "*" in self.namespace_list:
            self.controller_watches.append(self._add_resource_watch())
        else:
            for namespace in self.namespace_list:
                self.controller_watches.append(self._add_resource_watch(namespace))

    ## Interface ###############################################################

    def watch(self) -> bool:
        """Check for leadership and start all threads

        Returns:
            success:  bool
                True if all threads process are running correctly
        """
        log.info("Starting PythonWatchManager: %s", self)

        if not self.leadership_manager.is_leader():
            log.debug("Acquiring Leadership lock before starting %s", self)
            self.leadership_manager.acquire()

        # If watch has been shutdown then exit before starting threads
        if self.shutdown.is_set():
            return False

        # Start reconcile thread and all watch threads
        self.reconcile_thread.start_thread()
        for watch_thread in self.controller_watches:
            log.debug("Starting watch_thread: %s", watch_thread)
            watch_thread.start_thread()
        if self.heartbeat_thread:
            log.debug("Starting heartbeat_thread")
            self.heartbeat_thread.start_thread()
        return True

    def wait(self):
        """Wait shutdown to be signaled"""
        self.shutdown.wait()

    def stop(self):
        """Stop all threads. This waits for all reconciles
        to finish
        """

        log.info(
            "Stopping PythonWatchManager for %s/%s/%s",
            self.group,
            self.version,
            self.kind,
        )

        # Set shutdown and acquire leadership to clear any deadlocks
        self.shutdown.set()
        self.leadership_manager.acquire(force=True)

        # Stop all threads
        for watch in get_resource_watches():
            watch.stop_thread()
        self.reconcile_thread.stop_thread()
        self.leadership_manager.release()
        if self.heartbeat_thread:
            self.heartbeat_thread.stop_thread()

    ## Helper Functions ###############################################################

    def _add_resource_watch(self, namespace: Optional[str] = None):
        """Request a generic watch request. Optionally for a specific namespace

        Args:
            namespace: Optional[str] = None
                An optional namespace to watch
        """
        log.debug3("Adding %s request for %s", namespace if namespace else "", self)

        # In the global watch manager the controller is both
        # the watched and the requested objects
        resource_id = ResourceId.from_controller(self.controller_type, namespace)
        request = WatchRequest(
            controller_type=self.controller_type,
            watched=resource_id,
            requester=resource_id,
            filters=get_filters_for_resource_id(self.controller_type, resource_id),
        )
        return create_resource_watch(
            request,
            self.reconcile_thread,
            self.deploy_manager,
            self.leadership_manager,
        )
__init__(controller_type, deploy_manager=None, namespace_list=None)

Initialize the required threads and submit the watch requests Args: controller_type: Type[Controller] The controller to be watched deploy_manager: Optional[OpenshiftDeployManager] = None An optional DeployManager override namespace_list: Optional[List[str]] = [] A list of namespaces to watch

Source code in oper8/watch_manager/python_watch_manager/python_watch_manager.py
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
def __init__(
    self,
    controller_type: Type[Controller],
    deploy_manager: Optional[OpenshiftDeployManager] = None,
    namespace_list: Optional[List[str]] = None,
):
    """Initialize the required threads and submit the watch requests
    Args:
        controller_type: Type[Controller]
            The controller to be watched
        deploy_manager: Optional[OpenshiftDeployManager] = None
            An optional DeployManager override
        namespace_list: Optional[List[str]] = []
            A list of namespaces to watch
    """
    super().__init__(controller_type)

    # Handle functional args
    if deploy_manager is None:
        log.debug("Using OpenshiftDeployManager")
        deploy_manager = OpenshiftDeployManager()
    self.deploy_manager = deploy_manager

    # Setup watch namespace
    self.namespace_list = namespace_list or []
    if not namespace_list and config.watch_namespace != "":
        self.namespace_list = config.watch_namespace.split(",")

    # Setup Control variables
    self.shutdown = threading.Event()

    # Setup Threads. These are both singleton instances and will be
    # the same across all PythonWatchManagers
    self.leadership_manager: LeadershipManagerBase = get_leader_election_class()(
        self.deploy_manager
    )
    self.reconcile_thread: ReconcileThread = ReconcileThread(
        deploy_manager=self.deploy_manager,
        leadership_manager=self.leadership_manager,
    )
    self.heartbeat_thread: Optional[HeartbeatThread] = None
    if config.python_watch_manager.heartbeat_file:
        self.heartbeat_thread = HeartbeatThread(
            config.python_watch_manager.heartbeat_file,
            config.python_watch_manager.heartbeat_period,
        )

    # Start thread for each resource watch
    self.controller_watches: List[WatchThread] = []
    if len(self.namespace_list) == 0 or "*" in self.namespace_list:
        self.controller_watches.append(self._add_resource_watch())
    else:
        for namespace in self.namespace_list:
            self.controller_watches.append(self._add_resource_watch(namespace))
stop()

Stop all threads. This waits for all reconciles to finish

Source code in oper8/watch_manager/python_watch_manager/python_watch_manager.py
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
def stop(self):
    """Stop all threads. This waits for all reconciles
    to finish
    """

    log.info(
        "Stopping PythonWatchManager for %s/%s/%s",
        self.group,
        self.version,
        self.kind,
    )

    # Set shutdown and acquire leadership to clear any deadlocks
    self.shutdown.set()
    self.leadership_manager.acquire(force=True)

    # Stop all threads
    for watch in get_resource_watches():
        watch.stop_thread()
    self.reconcile_thread.stop_thread()
    self.leadership_manager.release()
    if self.heartbeat_thread:
        self.heartbeat_thread.stop_thread()
wait()

Wait shutdown to be signaled

Source code in oper8/watch_manager/python_watch_manager/python_watch_manager.py
119
120
121
def wait(self):
    """Wait shutdown to be signaled"""
    self.shutdown.wait()
watch()

Check for leadership and start all threads

Returns:

Name Type Description
success bool

bool True if all threads process are running correctly

Source code in oper8/watch_manager/python_watch_manager/python_watch_manager.py
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
def watch(self) -> bool:
    """Check for leadership and start all threads

    Returns:
        success:  bool
            True if all threads process are running correctly
    """
    log.info("Starting PythonWatchManager: %s", self)

    if not self.leadership_manager.is_leader():
        log.debug("Acquiring Leadership lock before starting %s", self)
        self.leadership_manager.acquire()

    # If watch has been shutdown then exit before starting threads
    if self.shutdown.is_set():
        return False

    # Start reconcile thread and all watch threads
    self.reconcile_thread.start_thread()
    for watch_thread in self.controller_watches:
        log.debug("Starting watch_thread: %s", watch_thread)
        watch_thread.start_thread()
    if self.heartbeat_thread:
        log.debug("Starting heartbeat_thread")
        self.heartbeat_thread.start_thread()
    return True

reconcile_process_entrypoint

ReconcileProcessEntrypoint for all PWM reconciles

ReconcileProcessDeployManager

Bases: OpenshiftDeployManager

ReconcileProcessEntrypoint deploy manager is a helper deploy manager that allows the PWM to insert functionality during a reconcile. This is used for things like watching dependent resources and subsystem rollout

Source code in oper8/watch_manager/python_watch_manager/reconcile_process_entrypoint.py
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
class ReconcileProcessDeployManager(OpenshiftDeployManager):
    """ReconcileProcessEntrypoint deploy manager is a helper deploy manager
    that allows the PWM to insert functionality during a reconcile. This
    is used for things like watching dependent resources and subsystem rollout"""

    def __init__(
        self,
        controller_type: Type[Controller],
        controller_resource: aconfig.Config,
        result_pipe: Connection,
        *args,
        **kwargs,
    ):
        """Initalize the ReconcileProcessEntrypoint DeployManger and gather start-up configurations

        Args:
            controller_type: Type[Controller]
                The Controller being reconciled
            controller_resource: aconfig.Config
                The resource being reconciled
            result_pipe: Connection
                The pipe to send dependent watch requests to
            *args:
                Extendable arguments to pass to to parent
            **kwargs:
                Extendable key word arguments to pass to parent
        """
        # Initialize ReconcileProcessEntrypoint Deploy Manager
        super().__init__(*args, owner_cr=controller_resource, **kwargs)

        # Initialize required variables
        self.requested_watches = set()
        self.result_pipe = result_pipe
        self.controller_type = controller_type

        # Setup Subsystems
        self.subsystems = self._gather_subsystems(controller_type)
        self.reconcile_manager = ReconcileManager(deploy_manager=self)

    # Functional Overrides

    def _apply_resource(self, resource_definition: dict) -> dict:
        """Override apply resource for handling watch_dependent_resources and subsystem rollout"""
        resource = super()._apply_resource(resource_definition)
        resource_id = ResourceId.from_resource(resource)

        # Send watch request if watch_dependent_resources is enabled
        # and/or handle subsystem rollout
        if config.python_watch_manager.watch_dependent_resources:
            log.debug2("Handling dependent resource %s", resource_id)
            self._handle_dependent_resource(resource_id)

        if (
            config.python_watch_manager.subsystem_rollout
            and resource_id.global_id in self.subsystems
        ):
            log.debug2("Rolling out subsystem %s", resource_id.global_id)
            self._handle_subsystem(
                resource, self.subsystems[resource_id.global_id], False
            )

        return resource

    def _replace_resource(self, resource_definition: dict) -> dict:
        """Override replace resource for handling watch_dependent_resources and subsystem rollout"""
        resource = super()._replace_resource(resource_definition)
        resource_id = ResourceId.from_resource(resource)

        # Send watch request if watch_dependent_resources is enabled
        # and/or handle subsystem rollout
        if config.python_watch_manager.watch_dependent_resources:
            log.debug2("Handling dependent resource %s", resource_id)
            self._handle_dependent_resource(resource_id)

        if (
            config.python_watch_manager.subsystem_rollout
            and resource_id.global_id in self.subsystems
        ):
            log.debug2("Rolling out subsystem %s", resource_id.global_id)
            self._handle_subsystem(
                resource, self.subsystems[resource_id.global_id], False
            )

        return resource

    def _disable(self, resource_definition: dict) -> bool:
        """Override disable to insert subsystem logic"""

        changed = super()._disable(resource_definition)
        if not changed:
            return changed

        resource_id = ResourceId.from_resource(resource_definition)

        # If deleted resource is a subsystem then run reconcile with finalizer
        if (
            config.python_watch_manager.subsystem_rollout
            and resource_id.global_id in self.subsystems
        ):
            success, current_state = self.get_object_current_state(
                kind=resource_id.kind,
                name=resource_id.name,
                namespace=resource_id.namespace,
                api_version=resource_id.api_version,
            )
            if not success or not current_state:
                log.warning(
                    "Unable to fetch owner resource %s/%s/%s/%s",
                    resource_id.kind,
                    resource_id.api_version,
                    resource_id.namespace,
                    resource_id.name,
                )
                return changed

            self._handle_subsystem(
                current_state, self.subsystems[resource_id.global_id], True
            )

        return changed

    def _handle_subsystem(self, resource, controller_type, is_finalizer):
        """Handle rolling out a subsystem for a specific controller, resource, and finalizer"""

        # Copy a ref of the current logging format to restore to
        log_formatters = {}
        for handler in logging.getLogger().handlers:
            log_formatters[handler] = handler.formatter

        # Update the current owner
        current_owner = self._owner_cr
        self._owner_cr = resource
        current_controller_type = self.controller_type
        self.controller_type = controller_type

        # Add the new controllers subsystems to the current dictionary
        # this simplifies future look ups
        current_subsystems = self.subsystems
        self.subsystems = (self._gather_subsystems(controller_type),)

        self.reconcile_manager.safe_reconcile(controller_type, resource, is_finalizer)

        # Reset owner_cr, logging, and subsystems
        self._owner_cr = current_owner
        self.controller_type = current_controller_type
        self.subsystems = current_subsystems
        for handler, formatter in log_formatters.items():
            handler.setFormatter(formatter)

    def _handle_dependent_resource(self, watched_id):
        """Handling request a watch for a deployed resource"""
        # Create requester id
        resource_id = ResourceId.from_resource(self._owner_cr)

        # Remove name from watched_id so it captures
        # any resource of that kind with this owner
        watched_id = copy.deepcopy(watched_id)
        watched_id = dataclasses.replace(watched_id, name=None)

        filters = DependentWatchFilter
        if controller_filters := get_filters_for_resource_id(
            self.controller_type, watched_id
        ):
            filters = AndFilter(DependentWatchFilter, controller_filters)
        watch_filters = FilterManager.to_info(filters)

        watch_request = WatchRequest(
            requester=resource_id,
            watched=watched_id,
            # Use controller info to avoid issues between vcs and pickling
            controller_info=ClassInfo.from_type(self.controller_type),
            filters_info=watch_filters,
        )

        # Only send each watch request once
        if watch_request not in self.requested_watches:
            log.debug3(f"Sending watch request {watch_request}")
            self.result_pipe.send(watch_request)
            self.requested_watches.add(watch_request)

    def _gather_subsystems(self, controller_type: Type[Controller]):
        """Gather the list of subsystems for a controller"""
        subsystem_controllers = getattr(controller_type, "pwm_subsystems", [])
        subsystems = {
            ResourceId.from_controller(controller).global_id: controller
            for controller in subsystem_controllers
        }
        log.debug3(f"Gathered subsystems: {subsystems}")
        return subsystems
__init__(controller_type, controller_resource, result_pipe, *args, **kwargs)

Initalize the ReconcileProcessEntrypoint DeployManger and gather start-up configurations

Parameters:

Name Type Description Default
controller_type Type[Controller]

Type[Controller] The Controller being reconciled

required
controller_resource Config

aconfig.Config The resource being reconciled

required
result_pipe Connection

Connection The pipe to send dependent watch requests to

required
*args

Extendable arguments to pass to to parent

()
**kwargs

Extendable key word arguments to pass to parent

{}
Source code in oper8/watch_manager/python_watch_manager/reconcile_process_entrypoint.py
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
def __init__(
    self,
    controller_type: Type[Controller],
    controller_resource: aconfig.Config,
    result_pipe: Connection,
    *args,
    **kwargs,
):
    """Initalize the ReconcileProcessEntrypoint DeployManger and gather start-up configurations

    Args:
        controller_type: Type[Controller]
            The Controller being reconciled
        controller_resource: aconfig.Config
            The resource being reconciled
        result_pipe: Connection
            The pipe to send dependent watch requests to
        *args:
            Extendable arguments to pass to to parent
        **kwargs:
            Extendable key word arguments to pass to parent
    """
    # Initialize ReconcileProcessEntrypoint Deploy Manager
    super().__init__(*args, owner_cr=controller_resource, **kwargs)

    # Initialize required variables
    self.requested_watches = set()
    self.result_pipe = result_pipe
    self.controller_type = controller_type

    # Setup Subsystems
    self.subsystems = self._gather_subsystems(controller_type)
    self.reconcile_manager = ReconcileManager(deploy_manager=self)
ReconcileProcessEntrypoint

The ReconcileProcessEntrypoint Class is the main start place for a reconciliation. It configures some watch manager specific settings like multiprocess logging, and signal handling then it hands off control to the ReconcileManager

Source code in oper8/watch_manager/python_watch_manager/reconcile_process_entrypoint.py
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
class ReconcileProcessEntrypoint:  # pylint: disable=too-few-public-methods
    """The ReconcileProcessEntrypoint Class is the main start place for a
    reconciliation. It configures some watch manager specific settings like
    multiprocess logging, and signal handling then it hands off control to the
    ReconcileManager"""

    def __init__(
        self,
        controller_type: Type[Controller],
        deploy_manager: DeployManagerBase = None,
    ):
        """Initializer for the entrypoint class

        Args:
            controller_type: Type[Controller]
                The Controller type being reconciled
            deploy_manager: DeployManagerBase = None
                An optional deploy manager override
        """
        self.controller_type = controller_type
        self.deploy_manager = deploy_manager

        # Initialize the reconcile manager in start
        self.reconcile_manager = None

    def start(
        self,
        request: ReconcileRequest,
        result_pipe: Connection,
    ):
        """Main entrypoint for the class

        Args:
            request: ReconcileRequest
                The reconcile request that trigger this reconciliation
            result_pipe: Connection
                The connection to send results back to
        """
        # Parse the request and setup local variables
        log.debug4("Setting up resource")
        resource = request.resource
        resource_id = ResourceId.from_resource(resource)

        # Set a unique thread name for each reconcile
        thread_name = f"entrypoint_{resource_id.get_id()}/{resource_id.name}"
        log.debug4("Setting thread name: %s", thread_name)
        threading.current_thread().name = thread_name

        # Reset signal handlers to default function
        log.debug4("Resetting signals")
        signal.signal(signal.SIGINT, signal.SIG_DFL)
        signal.signal(signal.SIGTERM, signal.SIG_DFL)

        # Replace stdout and stderr with a null stream as all messages should be passed via
        # the queue and any data in the buffer could cause the process to hang. This can
        # make it difficult to debug subprocesses if they fail before setting up the handler
        log.debug4("Redirecting to /dev/null")
        with open(os.devnull, "w", encoding="utf-8") as null_file:
            sys.stdout = null_file
            sys.stderr = null_file

            log.info(
                "ReconcileProcessEntrypoint for %s and with type: %s",
                self.controller_type,
                request.type,
            )

            # If controller_type has subsystems than set reconciliation to standalone mode.
            # This forces the reconcile to be single threaded but allows for recursive reconciles
            log.debug4("Checking for subsystem rollout")
            if (
                getattr(self.controller_type, "pwm_subsystems", [])
                and config.python_watch_manager.subsystem_rollout
            ):
                config.standalone = True

            # Create a custom deploy manager so we can insert functionality
            deploy_manager = self.deploy_manager
            if not deploy_manager:
                deploy_manager = ReconcileProcessDeployManager(
                    result_pipe=result_pipe,
                    controller_resource=resource.definition,
                    controller_type=self.controller_type,
                )

            # Create a reconciliation manager and start the reconcile
            self.reconcile_manager = ReconcileManager(deploy_manager=deploy_manager)

            finalize = request.type == KubeEventType.DELETED or resource.metadata.get(
                "deletionTimestamp"
            )
            reconcile_result = self.reconcile_manager.safe_reconcile(
                self.controller_type,
                resource.definition,
                finalize,
            )
            # Clear exception as it's not always pickleable
            reconcile_result.exception = None

            # Complete the reconcile by sending the result back up the pipe
            # and explicitly close all remaining descriptors
            log.info("Finished Reconcile for %s", resource_id)
            log.debug3("Sending reconciliation result back to main process")
            result_pipe.send(reconcile_result)
            result_pipe.close()
__init__(controller_type, deploy_manager=None)

Initializer for the entrypoint class

Parameters:

Name Type Description Default
controller_type Type[Controller]

Type[Controller] The Controller type being reconciled

required
deploy_manager DeployManagerBase

DeployManagerBase = None An optional deploy manager override

None
Source code in oper8/watch_manager/python_watch_manager/reconcile_process_entrypoint.py
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
def __init__(
    self,
    controller_type: Type[Controller],
    deploy_manager: DeployManagerBase = None,
):
    """Initializer for the entrypoint class

    Args:
        controller_type: Type[Controller]
            The Controller type being reconciled
        deploy_manager: DeployManagerBase = None
            An optional deploy manager override
    """
    self.controller_type = controller_type
    self.deploy_manager = deploy_manager

    # Initialize the reconcile manager in start
    self.reconcile_manager = None
start(request, result_pipe)

Main entrypoint for the class

Parameters:

Name Type Description Default
request ReconcileRequest

ReconcileRequest The reconcile request that trigger this reconciliation

required
result_pipe Connection

Connection The connection to send results back to

required
Source code in oper8/watch_manager/python_watch_manager/reconcile_process_entrypoint.py
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
def start(
    self,
    request: ReconcileRequest,
    result_pipe: Connection,
):
    """Main entrypoint for the class

    Args:
        request: ReconcileRequest
            The reconcile request that trigger this reconciliation
        result_pipe: Connection
            The connection to send results back to
    """
    # Parse the request and setup local variables
    log.debug4("Setting up resource")
    resource = request.resource
    resource_id = ResourceId.from_resource(resource)

    # Set a unique thread name for each reconcile
    thread_name = f"entrypoint_{resource_id.get_id()}/{resource_id.name}"
    log.debug4("Setting thread name: %s", thread_name)
    threading.current_thread().name = thread_name

    # Reset signal handlers to default function
    log.debug4("Resetting signals")
    signal.signal(signal.SIGINT, signal.SIG_DFL)
    signal.signal(signal.SIGTERM, signal.SIG_DFL)

    # Replace stdout and stderr with a null stream as all messages should be passed via
    # the queue and any data in the buffer could cause the process to hang. This can
    # make it difficult to debug subprocesses if they fail before setting up the handler
    log.debug4("Redirecting to /dev/null")
    with open(os.devnull, "w", encoding="utf-8") as null_file:
        sys.stdout = null_file
        sys.stderr = null_file

        log.info(
            "ReconcileProcessEntrypoint for %s and with type: %s",
            self.controller_type,
            request.type,
        )

        # If controller_type has subsystems than set reconciliation to standalone mode.
        # This forces the reconcile to be single threaded but allows for recursive reconciles
        log.debug4("Checking for subsystem rollout")
        if (
            getattr(self.controller_type, "pwm_subsystems", [])
            and config.python_watch_manager.subsystem_rollout
        ):
            config.standalone = True

        # Create a custom deploy manager so we can insert functionality
        deploy_manager = self.deploy_manager
        if not deploy_manager:
            deploy_manager = ReconcileProcessDeployManager(
                result_pipe=result_pipe,
                controller_resource=resource.definition,
                controller_type=self.controller_type,
            )

        # Create a reconciliation manager and start the reconcile
        self.reconcile_manager = ReconcileManager(deploy_manager=deploy_manager)

        finalize = request.type == KubeEventType.DELETED or resource.metadata.get(
            "deletionTimestamp"
        )
        reconcile_result = self.reconcile_manager.safe_reconcile(
            self.controller_type,
            resource.definition,
            finalize,
        )
        # Clear exception as it's not always pickleable
        reconcile_result.exception = None

        # Complete the reconcile by sending the result back up the pipe
        # and explicitly close all remaining descriptors
        log.info("Finished Reconcile for %s", resource_id)
        log.debug3("Sending reconciliation result back to main process")
        result_pipe.send(reconcile_result)
        result_pipe.close()
create_and_start_entrypoint(logging_queue, request, result_pipe, deploy_manager=None)

Function to create and start an entrypoint while catching any unexpected errors Args: logging_queue: multiprocessing.Queue The queue to send log messages to request: ReconcileRequest The request that triggered this reconciliation result_pipe: Connection The pipe to send a result back with deploy_manager: DeployManagerBase = None An optional DeployManager override

Source code in oper8/watch_manager/python_watch_manager/reconcile_process_entrypoint.py
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
def create_and_start_entrypoint(
    logging_queue: multiprocessing.Queue,
    request: ReconcileRequest,
    result_pipe: Connection,
    deploy_manager: DeployManagerBase = None,
):
    """Function to create and start an entrypoint while catching any unexpected errors
    Args:
        logging_queue: multiprocessing.Queue
            The queue to send log messages to
        request: ReconcileRequest
            The request that triggered this reconciliation
        result_pipe: Connection
            The pipe to send a result back with
        deploy_manager: DeployManagerBase = None
            An optional DeployManager override
    """
    try:
        # Set the logging library to utilize the multiprocessing logging queue. Do this before
        # any logging messages are sent since that might cause a deadlock
        root_logger = logging.getLogger()
        root_logger.handlers.clear()
        handler = LogQueueHandler(logging_queue, request.resource)
        root_logger.addHandler(handler)

        log.debug3("Creating entrypoint")
        entry = ReconcileProcessEntrypoint(
            request.controller_type, deploy_manager=deploy_manager
        )
        log.debug3("Starting entrypoint")
        entry.start(request, result_pipe)
    except Exception as exc:  # pylint: disable=broad-exception-caught
        log.error("Uncaught exception '%s'", exc, exc_info=True)

    # Close the logging queue to ensure all messages are sent before process end
    logging_queue.close()

threads

Import the ThreadBase and subclasses

base

Module for the ThreadBase Class

ThreadBase

Bases: Thread

Base class for all other thread classes. This class handles generic starting, stopping, and leadership functions

Source code in oper8/watch_manager/python_watch_manager/threads/base.py
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
class ThreadBase(threading.Thread):
    """Base class for all other thread classes. This class handles generic starting, stopping,
    and leadership functions"""

    def __init__(
        self,
        name: str = None,
        daemon: bool = None,
        deploy_manager: DeployManagerBase = None,
        leadership_manager: LeadershipManagerBase = None,
    ):
        """Initialize class and store required instance variables. This function
        is normally overriden by subclasses that pass in static name/daemon variables

        Args:
            name:str=None
                The name of the thread to manager
            daemon:bool=None
                Whether python should wait for this thread to stop before exiting
            deploy_manager: DeployManagerBase = None
                The deploy manager available to this thread during start()
            leadership_manager: LeadershipManagerBase = None
                The leadership_manager for tracking elections
        """
        self.deploy_manager = deploy_manager
        self.leadership_manager = leadership_manager or DryRunLeadershipManager()
        self.shutdown = threading.Event()
        super().__init__(name=name, daemon=daemon)

    ## Abstract Interface ######################################################
    #
    # These functions must be implemented by child classes
    ##
    def run(self):
        """Control loop for the thread. Once this function exits the thread stops"""
        raise NotImplementedError()

    ## Base Class Interface ####################################################
    #
    # These methods MAY be implemented by children, but contain default
    # implementations that are appropriate for simple cases.
    #
    ##

    def start_thread(self):
        """If the thread is not already alive start it"""
        if not self.is_alive():
            log.info("Starting %s: %s", self.__class__.__name__, self.name)
            self.start()

    def stop_thread(self):
        """Set the shutdown event"""
        log.info("Stopping %s: %s", self.__class__.__name__, self.name)
        self.shutdown.set()

    def should_stop(self) -> bool:
        """Helper to determine if a thread should shutdown"""
        return self.shutdown.is_set()

    def check_preconditions(self) -> bool:
        """Helper function to check if the thread should shutdown or reacquire leadership"""
        if self.should_stop():
            return False

        if self.leadership_manager and not self.leadership_manager.is_leader():
            log.debug3("Waiting for leadership")
            self.leadership_manager.acquire()

        return True

    def wait_on_precondition(self, timeout: float) -> bool:
        """Helper function to allow threads to wait for a certain period of time
        only being interrupted for preconditions"""
        self.shutdown.wait(timeout)

        return self.check_preconditions()
__init__(name=None, daemon=None, deploy_manager=None, leadership_manager=None)

Initialize class and store required instance variables. This function is normally overriden by subclasses that pass in static name/daemon variables

Parameters:

Name Type Description Default
name str

str=None The name of the thread to manager

None
daemon bool

bool=None Whether python should wait for this thread to stop before exiting

None
deploy_manager DeployManagerBase

DeployManagerBase = None The deploy manager available to this thread during start()

None
leadership_manager LeadershipManagerBase

LeadershipManagerBase = None The leadership_manager for tracking elections

None
Source code in oper8/watch_manager/python_watch_manager/threads/base.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
def __init__(
    self,
    name: str = None,
    daemon: bool = None,
    deploy_manager: DeployManagerBase = None,
    leadership_manager: LeadershipManagerBase = None,
):
    """Initialize class and store required instance variables. This function
    is normally overriden by subclasses that pass in static name/daemon variables

    Args:
        name:str=None
            The name of the thread to manager
        daemon:bool=None
            Whether python should wait for this thread to stop before exiting
        deploy_manager: DeployManagerBase = None
            The deploy manager available to this thread during start()
        leadership_manager: LeadershipManagerBase = None
            The leadership_manager for tracking elections
    """
    self.deploy_manager = deploy_manager
    self.leadership_manager = leadership_manager or DryRunLeadershipManager()
    self.shutdown = threading.Event()
    super().__init__(name=name, daemon=daemon)
check_preconditions()

Helper function to check if the thread should shutdown or reacquire leadership

Source code in oper8/watch_manager/python_watch_manager/threads/base.py
77
78
79
80
81
82
83
84
85
86
def check_preconditions(self) -> bool:
    """Helper function to check if the thread should shutdown or reacquire leadership"""
    if self.should_stop():
        return False

    if self.leadership_manager and not self.leadership_manager.is_leader():
        log.debug3("Waiting for leadership")
        self.leadership_manager.acquire()

    return True
run()

Control loop for the thread. Once this function exits the thread stops

Source code in oper8/watch_manager/python_watch_manager/threads/base.py
51
52
53
def run(self):
    """Control loop for the thread. Once this function exits the thread stops"""
    raise NotImplementedError()
should_stop()

Helper to determine if a thread should shutdown

Source code in oper8/watch_manager/python_watch_manager/threads/base.py
73
74
75
def should_stop(self) -> bool:
    """Helper to determine if a thread should shutdown"""
    return self.shutdown.is_set()
start_thread()

If the thread is not already alive start it

Source code in oper8/watch_manager/python_watch_manager/threads/base.py
62
63
64
65
66
def start_thread(self):
    """If the thread is not already alive start it"""
    if not self.is_alive():
        log.info("Starting %s: %s", self.__class__.__name__, self.name)
        self.start()
stop_thread()

Set the shutdown event

Source code in oper8/watch_manager/python_watch_manager/threads/base.py
68
69
70
71
def stop_thread(self):
    """Set the shutdown event"""
    log.info("Stopping %s: %s", self.__class__.__name__, self.name)
    self.shutdown.set()
wait_on_precondition(timeout)

Helper function to allow threads to wait for a certain period of time only being interrupted for preconditions

Source code in oper8/watch_manager/python_watch_manager/threads/base.py
88
89
90
91
92
93
def wait_on_precondition(self, timeout: float) -> bool:
    """Helper function to allow threads to wait for a certain period of time
    only being interrupted for preconditions"""
    self.shutdown.wait(timeout)

    return self.check_preconditions()
heartbeat

Thread class that will dump a heartbeat to a file periodically

HeartbeatThread

Bases: TimerThread

The HeartbeatThread acts as a pulse for the PythonWatchManager.

This thread will periodically dump the value of "now" to a file which can be read by an observer such as a liveness/readiness probe to ensure that the manager is functioning well.

Source code in oper8/watch_manager/python_watch_manager/threads/heartbeat.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
class HeartbeatThread(TimerThread):
    """The HeartbeatThread acts as a pulse for the PythonWatchManager.

    This thread will periodically dump the value of "now" to a file which can be
    read by an observer such as a liveness/readiness probe to ensure that the
    manager is functioning well.
    """

    # This format is designed to be read using `date -d $(cat heartbeat.txt)`
    # using the GNU date utility
    # CITE: https://www.gnu.org/software/coreutils/manual/html_node/Examples-of-date.html
    _DATE_FORMAT = "%Y-%m-%d %H:%M:%S"

    def __init__(self, heartbeat_file: str, heartbeat_period: str):
        """Initialize with the file location for the heartbeat output

        Args:
            heartbeat_file: str
                The fully-qualified path to the heartbeat file
            heartbeat_period: str
                Time delta string representing period delay between beats.
                NOTE: The GNU `date` utility cannot parse sub-seconds easily, so
                    the expected configuration for this is to be >= 1s
        """
        self._heartbeat_file = heartbeat_file
        self._offset = parse_time_delta(heartbeat_period)
        self._beat_lock = threading.Lock()
        self._beat_event = threading.Event()
        super().__init__(name="heartbeat_thread")

    def run(self):
        self._run_heartbeat()
        return super().run()

    def wait_for_beat(self):
        """Wait for the next beat"""
        # Make sure the beat lock is not held before starting wait. This
        # prevents beats that are immediately ready
        with self._beat_lock:
            pass

        # Wait for the next beat
        self._beat_event.wait()

    def _run_heartbeat(self):
        """Run the heartbeat dump to the heartbeat file and put the next beat"""
        now = datetime.now()
        log.debug3("Heartbeat %s", now)

        # Save the beat to disk
        try:
            with open(self._heartbeat_file, "w", encoding="utf-8") as handle:
                handle.write(now.strftime(self._DATE_FORMAT))
                handle.flush()
        except Exception as err:
            log.warning("Failed to write heartbeat file: %s", err, exc_info=True)

        # Unblock and reset the wait condition
        with self._beat_lock:
            self._beat_event.set()
            self._beat_event.clear()

        # Put the next beat if not stopped
        if not self.should_stop():
            self.put_event(now + self._offset, self._run_heartbeat)
__init__(heartbeat_file, heartbeat_period)

Initialize with the file location for the heartbeat output

Parameters:

Name Type Description Default
heartbeat_file str

str The fully-qualified path to the heartbeat file

required
heartbeat_period str

str Time delta string representing period delay between beats. NOTE: The GNU date utility cannot parse sub-seconds easily, so the expected configuration for this is to be >= 1s

required
Source code in oper8/watch_manager/python_watch_manager/threads/heartbeat.py
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
def __init__(self, heartbeat_file: str, heartbeat_period: str):
    """Initialize with the file location for the heartbeat output

    Args:
        heartbeat_file: str
            The fully-qualified path to the heartbeat file
        heartbeat_period: str
            Time delta string representing period delay between beats.
            NOTE: The GNU `date` utility cannot parse sub-seconds easily, so
                the expected configuration for this is to be >= 1s
    """
    self._heartbeat_file = heartbeat_file
    self._offset = parse_time_delta(heartbeat_period)
    self._beat_lock = threading.Lock()
    self._beat_event = threading.Event()
    super().__init__(name="heartbeat_thread")
wait_for_beat()

Wait for the next beat

Source code in oper8/watch_manager/python_watch_manager/threads/heartbeat.py
53
54
55
56
57
58
59
60
61
def wait_for_beat(self):
    """Wait for the next beat"""
    # Make sure the beat lock is not held before starting wait. This
    # prevents beats that are immediately ready
    with self._beat_lock:
        pass

    # Wait for the next beat
    self._beat_event.wait()
reconcile

The ReconcileThread is the heart of the PythonWatchManager and controls reconciling resources and managing any subprocesses

ReconcileThread

Bases: ThreadBase

This class is the core reconciliation class that handles starting subprocesses, tracking their status, and handles any results. This thread also kicks of requeue requests and requests dependent resource watches

Source code in oper8/watch_manager/python_watch_manager/threads/reconcile.py
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
class ReconcileThread(
    ThreadBase, metaclass=Singleton
):  # pylint: disable=too-many-instance-attributes
    """This class is the core reconciliation class that handles starting subprocesses,
    tracking their status, and handles any results. This thread also kicks of requeue
    requests and requests dependent resource watches"""

    def __init__(
        self,
        deploy_manager: DeployManagerBase = None,
        leadership_manager: LeadershipManagerBase = None,
    ):
        """Initialize the required queues, helper threads, and reconcile tracking. Also
        gather any onetime configuration options

        Args:
            deploy_manager: DeployManagerBase = None
                The deploy manager used throughout the thread
            leadership_manager: LeadershipManagerBase = None
                The leadership_manager for tracking elections
        """
        super().__init__(
            name="reconcile_thread",
            deploy_manager=deploy_manager,
            leadership_manager=leadership_manager,
        )

        # Configure the multiprocessing process spawning context
        context = config.python_watch_manager.process_context
        if context not in multiprocessing.get_all_start_methods():
            raise ConfigError(f"Invalid process_context: '{context}'")

        if context == "fork":
            log.warning(
                "The fork multiprocessing context is known to cause deadlocks in certain"
                " environments due to OpenSSL. Consider using spawn for more reliable"
                " reconciling"
            )

        self.spawn_ctx = multiprocessing.get_context(context)

        # Setup required queues
        self.request_queue = self.spawn_ctx.Queue()
        self.logging_queue = self.spawn_ctx.Queue()

        # Setup helper threads
        self.timer_thread: TimerThread = TimerThread()
        self.log_listener_thread: QueueListener = QueueListener(
            self.logging_queue, *get_logging_handlers(), respect_handler_level=False
        )

        # Setup reconcile, request, and event mappings
        self.running_reconciles: Dict[str, ReconcileProcess] = {}
        self.pending_reconciles: Dict[str, ReconcileRequest] = {}
        self.event_map: Dict[str, TimerEvent] = {}

        # Setup control variables
        self.process_overload = threading.Event()

        # Configure the max number of concurrent reconciles via either config
        # or number of cpus
        if config.python_watch_manager.max_concurrent_reconciles:
            self.max_concurrent_reconciles = (
                config.python_watch_manager.max_concurrent_reconciles
            )
        else:
            self.max_concurrent_reconciles = os.cpu_count()

    def run(self):
        """The reconcile threads control flow is to first wait for
        either a new reconcile request or for a process to end. If its a reconcile request
        the thread checks if one is already running for the resource and if not starts a
        new one. If a reconcile is already running or the thread couldn't start a new one
        the request gets added to the pending reconciles. There can only be one pending
        reconcile per resource. If the reconcile thread received a process end event it
        checks the exit code and handles the result queue.
        """
        while True:  # pylint: disable=too-many-nested-blocks
            if not self.check_preconditions():
                return

            # Wait for a change with the reconcile processes or reconcile queue.
            # Use _reader and Process.sentinel objects, so we can utilize select.select
            listen_objs = [
                *self.running_reconciles.values(),
                self.request_queue._reader,  # pylint: disable=protected-access
            ]
            log.debug3("Waiting on %s", listen_objs)
            ready_objs = multiprocessing.connection.wait(listen_objs)

            # Check preconditions both before and after waiting
            if not self.check_preconditions():
                return

            # For each object that triggered the connection
            for obj in ready_objs:
                log.debug3("Processing object %s with type %s", obj, type(obj))

                # Handle reconcile process end events
                if isinstance(obj, ReconcileProcess):
                    if self._handle_process_end(obj):
                        # If process overload is set than we need to check all resources for
                        # pending reconciles otherwise just check if the completed resource
                        # has a pending request.
                        if self.process_overload.is_set():
                            for uid in list(self.pending_reconciles.keys()):
                                if not self._handle_pending_reconcile(uid):
                                    break
                        else:
                            self._handle_pending_reconcile(obj.uid())

                # Handle all of the events in the queue
                elif isinstance(obj, Connection):
                    self._handle_request_queue()

    ## Class Interface ###################################################

    def start_thread(self):
        """Override start_thread to start helper threads"""
        self.timer_thread.start_thread()
        self.log_listener_thread.start()
        super().start_thread()

    def stop_thread(self):
        """Override stop_thread to ensure reconciles finish correctly"""
        super().stop_thread()

        if not self.is_alive() and not self.running_reconciles:
            log.debug("Reconcile Thread already stopped")
            return

        # Reawaken reconcile thread to stop
        log.debug("Pushing stop reconcile request")
        self.push_request(ReconcileRequest(None, ReconcileRequestType.STOPPED, {}))

        log.debug("Waiting for reconcile thread to finish")
        while self.is_alive():
            time.sleep(0.001)

        # Wait until all reconciles have completed
        log.info("Waiting for Running Reconciles to end")
        while self.running_reconciles:
            log.debug2("Waiting for reconciles %s to end", self.running_reconciles)
            for reconcile_process in list(self.running_reconciles.values()):
                # attempt to join process before trying the next one
                reconcile_process.process.join(JOIN_PROCESS_TIMEOUT)
                if reconcile_process.process.exitcode is not None:
                    log.debug(
                        "Joined reconciles process %s with exitcode: %s for request object %s",
                        reconcile_process.process.pid,
                        reconcile_process.process.exitcode,
                        reconcile_process.request,
                    )
                    self.running_reconciles.pop(reconcile_process.uid())
                    reconcile_process.process.close()

            # Pause for slight delay between checking processes
            sleep(SHUTDOWN_RECONCILE_POLL_TIME)

        # Close the logging queue to indicate no more logging events
        self.logging_queue.close()
        # Skip stopping the listener thread as it can hang on the join, this isn't
        # too bad as the listener thread is daemon anyways
        # self.log_listener_thread.stop()

    ## Public Interface ###################################################

    def push_request(self, request: ReconcileRequest):
        """Push request to reconcile queue

        Args:
            request: ReconcileRequest
                the ReconcileRequest to add to the queue
        """
        log.info(
            "Pushing request '%s' to reconcile queue",
            request,
            extra={"resource": request.resource},
        )
        self.request_queue.put(request)

    ## Event Handlers ###################################################

    def _handle_request_queue(self):
        """The function attempts to start a reconcile for every reconcile requests in the queue.
        If it can't start a reconcile or one is already running then it pushes it to the pending
        queue"""

        # Get all events from the queue
        pending_requests = self._get_all_requests()

        # Start a reconcile for each pending request
        for request in pending_requests:
            if request.type == ReconcileRequestType.STOPPED:
                break

            log.debug3("Got request %s from queue", request)

            # If reconcile is not running then start the process. Otherwise
            # or if starting failed push to the pending reconcile queue
            if request.resource.uid not in self.running_reconciles:
                if not self._start_reconcile_for_request(request):
                    self._push_to_pending_reconcile(request)
            else:
                self._push_to_pending_reconcile(request)

    def _handle_process_end(self, reconcile_process: ReconcileProcess) -> str:
        """Handle a process end event. The function joins the finished process,
        manages any events in the pipe, and creates a requeue/periodic event if
        one is needed.

        Args:
            reconcile_process: ReconcileProcess
                The process that ended

        Returns:
            uid: str
                The uid of the resource that ended
        """
        # Parse process variables
        uid = reconcile_process.uid()
        reconcile_request = reconcile_process.request
        process = reconcile_process.process
        pipe = reconcile_process.pipe

        # Attempt to join the process
        log.debug(
            "Joining process for request %s",
            reconcile_request,
            extra={"resource": reconcile_request.resource},
        )
        process.join(JOIN_PROCESS_TIMEOUT)
        exit_code = process.exitcode

        # If its still then exit and process   will be cleaned up on next iteration
        if exit_code is None:
            log.debug("Process is still alive after join. Skipping cleanup")
            return None

        if exit_code != 0:
            log.warning(
                "Reconcile did not complete successfully: %s",
                reconcile_request,
                extra={"resource": reconcile_request.resource},
            )

        # Remove reconcile from map and release resource lock
        self.running_reconciles.pop(uid)
        self.leadership_manager.release_resource(reconcile_request.resource)

        # Handle any events passed via the process pipe including the reconcile result and
        # close the pipe once done
        reconcile_result = self._handle_process_pipe(pipe)
        process.close()

        # Print reconciliation result
        log.info(
            "Reconcile completed with result %s",
            reconcile_result if reconcile_result else exit_code,
            extra={"resource": reconcile_request.resource},
        )

        # Cancel any existing requeue events
        if uid in self.event_map:
            log.debug2("Marking event as stale: %s", self.event_map[uid])
            self.event_map[uid].cancel()

        # Create a new timer event if one is needed
        event = self._create_timer_event_for_request(
            reconcile_request, reconcile_result
        )
        if event:
            self.event_map[uid] = event

        return uid

    def _handle_process_pipe(self, pipe: Connection) -> ReconciliationResult:
        """Handle any objects in a connection pipe and return the reconciliation result

        Args:
            pipe: Connection
                the pipe to read results from

        Returns:
            reconcile_result: ReconciliationResult
                The result gathered from the pipe
        """
        reconcile_result = None
        while pipe.poll():
            # EOFError is raised when the pipe is closed which is expected after the reconcile
            # process has been joined
            try:
                pipe_obj = pipe.recv()
            except EOFError:
                break

            log.debug3("Received obj %s from process pipe", pipe_obj)

            # Handle any watch requests received
            if isinstance(pipe_obj, WatchRequest):
                self._handle_watch_request(pipe_obj)

            # We only expect one reconciliation result per process
            elif isinstance(pipe_obj, ReconciliationResult):
                reconcile_result = pipe_obj

        # Close the reconcile pipe and release the rest of the process resources
        pipe.close()

        return reconcile_result

    def _handle_watch_request(self, request: WatchRequest):
        """Create a resource watch for a given watch request. This function also
        handles converting controller_info into a valid controller_type

        Args:
            request: WatchRequest
                The requested WatchRequest to be created
        """
        # Parse the controller info into a type
        if request.controller_info and not request.controller_type:
            request.controller_type = request.controller_info.to_class()

        # Parse any filter infos into types
        if request.filters_info:
            request.filters = FilterManager.from_info(request.filters_info)

        create_resource_watch(
            request,
            self,
            self.deploy_manager,
            self.leadership_manager,
        )

    def _create_timer_event_for_request(
        self, request: ReconcileRequest, result: ReconciliationResult = None
    ) -> Optional[TimerEvent]:
        """Enqueue either a requeue or periodic reconcile request for a given
        result.

        Args:
            request: ReconcileRequest
                The original reconcile request that triggered this process
            result: ReconciliationResult = None
                The result of the reconcile

        Returns:
            timer_event: Optional[TimerEvent]
                The timer event if one was created
        """

        # Short circuit if event is not needed, if resource was deleted,
        # or if theres already a pending reconcile
        if (
            not result or not result.requeue
        ) and not config.python_watch_manager.reconcile_period:
            return None

        if result and not result.requeue and request.type == KubeEventType.DELETED:
            return None

        if request.resource.uid in self.pending_reconciles:
            return None

        # Create requeue_time and type based on result/config
        request_type = None
        requeue_time = None
        if result and result.requeue:
            requeue_time = datetime.now() + result.requeue_params.requeue_after
            request_type = ReconcileRequestType.REQUEUED
        elif config.python_watch_manager.reconcile_period:
            requeue_time = datetime.now() + parse_time_delta(
                config.python_watch_manager.reconcile_period
            )
            request_type = ReconcileRequestType.PERIODIC

        future_request = ReconcileRequest(
            request.controller_type, request_type, request.resource
        )
        log.debug3("Pushing requeue request to timer: %s", future_request)

        return self.timer_thread.put_event(
            requeue_time, self.push_request, future_request
        )

    ## Pending Event Helpers ###################################################

    def _handle_pending_reconcile(self, uid: str) -> bool:
        """Start reconcile for pending request if there is one

        Args:
             uid:str
                The uid of the resource being reconciled

        Returns:
            successful_start:bool
                If there was a pending reconcile that got started"""
        # Check if resource has pending request
        if uid in self.running_reconciles or uid not in self.pending_reconciles:
            return False

        # Start reconcile for request
        request = self.pending_reconciles[uid]
        log.debug4("Got request %s from pending reconciles", request)
        if self._start_reconcile_for_request(request):
            self.pending_reconciles.pop(uid)
            return True
        return False

    def _push_to_pending_reconcile(self, request: ReconcileRequest):
        """Push a request to the pending queue if it's newer than the current event

        Args:
            request:  ReconcileRequest
                The request to possibly add to the pending_reconciles
        """
        uid = request.uid()
        # Only update queue if request is newer
        if uid in self.pending_reconciles:
            if request.timestamp > self.pending_reconciles[uid].timestamp:
                log.debug3("Updating reconcile queue with event %s", request)
                self.pending_reconciles[uid] = request
            else:
                log.debug4("Event in queue is newer than event %s", request)
        else:
            log.debug3("Adding event %s to reconcile queue", request)
            self.pending_reconciles[uid] = request

    ## Process functions ##################################################

    def _start_reconcile_for_request(self, request: ReconcileRequest) -> bool:
        """Start a reconciliation process for a given request

        Args:
            request: ReconcileRequest
                The request to attempt to start

        Returns:
            successfully_started: bool
                If a process could be started
        """
        # If thread is supposed to shutdown then don't start process
        if self.should_stop():
            return False

        # Check if there are too many reconciles running
        if len(self.running_reconciles.keys()) >= self.max_concurrent_reconciles:
            log.warning("Unable to start reconcile, max concurrent jobs reached")
            self.process_overload.set()
            return False

        # Attempt to acquire lock on resource. If failed skip starting
        if not self.leadership_manager.acquire_resource(request.resource):
            log.debug("Unable to obtain leadership lock for %s", request)
            return False

        self.process_overload.clear()
        log.info(
            "Starting reconcile for request %s",
            request,
            extra={"resource": request.resource},
        )

        # Create the send and return pipe
        recv_pipe, send_pipe = self.spawn_ctx.Pipe()

        process = self._start_reconcile_process(request, send_pipe)

        # Generate the reconcile process and update map
        reconcile_process = ReconcileProcess(
            process=process, request=request, pipe=recv_pipe
        )

        self.running_reconciles[request.uid()] = reconcile_process
        return True

    def _start_reconcile_process(
        self, request: ReconcileRequest, pipe: Connection
    ) -> multiprocessing.Process:
        """Helper function to generate and start the reconcile process. This
        was largely created to ease the testing and mocking process

         Args:
             request: ReconcileRequest
                The request to start the process with
             pipe: Connection
                The result pipe for this reconcile

        Returns:
            process: multiprocessing.Process
                The started process
        """

        # Create and start the reconcile process
        process = self.spawn_ctx.Process(
            target=create_and_start_entrypoint,
            args=[self.logging_queue, request, pipe],
        )
        process.start()
        log.debug3("Started child process with pid: %s", process.pid)
        return process

    ## Queue Functions ##################################################

    def _get_all_requests(
        self, timeout: Optional[int] = None
    ) -> List[ReconcileRequest]:
        """Get all of the requests from the reconcile queue

        Args:
            timeout:Optional[int]=None
                The timeout to wait for an event. If None it returns immediately

        Returns:
            requests: List[ReconcileRequest]
                The list of requests gathered from the queue
        """
        request_list = []
        while not self.request_queue.empty():
            try:
                request = self.request_queue.get(block=False, timeout=timeout)

                # If there is a stop request then immediately return it
                if request.type == ReconcileRequestType.STOPPED:
                    return [request]
                request_list.append(request)
            except queue.Empty:
                break
        return request_list
__init__(deploy_manager=None, leadership_manager=None)

Initialize the required queues, helper threads, and reconcile tracking. Also gather any onetime configuration options

Parameters:

Name Type Description Default
deploy_manager DeployManagerBase

DeployManagerBase = None The deploy manager used throughout the thread

None
leadership_manager LeadershipManagerBase

LeadershipManagerBase = None The leadership_manager for tracking elections

None
Source code in oper8/watch_manager/python_watch_manager/threads/reconcile.py
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
def __init__(
    self,
    deploy_manager: DeployManagerBase = None,
    leadership_manager: LeadershipManagerBase = None,
):
    """Initialize the required queues, helper threads, and reconcile tracking. Also
    gather any onetime configuration options

    Args:
        deploy_manager: DeployManagerBase = None
            The deploy manager used throughout the thread
        leadership_manager: LeadershipManagerBase = None
            The leadership_manager for tracking elections
    """
    super().__init__(
        name="reconcile_thread",
        deploy_manager=deploy_manager,
        leadership_manager=leadership_manager,
    )

    # Configure the multiprocessing process spawning context
    context = config.python_watch_manager.process_context
    if context not in multiprocessing.get_all_start_methods():
        raise ConfigError(f"Invalid process_context: '{context}'")

    if context == "fork":
        log.warning(
            "The fork multiprocessing context is known to cause deadlocks in certain"
            " environments due to OpenSSL. Consider using spawn for more reliable"
            " reconciling"
        )

    self.spawn_ctx = multiprocessing.get_context(context)

    # Setup required queues
    self.request_queue = self.spawn_ctx.Queue()
    self.logging_queue = self.spawn_ctx.Queue()

    # Setup helper threads
    self.timer_thread: TimerThread = TimerThread()
    self.log_listener_thread: QueueListener = QueueListener(
        self.logging_queue, *get_logging_handlers(), respect_handler_level=False
    )

    # Setup reconcile, request, and event mappings
    self.running_reconciles: Dict[str, ReconcileProcess] = {}
    self.pending_reconciles: Dict[str, ReconcileRequest] = {}
    self.event_map: Dict[str, TimerEvent] = {}

    # Setup control variables
    self.process_overload = threading.Event()

    # Configure the max number of concurrent reconciles via either config
    # or number of cpus
    if config.python_watch_manager.max_concurrent_reconciles:
        self.max_concurrent_reconciles = (
            config.python_watch_manager.max_concurrent_reconciles
        )
    else:
        self.max_concurrent_reconciles = os.cpu_count()
push_request(request)

Push request to reconcile queue

Parameters:

Name Type Description Default
request ReconcileRequest

ReconcileRequest the ReconcileRequest to add to the queue

required
Source code in oper8/watch_manager/python_watch_manager/threads/reconcile.py
215
216
217
218
219
220
221
222
223
224
225
226
227
def push_request(self, request: ReconcileRequest):
    """Push request to reconcile queue

    Args:
        request: ReconcileRequest
            the ReconcileRequest to add to the queue
    """
    log.info(
        "Pushing request '%s' to reconcile queue",
        request,
        extra={"resource": request.resource},
    )
    self.request_queue.put(request)
run()

The reconcile threads control flow is to first wait for either a new reconcile request or for a process to end. If its a reconcile request the thread checks if one is already running for the resource and if not starts a new one. If a reconcile is already running or the thread couldn't start a new one the request gets added to the pending reconciles. There can only be one pending reconcile per resource. If the reconcile thread received a process end event it checks the exit code and handles the result queue.

Source code in oper8/watch_manager/python_watch_manager/threads/reconcile.py
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
def run(self):
    """The reconcile threads control flow is to first wait for
    either a new reconcile request or for a process to end. If its a reconcile request
    the thread checks if one is already running for the resource and if not starts a
    new one. If a reconcile is already running or the thread couldn't start a new one
    the request gets added to the pending reconciles. There can only be one pending
    reconcile per resource. If the reconcile thread received a process end event it
    checks the exit code and handles the result queue.
    """
    while True:  # pylint: disable=too-many-nested-blocks
        if not self.check_preconditions():
            return

        # Wait for a change with the reconcile processes or reconcile queue.
        # Use _reader and Process.sentinel objects, so we can utilize select.select
        listen_objs = [
            *self.running_reconciles.values(),
            self.request_queue._reader,  # pylint: disable=protected-access
        ]
        log.debug3("Waiting on %s", listen_objs)
        ready_objs = multiprocessing.connection.wait(listen_objs)

        # Check preconditions both before and after waiting
        if not self.check_preconditions():
            return

        # For each object that triggered the connection
        for obj in ready_objs:
            log.debug3("Processing object %s with type %s", obj, type(obj))

            # Handle reconcile process end events
            if isinstance(obj, ReconcileProcess):
                if self._handle_process_end(obj):
                    # If process overload is set than we need to check all resources for
                    # pending reconciles otherwise just check if the completed resource
                    # has a pending request.
                    if self.process_overload.is_set():
                        for uid in list(self.pending_reconciles.keys()):
                            if not self._handle_pending_reconcile(uid):
                                break
                    else:
                        self._handle_pending_reconcile(obj.uid())

            # Handle all of the events in the queue
            elif isinstance(obj, Connection):
                self._handle_request_queue()
start_thread()

Override start_thread to start helper threads

Source code in oper8/watch_manager/python_watch_manager/threads/reconcile.py
165
166
167
168
169
def start_thread(self):
    """Override start_thread to start helper threads"""
    self.timer_thread.start_thread()
    self.log_listener_thread.start()
    super().start_thread()
stop_thread()

Override stop_thread to ensure reconciles finish correctly

Source code in oper8/watch_manager/python_watch_manager/threads/reconcile.py
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
def stop_thread(self):
    """Override stop_thread to ensure reconciles finish correctly"""
    super().stop_thread()

    if not self.is_alive() and not self.running_reconciles:
        log.debug("Reconcile Thread already stopped")
        return

    # Reawaken reconcile thread to stop
    log.debug("Pushing stop reconcile request")
    self.push_request(ReconcileRequest(None, ReconcileRequestType.STOPPED, {}))

    log.debug("Waiting for reconcile thread to finish")
    while self.is_alive():
        time.sleep(0.001)

    # Wait until all reconciles have completed
    log.info("Waiting for Running Reconciles to end")
    while self.running_reconciles:
        log.debug2("Waiting for reconciles %s to end", self.running_reconciles)
        for reconcile_process in list(self.running_reconciles.values()):
            # attempt to join process before trying the next one
            reconcile_process.process.join(JOIN_PROCESS_TIMEOUT)
            if reconcile_process.process.exitcode is not None:
                log.debug(
                    "Joined reconciles process %s with exitcode: %s for request object %s",
                    reconcile_process.process.pid,
                    reconcile_process.process.exitcode,
                    reconcile_process.request,
                )
                self.running_reconciles.pop(reconcile_process.uid())
                reconcile_process.process.close()

        # Pause for slight delay between checking processes
        sleep(SHUTDOWN_RECONCILE_POLL_TIME)

    # Close the logging queue to indicate no more logging events
    self.logging_queue.close()
timer

The TimerThread is a helper class used to run schedule events

TimerThread

Bases: ThreadBase

The TimerThread class is a helper class to run scheduled actions. This is very similar to threading.Timer stdlib class except that it uses one shared thread for all events instead of a thread per event.

Source code in oper8/watch_manager/python_watch_manager/threads/timer.py
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
class TimerThread(ThreadBase, metaclass=Singleton):
    """The TimerThread class is a helper class to run scheduled actions. This is very similar
    to threading.Timer stdlib class except that it uses one shared thread for all events
    instead of a thread per event."""

    def __init__(self, name: Optional[str] = None):
        """Initialize a priorityqueue like object and a synchronization object"""
        super().__init__(name=name or "timer_thread", daemon=True)

        # Use a heap queue instead of a queue.PriorityQueue as we're already handling
        # synchronization with the notify condition
        # https://docs.python.org/3/library/heapq.html?highlight=heap#priority-queue-implementation-notes
        self.timer_heap = []
        self.notify_condition = threading.Condition()

    def run(self):
        """The TimerThread's control loop sleeps until the next schedule
        event and executes all pending actions."""
        if not self.check_preconditions():
            return

        while True:
            # Wait until the next event or a new event is pushed
            with self.notify_condition:
                time_to_sleep = self._get_time_to_sleep()
                if time_to_sleep:
                    log.debug2(
                        "Timer waiting %ss until next scheduled event", time_to_sleep
                    )
                else:
                    log.debug2("Timer waiting until event queued")
                self.notify_condition.wait(timeout=time_to_sleep)

            if not self.check_preconditions():
                return

            # Get all the events to be executed
            event_list = self._get_all_current_events()
            for event in event_list:
                log.debug("Timer executing action for event: %s", event)
                event.action(*event.args, **event.kwargs)

    ## Class Interface ###################################################

    def stop_thread(self):
        """Override stop_thread to wake the control loop"""
        super().stop_thread()
        # Notify timer thread of shutdown
        log.debug2("Acquiring notify condition for shutdown")
        with self.notify_condition:
            log.debug("Notifying TimerThread of shutdown")
            self.notify_condition.notify_all()

    ## Public Interface ###################################################

    def put_event(
        self, time: datetime, action: Callable, *args: Any, **kwargs: Dict
    ) -> Optional[TimerEvent]:
        """Push an event to the timer

        Args:
            time: datetime
                The datetime to execute the event at
            action: Callable
                The action to execute
            *args: Any
                Args to pass to the action
            **kwargs: Dict
                Kwargs to pass to the action

        Returns:
            event: Optional[TimerEvent]
                TimerEvent describing the event and can be cancelled
        """
        # Don't allow pushing to a stopped thread
        if self.should_stop():
            return None

        # Create a timer event and push it to the heap
        event = TimerEvent(time=time, action=action, args=args, kwargs=kwargs)
        with self.notify_condition:
            heappush(self.timer_heap, event)
            self.notify_condition.notify_all()
        return event

    ## Time Functions  ###################################################

    def _get_time_to_sleep(self) -> Optional[int]:
        """Calculate the time to sleep based on the current queue

        Returns:
            time_to_wait: Optional[int]
               The time to wait if there's an object in the queue"""
        with self.notify_condition:
            obj = self._peak_next_event()
            if obj:
                time_to_sleep = (obj.time - datetime.now()).total_seconds()
                if time_to_sleep < MIN_SLEEP_TIME:
                    return MIN_SLEEP_TIME
                return time_to_sleep

            return None

    ## Queue Functions  ###################################################

    def _get_all_current_events(self) -> List[TimerEvent]:
        """Get all the current events that should execute

        Returns:
            current_events: List[TimerEvent]
                List of timer events to execute
        """
        event_list = []
        # With lock preview the next object
        with self.notify_condition:
            while len(self.timer_heap) != 0:
                obj_preview = self._peak_next_event()
                # If object exists and should've already executed then remove object from queue
                # and add it to return list
                if obj_preview and obj_preview.time < datetime.now():
                    try:
                        obj = heappop(self.timer_heap)
                        if obj.stale:
                            log.debug2("Skipping timer event %s", obj)
                            continue
                        event_list.append(obj)
                    except queue.Empty:
                        break
                else:
                    break
        return event_list

    def _peak_next_event(self) -> Optional[TimerEvent]:
        """Get the next timer event without removing it from the queue

        Returns:
            next_event: TimerEvent
                The next timer event if one exists
        """
        with self.notify_condition:
            if self.timer_heap:
                return self.timer_heap[0]
            return None
__init__(name=None)

Initialize a priorityqueue like object and a synchronization object

Source code in oper8/watch_manager/python_watch_manager/threads/timer.py
27
28
29
30
31
32
33
34
35
def __init__(self, name: Optional[str] = None):
    """Initialize a priorityqueue like object and a synchronization object"""
    super().__init__(name=name or "timer_thread", daemon=True)

    # Use a heap queue instead of a queue.PriorityQueue as we're already handling
    # synchronization with the notify condition
    # https://docs.python.org/3/library/heapq.html?highlight=heap#priority-queue-implementation-notes
    self.timer_heap = []
    self.notify_condition = threading.Condition()
put_event(time, action, *args, **kwargs)

Push an event to the timer

Parameters:

Name Type Description Default
time datetime

datetime The datetime to execute the event at

required
action Callable

Callable The action to execute

required
*args Any

Any Args to pass to the action

()
**kwargs Dict

Dict Kwargs to pass to the action

{}

Returns:

Name Type Description
event Optional[TimerEvent]

Optional[TimerEvent] TimerEvent describing the event and can be cancelled

Source code in oper8/watch_manager/python_watch_manager/threads/timer.py
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
def put_event(
    self, time: datetime, action: Callable, *args: Any, **kwargs: Dict
) -> Optional[TimerEvent]:
    """Push an event to the timer

    Args:
        time: datetime
            The datetime to execute the event at
        action: Callable
            The action to execute
        *args: Any
            Args to pass to the action
        **kwargs: Dict
            Kwargs to pass to the action

    Returns:
        event: Optional[TimerEvent]
            TimerEvent describing the event and can be cancelled
    """
    # Don't allow pushing to a stopped thread
    if self.should_stop():
        return None

    # Create a timer event and push it to the heap
    event = TimerEvent(time=time, action=action, args=args, kwargs=kwargs)
    with self.notify_condition:
        heappush(self.timer_heap, event)
        self.notify_condition.notify_all()
    return event
run()

The TimerThread's control loop sleeps until the next schedule event and executes all pending actions.

Source code in oper8/watch_manager/python_watch_manager/threads/timer.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
def run(self):
    """The TimerThread's control loop sleeps until the next schedule
    event and executes all pending actions."""
    if not self.check_preconditions():
        return

    while True:
        # Wait until the next event or a new event is pushed
        with self.notify_condition:
            time_to_sleep = self._get_time_to_sleep()
            if time_to_sleep:
                log.debug2(
                    "Timer waiting %ss until next scheduled event", time_to_sleep
                )
            else:
                log.debug2("Timer waiting until event queued")
            self.notify_condition.wait(timeout=time_to_sleep)

        if not self.check_preconditions():
            return

        # Get all the events to be executed
        event_list = self._get_all_current_events()
        for event in event_list:
            log.debug("Timer executing action for event: %s", event)
            event.action(*event.args, **event.kwargs)
stop_thread()

Override stop_thread to wake the control loop

Source code in oper8/watch_manager/python_watch_manager/threads/timer.py
66
67
68
69
70
71
72
73
def stop_thread(self):
    """Override stop_thread to wake the control loop"""
    super().stop_thread()
    # Notify timer thread of shutdown
    log.debug2("Acquiring notify condition for shutdown")
    with self.notify_condition:
        log.debug("Notifying TimerThread of shutdown")
        self.notify_condition.notify_all()
watch

The WatchThread Class is responsible for monitoring the cluster for resource events

WatchThread

Bases: ThreadBase

The WatchThread monitors the cluster for changes to a specific GroupVersionKind either cluster-wide or for a particular namespace. When it detects a change it checks the event against the registered Filters and submits a ReconcileRequest if it passes. Every resource that has at least one watch request gets a corresponding WatchedResource object whose main job is to store the current Filter status

Source code in oper8/watch_manager/python_watch_manager/threads/watch.py
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
class WatchThread(ThreadBase):  # pylint: disable=too-many-instance-attributes
    """The WatchThread monitors the cluster for changes to a specific GroupVersionKind either
    cluster-wide or for a particular namespace. When it detects a change it checks the event
    against the registered Filters and submits a ReconcileRequest if it passes. Every resource
    that has at least one watch request gets a corresponding WatchedResource object whose main
    job is to store the current Filter status
    """

    def __init__(  # pylint: disable=too-many-arguments
        self,
        reconcile_thread: RECONCILE_THREAD_TYPE,
        kind: str,
        api_version: str,
        namespace: Optional[str] = None,
        deploy_manager: DeployManagerBase = None,
        leadership_manager: LeadershipManagerBase = None,
    ):
        """Initialize a WatchThread by assigning instance variables and creating maps

        Args:
            reconcile_thread: ReconcileThread
                The reconcile thread to submit requests to
            kind: str
                The kind to watch
            api_version: str
                The api_version to watch
            namespace: Optional[str] = None
                The namespace to watch. If none then cluster-wide
            deploy_manager: DeployManagerBase = None
                The deploy_manager to watch events
            leadership_manager: LeadershipManagerBase = None
                The leadership manager to use for elections
        """
        # Setup initial variables
        self.reconcile_thread = reconcile_thread
        self.kind = kind
        self.api_version = api_version
        self.namespace = namespace

        name = f"watch_thread_{self.api_version}_{self.kind}"
        if self.namespace:
            name = name + f"_{self.namespace}"
        super().__init__(
            name=name,
            daemon=True,
            deploy_manager=deploy_manager,
            leadership_manager=leadership_manager,
        )

        # Setup kubernetes watch resource
        self.kubernetes_watch = watch.Watch()

        # Setup watched resources and request mappings. watched_resources
        # is used to track the current status of a resource in a cluster and also includes
        # the current filters. watch_request tracks all of the Controllers that have watched
        # a specific resource or groupings of resources
        self.watched_resources: Dict[str, WatchedResource] = {}
        self.watch_requests: Dict[str, Set[WatchRequest]] = {}

        # Lock for adding/gathering watch requests
        self.watch_request_lock = Lock()

        # Variables for tracking retries
        self.attempts_left = config.python_watch_manager.watch_retry_count
        self.retry_delay = parse_time_delta(
            config.python_watch_manager.watch_retry_delay or ""
        )

    def run(self):
        """The WatchThread's control loop continuously watches the DeployManager for any new
        events. For every event it gets it gathers all the WatchRequests whose `watched` value
        applies. The thread then initializes a WatchedObject if one doesn't already exist and
        tests the event against each request's Filter. Finally, it submits a ReconcileRequest
        for all events that pass
        """

        # Check for leadership and shutdown at the start
        list_resource_version = 0
        while True:
            try:
                if not self.check_preconditions():
                    log.debug("Checking preconditions failed. Shutting down")
                    return

                for event in self.deploy_manager.watch_objects(
                    self.kind,
                    self.api_version,
                    namespace=self.namespace,
                    resource_version=list_resource_version,
                    watch_manager=self.kubernetes_watch,
                ):
                    # Validate leadership on each event
                    if not self.check_preconditions():
                        log.debug("Checking preconditions failed. Shutting down")
                        return

                    resource = event.resource

                    # Gather all the watch requests which apply to this event
                    watch_requests = self._gather_resource_requests(resource)
                    if not watch_requests:
                        log.debug2("Skipping resource without requested watch")
                        self._clean_event(event)
                        continue

                    # Ensure a watched object exists for every resource
                    if resource.uid not in self.watched_resources:
                        self._create_watched_resource(resource, watch_requests)

                    # Check both global and watch specific filters
                    watch_requests = self._check_filters(
                        watch_requests, resource, event.type
                    )
                    if not watch_requests:
                        log.debug2(
                            "Skipping event %s as all requests failed filters", event
                        )
                        self._clean_event(event)
                        continue

                    # Push a reconcile request for each watch requested
                    for watch_request in watch_requests:
                        log.debug(
                            "Requesting reconcile for %s",
                            resource,
                            extra={"resource": watch_request.requester.get_resource()},
                        )
                        self._request_reconcile(event, watch_request)

                    # Clean up any resources used for the event
                    self._clean_event(event)

                # Update the resource version to only get new events
                list_resource_version = self.kubernetes_watch.resource_version
            except Exception as exc:
                log.info(
                    "Exception raised when attempting to watch %s",
                    repr(exc),
                    exc_info=exc,
                )
                if self.attempts_left <= 0:
                    log.error(
                        "Unable to start watch within %d attempts",
                        config.python_watch_manager.watch_retry_count,
                    )
                    os._exit(1)

                if not self.wait_on_precondition(self.retry_delay.total_seconds()):
                    log.debug(
                        "Checking preconditions failed during retry. Shutting down"
                    )
                    return
                self.attempts_left = self.attempts_left - 1
                log.info("Restarting watch with %d attempts left", self.attempts_left)

    ## Class Interface ###################################################

    def stop_thread(self):
        """Override stop_thread to stop the kubernetes client's Watch as well"""
        super().stop_thread()
        self.kubernetes_watch.stop()

    ## Public Interface ###################################################

    def request_watch(self, watch_request: WatchRequest):
        """Add a watch request if it doesn't exist

        Args:
            watch_request: WatchRequest
                The watch_request to add
        """
        requester_id = watch_request.requester

        # Acquire the watch request lock before starting work
        with self.watch_request_lock:
            if watch_request in self.watch_requests.get(requester_id.global_id, []):
                log.debug3("Request already added")
                return

            # Create watch request for this kind/api_version. Use global id
            # as watch thread is already namespaced/global
            log.debug3("Adding action with key %s", requester_id.global_id)
            self.watch_requests.setdefault(requester_id.global_id, set()).add(
                watch_request
            )

    ## WatchRequest Functions  ###################################################

    def _gather_resource_requests(self, resource: ManagedObject) -> List[WatchRequest]:
        """Gather the list of actions that apply to this specific Kube event based on
        the ownerRefs and the resource itself.

        Args:
            resource: ManagedObject
                The resource for this event

        Returns:
            request_list: List[WatchRequest]
                The list of watch requests that apply
        """

        request_list = []

        # Acquire the watch request lock
        with self.watch_request_lock:
            # Check if the event resource can be reconciled directly like in the case of
            # Controllers
            resource_id = ResourceId.from_resource(resource)
            for request in self.watch_requests.get(resource_id.global_id, []):
                # Check if request has a specific name and if this event matches
                if request.requester.name and request.requester.name != resource.name:
                    continue

                unique_request = copy.deepcopy(request)
                if not unique_request.requester.name:
                    unique_request.requester = dataclasses.replace(
                        unique_request.requester, name=resource_id.name
                    )

                log.debug3(
                    "Gathering request for controller %s from %s",
                    unique_request.controller_type,
                    resource_id.global_id,
                )
                request_list.append(unique_request)

            # Check for any owners watching this resource
            for owner_ref in resource.metadata.get("ownerReferences", []):
                owner_id = ResourceId.from_owner_ref(
                    owner_ref, namespace=resource_id.namespace
                )

                if owner_id.global_id not in self.watch_requests:
                    log.debug3("Skipping event with owner_key: %s", owner_id.global_id)
                    continue

                for request in self.watch_requests.get(owner_id.global_id, []):
                    # If request has a specific name then ensure it matches
                    if (
                        request.requester.name
                        and request.requester.name != owner_ref.get("name")
                    ):
                        continue

                    # If request doesn't already have a name then force
                    # this resource. This allows multiple controllers with
                    # the same kind/api_version to own the same resource
                    unique_request = copy.deepcopy(request)
                    if not unique_request.requester.name:
                        unique_request.requester = dataclasses.replace(
                            unique_request.requester, name=owner_id.name
                        )

                    log.debug3(
                        "Gathering request for controller %s from %s",
                        unique_request.controller_type,
                        owner_ref,
                    )
                    request_list.append(unique_request)

        return request_list

    def _request_reconcile(self, event: KubeWatchEvent, request: WatchRequest):
        """Request a reconcile for a kube event

        Args:
            event: KubeWatchEvent
                The KubeWatchEvent that triggered the reconcile
            request: WatchRequest
                The object that's requested a reconcile
        """

        resource = event.resource
        event_type = event.type
        requester_id = request.requester

        # If the watch request is for a different object (e.g dependent watch) then
        # fetch the correct resource to reconcile
        if (
            requester_id.kind != event.resource.kind
            or requester_id.api_version != event.resource.api_version
            or (requester_id.name and requester_id.name != event.resource.name)
        ):
            success, obj = self.deploy_manager.get_object_current_state(
                kind=requester_id.kind,
                name=requester_id.name,
                namespace=event.resource.namespace,
                api_version=requester_id.api_version,
            )
            if not success or not obj:
                log.warning(
                    "Unable to fetch owner resource %s", requester_id.get_named_id()
                )
                return

            resource = ManagedObject(obj)
            event_type = ReconcileRequestType.DEPENDENT

        # Generate the request and push one for each watched action to the reconcile thread
        request = ReconcileRequest(request.controller_type, event_type, resource)
        self.reconcile_thread.push_request(request)

    ## Watched Resource Functions  ###################################################

    def _create_watched_resource(
        self,
        resource: ManagedObject,
        watch_requests: List[WatchRequest],
    ):
        """Create a WatchedResource and initialize it's filters

        Args:
            resource: ManagedObject
                The resource being watched
            watch_requests: List[WatchRequest]
                The list of requests that apply to this resource

        """
        # update the watched resources dict
        if resource.uid in self.watched_resources:
            return

        # Setup filter dict with global filters
        filter_dict = {None: FilterManager(get_configured_filter(), resource)}
        for request in watch_requests:
            filter_dict[request.requester.get_named_id()] = FilterManager(
                request.filters, resource
            )

        # Add watched resource to mapping
        self.watched_resources[resource.uid] = WatchedResource(
            gvk=ResourceId.from_resource(resource), filters=filter_dict
        )

    def _clean_event(self, event: KubeWatchEvent):
        """Call this function after processing every event to clean any leftover resources

        Args:
            event: KubeWatchEvent
                The kube event to clean up
        """
        if event.type == KubeEventType.DELETED:
            self.watched_resources.pop(event.resource.uid, None)

    ## Filter Functions  ###################################################

    def _check_filters(
        self,
        watch_requests: List[WatchRequest],
        resource: ManagedObject,
        event: KubeEventType,
    ) -> List[WatchRequest]:
        """Check a resource and event against both global and request specific filters

        Args:
            watch_requests: List[WatchRequest]
                List of watch requests whose filters should be checked
            resource: ManagedObject
                The resource being filtered
            event: KubeEventType
                THe event type being filtered

        Returns:
            successful_requests: List[WatchRequest]
                The list of requests that passed the filter

        """

        if resource.uid not in self.watched_resources:
            return []

        # If the default watched resource filter fails then no need to
        # check any watch requests
        watched_resource = self.watched_resources[resource.uid]
        if not watched_resource.filters[None].update_and_test(resource, event):
            return []

        output_requests = []

        # Check the watch requests for any of their filters
        for request in watch_requests:
            requester_id = request.requester.get_named_id()

            # If this is the first time this watched resource has seen this request then
            # initialize the filters
            if requester_id not in watched_resource.filters:
                watched_resource.filters[requester_id] = FilterManager(
                    request.filters, resource
                )

            if not watched_resource.filters[requester_id].update_and_test(
                resource, event
            ):
                continue

            output_requests.append(request)

        return output_requests
__init__(reconcile_thread, kind, api_version, namespace=None, deploy_manager=None, leadership_manager=None)

Initialize a WatchThread by assigning instance variables and creating maps

Parameters:

Name Type Description Default
reconcile_thread RECONCILE_THREAD_TYPE

ReconcileThread The reconcile thread to submit requests to

required
kind str

str The kind to watch

required
api_version str

str The api_version to watch

required
namespace Optional[str]

Optional[str] = None The namespace to watch. If none then cluster-wide

None
deploy_manager DeployManagerBase

DeployManagerBase = None The deploy_manager to watch events

None
leadership_manager LeadershipManagerBase

LeadershipManagerBase = None The leadership manager to use for elections

None
Source code in oper8/watch_manager/python_watch_manager/threads/watch.py
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
def __init__(  # pylint: disable=too-many-arguments
    self,
    reconcile_thread: RECONCILE_THREAD_TYPE,
    kind: str,
    api_version: str,
    namespace: Optional[str] = None,
    deploy_manager: DeployManagerBase = None,
    leadership_manager: LeadershipManagerBase = None,
):
    """Initialize a WatchThread by assigning instance variables and creating maps

    Args:
        reconcile_thread: ReconcileThread
            The reconcile thread to submit requests to
        kind: str
            The kind to watch
        api_version: str
            The api_version to watch
        namespace: Optional[str] = None
            The namespace to watch. If none then cluster-wide
        deploy_manager: DeployManagerBase = None
            The deploy_manager to watch events
        leadership_manager: LeadershipManagerBase = None
            The leadership manager to use for elections
    """
    # Setup initial variables
    self.reconcile_thread = reconcile_thread
    self.kind = kind
    self.api_version = api_version
    self.namespace = namespace

    name = f"watch_thread_{self.api_version}_{self.kind}"
    if self.namespace:
        name = name + f"_{self.namespace}"
    super().__init__(
        name=name,
        daemon=True,
        deploy_manager=deploy_manager,
        leadership_manager=leadership_manager,
    )

    # Setup kubernetes watch resource
    self.kubernetes_watch = watch.Watch()

    # Setup watched resources and request mappings. watched_resources
    # is used to track the current status of a resource in a cluster and also includes
    # the current filters. watch_request tracks all of the Controllers that have watched
    # a specific resource or groupings of resources
    self.watched_resources: Dict[str, WatchedResource] = {}
    self.watch_requests: Dict[str, Set[WatchRequest]] = {}

    # Lock for adding/gathering watch requests
    self.watch_request_lock = Lock()

    # Variables for tracking retries
    self.attempts_left = config.python_watch_manager.watch_retry_count
    self.retry_delay = parse_time_delta(
        config.python_watch_manager.watch_retry_delay or ""
    )
request_watch(watch_request)

Add a watch request if it doesn't exist

Parameters:

Name Type Description Default
watch_request WatchRequest

WatchRequest The watch_request to add

required
Source code in oper8/watch_manager/python_watch_manager/threads/watch.py
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
def request_watch(self, watch_request: WatchRequest):
    """Add a watch request if it doesn't exist

    Args:
        watch_request: WatchRequest
            The watch_request to add
    """
    requester_id = watch_request.requester

    # Acquire the watch request lock before starting work
    with self.watch_request_lock:
        if watch_request in self.watch_requests.get(requester_id.global_id, []):
            log.debug3("Request already added")
            return

        # Create watch request for this kind/api_version. Use global id
        # as watch thread is already namespaced/global
        log.debug3("Adding action with key %s", requester_id.global_id)
        self.watch_requests.setdefault(requester_id.global_id, set()).add(
            watch_request
        )
run()

The WatchThread's control loop continuously watches the DeployManager for any new events. For every event it gets it gathers all the WatchRequests whose watched value applies. The thread then initializes a WatchedObject if one doesn't already exist and tests the event against each request's Filter. Finally, it submits a ReconcileRequest for all events that pass

Source code in oper8/watch_manager/python_watch_manager/threads/watch.py
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
def run(self):
    """The WatchThread's control loop continuously watches the DeployManager for any new
    events. For every event it gets it gathers all the WatchRequests whose `watched` value
    applies. The thread then initializes a WatchedObject if one doesn't already exist and
    tests the event against each request's Filter. Finally, it submits a ReconcileRequest
    for all events that pass
    """

    # Check for leadership and shutdown at the start
    list_resource_version = 0
    while True:
        try:
            if not self.check_preconditions():
                log.debug("Checking preconditions failed. Shutting down")
                return

            for event in self.deploy_manager.watch_objects(
                self.kind,
                self.api_version,
                namespace=self.namespace,
                resource_version=list_resource_version,
                watch_manager=self.kubernetes_watch,
            ):
                # Validate leadership on each event
                if not self.check_preconditions():
                    log.debug("Checking preconditions failed. Shutting down")
                    return

                resource = event.resource

                # Gather all the watch requests which apply to this event
                watch_requests = self._gather_resource_requests(resource)
                if not watch_requests:
                    log.debug2("Skipping resource without requested watch")
                    self._clean_event(event)
                    continue

                # Ensure a watched object exists for every resource
                if resource.uid not in self.watched_resources:
                    self._create_watched_resource(resource, watch_requests)

                # Check both global and watch specific filters
                watch_requests = self._check_filters(
                    watch_requests, resource, event.type
                )
                if not watch_requests:
                    log.debug2(
                        "Skipping event %s as all requests failed filters", event
                    )
                    self._clean_event(event)
                    continue

                # Push a reconcile request for each watch requested
                for watch_request in watch_requests:
                    log.debug(
                        "Requesting reconcile for %s",
                        resource,
                        extra={"resource": watch_request.requester.get_resource()},
                    )
                    self._request_reconcile(event, watch_request)

                # Clean up any resources used for the event
                self._clean_event(event)

            # Update the resource version to only get new events
            list_resource_version = self.kubernetes_watch.resource_version
        except Exception as exc:
            log.info(
                "Exception raised when attempting to watch %s",
                repr(exc),
                exc_info=exc,
            )
            if self.attempts_left <= 0:
                log.error(
                    "Unable to start watch within %d attempts",
                    config.python_watch_manager.watch_retry_count,
                )
                os._exit(1)

            if not self.wait_on_precondition(self.retry_delay.total_seconds()):
                log.debug(
                    "Checking preconditions failed during retry. Shutting down"
                )
                return
            self.attempts_left = self.attempts_left - 1
            log.info("Restarting watch with %d attempts left", self.attempts_left)
stop_thread()

Override stop_thread to stop the kubernetes client's Watch as well

Source code in oper8/watch_manager/python_watch_manager/threads/watch.py
196
197
198
199
def stop_thread(self):
    """Override stop_thread to stop the kubernetes client's Watch as well"""
    super().stop_thread()
    self.kubernetes_watch.stop()
create_resource_watch(watch_request, reconcile_thread, deploy_manager, leadership_manager)

Create or request a watch for a resource. This function will either append the request to an existing thread or create a new one. This function will also start the thread if any other watch threads have already been started.

Parameters:

Name Type Description Default
watch_request WatchRequest

WatchRequest The watch request to submit

required
reconcile_thread RECONCILE_THREAD_TYPE

ReconcileThread The ReconcileThread to submit ReconcileRequests to

required
deploy_manager DeployManagerBase

DeployManagerBase The DeployManager to use with the Thread

required
leadership_manager LeadershipManagerBase

LeadershipManagerBase The LeadershipManager to use for election

required

Returns:

Name Type Description
watch_thread WatchThread

WatchThread The watch_thread that is watching the request

Source code in oper8/watch_manager/python_watch_manager/threads/watch.py
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
def create_resource_watch(
    watch_request: WatchRequest,
    reconcile_thread: RECONCILE_THREAD_TYPE,
    deploy_manager: DeployManagerBase,
    leadership_manager: LeadershipManagerBase,
) -> WatchThread:
    """Create or request a watch for a resource. This function will either append the request to
    an existing thread or create a new one. This function will also start the thread if any
    other watch threads have already been started.

    Args:
        watch_request: WatchRequest
            The watch request to submit
        reconcile_thread: ReconcileThread
            The ReconcileThread to submit ReconcileRequests to
        deploy_manager: DeployManagerBase
            The DeployManager to use with the Thread
        leadership_manager: LeadershipManagerBase
            The LeadershipManager to use for election

    Returns:
        watch_thread: WatchThread
            The watch_thread that is watching the request
    """
    watch_thread = None
    watched_id = watch_request.watched

    # First check for a global watch before checking for a specific namespace watch
    if watched_id.global_id in watch_threads:
        log.debug2("Found existing global watch thread for %s", watch_request)
        watch_thread = watch_threads[watched_id.global_id]

    elif watched_id.namespace and watched_id.namespaced_id in watch_threads:
        log.debug2("Found existing namespaced watch thread for %s", watch_request)
        watch_thread = watch_threads[watched_id.namespaced_id]

    # Create a watch thread if it doesn't exist
    if not watch_thread:
        log.debug2("Creating new WatchThread for %s", watch_request)
        watch_thread = WatchThread(
            reconcile_thread,
            watched_id.kind,
            watched_id.api_version,
            watched_id.namespace,
            deploy_manager,
            leadership_manager,
        )

        watch_key = watched_id.get_id()
        watch_threads[watch_key] = watch_thread

        # Only start the watch thread if another is already watching
        for thread in watch_threads.values():
            if thread.is_alive():
                watch_thread.start_thread()
                break

    # Add action to controller
    watch_thread.request_watch(watch_request)
    return watch_thread
get_resource_watches()

Get the list of all watch_threads

Returns:

Name Type Description
list_of_watches List[WatchThread]

List[WatchThread] List of watch threads

Source code in oper8/watch_manager/python_watch_manager/threads/watch.py
506
507
508
509
510
511
512
513
def get_resource_watches() -> List[WatchThread]:
    """Get the list of all watch_threads

    Returns:
        list_of_watches: List[WatchThread]
            List of watch threads
    """
    return watch_threads.values()

utils

Import All functions, constants, and class from utils module

common

Shared utilities for the PythonWatchManager

get_logging_handlers()

Get the current logging handlers

Source code in oper8/watch_manager/python_watch_manager/utils/common.py
 98
 99
100
101
102
103
104
105
def get_logging_handlers() -> List[logging.Handler]:
    """Get the current logging handlers"""
    logger = logging.getLogger()
    if not logger.handlers:
        handler = logging.StreamHandler()
        logger.addHandler(handler)

    return logger.handlers
get_operator_namespace()

Get the current namespace from a kubernetes file or config

Source code in oper8/watch_manager/python_watch_manager/utils/common.py
59
60
61
62
63
64
65
66
67
def get_operator_namespace() -> str:
    """Get the current namespace from a kubernetes file or config"""
    # Default to in cluster namespace file
    namespace_file = pathlib.Path(
        "/var/run/secrets/kubernetes.io/serviceaccount/namespace"
    )
    if namespace_file.is_file():
        return namespace_file.read_text(encoding="utf-8")
    return config.python_watch_manager.lock.namespace
get_pod_name()

Get the current pod from env variables, config, or hostname

Source code in oper8/watch_manager/python_watch_manager/utils/common.py
70
71
72
73
74
75
76
77
78
def get_pod_name() -> str:
    """Get the current pod from env variables, config, or hostname"""

    pod_name = config.pod_name
    if not pod_name:
        log.warning("Pod name not detected, falling back to hostname")
        pod_name = platform.node().split(".")[0]

    return pod_name
obj_to_hash(obj)

Get the hash of any jsonable python object

Parameters:

Name Type Description Default
obj Any

Any The object to hash

required

Returns:

Name Type Description
hash str

str The hash of obj

Source code in oper8/watch_manager/python_watch_manager/utils/common.py
84
85
86
87
88
89
90
91
92
93
94
95
def obj_to_hash(obj: Any) -> str:
    """Get the hash of any jsonable python object

    Args:
        obj: Any
            The object to hash

    Returns:
        hash: str
            The hash of obj
    """
    return hash(json.dumps(obj, sort_keys=True))
parse_time_delta(time_str)

Parse a string into a timedelta. Excepts values in the following formats: 1h, 5m, 10s, etc

Parameters:

Name Type Description Default
time_str str

str The string representation of a timedelta

required

Returns:

Name Type Description
result Optional[timedelta]

Optional[timedelta] The parsed timedelta if one could be found

Source code in oper8/watch_manager/python_watch_manager/utils/common.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
def parse_time_delta(
    time_str: str,
) -> Optional[timedelta]:  # pylint: disable=inconsistent-return-statements
    """Parse a string into a timedelta. Excepts values in the
    following formats: 1h, 5m, 10s, etc

    Args:
        time_str: str
            The string representation of a timedelta

    Returns:
        result: Optional[timedelta]
            The parsed timedelta if one could be found
    """
    parts = regex.match(time_str)
    if not parts or all(part is None for part in parts.groupdict().values()):
        return None
    parts = parts.groupdict()
    time_params = {}
    for name, param in parts.items():
        if param:
            time_params[name] = float(param)
    return timedelta(**time_params)
constants

Useful Constants

log_handler

Log handler helper class

LogQueueHandler

Bases: QueueHandler

Log Handler class to collect messages from a child processes and pass them to the root process via a multiprocess queue

Source code in oper8/watch_manager/python_watch_manager/utils/log_handler.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
class LogQueueHandler(QueueHandler):
    """
    Log Handler class to collect messages from a child processes and pass
    them to the root process via a multiprocess queue
    """

    def __init__(self, queue: QUEUE_TYPE, manifest: ManagedObject = None):
        """Initialize the queue handler and instance variables

        Args:
            queue: "Queue[Any]"
                The queue to pass messages to
            manifest: ManagedObject
                The manifest of the current process. This is only used if it can't find
                the resource on the current formatter
        """
        super().__init__(queue)
        self.manifest = manifest

    def prepare(self, record: LogRecord) -> LogRecord:
        """Prep a record for pickling before sending it to the queue

        Args:
            record: LogRecord
                The record to be prepared

        Returns:
            prepared_record: LogRecord
                The prepared record ready to be pickled
        """

        # Duplicate record to preserve other handlers
        record = copy.copy(record)

        # get the currently used formatter
        formatter = self.formatter if self.formatter else Formatter()

        # Exceptions can't always be pickled so manually process
        # the record but remove the exc_info This retains the
        # the processed exc_txt but allows the parent process to reformat
        # the message
        if record.exc_info:
            record.exc_text = formatter.formatException(record.exc_info)
            record.exc_info = None

        # In case there are exceptions/unpicklable objects in the logging
        # args then manually compute the message. After computing clear the
        # message&args values to allow the parent process to reformat the
        # record
        record.msg = record.getMessage()
        record.args = []

        # Take the manifest from the current formatter and pass it back up
        resource = {}
        if hasattr(formatter, "manifest"):
            resource = formatter.manifest
        elif self.manifest:
            resource = self.manifest

        # Only copy required resource keys to the record
        resource_metadata = resource.get("metadata", {})
        record.resource = {
            "kind": resource.get("kind"),
            "apiVersion": resource.get("apiVersion"),
            "metadata": {
                "name": resource_metadata.get("name"),
                "namespace": resource_metadata.get("namespace"),
                "resourceVersion": resource_metadata.get("resourceVersion"),
            },
        }

        return record
__init__(queue, manifest=None)

Initialize the queue handler and instance variables

Parameters:

Name Type Description Default
queue QUEUE_TYPE

"Queue[Any]" The queue to pass messages to

required
manifest ManagedObject

ManagedObject The manifest of the current process. This is only used if it can't find the resource on the current formatter

None
Source code in oper8/watch_manager/python_watch_manager/utils/log_handler.py
26
27
28
29
30
31
32
33
34
35
36
37
def __init__(self, queue: QUEUE_TYPE, manifest: ManagedObject = None):
    """Initialize the queue handler and instance variables

    Args:
        queue: "Queue[Any]"
            The queue to pass messages to
        manifest: ManagedObject
            The manifest of the current process. This is only used if it can't find
            the resource on the current formatter
    """
    super().__init__(queue)
    self.manifest = manifest
prepare(record)

Prep a record for pickling before sending it to the queue

Parameters:

Name Type Description Default
record LogRecord

LogRecord The record to be prepared

required

Returns:

Name Type Description
prepared_record LogRecord

LogRecord The prepared record ready to be pickled

Source code in oper8/watch_manager/python_watch_manager/utils/log_handler.py
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
def prepare(self, record: LogRecord) -> LogRecord:
    """Prep a record for pickling before sending it to the queue

    Args:
        record: LogRecord
            The record to be prepared

    Returns:
        prepared_record: LogRecord
            The prepared record ready to be pickled
    """

    # Duplicate record to preserve other handlers
    record = copy.copy(record)

    # get the currently used formatter
    formatter = self.formatter if self.formatter else Formatter()

    # Exceptions can't always be pickled so manually process
    # the record but remove the exc_info This retains the
    # the processed exc_txt but allows the parent process to reformat
    # the message
    if record.exc_info:
        record.exc_text = formatter.formatException(record.exc_info)
        record.exc_info = None

    # In case there are exceptions/unpicklable objects in the logging
    # args then manually compute the message. After computing clear the
    # message&args values to allow the parent process to reformat the
    # record
    record.msg = record.getMessage()
    record.args = []

    # Take the manifest from the current formatter and pass it back up
    resource = {}
    if hasattr(formatter, "manifest"):
        resource = formatter.manifest
    elif self.manifest:
        resource = self.manifest

    # Only copy required resource keys to the record
    resource_metadata = resource.get("metadata", {})
    record.resource = {
        "kind": resource.get("kind"),
        "apiVersion": resource.get("apiVersion"),
        "metadata": {
            "name": resource_metadata.get("name"),
            "namespace": resource_metadata.get("namespace"),
            "resourceVersion": resource_metadata.get("resourceVersion"),
        },
    }

    return record
types

Standard data types used through PWM

ABCSingletonMeta

Bases: Singleton, ABCMeta

Shared metaclass for ABCMeta and Singleton

Source code in oper8/watch_manager/python_watch_manager/utils/types.py
273
274
class ABCSingletonMeta(Singleton, abc.ABCMeta):
    """Shared metaclass for ABCMeta and Singleton"""
ClassInfo

Bases: NamedTuple

Class containing information describing a class. This is required when passing class references between processes which might have different sys paths like when using VCS

Source code in oper8/watch_manager/python_watch_manager/utils/types.py
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
class ClassInfo(NamedTuple):
    """Class containing information describing a class. This is required when passing class
    references between processes which might have different sys paths like when using VCS"""

    moduleName: str
    className: str

    # Generation Utilities
    @classmethod
    def from_type(cls, class_obj: type) -> "ClassInfo":
        """Create a ClassInfo from a class object"""
        return cls(moduleName=class_obj.__module__, className=class_obj.__name__)

    @classmethod
    def from_obj(cls, obj) -> "ClassInfo":
        """Create a ClassInfo from an existing object"""
        return cls.from_type(type(obj))

    # Get the class referenced described by the info
    def to_class(self) -> type:
        """Import and return a ClassInfo's type"""
        module = importlib.import_module(self.moduleName)
        if not module:
            raise ValueError(f"Invalid ControllerInfo Module: {self.moduleName}")

        if not hasattr(module, self.className):
            raise ValueError(
                f"Invalid ControllerInfo: {self.className} not a member of {self.moduleName}"
            )

        return getattr(module, self.className)
from_obj(obj) classmethod

Create a ClassInfo from an existing object

Source code in oper8/watch_manager/python_watch_manager/utils/types.py
115
116
117
118
@classmethod
def from_obj(cls, obj) -> "ClassInfo":
    """Create a ClassInfo from an existing object"""
    return cls.from_type(type(obj))
from_type(class_obj) classmethod

Create a ClassInfo from a class object

Source code in oper8/watch_manager/python_watch_manager/utils/types.py
110
111
112
113
@classmethod
def from_type(cls, class_obj: type) -> "ClassInfo":
    """Create a ClassInfo from a class object"""
    return cls(moduleName=class_obj.__module__, className=class_obj.__name__)
to_class()

Import and return a ClassInfo's type

Source code in oper8/watch_manager/python_watch_manager/utils/types.py
121
122
123
124
125
126
127
128
129
130
131
132
def to_class(self) -> type:
    """Import and return a ClassInfo's type"""
    module = importlib.import_module(self.moduleName)
    if not module:
        raise ValueError(f"Invalid ControllerInfo Module: {self.moduleName}")

    if not hasattr(module, self.className):
        raise ValueError(
            f"Invalid ControllerInfo: {self.className} not a member of {self.moduleName}"
        )

    return getattr(module, self.className)
ReconcileProcess dataclass

Dataclass to track a running reconcile. This includes the raw process object, the result pipe, and the request being reconciled

Source code in oper8/watch_manager/python_watch_manager/utils/types.py
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
@dataclass
class ReconcileProcess:
    """Dataclass to track a running reconcile. This includes the raw process
    object, the result pipe, and the request being reconciled"""

    process: multiprocessing.Process
    request: ReconcileRequest
    pipe: Connection

    def fileno(self):
        """Pass through fileno to process. Sentinel so this object can be
        directly used by multiprocessing.connection.wait"""
        return self.process.sentinel

    def uid(self):
        """Get the uid for the resource being reconciled"""
        return self.request.uid()
fileno()

Pass through fileno to process. Sentinel so this object can be directly used by multiprocessing.connection.wait

Source code in oper8/watch_manager/python_watch_manager/utils/types.py
225
226
227
228
def fileno(self):
    """Pass through fileno to process. Sentinel so this object can be
    directly used by multiprocessing.connection.wait"""
    return self.process.sentinel
uid()

Get the uid for the resource being reconciled

Source code in oper8/watch_manager/python_watch_manager/utils/types.py
230
231
232
def uid(self):
    """Get the uid for the resource being reconciled"""
    return self.request.uid()
ReconcileRequest dataclass

Class to represent one request to the ReconcileThread. This includes important information including the current resource and Controller being reconciled.

Source code in oper8/watch_manager/python_watch_manager/utils/types.py
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
@dataclass
class ReconcileRequest:
    """Class to represent one request to the ReconcileThread. This includes
    important information including the current resource and Controller being
    reconciled.
    """

    controller_type: Type[CONTROLLER_TYPE]
    type: Union[ReconcileRequestType, KUBE_EVENT_TYPE_TYPE]
    resource: ManagedObject
    timestamp: datetime = datetime.now()

    def uid(self):
        """Get the uid of the resource being reconciled"""
        return self.resource.uid
uid()

Get the uid of the resource being reconciled

Source code in oper8/watch_manager/python_watch_manager/utils/types.py
211
212
213
def uid(self):
    """Get the uid of the resource being reconciled"""
    return self.resource.uid
ReconcileRequestType

Bases: Enum

Enum to expand the possible KubeEventTypes to include PythonWatchManager specific events

Source code in oper8/watch_manager/python_watch_manager/utils/types.py
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
class ReconcileRequestType(Enum):
    """Enum to expand the possible KubeEventTypes to include PythonWatchManager
    specific events"""

    # Used for events that are a requeue of an object
    REQUEUED = "REQUEUED"

    # Used for periodic reconcile events
    PERIODIC = "PERIODIC"

    # Used for when an event is a dependent resource of a controller
    DEPENDENT = "DEPENDENT"

    # Used as a sentinel to alert threads to stop
    STOPPED = "STOPPED"
ResourceId dataclass

Class containing the information needed to identify a resource

Source code in oper8/watch_manager/python_watch_manager/utils/types.py
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
@dataclass(eq=True, frozen=True)
class ResourceId:
    """Class containing the information needed to identify a resource"""

    api_version: str
    kind: str
    name: str = None
    namespace: str = None

    # Id properties

    @cached_property
    def global_id(self) -> str:
        """Get the global_id for a resource in the form kind.version.group"""
        group_version = self.api_version.split("/")
        return ".".join([self.kind, *reversed(group_version)])

    @cached_property
    def namespaced_id(self) -> str:
        """Get the namespace specific id for a resource"""
        return f"{self.namespace}.{self.global_id}"

    # Helper Accessor functions
    def get_id(self) -> str:
        """Get the requisite id for a resource"""
        return self.namespaced_id if self.namespace else self.global_id

    def get_named_id(self) -> str:
        """Get a named id for a resouce"""
        return f"{self.name}.{self.get_id()}"

    def get_resource(self) -> dict:
        """Get a resource template from this id"""
        return {
            "kind": self.kind,
            "apiVersion": self.api_version,
            "metadata": {"name": self.name, "namespace": self.namespace},
        }

    # Helper Creation Functions
    @classmethod
    def from_resource(cls, resource: Union[ManagedObject, dict]) -> "ResourceId":
        """Create a resource id from an existing resource"""
        metadata = resource.get("metadata", {})
        return cls(
            api_version=resource.get("apiVersion"),
            kind=resource.get("kind"),
            namespace=metadata.get("namespace"),
            name=metadata.get("name"),
        )

    @classmethod
    def from_owner_ref(cls, owner_ref: dict, namespace: str = None) -> "ResourceId":
        """Create a resource id from an ownerRef"""
        return cls(
            api_version=owner_ref.get("apiVersion"),
            kind=owner_ref.get("kind"),
            namespace=namespace,
            name=owner_ref.get("name"),
        )

    @classmethod
    def from_controller(
        cls, controller: Type[CONTROLLER_TYPE], namespace: str = None
    ) -> "ResourceId":
        """Get a Controller's target as a resource id"""
        return cls(
            api_version=f"{controller.group}/{controller.version}",
            kind=controller.kind,
            namespace=namespace,
        )
global_id cached property

Get the global_id for a resource in the form kind.version.group

namespaced_id cached property

Get the namespace specific id for a resource

from_controller(controller, namespace=None) classmethod

Get a Controller's target as a resource id

Source code in oper8/watch_manager/python_watch_manager/utils/types.py
87
88
89
90
91
92
93
94
95
96
@classmethod
def from_controller(
    cls, controller: Type[CONTROLLER_TYPE], namespace: str = None
) -> "ResourceId":
    """Get a Controller's target as a resource id"""
    return cls(
        api_version=f"{controller.group}/{controller.version}",
        kind=controller.kind,
        namespace=namespace,
    )
from_owner_ref(owner_ref, namespace=None) classmethod

Create a resource id from an ownerRef

Source code in oper8/watch_manager/python_watch_manager/utils/types.py
77
78
79
80
81
82
83
84
85
@classmethod
def from_owner_ref(cls, owner_ref: dict, namespace: str = None) -> "ResourceId":
    """Create a resource id from an ownerRef"""
    return cls(
        api_version=owner_ref.get("apiVersion"),
        kind=owner_ref.get("kind"),
        namespace=namespace,
        name=owner_ref.get("name"),
    )
from_resource(resource) classmethod

Create a resource id from an existing resource

Source code in oper8/watch_manager/python_watch_manager/utils/types.py
66
67
68
69
70
71
72
73
74
75
@classmethod
def from_resource(cls, resource: Union[ManagedObject, dict]) -> "ResourceId":
    """Create a resource id from an existing resource"""
    metadata = resource.get("metadata", {})
    return cls(
        api_version=resource.get("apiVersion"),
        kind=resource.get("kind"),
        namespace=metadata.get("namespace"),
        name=metadata.get("name"),
    )
get_id()

Get the requisite id for a resource

Source code in oper8/watch_manager/python_watch_manager/utils/types.py
49
50
51
def get_id(self) -> str:
    """Get the requisite id for a resource"""
    return self.namespaced_id if self.namespace else self.global_id
get_named_id()

Get a named id for a resouce

Source code in oper8/watch_manager/python_watch_manager/utils/types.py
53
54
55
def get_named_id(self) -> str:
    """Get a named id for a resouce"""
    return f"{self.name}.{self.get_id()}"
get_resource()

Get a resource template from this id

Source code in oper8/watch_manager/python_watch_manager/utils/types.py
57
58
59
60
61
62
63
def get_resource(self) -> dict:
    """Get a resource template from this id"""
    return {
        "kind": self.kind,
        "apiVersion": self.api_version,
        "metadata": {"name": self.name, "namespace": self.namespace},
    }
Singleton

Bases: type

MetaClass to limit a class to only one global instance. When the first instance is created it's attached to the Class and the next time someone initializes the class the original instance is returned

Source code in oper8/watch_manager/python_watch_manager/utils/types.py
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
class Singleton(type):
    """MetaClass to limit a class to only one global instance. When the
    first instance is created it's attached to the Class and the next
    time someone initializes the class the original instance is returned
    """

    def __call__(cls, *args, **kwargs):
        if getattr(cls, "_disable_singleton", False):
            return type.__call__(cls, *args, **kwargs)

        # The _instance is attached to the class itself without looking upwards
        # into any parent classes
        if "_instance" not in cls.__dict__:
            cls._instance = type.__call__(cls, *args, **kwargs)
        return cls._instance
TimerEvent dataclass

Class for keeping track of an item in the timer queue. Time is the only comparable field to support the TimerThreads priority queue

Source code in oper8/watch_manager/python_watch_manager/utils/types.py
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
@dataclass(order=True)
class TimerEvent:
    """Class for keeping track of an item in the timer queue. Time is the
    only comparable field to support the TimerThreads priority queue"""

    time: datetime
    action: callable = field(compare=False)
    args: list = field(default_factory=list, compare=False)
    kwargs: dict = field(default_factory=dict, compare=False)
    stale: bool = field(default=False, compare=False)

    def cancel(self):
        """Cancel this event. It will not be executed when read from the
        queue"""
        self.stale = True
cancel()

Cancel this event. It will not be executed when read from the queue

Source code in oper8/watch_manager/python_watch_manager/utils/types.py
247
248
249
250
def cancel(self):
    """Cancel this event. It will not be executed when read from the
    queue"""
    self.stale = True
WatchRequest dataclass

A class for requesting a watch of a particular object. It contains information around the watched object, who requested the watch, the controller type to be reconciled, and any filters to be applied to just this request

Source code in oper8/watch_manager/python_watch_manager/utils/types.py
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
@dataclass()
class WatchRequest:
    """A class for requesting a watch of a particular object. It contains information around the
    watched object, who requested the watch, the controller type to be reconciled, and any filters
    to be applied to just this request"""

    watched: ResourceId
    requester: ResourceId

    # Watch request must have either type or info
    controller_type: Type[CONTROLLER_TYPE] = None
    controller_info: ClassInfo = None

    # Don't compare filters when checking equality as we
    # assume they're the same if they have the same controller
    filters: List[Type[FILTER_TYPE]] = field(default_factory=list, compare=False)
    filters_info: List[Type[ClassInfo]] = field(default_factory=list, compare=False)

    def __hash__(self) -> int:
        return hash(
            (
                self.watched,
                self.requester,
                self.controller_type if self.controller_type else self.controller_info,
            )
        )
WatchedResource dataclass

A class for tracking a resource in the cluster. Every resource that has a requested watch will have a corresponding WatchedResource

Source code in oper8/watch_manager/python_watch_manager/utils/types.py
135
136
137
138
139
140
141
142
143
144
145
@dataclass
class WatchedResource:
    """A class for tracking a resource in the cluster. Every resource that has a
    requested watch will have a corresponding WatchedResource"""

    gvk: ResourceId
    # Each watched resource contains a dict of filters for each
    # corresponding watch request. The key is the named_id of
    # the requester or None for default filters. This aligns with
    # the Controllers pwm_filters attribute
    filters: Dict[str, FILTER_MANAGER_TYPE] = field(default_factory=dict)

x

The oper8.x module holds common implementations of reusable patterns built on top of the abstractions in oper8. These are intended as reusable components that can be share across many operator implementations.

One of the core principles of oper8 is that the schema for config is entirely up to the user (with the only exception being spec.version). In oper8.x, this is not the case and there are many config conventions (CRD schema and backend) that are encoded into the various utilities.

datastores

connection_base

Base class definition for all datastore connections

DatastoreConnectionBase

Bases: ABCStatic

A DatastoreConnection is an object that holds all of the critical data to connect to a specific datastore type. A DatastoreConnection for a given datastore type MUST not care what implementation backs the connection.

Source code in oper8/x/datastores/connection_base.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
class DatastoreConnectionBase(ABCStatic):
    """
    A DatastoreConnection is an object that holds all of the critical data to
    connect to a specific datastore type. A DatastoreConnection for a given
    datastore type MUST not care what implementation backs the connection.
    """

    ## Construction ############################################################

    def __init__(self, session: Session):
        """Construct with the session so that it can be saved as a member"""
        self._session = session

    @property
    def session(self) -> Session:
        return self._session

    ## Abstract Interface ######################################################

    @abc.abstractmethod
    def to_dict(self) -> dict:
        """Serialize the internal connection details to a dict object which can
        be added directly to a subsystem's CR.

        Returns:
            config_dict:  dict
                This dict will hold the keys and values that can be used to add
                to a subsystem's datastores.connections section.
        """

    @classmethod
    @abc.abstractmethod
    def from_dict(
        cls, session: Session, config_dict: dict
    ) -> "DatastoreConnectionBase":
        """Parse a config_dict from a subsystem CR to create an instance of the
        DatastoreConnection class.

        Args:
            session:  Session
                The current deploy session
            config_dict:  dict
                This dict will hold the keys and values created by to_dict and
                pulled from the subsystem CR.

        Returns:
            datastore_connection:  DatastoreConnectionBase
                The constructed instance of the connection
        """

    ## Shared Utilities ########################################################

    def _fetch_secret_data(self, secret_name: str) -> Optional[dict]:
        """Most connection implementations will need the ability to fetch secret
        data from the cluster when loading from the CR dict, so this provides a
        common implementation.

        Args:
            secret_name:  str
                The name of the secret to fetch

        Returns:
            secret_data:  Optional[dict]
                The content of the 'data' field in the secret with values base64
                decoded if the secret is found, otherwise None
        """
        success, content = self.session.get_object_current_state("Secret", secret_name)
        assert_cluster(success, f"Fetching connection secret [{secret_name}] failed")
        if content is None:
            return None
        assert "data" in content, "Got a secret without 'data'?"
        return {
            key: common.b64_secret_decode(val) for key, val in content["data"].items()
        }
__init__(session)

Construct with the session so that it can be saved as a member

Source code in oper8/x/datastores/connection_base.py
29
30
31
def __init__(self, session: Session):
    """Construct with the session so that it can be saved as a member"""
    self._session = session
from_dict(session, config_dict) abstractmethod classmethod

Parse a config_dict from a subsystem CR to create an instance of the DatastoreConnection class.

Parameters:

Name Type Description Default
session Session

Session The current deploy session

required
config_dict dict

dict This dict will hold the keys and values created by to_dict and pulled from the subsystem CR.

required

Returns:

Name Type Description
datastore_connection DatastoreConnectionBase

DatastoreConnectionBase The constructed instance of the connection

Source code in oper8/x/datastores/connection_base.py
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
@classmethod
@abc.abstractmethod
def from_dict(
    cls, session: Session, config_dict: dict
) -> "DatastoreConnectionBase":
    """Parse a config_dict from a subsystem CR to create an instance of the
    DatastoreConnection class.

    Args:
        session:  Session
            The current deploy session
        config_dict:  dict
            This dict will hold the keys and values created by to_dict and
            pulled from the subsystem CR.

    Returns:
        datastore_connection:  DatastoreConnectionBase
            The constructed instance of the connection
    """
to_dict() abstractmethod

Serialize the internal connection details to a dict object which can be added directly to a subsystem's CR.

Returns:

Name Type Description
config_dict dict

dict This dict will hold the keys and values that can be used to add to a subsystem's datastores.connections section.

Source code in oper8/x/datastores/connection_base.py
39
40
41
42
43
44
45
46
47
48
@abc.abstractmethod
def to_dict(self) -> dict:
    """Serialize the internal connection details to a dict object which can
    be added directly to a subsystem's CR.

    Returns:
        config_dict:  dict
            This dict will hold the keys and values that can be used to add
            to a subsystem's datastores.connections section.
    """

cos

Top level imports for the cos datastore type

connection

The common Connection type for a COS instance

CosConnection

Bases: DatastoreConnectionBase

A CosConnection holds the core connection information for a named COS instance, regardless of what ICosComponent implements it. The key pieces of information are:

  • General config

    • hostname: The hostname where the instance can be reached
    • port: The port where the instance is listening
    • bucket_name: The name of the bucket within the instance
  • Auth

    • auth_secret_name: The in-cluster name for the secret holding the access_key and secret_key
    • auth_secret_access_key_field: The field within the auth secret that holds the access_key
    • auth_secret_secret_key_field: The field within the auth secret that holds the secret_key
  • TLS:

    • tls_cert: The content of the TLS cert if tls is enabled
    • tls_secret_name: The in-cluster name for the secret holding the TLS creds if tls is enabled
    • tls_secret_cert_field: The field within the tls secret that holds the cert
Source code in oper8/x/datastores/cos/connection.py
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
class CosConnection(DatastoreConnectionBase):
    """
    A CosConnection holds the core connection information for a named COS
    instance, regardless of what ICosComponent implements it. The key pieces of
    information are:

    * General config
        * hostname: The hostname where the instance can be reached
        * port: The port where the instance is listening
        * bucket_name: The name of the bucket within the instance

    * Auth
        * auth_secret_name: The in-cluster name for the secret holding the
            access_key and secret_key
        * auth_secret_access_key_field: The field within the auth secret that
            holds the access_key
        * auth_secret_secret_key_field: The field within the auth secret that
            holds the secret_key

    * TLS:
        * tls_cert: The content of the TLS cert if tls is enabled
        * tls_secret_name: The in-cluster name for the secret holding the TLS
            creds if tls is enabled
        * tls_secret_cert_field: The field within the tls secret that holds the
            cert
    """

    def __init__(
        self,
        session: Session,
        hostname: str,
        port: int,
        bucket_name: str,
        auth_secret_name: str,
        auth_secret_access_key_field: str,
        auth_secret_secret_key_field: str,
        tls_secret_name: Optional[str] = None,
        tls_secret_cert_field: Optional[str] = None,
        access_key: Optional[str] = None,
        secret_key: Optional[str] = None,
        tls_cert: Optional[str] = None,
    ):
        super().__init__(session)

        # These fields must be passed in directly
        self._hostname = hostname
        self._port = port
        self._bucket_name = bucket_name
        self._auth_secret_name = auth_secret_name
        self._auth_secret_access_key_field = auth_secret_access_key_field
        self._auth_secret_secret_key_field = auth_secret_secret_key_field
        self._tls_secret_name = tls_secret_name
        self._tls_secret_cert_field = tls_secret_cert_field

        # The secret content may be populated or not, depending on whether this
        # Connection is being created by the component or a CR config. If not
        # populated now, they will be lazily populated on client request.
        self._access_key = access_key
        self._secret_key = secret_key
        self._tls_cert = tls_cert

        # Ensure that the TLS arguments are provided in a reasonable way. The
        # cert may be omitted
        tls_args = {tls_secret_name, tls_secret_cert_field}
        assert (
            tls_args == {None} or None not in tls_args
        ), "All TLS arguments must be provided if tls is enabled"
        self._tls_enabled = tls_args != {None}
        assert (
            self._tls_enabled or self._tls_cert is None
        ), "Cannot give a tls cert value when tls is disabled"

        # Schema is deduced based on the presence of the tls arguments
        self._schema = "http" if tls_secret_name is None else "https"

    ## Properties ##############################################################

    @property
    def schema(self) -> str:
        """The schema (http or https)"""
        return self._schema

    @property
    def hostname(self) -> str:
        """The hostname (without schema)"""
        return self._hostname

    @property
    def port(self) -> int:
        """The numeric port"""
        return self._port

    @property
    def endpoint(self) -> int:
        """The fully constructed endpoint for the COS instance"""
        return f"{self._schema}://{self._hostname}:{self._port}"

    @property
    def bucket_name(self) -> int:
        """The numeric bucket_name"""
        return self._bucket_name

    @property
    def auth_secret_name(self) -> str:
        """Secret name containing the access_key and secret_key"""
        return self._auth_secret_name

    @property
    def auth_secret_access_key_field(self) -> str:
        """Field in the auth secret containing the access_key"""
        return self._auth_secret_access_key_field

    @property
    def auth_secret_secret_key_field(self) -> str:
        """Field in the auth secret containing the secret_key"""
        return self._auth_secret_secret_key_field

    @property
    def tls_secret_name(self) -> str:
        """The name of the secret holding the tls certificate (for mounting)"""
        return self._tls_secret_name

    @property
    def tls_secret_cert_field(self) -> str:
        """The field within the tls secret that holds the CA cert"""
        return self._tls_secret_cert_field

    @property
    def tls_enabled(self) -> bool:
        return self._tls_enabled

    ## Interface ###############################################################

    _DICT_FIELDS = [
        "hostname",
        "port",
        "bucket_name",
        "auth_secret_name",
        "auth_secret_access_key_field",
        "auth_secret_secret_key_field",
        "tls_secret_name",
        "tls_secret_cert_field",
    ]

    def to_dict(self) -> dict:
        """Return the dict representation of the object for the CR"""
        return {field: getattr(self, f"_{field}") for field in self._DICT_FIELDS}

    @classmethod
    def from_dict(cls, session: Session, config_dict: dict) -> "CosConnection":
        kwargs = {"session": session}
        config_dict = common.camelcase_to_snake_case(config_dict)

        uri_secret = config_dict.get("uri_secret")
        uri_hostname_field = config_dict.get("uri_secret_hostname_field")
        uri_port_field = config_dict.get("uri_secret_port_field")
        uri_bucketname_field = config_dict.get("uri_secret_bucketname_field")

        # First pull provided hostname/port secret if available and fill in
        # hostname/port fields into config_dict
        if (
            uri_secret
            and uri_hostname_field
            and uri_port_field
            and uri_bucketname_field
        ):
            # If we have provided host/port credentials, we need to extract them
            # and place these values in our config dict
            success, secret_content = session.get_object_current_state(
                "Secret", uri_secret
            )
            assert_cluster(success, f"Fetching connection secret [{uri_secret}] failed")
            assert "data" in secret_content, "Got a secret without 'data'?"
            secret_content = secret_content.get("data", {})
            assert_precondition(
                secret_content,
                f"Missing expected Secret/{uri_secret} holding [hostname] and [port]",
            )
            hostname = common.b64_secret_decode(secret_content.get(uri_hostname_field))
            port = common.b64_secret_decode(secret_content.get(uri_port_field))
            bucketname = common.b64_secret_decode(
                secret_content.get(uri_bucketname_field)
            )
            if None in [hostname, port, bucketname]:
                log.debug2(
                    "Failed to find hostname/port/bucketname in uri secret [%s]",
                    uri_secret,
                )

            config_dict["hostname"], config_dict["port"], config_dict["bucket_name"] = (
                hostname,
                int(port),
                bucketname,
            )

        for field in cls._DICT_FIELDS:
            if field not in config_dict:
                raise ValueError(f"Missing required connection element [{field}]")

            # Set the kwargs (using None in place of empty strings)
            kwargs[field] = config_dict[field] or None
        return cls(**kwargs)

    ## Client Utilities ########################################################

    def get_auth_keys(self) -> Tuple[Optional[str], Optional[str]]:
        """Get the current access_key/secret_key pair from the auth secret if
        available

        Returns:
            access_key:  str or None
                The plain-text access_key (not encoded) if available
            secret_key:  str or None
                The plain-text secret_key (not encoded) if available
        """
        if None in [self._access_key, self._secret_key]:
            secret_content = self._fetch_secret_data(self._auth_secret_name) or {}
            log.debug4("Auth secret content: %s", secret_content)
            log.debug3(
                "Looking for [%s/%s]",
                self._auth_secret_access_key_field,
                self._auth_secret_secret_key_field,
            )
            access_key = secret_content.get(self._auth_secret_access_key_field)
            secret_key = secret_content.get(self._auth_secret_secret_key_field)
            if None in [access_key, secret_key]:
                log.debug2(
                    "Failed to find access_key/secret_key in auth secret [%s]",
                    self._auth_secret_name,
                )
                return None, None
            self._access_key = access_key
            self._secret_key = secret_key
        return self._access_key, self._secret_key

    def get_tls_cert(self) -> Optional[str]:
        """Get the current TLS certificate for a client connection if TLS is
        enabled

        If TLS is enabled, but the cert is not found, this function will raise
        an AssertionError

        Returns:
            tls_cert: str or None
                PEM encoded cert string (not base64-encoded) if found, otherwise
                None
        """
        if self._tls_enabled:
            if self._tls_cert is None:
                secret_data = self._fetch_secret_data(self._tls_secret_name)
                if secret_data is not None:
                    self._tls_cert = secret_data.get(self._tls_secret_cert_field)
            return self._tls_cert

        return None

    def get_connection_string(self) -> str:
        """Get the formatted s3 connection string to connect to the given bucket
        in the instance

        Returns:
            connection_string:  str
                The formatted connection string
        """
        access_key, secret_key = self.get_auth_keys()
        assert_precondition(
            None not in [access_key, secret_key],
            "No auth keys available for COS connection string",
        )
        return (
            "s3,endpoint={}://{}:{},accesskey={},secretkey={},bucketsuffix={}".format(
                self._schema,
                self._hostname,
                self._port,
                access_key,
                secret_key,
                self._bucket_name,
            )
        )
auth_secret_access_key_field property

Field in the auth secret containing the access_key

auth_secret_name property

Secret name containing the access_key and secret_key

auth_secret_secret_key_field property

Field in the auth secret containing the secret_key

bucket_name property

The numeric bucket_name

endpoint property

The fully constructed endpoint for the COS instance

hostname property

The hostname (without schema)

port property

The numeric port

schema property

The schema (http or https)

tls_secret_cert_field property

The field within the tls secret that holds the CA cert

tls_secret_name property

The name of the secret holding the tls certificate (for mounting)

get_auth_keys()

Get the current access_key/secret_key pair from the auth secret if available

Returns:

Name Type Description
access_key Optional[str]

str or None The plain-text access_key (not encoded) if available

secret_key Optional[str]

str or None The plain-text secret_key (not encoded) if available

Source code in oper8/x/datastores/cos/connection.py
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
def get_auth_keys(self) -> Tuple[Optional[str], Optional[str]]:
    """Get the current access_key/secret_key pair from the auth secret if
    available

    Returns:
        access_key:  str or None
            The plain-text access_key (not encoded) if available
        secret_key:  str or None
            The plain-text secret_key (not encoded) if available
    """
    if None in [self._access_key, self._secret_key]:
        secret_content = self._fetch_secret_data(self._auth_secret_name) or {}
        log.debug4("Auth secret content: %s", secret_content)
        log.debug3(
            "Looking for [%s/%s]",
            self._auth_secret_access_key_field,
            self._auth_secret_secret_key_field,
        )
        access_key = secret_content.get(self._auth_secret_access_key_field)
        secret_key = secret_content.get(self._auth_secret_secret_key_field)
        if None in [access_key, secret_key]:
            log.debug2(
                "Failed to find access_key/secret_key in auth secret [%s]",
                self._auth_secret_name,
            )
            return None, None
        self._access_key = access_key
        self._secret_key = secret_key
    return self._access_key, self._secret_key
get_connection_string()

Get the formatted s3 connection string to connect to the given bucket in the instance

Returns:

Name Type Description
connection_string str

str The formatted connection string

Source code in oper8/x/datastores/cos/connection.py
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
def get_connection_string(self) -> str:
    """Get the formatted s3 connection string to connect to the given bucket
    in the instance

    Returns:
        connection_string:  str
            The formatted connection string
    """
    access_key, secret_key = self.get_auth_keys()
    assert_precondition(
        None not in [access_key, secret_key],
        "No auth keys available for COS connection string",
    )
    return (
        "s3,endpoint={}://{}:{},accesskey={},secretkey={},bucketsuffix={}".format(
            self._schema,
            self._hostname,
            self._port,
            access_key,
            secret_key,
            self._bucket_name,
        )
    )
get_tls_cert()

Get the current TLS certificate for a client connection if TLS is enabled

If TLS is enabled, but the cert is not found, this function will raise an AssertionError

Returns:

Name Type Description
tls_cert Optional[str]

str or None PEM encoded cert string (not base64-encoded) if found, otherwise None

Source code in oper8/x/datastores/cos/connection.py
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
def get_tls_cert(self) -> Optional[str]:
    """Get the current TLS certificate for a client connection if TLS is
    enabled

    If TLS is enabled, but the cert is not found, this function will raise
    an AssertionError

    Returns:
        tls_cert: str or None
            PEM encoded cert string (not base64-encoded) if found, otherwise
            None
    """
    if self._tls_enabled:
        if self._tls_cert is None:
            secret_data = self._fetch_secret_data(self._tls_secret_name)
            if secret_data is not None:
                self._tls_cert = secret_data.get(self._tls_secret_cert_field)
        return self._tls_cert

    return None
to_dict()

Return the dict representation of the object for the CR

Source code in oper8/x/datastores/cos/connection.py
165
166
167
def to_dict(self) -> dict:
    """Return the dict representation of the object for the CR"""
    return {field: getattr(self, f"_{field}") for field in self._DICT_FIELDS}
factory

COS instance factory

CosFactory

Bases: DatastoreSingletonFactoryBase

The common factory that will manage instances of COS for each deploy

Source code in oper8/x/datastores/cos/factory.py
10
11
12
13
14
class CosFactory(DatastoreSingletonFactoryBase):
    """The common factory that will manage instances of COS for each deploy"""

    DATASTORE_TYPE = "cos"
    CONNECTION_TYPE = CosConnection
interfaces

Base class interface for a Cloud Object Store (cos) component

ICosComponentBase

Bases: Datastore

A COS chart provides access to a single running S3-compatible object store instance

Source code in oper8/x/datastores/cos/interfaces.py
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
@component(COMPONENT_NAME)
class ICosComponentBase(Datastore):
    """A COS chart provides access to a single running S3-compatible object
    store instance
    """

    ## Parent Interface ########################################################

    def get_connection(self) -> CosConnection:
        """Get the connection object for this instance"""
        return CosConnection(
            session=self.session,
            hostname=self._get_hostname(),
            port=self._get_port(),
            bucket_name=self._get_bucket_name(),
            auth_secret_name=self._get_auth_secret_name(),
            auth_secret_access_key_field=self._get_auth_secret_access_key_field(),
            auth_secret_secret_key_field=self._get_auth_secret_secret_key_field(),
            tls_secret_name=self._get_tls_secret_name(),
            tls_secret_cert_field=self._get_tls_secret_cert_field(),
            access_key=self._get_access_key(),
            secret_key=self._get_secret_key(),
            tls_cert=self._get_tls_cert(),
        )

    ## Abstract Interface ######################################################
    #
    # This is the interface that needs to be implemented by a child in order to
    # provide the common information that a client will use.
    ##

    ##################
    ## General Info ##
    ##################

    @abstractmethod
    def _get_hostname(self) -> str:
        """Gets the hotsname for the connection. Can be IP address as well.

        Returns:
            hostname:  str
                The hostname (without schema) for the service
        """

    @abstractmethod
    def _get_port(self) -> int:
        """Gets the port where the service is listening

        Returns:
            port:  int
                The port where the service is listening
        """

    @abstractmethod
    def _get_bucket_name(self) -> str:
        """Gets the bucket name for the connection

        Returns:
            bucket_name:  str
                The default bucket name for this instance
        """

    ###############
    ## Auth Info ##
    ###############

    @abstractmethod
    def _get_auth_secret_name(self) -> str:
        """Get the Auth secret name with any scoping applied

        Returns:
            auth_secret_name:  str
                The name of the secret containing the auth secret
        """

    @abstractmethod
    def _get_auth_secret_access_key_field(self) -> str:
        """Get the field form within the auth secret that contains the
        access_key

        Returns:
            access_key_field:  str
                The field within the auth secret that contains the access_key
        """

    @abstractmethod
    def _get_auth_secret_secret_key_field(self) -> str:
        """Get the field form within the auth secret that contains the
        secret_key

        Returns:
            secret_key_field:  str
                The field within the auth secret that contains the secret_key
        """

    @abstractmethod
    def _get_access_key(self) -> Optional[str]:
        """Get the un-encoded content of the access_key if available in-memory.
        Components which proxy an external secret don't need to fetch this
        content from the cluster.

        Returns:
            access_key:  Optional[str]
                The content of the access_key if known
        """

    @abstractmethod
    def _get_secret_key(self) -> Optional[str]:
        """Get the un-encoded content of the secret_key if available in-memory.
        Components which proxy an external secret don't need to fetch this
        content from the cluster.

        Returns:
            secret_key:  Optional[str]
                The content of the secret_key if known
        """

    ##############
    ## TLS Info ##
    ##############

    @abstractmethod
    def _get_tls_secret_name(self) -> Optional[str]:
        """Get the TLS secret name with any scoping applied if tls is enabled

        Returns:
            tls_secret_name:  Optional[str]
                If tls is enabled, returns the name of the secret, otherwise
                None
        """

    @abstractmethod
    def _get_tls_secret_cert_field(self) -> Optional[str]:
        """Get the field from within the tls secret that contains the CA
        certificate a client would need to use to connect

        Returns:
            cert_field:  Optional[str]
                The field within the tls secret where the CA certificate lives
        """

    @abstractmethod
    def _get_tls_cert(self) -> Optional[str]:
        """Get the un-encoded content of the TLS cert if TLS is enabled and
        available in-memory. Components which proxy an external secret don't
        need to fetch this content from the cluster.

        Returns:
            cert_content:  Optional[str]
                The content of the cert if tls is enabled
        """
get_connection()

Get the connection object for this instance

Source code in oper8/x/datastores/cos/interfaces.py
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
def get_connection(self) -> CosConnection:
    """Get the connection object for this instance"""
    return CosConnection(
        session=self.session,
        hostname=self._get_hostname(),
        port=self._get_port(),
        bucket_name=self._get_bucket_name(),
        auth_secret_name=self._get_auth_secret_name(),
        auth_secret_access_key_field=self._get_auth_secret_access_key_field(),
        auth_secret_secret_key_field=self._get_auth_secret_secret_key_field(),
        tls_secret_name=self._get_tls_secret_name(),
        tls_secret_cert_field=self._get_tls_secret_cert_field(),
        access_key=self._get_access_key(),
        secret_key=self._get_secret_key(),
        tls_cert=self._get_tls_cert(),
    )

factory_base

The DatastoreSingletonFactoryBase class defines the common functionality that all datastore type factories will use. It implements common logic for constructing named singleton instances of a given datastore type.

DatastoreSingletonFactoryBase

The DatastoreSingletonFactoryBase manages instances of all datastore types as singletons on a per-deployment basis. It provides functionality for derived classes to define a specific DATASTORE_TYPE (e.g. redis) and register implementations of that type.

The instances of each type are held as singletons scoped to the individual deployment (session.deploy_id). This is done to support multiple calls to fetch a named instance within a given deployment without reconstructing, but to allow configuration to change between deploys.

Source code in oper8/x/datastores/factory_base.py
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
class DatastoreSingletonFactoryBase:
    """The DatastoreSingletonFactoryBase manages instances of all datastore
    types as singletons on a per-deployment basis. It provides functionality for
    derived classes to define a specific DATASTORE_TYPE (e.g. redis) and
    register implementations of that type.

    The instances of each type are held as singletons scoped to the individual
    deployment (session.deploy_id). This is done to support multiple calls to
    fetch a named instance within a given deployment without reconstructing, but
    to allow configuration to change between deploys.
    """

    ## Private Members #########################################################

    # Singleton dict of constructors for each implementation type
    _type_constructors = {}

    # Singleton dict of named components
    _components = {}

    # Singleton dict of named connections
    _connections = {}

    # Class attribute that all individual factory types must have.
    # NOTE: This will be used as the key in the CR's datastores section
    _DATASTORE_TYPE_ATTRIBUTE_NAME = "DATASTORE_TYPE"

    # Class attribute that must be defined on an implementation to define the
    # common connection type
    _CONNECTION_TYPE_ATTRIBUTE = "CONNECTION_TYPE"

    ## Public interface ########################################################

    @classproperty
    def datastore_type(cls):
        return getattr(cls, cls._DATASTORE_TYPE_ATTRIBUTE_NAME)

    @classproperty
    def connection_type(cls):
        return getattr(cls, cls._CONNECTION_TYPE_ATTRIBUTE)

    @classmethod
    def get_component(
        cls,
        session: Session,
        name: Optional[str] = None,
        disabled: bool = False,
        config_overrides: Optional[dict] = None,
    ) -> Optional[Component]:
        """Construct an instance of the datastore type's component

        Args:
            session:  Session
                The session for the current deployment
            name:  Optional[str]
                The name of the singleton instance to get. If not provided, a
                top-level instance is used (e.g. datastores.postgres.type)
            disabled:  bool
                Whether or not the component is disabled in this deployment
            config_overrides:  Optional[dict]
                Optional runtime config values. These will overwrite any values
                pulled from the session.config

        Returns:
            instance:  Optional[Component]
                The constructed component if one is needed
        """
        return cls._get_component(
            session=session,
            name=name,
            disabled=disabled,
            config_overrides=config_overrides,
        )

    @classmethod
    def get_connection(
        cls,
        session: Session,
        name: Optional[str] = None,
        allow_from_component: bool = True,
    ) -> DatastoreConnectionBase:
        """Get the connection details for a named instance of the datastore type

        If not pre-constructed by the creation of the Component, connection
        details are pulled from the CR directly
        (spec.datastores.<datastore_type>.[<name>].connection)

        Args:
            session:  Session
                The session for the current deployment
            name:  Optional[str]
                The name of the singleton instance to get. If not provided, a
                top-level instance is used
            allow_from_component:  bool
                If True, use connection info from the component

        Returns:
            connection:  DatastoreConnectionBase
                The connection for this instance
        """
        return cls._get_connection(session, name, allow_from_component)

    @classmethod
    def register_type(cls, type_class: Datastore):
        """Register a new type constructor

        Args:
            type_class:  Datastore
                The class that will be constructed with the config for
        """
        cls._validate_class_attributes()

        assert issubclass(
            type_class, Datastore
        ), "Datastore types use component_class=Datastore"
        datastore_type_classes = cls._type_constructors.setdefault(
            cls.datastore_type, {}
        )
        if type_class.TYPE_LABEL in datastore_type_classes:
            log.warning("Got duplicate registration for %s", type_class.TYPE_LABEL)
        datastore_type_classes[type_class.TYPE_LABEL] = type_class

    ## Implementation Details ##################################################

    @classmethod
    def _validate_class_attributes(cls):
        """Since this class is always used statically, this helper makes sure
        the expected class attributes
        """
        assert isinstance(
            getattr(
                cls, DatastoreSingletonFactoryBase._DATASTORE_TYPE_ATTRIBUTE_NAME, None
            ),
            str,
        ), "Incorrectly configured datastore [{}]. Must define str [{}]".format(
            cls, DatastoreSingletonFactoryBase._DATASTORE_TYPE_ATTRIBUTE_NAME
        )
        connection_type = getattr(
            cls, DatastoreSingletonFactoryBase._CONNECTION_TYPE_ATTRIBUTE, None
        )
        assert isinstance(connection_type, type) and issubclass(
            connection_type, DatastoreConnectionBase
        ), (
            f"Incorrectly configured datastore [{cls}]. Must define "
            f"[{DatastoreSingletonFactoryBase._CONNECTION_TYPE_ATTRIBUTE}] as "
            "a DatastoreConnectionBase type"
        )

    @classmethod
    def _get_component(
        cls,
        session: Session,
        name: Optional[str] = None,
        disabled: bool = False,
        config_overrides: Optional[dict] = None,
        allow_instantiation: bool = True,
    ) -> Optional[Component]:
        """Implementation detail for get_component which can be called by
        get_connection and disallow lazy creation of the singleton.
        """
        cls._validate_class_attributes()

        # First, check to see if there's a connection already available based on
        # conneciton details in the CR. If so, we won't create the component
        conn = cls._get_connection(session, name, allow_from_component=False)
        if conn is not None:
            log.debug(
                "Found connection for [%s] in the CR. Not constructing the component.",
                cls.datastore_type,
            )
            return None

        # Get the pre-existing instances for this datastore type (keyed by the
        # datastore subclass)
        datastore_components = cls._components.setdefault(cls.datastore_type, {})

        # Get the app config section for this instance by name
        instance_config = merge_configs(
            session.config.get(cls.datastore_type, {}),
            config_overrides or {},
        )
        log.debug4("Full config: %s", instance_config)
        if name is not None:
            instance_config = instance_config.get(name)
        assert (
            instance_config is not None
        ), f"Cannot construct unknown [{cls.datastore_type}] instance: {name}"
        assert (
            "type" in instance_config
        ), f"Missing required [type] key for [{cls.datastore_type}/{name}]"

        # Fetch the current instance/deploy_id
        instance, deploy_id = datastore_components.get(name, (None, None))

        # If the deploy_id has changed, remove any current instance
        if deploy_id != session.id:
            instance = None
            datastore_components.pop(name, None)

        # If there is not a valid instance and it's allowed, construct it
        if not instance and allow_instantiation:
            log.debug2(
                "Constructing [%s]/%s for the first time for deploy [%s]",
                cls.datastore_type,
                name,
                session.id,
            )
            type_key = instance_config.type

            # Fetch the class for this type of the datastore
            datastore_type_classes = cls._type_constructors.get(cls.datastore_type, {})
            type_class = datastore_type_classes.get(type_key)
            assert (
                type_class is not None
            ), f"Cannot construct unsupported type [{type_key}]"

            # If there is a name provided, create a wrapper component with the
            # given name
            if name is not None:
                instance_class_name = f"{type_class.name}-{name}"
                log.debug2("Wrapping %s with instance name override", type_class)

                class InstanceClass(type_class):
                    """Wrapper for {}/{} with instance naming""".format(
                        cls.datastore_type, type_key
                    )

                    name = instance_class_name

            else:
                log.debug2("No instance name wrapping needed for %s", type_class)
                InstanceClass = type_class
            log.debug("Constructing %s", type_key)
            instance = InstanceClass(
                session=session,
                config=instance_config,
                instance_name=name,
                disabled=disabled,
            )
            datastore_components[name] = (instance, session.id)

        # Return the singleton
        return instance

    @classmethod
    def _get_connection(
        cls,
        session: Session,
        name: Optional[str] = None,
        allow_from_component: bool = True,
    ) -> Optional[DatastoreConnectionBase]:
        """Implementation for get_connection that can be used by _get_component
        to fetch connections from the CR
        """
        cls._validate_class_attributes()

        # Get the pre-existing instances for this datastore type (keyed by the
        # datastore subclass)
        connection, deploy_id = cls._connections.get(cls.datastore_type, {}).get(
            name, (None, None)
        )

        # If there is no connection for this deploy already, deserialize it from
        # the CR
        if connection is None or deploy_id != session.id:
            log.debug("Constructing %s connection from config", cls.datastore_type)

            # Get the CR config for this datastore type
            ds_config = session.spec.get(constants.SPEC_DATASTORES, {}).get(
                cls.datastore_type, {}
            )
            if name is not None:
                ds_config = ds_config.get(name, {})
            ds_config = ds_config.get(constants.SPEC_DATASTORE_CONNECTION)
            log.debug3(
                "%s/%s connection config: %s", cls.datastore_type, name, ds_config
            )

            if ds_config is not None:
                # Deserialize connection from sub-cr connection specification
                connection = cls.connection_type.from_dict(session, ds_config)
                cls._connections.setdefault(cls.datastore_type, {})[name] = (
                    connection,
                    session.id,
                )
            elif allow_from_component:
                # Add the connection information for this instance
                instance = cls._get_component(session, name, allow_instantiation=False)
                assert (
                    instance is not None
                ), f"No instance or config available for {cls.datastore_type}"
                connection = instance.get_connection()
                assert isinstance(
                    connection, cls.connection_type
                ), f"Got incorrect [{cls.datastore_type}] connection type: {type(connection)}"
                cls._connections.setdefault(cls.datastore_type, {})[name] = (
                    connection,
                    session.id,
                )
            else:
                log.debug2(
                    "No connection details for %s found in CR", cls.datastore_type
                )
                return None

        # Return the connection singleton
        return cls._connections[cls.datastore_type][name][0]
get_component(session, name=None, disabled=False, config_overrides=None) classmethod

Construct an instance of the datastore type's component

Parameters:

Name Type Description Default
session Session

Session The session for the current deployment

required
name Optional[str]

Optional[str] The name of the singleton instance to get. If not provided, a top-level instance is used (e.g. datastores.postgres.type)

None
disabled bool

bool Whether or not the component is disabled in this deployment

False
config_overrides Optional[dict]

Optional[dict] Optional runtime config values. These will overwrite any values pulled from the session.config

None

Returns:

Name Type Description
instance Optional[Component]

Optional[Component] The constructed component if one is needed

Source code in oper8/x/datastores/factory_base.py
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
@classmethod
def get_component(
    cls,
    session: Session,
    name: Optional[str] = None,
    disabled: bool = False,
    config_overrides: Optional[dict] = None,
) -> Optional[Component]:
    """Construct an instance of the datastore type's component

    Args:
        session:  Session
            The session for the current deployment
        name:  Optional[str]
            The name of the singleton instance to get. If not provided, a
            top-level instance is used (e.g. datastores.postgres.type)
        disabled:  bool
            Whether or not the component is disabled in this deployment
        config_overrides:  Optional[dict]
            Optional runtime config values. These will overwrite any values
            pulled from the session.config

    Returns:
        instance:  Optional[Component]
            The constructed component if one is needed
    """
    return cls._get_component(
        session=session,
        name=name,
        disabled=disabled,
        config_overrides=config_overrides,
    )
get_connection(session, name=None, allow_from_component=True) classmethod

Get the connection details for a named instance of the datastore type

If not pre-constructed by the creation of the Component, connection details are pulled from the CR directly (spec.datastores..[].connection)

Parameters:

Name Type Description Default
session Session

Session The session for the current deployment

required
name Optional[str]

Optional[str] The name of the singleton instance to get. If not provided, a top-level instance is used

None
allow_from_component bool

bool If True, use connection info from the component

True

Returns:

Name Type Description
connection DatastoreConnectionBase

DatastoreConnectionBase The connection for this instance

Source code in oper8/x/datastores/factory_base.py
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
@classmethod
def get_connection(
    cls,
    session: Session,
    name: Optional[str] = None,
    allow_from_component: bool = True,
) -> DatastoreConnectionBase:
    """Get the connection details for a named instance of the datastore type

    If not pre-constructed by the creation of the Component, connection
    details are pulled from the CR directly
    (spec.datastores.<datastore_type>.[<name>].connection)

    Args:
        session:  Session
            The session for the current deployment
        name:  Optional[str]
            The name of the singleton instance to get. If not provided, a
            top-level instance is used
        allow_from_component:  bool
            If True, use connection info from the component

    Returns:
        connection:  DatastoreConnectionBase
            The connection for this instance
    """
    return cls._get_connection(session, name, allow_from_component)
register_type(type_class) classmethod

Register a new type constructor

Parameters:

Name Type Description Default
type_class Datastore

Datastore The class that will be constructed with the config for

required
Source code in oper8/x/datastores/factory_base.py
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
@classmethod
def register_type(cls, type_class: Datastore):
    """Register a new type constructor

    Args:
        type_class:  Datastore
            The class that will be constructed with the config for
    """
    cls._validate_class_attributes()

    assert issubclass(
        type_class, Datastore
    ), "Datastore types use component_class=Datastore"
    datastore_type_classes = cls._type_constructors.setdefault(
        cls.datastore_type, {}
    )
    if type_class.TYPE_LABEL in datastore_type_classes:
        log.warning("Got duplicate registration for %s", type_class.TYPE_LABEL)
    datastore_type_classes[type_class.TYPE_LABEL] = type_class
classproperty

@classmethod+@property CITE: https://stackoverflow.com/a/22729414

Source code in oper8/x/datastores/factory_base.py
25
26
27
28
29
30
31
32
33
34
class classproperty:
    """@classmethod+@property
    CITE: https://stackoverflow.com/a/22729414
    """

    def __init__(self, func):
        self.func = classmethod(func)

    def __get__(self, *args):
        return self.func.__get__(*args)()

interfaces

Base class for all Datastore component implementations

Datastore

Bases: Oper8xComponent

The Datastore baseclass defines the interface that any datastore must conform to. It is a oper8 Component and should be constructed via a per-type factory.

Source code in oper8/x/datastores/interfaces.py
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
class Datastore(Oper8xComponent):
    """
    The Datastore baseclass defines the interface that any datastore must
    conform to. It is a oper8 Component and should be constructed via a per-type
    factory.
    """

    _TYPE_LABEL_ATTRIBUTE = "TYPE_LABEL"

    def __init__(
        self,
        session: Session,
        config: aconfig.Config,
        instance_name: Optional[str] = None,
        disabled: bool = False,
    ):
        """This passthrough constructor enforces that all datastores have a
        class attribute TYPE_LABEL (str)
        """
        type_label = getattr(self, self._TYPE_LABEL_ATTRIBUTE, None)
        assert isinstance(
            type_label, str
        ), f"All datastores types must define {self._TYPE_LABEL_ATTRIBUTE} as a str"
        super().__init__(session=session, disabled=disabled)
        self._config = config
        self.instance_name = instance_name

    @property
    def config(self) -> aconfig.Config:
        """The config for this instance of the datastore"""
        return self._config

    @abc.abstractmethod
    def get_connection(self) -> DatastoreConnectionBase:
        """Get the connection object for this datastore instance. Each datastore
        type must manage a common abstraction for a connection which clients
        will use to connect to the datastore.
        """
config property

The config for this instance of the datastore

__init__(session, config, instance_name=None, disabled=False)

This passthrough constructor enforces that all datastores have a class attribute TYPE_LABEL (str)

Source code in oper8/x/datastores/interfaces.py
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
def __init__(
    self,
    session: Session,
    config: aconfig.Config,
    instance_name: Optional[str] = None,
    disabled: bool = False,
):
    """This passthrough constructor enforces that all datastores have a
    class attribute TYPE_LABEL (str)
    """
    type_label = getattr(self, self._TYPE_LABEL_ATTRIBUTE, None)
    assert isinstance(
        type_label, str
    ), f"All datastores types must define {self._TYPE_LABEL_ATTRIBUTE} as a str"
    super().__init__(session=session, disabled=disabled)
    self._config = config
    self.instance_name = instance_name
get_connection() abstractmethod

Get the connection object for this datastore instance. Each datastore type must manage a common abstraction for a connection which clients will use to connect to the datastore.

Source code in oper8/x/datastores/interfaces.py
50
51
52
53
54
55
@abc.abstractmethod
def get_connection(self) -> DatastoreConnectionBase:
    """Get the connection object for this datastore instance. Each datastore
    type must manage a common abstraction for a connection which clients
    will use to connect to the datastore.
    """

postgres

Common postgres module exposed imports

connection

The common Connection type for a postgres instance

PostgresConnection

Bases: DatastoreConnectionBase

A connection for postgres defines the client operations and utilities needed to configure a microservice to interact with a single postgres instance. The key pieces of information are:

  • General config:

    • hostname: The hostname where the database can be reached
    • port: The port the database service is listening on
  • Auth:

    • auth_secret_name: The in-cluster name for the secret holding the username and password
    • auth_secret_username_field: The field within the auth secret that holds the username
    • auth_secret_password_field: The field within the auth secret that holds the password
  • TLS:

    • tls_cert: The content of the TLS cert if tls is enabled
    • tls_secret_name: The in-cluster name for the secret holding the TLS creds if tls is enabled
    • tls_secret_cert_field: The field within the tls secret that holds the cert
Source code in oper8/x/datastores/postgres/connection.py
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
class PostgresConnection(DatastoreConnectionBase):
    """A connection for postgres defines the client operations and utilities
    needed to configure a microservice to interact with a single postgres
    instance. The key pieces of information are:

    * General config:
        * hostname: The hostname where the database can be reached
        * port: The port the database service is listening on

    * Auth:
        * auth_secret_name: The in-cluster name for the secret holding the
            username and password
        * auth_secret_username_field: The field within the auth secret that
            holds the username
        * auth_secret_password_field: The field within the auth secret that
            holds the password

    * TLS:
        * tls_cert: The content of the TLS cert if tls is enabled
        * tls_secret_name: The in-cluster name for the secret holding the TLS
            creds if tls is enabled
        * tls_secret_cert_field: The field within the tls secret that holds the
            cert
    """

    def __init__(
        self,
        session: Session,
        hostname: str,
        port: int,
        auth_secret_name: str,
        auth_secret_username_field: str,
        auth_secret_password_field: str,
        tls_secret_name: Optional[str] = None,
        tls_secret_cert_field: Optional[str] = None,
        auth_username: Optional[str] = None,
        auth_password: Optional[str] = None,
        tls_cert: Optional[str] = None,
    ):
        """Construct with all of the crucial information pieces"""
        super().__init__(session)

        # Save internal values
        self._hostname = hostname
        self._port = port
        self._auth_secret_name = auth_secret_name
        self._auth_secret_username_field = auth_secret_username_field
        self._auth_secret_password_field = auth_secret_password_field
        self._tls_secret_name = tls_secret_name
        self._tls_secret_cert_field = tls_secret_cert_field

        # The secret content may be populated or not, depending on whether this
        # Connection is being created by the component or a CR config. If not
        # populated now, they will be lazily populated on client request.
        self._auth_username = auth_username
        self._auth_password = auth_password
        self._tls_cert = tls_cert

        # Ensure that the TLS arguments are provided in a reasonable way. The
        # cert may be omitted
        tls_args = {tls_secret_name, tls_secret_cert_field}
        assert (
            tls_args == {None} or None not in tls_args
        ), "All TLS arguments must be provided if tls is enabled"
        self._tls_enabled = tls_args != {None}
        assert (
            self._tls_enabled or self._tls_cert is None
        ), "Cannot give a tls cert value when tls is disabled"

    ## Properties ##############################################################

    @property
    def hostname(self) -> str:
        return self._hostname

    @property
    def port(self) -> int:
        return self._port

    @property
    def auth_secret_name(self) -> str:
        return self._auth_secret_name

    @property
    def auth_secret_username_field(self) -> str:
        return self._auth_secret_username_field

    @property
    def auth_secret_password_field(self) -> str:
        return self._auth_secret_password_field

    @property
    def tls_enabled(self) -> bool:
        return self._tls_enabled

    @property
    def tls_secret_name(self) -> str:
        return self._tls_secret_name

    @property
    def tls_secret_cert_field(self) -> str:
        return self._tls_secret_cert_field

    ## Interface ###############################################################

    _DICT_FIELDS = [
        "hostname",
        "port",
        "auth_secret_name",
        "auth_secret_username_field",
        "auth_secret_password_field",
        "tls_secret_name",
        "tls_secret_cert_field",
    ]

    _PROVIDED_DICT_FIELDS = [
        "uri_secret",
        "uri_secret_hostname_field",
        "uri_secret_port_field",
    ]

    def to_dict(self) -> dict:
        """Return the dict representation of the object for the CR"""
        return {field: getattr(self, f"_{field}") for field in self._DICT_FIELDS}

    @classmethod
    def from_dict(cls, session: Session, config_dict: dict) -> "PostgresConnection":
        kwargs = {"session": session}
        config_dict = common.camelcase_to_snake_case(config_dict)
        uri_secret = config_dict.get("uri_secret", {})
        uri_hostname_field = config_dict.get("uri_secret_hostname_field", {})
        uri_port_field = config_dict.get("uri_secret_port_field", {})

        # First pull provided hostname/port secret if available and fill in
        # hostname/port fields into config_dict
        if uri_secret and uri_hostname_field and uri_port_field:
            # If we have provided host/port credentials, we need to extract them
            # and place these values in our config dict
            success, secret_content = session.get_object_current_state(
                "Secret", uri_secret
            )
            assert_cluster(success, f"Fetching connection secret [{uri_secret}] failed")
            assert_precondition(
                secret_content,
                f"Missing expected Secret/{uri_secret} holding [hostname] and [port]",
            )
            assert "data" in secret_content, "Got a secret without 'data'?"
            secret_content = secret_content.get("data", {})
            hostname_raw = secret_content.get(uri_hostname_field)
            port_raw = secret_content.get(uri_port_field)
            assert_config(
                None not in [hostname_raw, port_raw],
                f"Failed to find hostname/port in uri secret [{uri_secret}]",
            )
            hostname = common.b64_secret_decode(hostname_raw)
            port = common.b64_secret_decode(port_raw)

            config_dict["hostname"], config_dict["port"] = hostname, int(port)

        for field in cls._DICT_FIELDS:
            if field not in config_dict:
                raise ValueError(f"Missing required connection element [{field}]")

            # Set the kwargs (using None in place of empty strings)
            kwargs[field] = config_dict[field] or None
        return cls(**kwargs)

    ## Client Utilities ########################################################

    def get_ssl_mode(self) -> str:
        """Get Postgres SSL mode to operate in

        Returns:
            ssl_mode: str
                "require" (tls enabled) or "disable" (tls disabled)
        """
        return "require" if self.tls_enabled else "disable"

    def get_auth_username_password(self) -> Tuple[str, str]:
        """Get the current username/password pair from the auth secret if
        available

        Returns:
            username: str or None
                The plain-text username for the instance or None if not
                available
            password: str or None
                The plain-text password for the instance or None if not
                available
        """
        # If not already known, fetch from the cluster
        if None in [self._auth_username, self._auth_password]:
            secret_content = self._fetch_secret_data(self._auth_secret_name) or {}
            username = secret_content.get(self._auth_secret_username_field)
            password = secret_content.get(self._auth_secret_password_field)
            if None in [username, password]:
                log.debug2(
                    "Failed to find username/password in auth secret [%s]",
                    self._auth_secret_name,
                )
                return None, None
            self._auth_username = username
            self._auth_password = password
        return self._auth_username, self._auth_password

    def get_tls_secret_volume_mounts(
        self,
        mount_path: str = DEFAULT_TLS_VOLUME_MOUNT_PATH,
        volume_name: str = DEFAULT_TLS_VOLUME_NAME,
    ) -> List[dict]:
        """Get the list of volumeMounts entries needed to support TLS for a
        client. If TLS is not enabled, this will be an empty list.

        Args:
            mount_path: str
                A path where the tls entries should be mounted
            volume_name: str
                The name of the volume within the pod spec

        Returns:
            volume_mounts: List[dict]
                A list of dict entries for the volume mounts which can be used
                to extend other volume lists
        """
        if self._tls_enabled:
            return [dict(name=volume_name, mountPath=mount_path)]
        return []

    def get_tls_secret_volumes(
        self,
        cert_mount_path: Optional[str] = None,
        volume_name: str = DEFAULT_TLS_VOLUME_NAME,
    ) -> List[dict]:
        """Get the list of dict entries needed to support TLS for a
        client. If TLS is not enabled, this will be an empty list.

        Args:
            cert_mount_path: Optional[str]
                The name of the file that the ca cert should be mounted to
            volume_name: str
                The name of the volume within the pod spec

        Returns:
            volumes: List[dict]
                A list of dict Volume entries which can be used to extend other
                volume lists
        """
        if self._tls_enabled:
            cert_mount_path = cert_mount_path or self._tls_secret_cert_field
            return [
                dict(
                    name=volume_name,
                    secret=dict(
                        defaultMode=common.mount_mode(440),
                        secretName=self._tls_secret_name,
                        items=[
                            dict(key=self._tls_secret_cert_field, path=cert_mount_path)
                        ],
                    ),
                )
            ]
        return []

    def get_tls_cert(self) -> Optional[str]:
        """Get the current TLS certificate for a client connection if TLS is
        enabled

        If TLS is enabled, but the cert is not found, this function will raise
        an AssertionError

        Returns:
            tls_cert: str or None
                PEM encoded cert string (not base64-encoded) if found, otherwise
                None
        """
        if self._tls_enabled:
            if self._tls_cert is None:
                secret_data = self._fetch_secret_data(self._tls_secret_name) or {}
                self._tls_cert = secret_data.get(self._tls_secret_cert_field)
                assert_precondition(
                    self._tls_cert is not None, "Failed to find TLS cert"
                )
            return self._tls_cert

        return None
__init__(session, hostname, port, auth_secret_name, auth_secret_username_field, auth_secret_password_field, tls_secret_name=None, tls_secret_cert_field=None, auth_username=None, auth_password=None, tls_cert=None)

Construct with all of the crucial information pieces

Source code in oper8/x/datastores/postgres/connection.py
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
def __init__(
    self,
    session: Session,
    hostname: str,
    port: int,
    auth_secret_name: str,
    auth_secret_username_field: str,
    auth_secret_password_field: str,
    tls_secret_name: Optional[str] = None,
    tls_secret_cert_field: Optional[str] = None,
    auth_username: Optional[str] = None,
    auth_password: Optional[str] = None,
    tls_cert: Optional[str] = None,
):
    """Construct with all of the crucial information pieces"""
    super().__init__(session)

    # Save internal values
    self._hostname = hostname
    self._port = port
    self._auth_secret_name = auth_secret_name
    self._auth_secret_username_field = auth_secret_username_field
    self._auth_secret_password_field = auth_secret_password_field
    self._tls_secret_name = tls_secret_name
    self._tls_secret_cert_field = tls_secret_cert_field

    # The secret content may be populated or not, depending on whether this
    # Connection is being created by the component or a CR config. If not
    # populated now, they will be lazily populated on client request.
    self._auth_username = auth_username
    self._auth_password = auth_password
    self._tls_cert = tls_cert

    # Ensure that the TLS arguments are provided in a reasonable way. The
    # cert may be omitted
    tls_args = {tls_secret_name, tls_secret_cert_field}
    assert (
        tls_args == {None} or None not in tls_args
    ), "All TLS arguments must be provided if tls is enabled"
    self._tls_enabled = tls_args != {None}
    assert (
        self._tls_enabled or self._tls_cert is None
    ), "Cannot give a tls cert value when tls is disabled"
get_auth_username_password()

Get the current username/password pair from the auth secret if available

Returns:

Name Type Description
username str

str or None The plain-text username for the instance or None if not available

password str

str or None The plain-text password for the instance or None if not available

Source code in oper8/x/datastores/postgres/connection.py
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
def get_auth_username_password(self) -> Tuple[str, str]:
    """Get the current username/password pair from the auth secret if
    available

    Returns:
        username: str or None
            The plain-text username for the instance or None if not
            available
        password: str or None
            The plain-text password for the instance or None if not
            available
    """
    # If not already known, fetch from the cluster
    if None in [self._auth_username, self._auth_password]:
        secret_content = self._fetch_secret_data(self._auth_secret_name) or {}
        username = secret_content.get(self._auth_secret_username_field)
        password = secret_content.get(self._auth_secret_password_field)
        if None in [username, password]:
            log.debug2(
                "Failed to find username/password in auth secret [%s]",
                self._auth_secret_name,
            )
            return None, None
        self._auth_username = username
        self._auth_password = password
    return self._auth_username, self._auth_password
get_ssl_mode()

Get Postgres SSL mode to operate in

Returns:

Name Type Description
ssl_mode str

str "require" (tls enabled) or "disable" (tls disabled)

Source code in oper8/x/datastores/postgres/connection.py
192
193
194
195
196
197
198
199
def get_ssl_mode(self) -> str:
    """Get Postgres SSL mode to operate in

    Returns:
        ssl_mode: str
            "require" (tls enabled) or "disable" (tls disabled)
    """
    return "require" if self.tls_enabled else "disable"
get_tls_cert()

Get the current TLS certificate for a client connection if TLS is enabled

If TLS is enabled, but the cert is not found, this function will raise an AssertionError

Returns:

Name Type Description
tls_cert Optional[str]

str or None PEM encoded cert string (not base64-encoded) if found, otherwise None

Source code in oper8/x/datastores/postgres/connection.py
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
def get_tls_cert(self) -> Optional[str]:
    """Get the current TLS certificate for a client connection if TLS is
    enabled

    If TLS is enabled, but the cert is not found, this function will raise
    an AssertionError

    Returns:
        tls_cert: str or None
            PEM encoded cert string (not base64-encoded) if found, otherwise
            None
    """
    if self._tls_enabled:
        if self._tls_cert is None:
            secret_data = self._fetch_secret_data(self._tls_secret_name) or {}
            self._tls_cert = secret_data.get(self._tls_secret_cert_field)
            assert_precondition(
                self._tls_cert is not None, "Failed to find TLS cert"
            )
        return self._tls_cert

    return None
get_tls_secret_volume_mounts(mount_path=DEFAULT_TLS_VOLUME_MOUNT_PATH, volume_name=DEFAULT_TLS_VOLUME_NAME)

Get the list of volumeMounts entries needed to support TLS for a client. If TLS is not enabled, this will be an empty list.

Parameters:

Name Type Description Default
mount_path str

str A path where the tls entries should be mounted

DEFAULT_TLS_VOLUME_MOUNT_PATH
volume_name str

str The name of the volume within the pod spec

DEFAULT_TLS_VOLUME_NAME

Returns:

Name Type Description
volume_mounts List[dict]

List[dict] A list of dict entries for the volume mounts which can be used to extend other volume lists

Source code in oper8/x/datastores/postgres/connection.py
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
def get_tls_secret_volume_mounts(
    self,
    mount_path: str = DEFAULT_TLS_VOLUME_MOUNT_PATH,
    volume_name: str = DEFAULT_TLS_VOLUME_NAME,
) -> List[dict]:
    """Get the list of volumeMounts entries needed to support TLS for a
    client. If TLS is not enabled, this will be an empty list.

    Args:
        mount_path: str
            A path where the tls entries should be mounted
        volume_name: str
            The name of the volume within the pod spec

    Returns:
        volume_mounts: List[dict]
            A list of dict entries for the volume mounts which can be used
            to extend other volume lists
    """
    if self._tls_enabled:
        return [dict(name=volume_name, mountPath=mount_path)]
    return []
get_tls_secret_volumes(cert_mount_path=None, volume_name=DEFAULT_TLS_VOLUME_NAME)

Get the list of dict entries needed to support TLS for a client. If TLS is not enabled, this will be an empty list.

Parameters:

Name Type Description Default
cert_mount_path Optional[str]

Optional[str] The name of the file that the ca cert should be mounted to

None
volume_name str

str The name of the volume within the pod spec

DEFAULT_TLS_VOLUME_NAME

Returns:

Name Type Description
volumes List[dict]

List[dict] A list of dict Volume entries which can be used to extend other volume lists

Source code in oper8/x/datastores/postgres/connection.py
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
def get_tls_secret_volumes(
    self,
    cert_mount_path: Optional[str] = None,
    volume_name: str = DEFAULT_TLS_VOLUME_NAME,
) -> List[dict]:
    """Get the list of dict entries needed to support TLS for a
    client. If TLS is not enabled, this will be an empty list.

    Args:
        cert_mount_path: Optional[str]
            The name of the file that the ca cert should be mounted to
        volume_name: str
            The name of the volume within the pod spec

    Returns:
        volumes: List[dict]
            A list of dict Volume entries which can be used to extend other
            volume lists
    """
    if self._tls_enabled:
        cert_mount_path = cert_mount_path or self._tls_secret_cert_field
        return [
            dict(
                name=volume_name,
                secret=dict(
                    defaultMode=common.mount_mode(440),
                    secretName=self._tls_secret_name,
                    items=[
                        dict(key=self._tls_secret_cert_field, path=cert_mount_path)
                    ],
                ),
            )
        ]
    return []
to_dict()

Return the dict representation of the object for the CR

Source code in oper8/x/datastores/postgres/connection.py
144
145
146
def to_dict(self) -> dict:
    """Return the dict representation of the object for the CR"""
    return {field: getattr(self, f"_{field}") for field in self._DICT_FIELDS}
factory

Postgres instance factory

PostgresFactory

Bases: DatastoreSingletonFactoryBase

The common factory that will manage instances of Postgres for each deploy

Source code in oper8/x/datastores/postgres/factory.py
10
11
12
13
14
class PostgresFactory(DatastoreSingletonFactoryBase):
    """The common factory that will manage instances of Postgres for each deploy"""

    DATASTORE_TYPE = "postgres"
    CONNECTION_TYPE = PostgresConnection
interfaces

Base class interface for a Postgres component

IPostgresComponent

Bases: Datastore

A postgres chart provides access to a single running Postgres cluster

Source code in oper8/x/datastores/postgres/interfaces.py
12
13
14
15
16
17
18
19
20
21
22
23
@component(COMPONENT_NAME)
class IPostgresComponent(Datastore):
    """A postgres chart provides access to a single running Postgres cluster"""

    ## Shared Utilities ########################################################

    def tls_enabled(self) -> bool:
        """Return whether TLS is enabled or not
        Returns:
            bool: True (TLS enabled), False (TLS disabled)
        """
        return self.config.get("tls", {}).get("enabled", True)
tls_enabled()

Return whether TLS is enabled or not Returns: bool: True (TLS enabled), False (TLS disabled)

Source code in oper8/x/datastores/postgres/interfaces.py
18
19
20
21
22
23
def tls_enabled(self) -> bool:
    """Return whether TLS is enabled or not
    Returns:
        bool: True (TLS enabled), False (TLS disabled)
    """
    return self.config.get("tls", {}).get("enabled", True)

redis

Top level imports for the Redis datastore type

connection

The common connection type for a Redis instance

RedisConnection

Bases: DatastoreConnectionBase

A RedisConnection holds the core connection information for a named Redis instance, regardless of what IRedisComponent implements it. The key pieces of information are:

  • General config

    • hostname: The hostname where the instance can be reached
    • port: The port where the instance is listening
  • Auth

    • auth_secret_name: The in-cluster name for the secret holding the username and password
    • auth_secret_username_field: The field within the auth secret that holds the username.
    • auth_secret_password_field: The field within the auth secret that holds the password
  • TLS:

    • tls_cert: The content of the TLS cert if tls is enabled
    • tls_secret_name: The in-cluster name for the secret holding the TLS creds if tls is enabled
    • tls_secret_cert_field: The field within the tls secret that holds the cert
Source code in oper8/x/datastores/redis/connection.py
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
class RedisConnection(DatastoreConnectionBase):
    """
    A RedisConnection holds the core connection information for a named Redis
    instance, regardless of what IRedisComponent implements it. The key pieces
    of information are:

    * General config
        * hostname: The hostname where the instance can be reached
        * port: The port where the instance is listening

    * Auth
        * auth_secret_name: The in-cluster name for the secret holding the
            username and password
        * auth_secret_username_field: The field within the auth secret that
            holds the username.
        * auth_secret_password_field: The field within the auth secret that
            holds the password

    * TLS:
        * tls_cert: The content of the TLS cert if tls is enabled
        * tls_secret_name: The in-cluster name for the secret holding the TLS
            creds if tls is enabled
        * tls_secret_cert_field: The field within the tls secret that holds the
            cert
    """

    def __init__(
        self,
        session: Session,
        hostname: str,
        port: int,
        auth_secret_name: str,
        auth_secret_password_field: str,
        auth_secret_username_field: str,
        tls_secret_name: Optional[str] = None,
        tls_secret_cert_field: Optional[str] = None,
        auth_username: Optional[str] = None,
        auth_password: Optional[str] = None,
        tls_cert: Optional[str] = None,
    ):
        super().__init__(session)

        # These fields must be passed in directly
        self._hostname = hostname
        self._port = port
        self._auth_secret_name = auth_secret_name
        self._auth_secret_username_field = auth_secret_username_field
        self._auth_secret_password_field = auth_secret_password_field
        self._tls_secret_name = tls_secret_name
        self._tls_secret_cert_field = tls_secret_cert_field

        # The secret content may be populated or not, depending on whether this
        # Connection is being created by the component or a CR config. If not
        # populated now, they will be lazily populated on client request.
        self._auth_username = auth_username
        self._auth_password = auth_password
        self._tls_cert = tls_cert

        # Ensure that the TLS arguments are provided in a reasonable way. The
        # cert may be omitted
        tls_args = {tls_secret_name, tls_secret_cert_field}
        assert (
            tls_args == {None} or None not in tls_args
        ), "All TLS arguments must be provided if tls is enabled"
        self._tls_enabled = tls_args != {None}
        assert (
            self._tls_enabled or self._tls_cert is None
        ), "Cannot give a tls cert value when tls is disabled"

        # Schema is deduced based on the presence of the tls arguments
        self._schema = "redis" if tls_secret_name is None else "rediss"

    ## Properties ##############################################################

    @property
    def schema(self) -> str:
        """The schema (redis or rediss)"""
        return self._schema

    @property
    def hostname(self) -> str:
        """The hostname (without schema)"""
        return self._hostname

    @property
    def port(self) -> int:
        """The numeric port"""
        return self._port

    @property
    def auth_secret_name(self) -> str:
        """Secret name containing the username_key and password_key"""
        return self._auth_secret_name

    @property
    def auth_secret_username_field(self) -> str:
        """Field in the auth secret containing the username"""
        return self._auth_secret_username_field

    @property
    def auth_secret_password_field(self) -> str:
        """Field in the auth secret containing the password"""
        return self._auth_secret_password_field

    @property
    def tls_secret_name(self) -> str:
        """The name of the secret holding the tls certificate (for mounting)"""
        return self._tls_secret_name

    @property
    def tls_secret_cert_field(self) -> str:
        """The field within the tls secret that holds the CA cert"""
        return self._tls_secret_cert_field

    @property
    def tls_enabled(self) -> bool:
        return self._tls_enabled

    ## Interface ###############################################################

    _DICT_FIELDS = [
        "hostname",
        "port",
        "auth_secret_name",
        "auth_secret_password_field",
        "auth_secret_username_field",
        "tls_secret_name",
        "tls_secret_cert_field",
    ]

    def to_dict(self) -> dict:
        """Return the dict representation of the object for the CR"""
        return {field: getattr(self, f"_{field}") for field in self._DICT_FIELDS}

    @classmethod
    def from_dict(cls, session: Session, config_dict: dict) -> "RedisConnection":
        kwargs = {"session": session}
        config_dict = common.camelcase_to_snake_case(config_dict)
        uri_secret = config_dict.get("uri_secret")
        uri_hostname_field = config_dict.get("uri_secret_hostname_field")
        uri_port_field = config_dict.get("uri_secret_port_field")

        # First pull provided hostname/port secret if available and fill in
        # hostname/port fields into config_dict
        if uri_secret and uri_hostname_field and uri_port_field:
            # If we have provided host/port credentials, we need to extract them
            # and place these values in our config dict
            success, secret_content = session.get_object_current_state(
                "Secret", uri_secret
            )
            assert_cluster(success, f"Fetching connection secret [{uri_secret}] failed")
            assert "data" in secret_content, "Got a secret without 'data'?"
            secret_content = secret_content.get("data")
            assert_precondition(
                secret_content,
                f"Missing expected Secret/{uri_secret} holding [hostname] and [port]",
            )
            hostname = common.b64_secret_decode(secret_content.get(uri_hostname_field))
            port = common.b64_secret_decode(secret_content.get(uri_port_field))
            if None in [hostname, port]:
                log.debug2(
                    "Failed to find hostname/port in uri secret [%s]",
                    uri_secret,
                )

            try:
                port = int(port)
            except ValueError as err:
                raise ConfigError(f"Invalid non-int port: {port}") from err
            config_dict["hostname"], config_dict["port"] = hostname, port

        for field in cls._DICT_FIELDS:
            if field not in config_dict:
                raise ValueError(f"Missing required connection element [{field}]")

            # Set the kwargs (using None in place of empty strings)
            kwargs[field] = config_dict[field] or None

        return cls(**kwargs)

    ## Client Utilities ########################################################

    def get_auth_username_password(self) -> Tuple[Optional[str], Optional[str]]:
        """Get the current username_key/password_key pair from the auth secret if
        available

        Returns:
            username:  str or None
                The plain-text username (not encoded) if available
            password:  str or None
                The plain-text password (not encoded) if available
        """
        if None in [self._auth_username, self._auth_password]:
            secret_content = self._fetch_secret_data(self._auth_secret_name) or {}
            log.debug4("Auth secret content: %s", secret_content)
            log.debug3(
                "Looking for [%s/%s]",
                self._auth_secret_username_field,
                self._auth_secret_password_field,
            )
            username = secret_content.get(self._auth_secret_username_field)
            password = secret_content.get(self._auth_secret_password_field)
            # username not required as expect username only used when using ACL
            # redis-cli also does support username in URI to be used for that
            # CITE: https://redis.io/commands/auth
            if None in [username, password]:
                log.debug2(
                    "Failed to find username/password in auth secret [%s]",
                    self._auth_secret_name,
                )
                return None, None
            self._auth_username = username
            self._auth_password = password
        return self._auth_username, self._auth_password

    def get_tls_cert(self) -> Optional[str]:
        """Get the current TLS certificate for a client connection if TLS is
        enabled

        If TLS is enabled, but the cert is not found, this function will raise
        an AssertionError

        Returns:
            tls_cert: str or None
                PEM encoded cert string (not base64-encoded) if found, otherwise
                None
        """
        if self._tls_enabled:
            if self._tls_cert is None:
                secret_data = self._fetch_secret_data(self._tls_secret_name)
                if secret_data is not None:
                    self._tls_cert = secret_data.get(self._tls_secret_cert_field)
            return self._tls_cert

        return None

    def get_connection_string(self) -> str:
        """Get the formatted Redis connection string to connect to the instance

        Returns:
            connection_string:  str
                The formatted connection string
        """
        username_key, password_key = self.get_auth_username_password()
        assert_precondition(
            None not in [username_key, password_key],
            "No auth keys available for Redis connection string",
        )
        # NOTE: username/password required and needs to change if ever need to
        # support the rediss://<host>:<port> format without username/password
        # CITE: https://redis.io/topics/rediscli
        return "{}://{}:{}@{}:{}".format(
            self._schema, username_key, password_key, self._hostname, self._port
        )
auth_secret_name property

Secret name containing the username_key and password_key

auth_secret_password_field property

Field in the auth secret containing the password

auth_secret_username_field property

Field in the auth secret containing the username

hostname property

The hostname (without schema)

port property

The numeric port

schema property

The schema (redis or rediss)

tls_secret_cert_field property

The field within the tls secret that holds the CA cert

tls_secret_name property

The name of the secret holding the tls certificate (for mounting)

get_auth_username_password()

Get the current username_key/password_key pair from the auth secret if available

Returns:

Name Type Description
username Optional[str]

str or None The plain-text username (not encoded) if available

password Optional[str]

str or None The plain-text password (not encoded) if available

Source code in oper8/x/datastores/redis/connection.py
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
def get_auth_username_password(self) -> Tuple[Optional[str], Optional[str]]:
    """Get the current username_key/password_key pair from the auth secret if
    available

    Returns:
        username:  str or None
            The plain-text username (not encoded) if available
        password:  str or None
            The plain-text password (not encoded) if available
    """
    if None in [self._auth_username, self._auth_password]:
        secret_content = self._fetch_secret_data(self._auth_secret_name) or {}
        log.debug4("Auth secret content: %s", secret_content)
        log.debug3(
            "Looking for [%s/%s]",
            self._auth_secret_username_field,
            self._auth_secret_password_field,
        )
        username = secret_content.get(self._auth_secret_username_field)
        password = secret_content.get(self._auth_secret_password_field)
        # username not required as expect username only used when using ACL
        # redis-cli also does support username in URI to be used for that
        # CITE: https://redis.io/commands/auth
        if None in [username, password]:
            log.debug2(
                "Failed to find username/password in auth secret [%s]",
                self._auth_secret_name,
            )
            return None, None
        self._auth_username = username
        self._auth_password = password
    return self._auth_username, self._auth_password
get_connection_string()

Get the formatted Redis connection string to connect to the instance

Returns:

Name Type Description
connection_string str

str The formatted connection string

Source code in oper8/x/datastores/redis/connection.py
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
def get_connection_string(self) -> str:
    """Get the formatted Redis connection string to connect to the instance

    Returns:
        connection_string:  str
            The formatted connection string
    """
    username_key, password_key = self.get_auth_username_password()
    assert_precondition(
        None not in [username_key, password_key],
        "No auth keys available for Redis connection string",
    )
    # NOTE: username/password required and needs to change if ever need to
    # support the rediss://<host>:<port> format without username/password
    # CITE: https://redis.io/topics/rediscli
    return "{}://{}:{}@{}:{}".format(
        self._schema, username_key, password_key, self._hostname, self._port
    )
get_tls_cert()

Get the current TLS certificate for a client connection if TLS is enabled

If TLS is enabled, but the cert is not found, this function will raise an AssertionError

Returns:

Name Type Description
tls_cert Optional[str]

str or None PEM encoded cert string (not base64-encoded) if found, otherwise None

Source code in oper8/x/datastores/redis/connection.py
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
def get_tls_cert(self) -> Optional[str]:
    """Get the current TLS certificate for a client connection if TLS is
    enabled

    If TLS is enabled, but the cert is not found, this function will raise
    an AssertionError

    Returns:
        tls_cert: str or None
            PEM encoded cert string (not base64-encoded) if found, otherwise
            None
    """
    if self._tls_enabled:
        if self._tls_cert is None:
            secret_data = self._fetch_secret_data(self._tls_secret_name)
            if secret_data is not None:
                self._tls_cert = secret_data.get(self._tls_secret_cert_field)
        return self._tls_cert

    return None
to_dict()

Return the dict representation of the object for the CR

Source code in oper8/x/datastores/redis/connection.py
152
153
154
def to_dict(self) -> dict:
    """Return the dict representation of the object for the CR"""
    return {field: getattr(self, f"_{field}") for field in self._DICT_FIELDS}
factory

Redis instance factory

RedisFactory

Bases: DatastoreSingletonFactoryBase

The common factory that will manage instances of Redis

Source code in oper8/x/datastores/redis/factory.py
10
11
12
13
14
class RedisFactory(DatastoreSingletonFactoryBase):
    """The common factory that will manage instances of Redis"""

    DATASTORE_TYPE = "redis"
    CONNECTION_TYPE = RedisConnection
interfaces

Base class interface for a Redis component

IRedisComponent

Bases: Datastore

A redis chart provides access to a redis instance

Source code in oper8/x/datastores/redis/interfaces.py
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
@component(COMPONENT_NAME)
class IRedisComponent(Datastore):
    """A redis chart provides access to a redis instance"""

    ## Parent Interface ########################################################

    def get_connection(self) -> RedisConnection:
        """Get the connection object for this instance"""
        return RedisConnection(
            session=self.session,
            hostname=self._get_hostname(),
            port=self._get_port(),
            auth_secret_name=self._get_auth_secret_name(),
            auth_secret_username_field=self._get_auth_secret_username_field(),
            auth_secret_password_field=self._get_auth_secret_password_field(),
            tls_secret_name=self._get_tls_secret_name(),
            tls_secret_cert_field=self._get_tls_secret_cert_field(),
            auth_username=self._get_auth_username(),
            auth_password=self._get_auth_password(),
            tls_cert=self._get_tls_cert(),
        )

    ## Abstract Interface ######################################################
    #
    # This is the interface that needs to be implemented by a child in order to
    # provide the common information that a client will use.
    ##

    ##################
    ## General Info ##
    ##################

    @abstractmethod
    def _get_hostname(self) -> str:
        """Gets the hotsname for the connection. Can be IP address as well.

        Returns:
            hostname:  str
                The hostname (without schema) for the service
        """

    @abstractmethod
    def _get_port(self) -> int:
        """Gets the port where the service is listening

        Returns:
            port:  int
                The port where the service is listening
        """

    ###############
    ## Auth Info ##
    ###############

    @abstractmethod
    def _get_auth_secret_name(self) -> str:
        """Get the Auth secret name with any scoping applied

        Returns:
            auth_secret_name:  str
                The name of the secret containing the auth secret
        """

    @abstractmethod
    def _get_auth_secret_username_field(self) -> Optional[str]:
        """Get the field form within the auth secret that contains the
        username

        Returns:
            username_key_field:  str
                The field within the auth secret that contains the username
        """

    @abstractmethod
    def _get_auth_secret_password_field(self) -> str:
        """Get the field form within the auth secret that contains the
        password for the user

        Returns:
            password_key_field:  str
                The field within the auth secret that contains the password_key
        """

    @abstractmethod
    def _get_auth_username(self) -> Optional[str]:
        """Get the un-encoded content of the username if available in-memory.
        Components which proxy an external secret don't need to fetch this
        content from the cluster.

        Returns:
            username:  Optional[str]
                The content of the username if known
        """

    @abstractmethod
    def _get_auth_password(self) -> Optional[str]:
        """Get the un-encoded content of the password if available in-memory.
        Components which proxy an external secret don't need to fetch this
        content from the cluster.

        Returns:
            password:  Optional[str]
                The content of the password if known
        """

    ##############
    ## TLS Info ##
    ##############

    @abstractmethod
    def _get_tls_secret_name(self) -> Optional[str]:
        """Get the TLS secret name with any scoping applied if tls is enabled

        Returns:
            tls_secret_name:  Optional[str]
                If tls is enabled, returns the name of the secret, otherwise
                None
        """

    @abstractmethod
    def _get_tls_secret_cert_field(self) -> Optional[str]:
        """Get the field from within the tls secret that contains the CA
        certificate a client would need to use to connect

        Returns:
            cert_field:  Optional[str]
                The field within the tls secret where the CA certificate lives
        """

    @abstractmethod
    def _get_tls_cert(self) -> Optional[str]:
        """Get the un-encoded content of the TLS cert if TLS is enabled and
        available in-memory. Components which proxy an external secret don't
        need to fetch this content from the cluster.

        Returns:
            cert_content:  Optional[str]
                The content of the cert if tls is enabled
        """
get_connection()

Get the connection object for this instance

Source code in oper8/x/datastores/redis/interfaces.py
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
def get_connection(self) -> RedisConnection:
    """Get the connection object for this instance"""
    return RedisConnection(
        session=self.session,
        hostname=self._get_hostname(),
        port=self._get_port(),
        auth_secret_name=self._get_auth_secret_name(),
        auth_secret_username_field=self._get_auth_secret_username_field(),
        auth_secret_password_field=self._get_auth_secret_password_field(),
        tls_secret_name=self._get_tls_secret_name(),
        tls_secret_cert_field=self._get_tls_secret_cert_field(),
        auth_username=self._get_auth_username(),
        auth_password=self._get_auth_password(),
        tls_cert=self._get_tls_cert(),
    )

oper8x_component

This class provides a base class with shared functionality that all concrete components can use.

Oper8xComponent

Bases: Component

The Oper8xComponent provides common config-based utilities on top of the core oper8.Component base class. It can be used as a drop-in replacement.

Source code in oper8/x/oper8x_component.py
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
class Oper8xComponent(Component):
    """The Oper8xComponent provides common config-based utilities on top of the
    core oper8.Component base class. It can be used as a drop-in replacement.
    """

    def __init__(self, session: Session, disabled: bool = False):
        """Construct with a member to access the session in implementations

        Args:
            session:  Session
                The session for the current deployment
            disabled:  bool
                Whether or not this component is disabled in the current
                configuration
        """
        super().__init__(session=session, disabled=disabled)
        self._session = session

    @property
    def session(self):
        return self._session

    ## Interface Overrides #####################################################

    def deploy(self, session: Session) -> bool:
        """Override the base Component's implementation of deploy to insert the
        dependency hash annotation. See NOTE in deps_annotation for explanation
        of why deploy is used instead of update_object_definition.

        Args:
            session:  Session
                The session for the current deployment

        Returns:
            success:  bool
                True on successful application of the resource to the cluster
        """
        for obj in self.managed_objects:
            obj.definition = deps_annotation.add_deps_annotation(
                self, session, obj.definition
            )
        return super().deploy(session)

    def update_object_definition(
        self,
        session: Session,
        internal_name: str,
        resource_definition: dict,
    ) -> dict:
        """For components assigned to different namespaces, ensure that the
        target namespace is set

        Args:
            session:  Session
                The session for this deploy
            internal_name:  str
                The internal name of the object to update
            resource_definition:  dict
                The dict representation of the resource to modify

        Returns:
            resource_definition:  dict
                The dict representation of the resource with any modifications
                applied
        """

        # Call the base implementation
        resource_definition = super().update_object_definition(
            session,
            internal_name,
            resource_definition,
        )

        # Inject namespace override for this component if given
        namespace_override = session.config.get(self.name, {}).get("namespace")
        if namespace_override is not None:
            log.debug2("Namespace  override for %s: %s", self, namespace_override)
            metadata = resource_definition.get("metadata")
            assert isinstance(metadata, dict), "Resource metadata is not a dict!"
            metadata["namespace"] = namespace_override

        return resource_definition

    ## Shared Utilities ########################################################

    def get_cluster_name(self, resource_name: str) -> str:
        """Get the name for a given resource with any instance scoping applied

        Args:
            resource_name:  str
                The unscoped name of a kubernetes resource

        Returns:
            resource_cluster_name:  str
                The name that the resource will use in the cluster
        """
        return common.get_resource_cluster_name(
            resource_name=resource_name,
            component=self.name,
            session=self.session,
        )

    def get_replicas(self, force: bool = False) -> Union[int, None]:
        """Get the replica count for this component based on the current
        deploy's t-shirt size and the state of the instance-size label. A
        replica count is only returned if there is not an existing replica count
        in the cluster for this deployment, the t-shirt size has changed, or
        the force flag is True.

        Args:
            force: bool
                If True, the state of the cluster will not be checked

        Returns:
            replicas:  Union[int, None]
                If replicas should be set for this deployment, the integer count
                will be returned, otherwise None is returned.
        """
        return common.get_replicas(
            session=self.session,
            component_name=self.name,
            unscoped_name=self.name,
            force=force,
        )
__init__(session, disabled=False)

Construct with a member to access the session in implementations

Parameters:

Name Type Description Default
session Session

Session The session for the current deployment

required
disabled bool

bool Whether or not this component is disabled in the current configuration

False
Source code in oper8/x/oper8x_component.py
28
29
30
31
32
33
34
35
36
37
38
39
def __init__(self, session: Session, disabled: bool = False):
    """Construct with a member to access the session in implementations

    Args:
        session:  Session
            The session for the current deployment
        disabled:  bool
            Whether or not this component is disabled in the current
            configuration
    """
    super().__init__(session=session, disabled=disabled)
    self._session = session
deploy(session)

Override the base Component's implementation of deploy to insert the dependency hash annotation. See NOTE in deps_annotation for explanation of why deploy is used instead of update_object_definition.

Parameters:

Name Type Description Default
session Session

Session The session for the current deployment

required

Returns:

Name Type Description
success bool

bool True on successful application of the resource to the cluster

Source code in oper8/x/oper8x_component.py
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
def deploy(self, session: Session) -> bool:
    """Override the base Component's implementation of deploy to insert the
    dependency hash annotation. See NOTE in deps_annotation for explanation
    of why deploy is used instead of update_object_definition.

    Args:
        session:  Session
            The session for the current deployment

    Returns:
        success:  bool
            True on successful application of the resource to the cluster
    """
    for obj in self.managed_objects:
        obj.definition = deps_annotation.add_deps_annotation(
            self, session, obj.definition
        )
    return super().deploy(session)
get_cluster_name(resource_name)

Get the name for a given resource with any instance scoping applied

Parameters:

Name Type Description Default
resource_name str

str The unscoped name of a kubernetes resource

required

Returns:

Name Type Description
resource_cluster_name str

str The name that the resource will use in the cluster

Source code in oper8/x/oper8x_component.py
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
def get_cluster_name(self, resource_name: str) -> str:
    """Get the name for a given resource with any instance scoping applied

    Args:
        resource_name:  str
            The unscoped name of a kubernetes resource

    Returns:
        resource_cluster_name:  str
            The name that the resource will use in the cluster
    """
    return common.get_resource_cluster_name(
        resource_name=resource_name,
        component=self.name,
        session=self.session,
    )
get_replicas(force=False)

Get the replica count for this component based on the current deploy's t-shirt size and the state of the instance-size label. A replica count is only returned if there is not an existing replica count in the cluster for this deployment, the t-shirt size has changed, or the force flag is True.

Parameters:

Name Type Description Default
force bool

bool If True, the state of the cluster will not be checked

False

Returns:

Name Type Description
replicas Union[int, None]

Union[int, None] If replicas should be set for this deployment, the integer count will be returned, otherwise None is returned.

Source code in oper8/x/oper8x_component.py
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
def get_replicas(self, force: bool = False) -> Union[int, None]:
    """Get the replica count for this component based on the current
    deploy's t-shirt size and the state of the instance-size label. A
    replica count is only returned if there is not an existing replica count
    in the cluster for this deployment, the t-shirt size has changed, or
    the force flag is True.

    Args:
        force: bool
            If True, the state of the cluster will not be checked

    Returns:
        replicas:  Union[int, None]
            If replicas should be set for this deployment, the integer count
            will be returned, otherwise None is returned.
    """
    return common.get_replicas(
        session=self.session,
        component_name=self.name,
        unscoped_name=self.name,
        force=force,
    )
update_object_definition(session, internal_name, resource_definition)

For components assigned to different namespaces, ensure that the target namespace is set

Parameters:

Name Type Description Default
session Session

Session The session for this deploy

required
internal_name str

str The internal name of the object to update

required
resource_definition dict

dict The dict representation of the resource to modify

required

Returns:

Name Type Description
resource_definition dict

dict The dict representation of the resource with any modifications applied

Source code in oper8/x/oper8x_component.py
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
def update_object_definition(
    self,
    session: Session,
    internal_name: str,
    resource_definition: dict,
) -> dict:
    """For components assigned to different namespaces, ensure that the
    target namespace is set

    Args:
        session:  Session
            The session for this deploy
        internal_name:  str
            The internal name of the object to update
        resource_definition:  dict
            The dict representation of the resource to modify

    Returns:
        resource_definition:  dict
            The dict representation of the resource with any modifications
            applied
    """

    # Call the base implementation
    resource_definition = super().update_object_definition(
        session,
        internal_name,
        resource_definition,
    )

    # Inject namespace override for this component if given
    namespace_override = session.config.get(self.name, {}).get("namespace")
    if namespace_override is not None:
        log.debug2("Namespace  override for %s: %s", self, namespace_override)
        metadata = resource_definition.get("metadata")
        assert isinstance(metadata, dict), "Resource metadata is not a dict!"
        metadata["namespace"] = namespace_override

    return resource_definition

utils

Common utilities for reused components

abc_static

This module adds metaclass support for declaring an interface with @abstractmethod methods that MUST be implemented as @classmethod or @staticmethod

ABCStatic

An ABCStatic class is a child of abc.ABC which has support for enforcing methods which combine @classmethod and @abstractmethod

Source code in oper8/x/utils/abc_static.py
68
69
70
71
class ABCStatic(metaclass=ABCStaticMeta):
    """An ABCStatic class is a child of abc.ABC which has support for enforcing
    methods which combine @classmethod and @abstractmethod
    """
ABCStaticMeta

Bases: ABCMeta

The StaticABCMeta class is a metaclass that enforces implementations of base class functions marked as both @abstractmethod and @classmethod. Methods with this signature MUST be implemented with the @classmethod or @staticmethod decorator in derived classes.

Source code in oper8/x/utils/abc_static.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
class ABCStaticMeta(abc.ABCMeta):
    """The StaticABCMeta class is a metaclass that enforces implementations of
    base class functions marked as both @abstractmethod and @classmethod.
    Methods with this signature MUST be implemented with the @classmethod or
    @staticmethod decorator in derived classes.
    """

    def __init__(cls, name, bases, dct):
        # Find abstract class methods that have not been implemented at all
        attrs = {name: getattr(cls, name) for name in dir(cls)}
        cls.__abstract_class_methods__ = [
            name
            for name, attr in attrs.items()
            if inspect.ismethod(attr) and getattr(attr, "__isabstractmethod__", False)
        ]

        # For any abstract class methods that have not been implemented,
        # overwrite them to raise NotImplementedError if called
        for method_name in cls.__abstract_class_methods__:

            def not_implemented(*_, x=method_name, **__):
                raise NotImplementedError(f"Cannot invoke abstract class method {x}")

            not_implemented.__original_signature__ = inspect.signature(
                getattr(cls, method_name)
            )
            setattr(cls, method_name, not_implemented)

        # Look for abstract class methods of parents
        base_abstract_class_methods = {
            method_name: getattr(base, method_name)
            for base in bases
            for method_name in getattr(base, "__abstract_class_methods__", [])
            if method_name not in cls.__abstract_class_methods__
        }

        # If any parent abstract class methods have been implemented as instance
        # methods, raise an import-time exception
        for method_name, base_method in base_abstract_class_methods.items():
            # A local implementation is valid if it is a bound method (
            # implemented as a @classmethod) or it is a function with a
            # signature that exactly matches the signature of the base class
            # (implemented as @staticmethod).
            this_method = getattr(cls, method_name)
            is_classmethod = inspect.ismethod(this_method)
            original_signature = getattr(base_method, "__original_signature__", None)
            is_staticmethod = inspect.isfunction(this_method) and inspect.signature(
                this_method
            ) in [original_signature, inspect.signature(base_method)]
            if not (is_classmethod or is_staticmethod):
                raise NotImplementedError(
                    f"The method [{method_name}] is an @classmethod @abstractmethod. "
                    f"{cls} implements it as an instance method"
                )

common

Shared utilities accessible to all components

from_string_or_number(value)

Handle strings or numbers for fields that can be either string or numeric

Parameters:

Name Type Description Default
value Union[int, str, float]

Union[str, int, float] Quantity type that can be in numeric or string form (e.g. resources)

required

Returns:

Name Type Description
formatted_value Union[int, str, float]

Union[str, int, float] The value formatted as the correct type

Source code in oper8/x/utils/common.py
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
def from_string_or_number(value: Union[int, str, float]) -> Union[int, str, float]:
    """Handle strings or numbers for fields that can be either string or numeric

    Args:
        value: Union[str, int, float]
            Quantity type that can be in numeric or string form (e.g. resources)

    Returns:
        formatted_value: Union[str, int, float]
            The value formatted as the correct type
    """
    # By default no conversion is needed
    formatted_value = value

    # If it's a string, try converting it to an int, then a float
    if isinstance(value, str):
        for target_type in [int, float]:
            try:
                formatted_value = target_type(value)
                break
            except ValueError:
                pass

    return formatted_value
get_deploy_labels(session, base_labels=None)

Get labels for a Deployment resource on top of the standard base labels

Source code in oper8/x/utils/common.py
182
183
184
185
186
187
def get_deploy_labels(session, base_labels=None):
    """Get labels for a Deployment resource on top of the standard base labels"""
    # Shallow copy is fine here since labels are one-level deep and only strings
    deploy_labels = copy.copy(base_labels or {})
    deploy_labels["instance-size"] = session.spec.size
    return deploy_labels
get_labels(cluster_name, session, component_name=None)

Common utility for fetching the set of metadata.labels for a given resource. Args: cluster_name: str The name of the resource as it will be applied to the cluster including any scoping applied by get_resource_cluster_name session: DeploySession The session for the current deployment component_name: str The name of the component that manages this resource. NOTE: This argument is optional for backwards compatibility, but should always be provided to ensure accurate labels! Returns: labels: Dict[str, str] The full set of labels to use for the given resource

Source code in oper8/x/utils/common.py
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
def get_labels(
    cluster_name: str,
    session: Session,
    component_name: Optional[str] = None,
) -> Dict[str, str]:
    """Common utility for fetching the set of metadata.labels for a given resource.
    Args:
        cluster_name:  str
            The name of the resource as it will be applied to the cluster
            including any scoping applied by get_resource_cluster_name
        session:  DeploySession
            The session for the current deployment
        component_name:  str
            The name of the component that manages this resource.
            NOTE: This argument is optional for backwards compatibility,
                but should always be provided to ensure accurate labels!
    Returns:
        labels:  Dict[str, str]
            The full set of labels to use for the given resource
    """
    labels = {
        "app": cluster_name,
        "app.kubernetes.io/managed-by": "Oper8",
        "app.kubernetes.io/instance": session.name,
    }
    if component_name:
        labels["component"] = component_name
    if slot_name := get_slot_name(component_name, session):
        labels["slot"] = slot_name

    # Add user-specified labels from the CR's spec.labels field
    user_labels = session.spec.labels or {}
    labels.update(user_labels)

    return labels
get_replicas(session, component_name, unscoped_name, force=False, replicas_override=None)

Get the replica count for the given resource.

This function consolidates logic for getting replicas for all components in the application. It allows replicas to be conditionally set only when needed to avoid thrashing with HPAs.

Parameters:

Name Type Description Default
session Session

Session The current deploy session

required
component_name str

str The name of the component to get replicas for

required
unscoped_name str

str The external name of the deployment without scoping

required
force bool

bool If True, the state of the cluster will not be checked

False
replicas_override Union[int, None]

int or None An override value to use in place of the normal config-based value

None

Returns:

Name Type Description
replicas Union[int, None]

int or None If replicas should not be set for this resource, None is returned, otherwise the number of replicas is returned based on the t-shirt size for the instance.

Source code in oper8/x/utils/common.py
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
def get_replicas(
    session: Session,
    component_name: str,
    unscoped_name: str,
    force: bool = False,
    replicas_override: Union[int, None] = None,
) -> Union[int, None]:
    """
    Get the replica count for the given resource.

    This function consolidates logic for getting replicas for all components in
    the application. It allows replicas to be conditionally set only when needed
    to avoid thrashing with HPAs.

    Args:
        session: Session
            The current deploy session
        component_name: str
            The name of the component to get replicas for
        unscoped_name: str
            The external name of the deployment without scoping
        force: bool
            If True, the state of the cluster will not be checked
        replicas_override: int or None
            An override value to use in place of the normal config-based value

    Returns:
        replicas: int or None
            If replicas should not be set for this resource, None is returned,
            otherwise the number of replicas is returned based on the t-shirt
            size for the instance.
    """

    # Fetch the current state of the deployment
    if not force:
        name = get_resource_cluster_name(
            resource_name=unscoped_name,
            component=component_name,
            session=session,
        )
        success, content = session.get_object_current_state(
            kind="Deployment",
            name=name,
            api_version="apps/v1",
        )
        assert success, f"Failed to look up state for [{name}]"

        # Check the current content to see if this is a t-shirt size change
        if content is not None:
            # Fetch the current replica count. We'll reuse this if there's no
            # reason to change
            replicas = content.get("spec", {}).get("replicas")

            # If we found replicas, check for t-shirt size change
            if replicas is None:
                log.debug("No replicas found for [%s]. Using config.".name)
            else:
                assert isinstance(replicas, int), "Replicas is not an int!"
                current_size = session.spec.size
                deployed_size = (
                    content.get("metadata", {}).get("labels", {}).get("instance-size")
                )
                if replicas == 0 and not session.spec.get("backup", {}).get(
                    "offlineQuiesce", False
                ):
                    log.debug(
                        "Found [%s] with size [%s] and offlineQuiesce off. Need "
                        "to scale up from [%s] replicas.",
                        name,
                        current_size,
                        replicas,
                    )
                elif current_size == deployed_size:
                    log.debug(
                        "Found [%s] with size [%s]. Not changing replicas from [%s].",
                        name,
                        current_size,
                        replicas,
                    )
                    return replicas
                else:
                    log.debug(
                        "Found t-shirt size change for [%s] from [%s -> %s]",
                        name,
                        deployed_size,
                        current_size,
                    )

    # Look up the replicas based on the t-shirt size
    size = session.spec.size
    replica_map = session.config.get("replicas", {}).get(size, {})
    replicas = replicas_override or replica_map.get(component_name)
    log.debug3("Replica map for [%s]: %s", size, replica_map)
    assert_config(
        replicas is not None,
        f"No replicas for [{component_name}] available for size [{size}]",
    )
    return replicas
get_resource_cluster_name(resource_name, component, session)

Common helper function to get the name a given kubernetes resource should use when deployed to the cluster.

Parameters:

Name Type Description Default
resource_name str

str The raw name for the resource (e.g. sireg-secret)

required
component Component

Union[Component, str] The component (or component name) that owns this resource

required
session Session

Session The session for the current reconciliation deploy

required

Returns:

Name Type Description
resource_cluster_name

str The resource name with appropriate scoping and truncation added

Source code in oper8/x/utils/common.py
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
def get_resource_cluster_name(
    resource_name: str,
    component: Component,
    session: Session,
):
    """Common helper function to get the name a given kubernetes resource should
    use when deployed to the cluster.

    Args:
        resource_name:  str
            The raw name for the resource (e.g. sireg-secret)
        component:  Union[Component, str]
            The component (or component name) that owns this resource
        session:  Session
            The session for the current reconciliation deploy

    Returns:
        resource_cluster_name:  str
            The resource name with appropriate scoping and truncation added
    """
    if is_global(component, session):
        log.debug2(
            "Applying global name logic to [%s] for component [%s]",
            resource_name,
            component,
        )
        return session.get_truncated_name(resource_name)
    return session.get_scoped_name(resource_name)
get_slot_name(component, session)

Get the slot name for the given component in the current deployment

Parameters:

Name Type Description Default
component Union[Component, str]

Union[Component, str] The component to fetch the slot name for

required
session Session

DeploySession The session for the current deployment

required

Returns:

Name Type Description
slot_name str

str The string name of the slot where the given component will live for this deployment. For global components, the static global slot name is returned

Source code in oper8/x/utils/common.py
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
def get_slot_name(component: Union[Component, str], session: Session) -> str:
    """Get the slot name for the given component in the current deployment

    Args:
        component:  Union[Component, str]
            The component to fetch the slot name for
        session:  DeploySession
            The session for the current deployment

    Returns:
        slot_name:  str
            The string name of the slot where the given component will live for
            this deployment. For global components, the static global slot name
            is returned
    """
    if not is_global(component, session):
        return session.name
    return constants.GLOBAL_SLOT
is_global(component, session)

Determine if the given component is global in this deployment

Parameters:

Name Type Description Default
component Union[Component, str]

Union[Component, str] The component to fetch the slot name for

required
session Session

Session The session for the current deployment

required

Returns:

Name Type Description
is_global bool

bool True if the given component is global!

Source code in oper8/x/utils/common.py
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
def is_global(component: Union[Component, str], session: Session) -> bool:
    """Determine if the given component is global in this deployment

    Args:
        component:  Union[Component, str]
            The component to fetch the slot name for
        session:  Session
            The session for the current deployment

    Returns:
        is_global:  bool
            True if the given component is global!
    """
    component_name = component if isinstance(component, str) else component.name
    return session.config.get(component_name, {}).get(constants.GLOBAL_SLOT, False)
metadata_defaults(cluster_name, session, **kwargs)

This function will create the metadata object given the external name for a resource. The external name should be created using common.get_resource_external_name. These functions are separate because the external name is often needed independently, so it will be pre-computed at the start of most components.

Parameters:

Name Type Description Default
cluster_name str

str The fully scoped and truncated name that the resource will use in the cluster (metadata.name)

required
session Session

DeploySession The session for the current reconciliation deploy

required

Returns:

Name Type Description
metadata dict

dict The constructed metadata dict

Source code in oper8/x/utils/common.py
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
def metadata_defaults(
    cluster_name: str,
    session: Session,
    **kwargs,
) -> dict:
    """This function will create the metadata object given the external name for
    a resource. The external name should be created using
    common.get_resource_external_name. These functions are separate because the
    external name is often needed independently, so it will be pre-computed at
    the start of most components.

    Args:
        cluster_name:  str
            The fully scoped and truncated name that the resource will use in
            the cluster (metadata.name)
        session:  DeploySession
            The session for the current reconciliation deploy

    Returns:
        metadata:  dict
            The constructed metadata dict
    """
    # NOTE: For the time being, there are no defaults injected here, but we will
    #   retain the abstraction function so that we can add defaulting
    #   functionality (e.g. multi-namespace deployments) without touching every
    #   file.
    return {"name": cluster_name, **kwargs}
mount_mode(octal_val)

This helper gets the decimal version of an octal representation of file permissions used for a volume mount.

Parameters:

Name Type Description Default
octal_val

int or str The number as octal (e.g. 755 or "0755")

required

Returns:

Name Type Description
decimal_val

int The decimal integer value corresponding to the given octal value which can be used in VolumeMount's default_mode field

Source code in oper8/x/utils/common.py
336
337
338
339
340
341
342
343
344
345
346
347
348
349
def mount_mode(octal_val):
    """This helper gets the decimal version of an octal representation of file
    permissions used for a volume mount.

    Args:
        octal_val:  int or str
            The number as octal (e.g. 755 or "0755")

    Returns:
        decimal_val:  int
            The decimal integer value corresponding to the given octal value
            which can be used in VolumeMount's default_mode field
    """
    return int(str(octal_val), 8)

constants

Shared constants across the various oper8.x tools

deps_annotation

This module holds shared functionality for adding dependency annotations to all resources that need them.

A dependency annotation on a Pod encodes a unique hash of the set of data-resources that the Pod depends on. For example, if a Pod mounds a Secret and a ConfigMap, the dependency annotation will hold a unique hash of the data content of these secrets. The role of the dependency annotation is to force a rollover when upstream data-resources change their content so that the content is guaranteed to be picked up by the consuming Pod.

add_deps_annotation(component, session, resource_definition)

Add the dependency hash annotation to any pods found in the given object

Parameters:

Name Type Description Default
component Component

Component The component that this resource belongs to

required
session Session

Session The session for this deploy

required
resource_definition dict

dict The dict representation of the resource to modify

required

Returns:

Name Type Description
resource_definition dict

dict The dict representation of the resource with any modifications applied

Source code in oper8/x/utils/deps_annotation.py
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
@alog.logged_function(log.debug)
def add_deps_annotation(
    component: Component,
    session: Session,
    resource_definition: dict,
) -> dict:
    """Add the dependency hash annotation to any pods found in the given object

    Args:
        component:  Component
            The component that this resource belongs to
        session:  Session
            The session for this deploy
        resource_definition:  dict
            The dict representation of the resource to modify

    Returns:
        resource_definition:  dict
            The dict representation of the resource with any modifications
            applied
    """
    resource_name = "{}/{}".format(
        resource_definition.get("kind"),
        resource_definition.get("metadata", {}).get("name"),
    )

    # Look for any/all pod annotations
    pod = _find_pod(resource_definition)
    if pod is not None:
        log.debug2("Found Pod for [%s]", resource_name)
        log.debug4(pod)

        # Traverse through and look for anything that looks like a secret or
        # configmap reference
        deps_map = _find_pod_data_deps(pod)
        log.debug3("Deps Map: %s", deps_map)
        if deps_map:
            # Go through each dependency and determine if it needs to be fetched
            # of if it's part of the owning component
            deps_list = []
            for dep_kind, dep_names in deps_map.items():
                for dep_name in dep_names:
                    # Look for this object in the objects managed by this
                    # component.
                    #
                    # NOTE: This will only be the components which have been
                    #   declared earlier in the chart or have explicitly been
                    #   marked as upstreams of this object.
                    found_in_component = False
                    for obj in component.managed_objects:
                        log.debug4("Checking %s/%s", obj.kind, obj.name)
                        if obj.kind == dep_kind and obj.name == dep_name:
                            log.debug3(
                                "Found intra-chart dependency of %s: %s",
                                resource_name,
                                obj,
                            )
                            deps_list.append(obj.definition)
                            found_in_component = True
                            break

                    # If not found in the component, add it as a lookup
                    if not found_in_component:
                        log.debug3(
                            "Found extra-chart dependency of %s: %s/%s",
                            resource_name,
                            dep_kind,
                            dep_name,
                        )
                        deps_list.append((dep_kind, dep_name))

            # Add the annotation with the full list
            md = pod.setdefault("metadata", {})
            annos = md.setdefault("annotations", {})
            md["annotations"] = merge_configs(
                annos, get_deps_annotation(session, deps_list, resource_name)
            )

    log.debug4("Updated Definition of [%s]: %s", resource_name, resource_definition)
    return resource_definition
get_deps_annotation(session, dependencies, resource_name='', namespace=_SESSION_NAMESPACE)

Get a dict holding an annotation key/value pair representing the unique content hash of all given dependencies. This can be used to force pods to roll over when a dependency such as a ConfigMap or Secret changes its content. This function supports two ways of fetching dependency content:

  1. Dict representation of the object
  2. Tuple of the scoped (kind, name) for the object

Additionally, this function holds special logic for ConfigMap and Secret dependencies, but can handle arbitrary kinds. For kinds without special logic, the full dict representation is used to compute the hash.

Parameters:

Name Type Description Default
session Session

Session The current session

required
dependencies List[Union[dict, Tuple[str, str]]]

list(dict or str or cdk8s.ApiObject) An ordered list of dependencies to compute the content hash from

required
resource_name str

str A string name for the resource (used for logging)

''
namespace Optional[str]

Optional[str] Namespace where the dependencies live. Defaults to session.namespace

_SESSION_NAMESPACE

Returns:

Name Type Description
deps_annotation dict

dict A dict representation of the key/value pair used to hold the content hash for the given set of dependencies

Source code in oper8/x/utils/deps_annotation.py
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
def get_deps_annotation(
    session: Session,
    dependencies: List[Union[dict, Tuple[str, str]]],
    resource_name: str = "",
    namespace: Optional[str] = _SESSION_NAMESPACE,
) -> dict:
    """Get a dict holding an annotation key/value pair representing the unique
    content hash of all given dependencies. This can be used to force pods to
    roll over when a dependency such as a ConfigMap or Secret changes its
    content. This function supports two ways of fetching dependency content:

    1. Dict representation of the object
    2. Tuple of the scoped (kind, name) for the object

    Additionally, this function holds special logic for ConfigMap and Secret
    dependencies, but can handle arbitrary kinds. For kinds without special
    logic, the full dict representation is used to compute the hash.

    Args:
        session:  Session
            The current session
        dependencies:  list(dict or str or cdk8s.ApiObject)
            An ordered list of dependencies to compute the content hash from
        resource_name:  str
            A string name for the resource (used for logging)
        namespace:  Optional[str]
            Namespace where the dependencies live. Defaults to session.namespace

    Returns:
        deps_annotation:  dict
            A dict representation of the key/value pair used to hold the content
            hash for the given set of dependencies
    """
    content_hash = hashlib.sha1()
    namespace = namespace if namespace != _SESSION_NAMESPACE else session.namespace
    for dep in dependencies:
        # Get the dict representation depending on what type this is
        if isinstance(dep, tuple):
            log.debug3("[%s] Handling tuple dependency: %s", resource_name, dep)
            assert len(dep) == 2, f"Invalid dependency tuple given: {dep}"
            kind, name = dep
            success, dep_dict = session.get_object_current_state(
                name=name,
                kind=kind,
                namespace=namespace,
            )
            assert success, f"Failed to fetch current state of {kind}/{name}"

            # There are several reasons that the upstream dependency would not
            # be found, some legitimate and some not:
            #
            # 1. The dependency is not managed by this operator and this is a
            #   dry run. This can't be solved since we don't have control over
            #   the state of the cluster in dry run.
            #
            # 2. The dependency is part of a cyclic dependency between
            #   Components. While a sign of something bad, this is ultimately
            #   something that needs to be solved by decoupling the Component
            #   dependencies.
            #
            # 3. The upstream is an undeclared chart dependency. This is an
            #   easily fixed bug in the component by adding the necessary
            #   add_dependency() calls.
            #
            # 4. The upstream is part of an undeclared component dependency.
            #   This is an easily fixed bug in the parent Application by adding
            #   the missing add_component_dependency() calls.
            #
            # Since some of these are things that should be quickly fixed, but
            # some are signs of larger systemic problems, we warn and move on.
            # For (1), these external dependencies should be present in the
            # cluster. For the rest, once the deploy completes for the coupled
            # components, the resources will show up and the next reconcile will
            # cause the hash to change to what it should be.
            if dep_dict is None:
                log.warning(
                    "Working around missing external data dependency for [%s]: %s/%s",
                    resource_name,
                    kind,
                    name,
                )
                continue
        else:
            log.debug3(
                "[%s] Handling dict dependency: %s",
                resource_name,
                dep.get("metadata", {}).get("name"),
            )
            assert isinstance(dep, dict), f"Unknown dependency type: {type(dep)}"
            dep_dict = dep

        # The hash should be unique to the name and kind
        kind = dep_dict.get("kind", "")
        name = dep_dict.get("metadata", {}).get("name", "")
        content_hash.update(kind.encode("utf-8"))
        content_hash.update(name.encode("utf-8"))

        # Compute the data hash based on any kind-specific logic
        if kind in ["Secret", "ConfigMap"]:
            log.debug2("Getting data hash for dep of kind %s", kind)
            data_dict = dep_dict.get("data", {})
        else:
            log.debug2("Getting full hash for dep of kind %s", kind)
            data_dict = dep_dict
        log.debug4("Data Dict: %s", data_dict)

        # Add to the overall hash
        content_hash.update(json.dumps(data_dict, sort_keys=True).encode("utf-8"))

    # Return the annotation dict
    final_hash = content_hash.hexdigest()
    log.debug2("[%s] Final Hash: %s", resource_name, final_hash)
    return {DEPS_ANNOTATION: final_hash}

tls

Shared utilities for managing TLS keys and certs

generate_ca_cert(key, encode=True)

Generate a Certificate Authority certificate based on a private key

Parameters:

Name Type Description Default
key

RSAPrivateKey The private key that will pair with this CA cert

required
encode

bool Base64 encode the output pem bytes

True

Returns:

Name Type Description
ca

str The PEM encoded string for this CA cert

Source code in oper8/x/utils/tls.py
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
def generate_ca_cert(key, encode=True):
    """Generate a Certificate Authority certificate based on a private key

    Args:
        key:  RSAPrivateKey
            The private key that will pair with this CA cert
        encode:  bool
            Base64 encode the output pem bytes

    Returns:
        ca:  str
            The PEM encoded string for this CA cert
    """

    # Create self-signed CA
    # The specifics of the extensions that are required for the CA were gleaned
    # from the etcd operator example found here:
    # https://github.com/openshift/etcd-ha-operator/blob/master/roles/tls_certs/templates/ca_crt_conf.j2
    log.debug("Creating CA")
    subject = get_subject()
    ca = (
        x509.CertificateBuilder()
        .subject_name(subject)
        .issuer_name(subject)
        .public_key(key.public_key())
        .serial_number(x509.random_serial_number())
        .not_valid_before(datetime.datetime.utcnow())
        .not_valid_after(
            # Our certificate will be valid for 10000 days
            datetime.datetime.utcnow()
            + datetime.timedelta(days=10000)
        )
        .add_extension(
            # X509v3 Basic Constraints: critical
            #     CA:TRUE
            x509.BasicConstraints(ca=True, path_length=None),
            critical=True,
        )
        .add_extension(
            # X509v3 Key Usage: critical
            #     Digital Signature, Key Encipherment, Certificate Sign
            x509.KeyUsage(
                digital_signature=True,
                content_commitment=False,
                key_encipherment=True,
                data_encipherment=False,
                key_agreement=False,
                key_cert_sign=True,
                crl_sign=False,
                encipher_only=False,
                decipher_only=False,
            ),
            critical=True,
        )
        .sign(key, hashes.SHA256(), default_backend())
    )

    cert_pem = ca.public_bytes(Encoding.PEM)
    return (base64.b64encode(cert_pem) if encode else cert_pem).decode("utf-8")
generate_derived_key_cert_pair(ca_key, san_list, encode=True, key_cert_sign=False)

Generate a certificate for use in encrypting TLS traffic, derived from a common key

Parameters:

Name Type Description Default
key

RSAPrivateKey The private key that will pair with this CA cert

required
san_list

list(str) List of strings to use for the Subject Alternate Name

required
encode

bool Whether or not to base64 encode the output pem strings

True
key_cert_sign

bool Whether or not to set the key_cert_sign usage bit in the generated certificate. This may be needed when the derived key/cert will be used as an intermediate CA or expected to act as a self-signed CA. Reference: https://ldapwiki.com/wiki/KeyUsage

False

Returns:

Name Type Description
key_pem

str The pem-encoded key (base64 encoded if encode set)

crt_pem

str The pem-encoded cert (base64 encoded if encode set)

Source code in oper8/x/utils/tls.py
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
def generate_derived_key_cert_pair(ca_key, san_list, encode=True, key_cert_sign=False):
    """Generate a certificate for use in encrypting TLS traffic, derived from
    a common key

    Args:
        key:  RSAPrivateKey
            The private key that will pair with this CA cert
        san_list:  list(str)
            List of strings to use for the Subject Alternate Name
        encode:  bool
            Whether or not to base64 encode the output pem strings
        key_cert_sign:  bool
            Whether or not to set the key_cert_sign usage bit in the generated certificate.
            This may be needed when the derived key/cert will be used as an intermediate CA
            or expected to act as a self-signed CA.
            Reference: https://ldapwiki.com/wiki/KeyUsage

    Returns:
        key_pem:  str
            The pem-encoded key (base64 encoded if encode set)
        crt_pem:  str
            The pem-encoded cert (base64 encoded if encode set)
    """

    # Create a new private key for the server
    key, key_pem = generate_key(encode=encode)

    # Create the server certificate as if using a CSR. The final key will be
    # signed by the CA private key, but will have the public key from the
    # server's key.
    #
    # NOTE: It is not legal to use an identical Common Name for both the CA and
    #   the derived certificate. With openssl 1.1.1k, this results in an invalid
    #   certificate that fails with "self signed certificate."
    #   CITE: https://stackoverflow.com/a/19738223
    cert = (
        x509.CertificateBuilder()
        .subject_name(get_subject(f"{DEFAULT_COMMON_NAME}.server"))
        .issuer_name(get_subject())
        .public_key(key.public_key())
        .serial_number(x509.random_serial_number())
        .not_valid_before(datetime.datetime.utcnow())
        .not_valid_after(
            # Our certificate will be valid for 10000 days
            datetime.datetime.utcnow()
            + datetime.timedelta(days=10000)
        )
        .add_extension(
            x509.SubjectAlternativeName([x509.DNSName(san) for san in san_list]),
            critical=False,
        )
        .add_extension(
            # X509v3 Key Usage: critical
            #     Digital Signature, Key Encipherment
            x509.KeyUsage(
                digital_signature=True,
                content_commitment=False,
                key_encipherment=True,
                data_encipherment=False,
                key_agreement=False,
                key_cert_sign=key_cert_sign,
                crl_sign=False,
                encipher_only=False,
                decipher_only=False,
            ),
            critical=True,
        )
        .add_extension(
            # X509v3 Extended Key Usage:
            #     TLS Web Client Authentication, TLS Web Server Authentication
            x509.ExtendedKeyUsage(
                [ExtendedKeyUsageOID.CLIENT_AUTH, ExtendedKeyUsageOID.SERVER_AUTH]
            ),
            critical=False,
        )
        .sign(ca_key, hashes.SHA256(), default_backend())
    )

    crt_pem = cert.public_bytes(Encoding.PEM)
    return (key_pem, (base64.b64encode(crt_pem) if encode else crt_pem).decode("utf-8"))
generate_key(encode=True)

Generate a new RSA key for use when generating TLS components

Parameters:

Name Type Description Default
encode

bool Base64 encode the output pem bytes

True

Returns:

Name Type Description
key

RSAPrivateKey The key object that can be used to sign certificates

key_pem

str The PEM encoded string for the key

Source code in oper8/x/utils/tls.py
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
def generate_key(encode=True):
    """Generate a new RSA key for use when generating TLS components

    Args:
        encode:  bool
            Base64 encode the output pem bytes

    Returns:
        key:  RSAPrivateKey
            The key object that can be used to sign certificates
        key_pem:  str
            The PEM encoded string for the key
    """
    key = rsa.generate_private_key(
        public_exponent=65537, key_size=2048, backend=default_backend()
    )
    key_pem = key.private_bytes(
        Encoding.PEM,
        PrivateFormat.PKCS8,
        encryption_algorithm=serialization.NoEncryption(),
    )
    return (key, (base64.b64encode(key_pem) if encode else key_pem).decode("utf-8"))
get_subject(common_name=DEFAULT_COMMON_NAME)

Get the subject object used when creating self-signed certificates. This will be consistent across all components, but will be tailored to the domain of the cluster.

Parameters:

Name Type Description Default
common_name str

str The Common Name to use for this subject

DEFAULT_COMMON_NAME

Returns:

Name Type Description
subject Name

x509.Name The full subect object to use when constructing certificates

Source code in oper8/x/utils/tls.py
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
def get_subject(common_name: str = DEFAULT_COMMON_NAME) -> x509.Name:
    """Get the subject object used when creating self-signed certificates. This
    will be consistent across all components, but will be tailored to the domain
    of the cluster.

    Args:
        common_name:  str
            The Common Name to use for this subject

    Returns:
        subject:  x509.Name
            The full subect object to use when constructing certificates
    """
    return x509.Name(
        [
            x509.NameAttribute(NameOID.COMMON_NAME, common_name),
        ]
    )
parse_private_key_pem(key_pem)

Parse the content of a pem-encoded private key file into an RSAPrivateKey

Parameters:

Name Type Description Default
key_pem

str The pem-encoded key (not base64 encoded)

required

Returns:

Name Type Description
key

RSAPrivateKey The parsed key object which can be used for signing certs

Source code in oper8/x/utils/tls.py
212
213
214
215
216
217
218
219
220
221
222
223
def parse_private_key_pem(key_pem):
    """Parse the content of a pem-encoded private key file into an RSAPrivateKey

    Args:
        key_pem:  str
            The pem-encoded key (not base64 encoded)

    Returns:
        key:  RSAPrivateKey
            The parsed key object which can be used for signing certs
    """
    return serialization.load_pem_private_key(key_pem.encode("utf-8"), None)
parse_public_key_pem_from_cert(cert_pem)

Extract the pem-encoded public key from a pem-encoded

Parameters:

Name Type Description Default
cert_pem

str The pem-encoded certificate (not base64 encoded)

required

Returns:

Name Type Description
key

RSAPrivateKey The parsed key object which can be used for signing certs

Source code in oper8/x/utils/tls.py
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
def parse_public_key_pem_from_cert(cert_pem):
    """Extract the pem-encoded public key from a pem-encoded

    Args:
        cert_pem:  str
            The pem-encoded certificate (not base64 encoded)

    Returns:
        key:  RSAPrivateKey
            The parsed key object which can be used for signing certs
    """
    return (
        x509.load_pem_x509_certificate(cert_pem.encode("utf-8"))
        .public_key()
        .public_bytes(
            serialization.Encoding.PEM, serialization.PublicFormat.SubjectPublicKeyInfo
        )
        .decode("utf-8")
    )

tls_context

Common tls_context module setup

factory

This module implements a factory for TlsContext implementations

get_tls_context(session, config_overrides=None)

Get an instance of the configured implementation of the tls context

Parameters:

Name Type Description Default
session Session

Session The current deploy session

required
config_overrides Optional[dict]

Optional[dict] Optional runtime config values. These will overwrite any values pulled from the session.config

None

Returns:

Name Type Description
tls_context ITlsContext

ITlsContext The constructed instance of the context

Source code in oper8/x/utils/tls_context/factory.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
def get_tls_context(
    session: Session,
    config_overrides: Optional[dict] = None,
) -> ITlsContext:
    """Get an instance of the configured implementation of the tls context

    Args:
        session:  Session
            The current deploy session
        config_overrides:  Optional[dict]
            Optional runtime config values. These will overwrite any values
            pulled from the session.config

    Returns:
        tls_context:  ITlsContext
            The constructed instance of the context
    """
    return _TlsContextSingletonFactory.get_tls_context(
        session,
        config_overrides=config_overrides,
    )
register_tls_context_type(context_class)

Register a constructor for a given context implementation type

Parameters:

Name Type Description Default
context_class Type[ITlsContext]

Type[ITlsContext] The ITlsContext child class to register

required
Source code in oper8/x/utils/tls_context/factory.py
45
46
47
48
49
50
51
52
def register_tls_context_type(context_class: Type[ITlsContext]):
    """Register a constructor for a given context implementation type

    Args:
        context_class:  Type[ITlsContext]
            The ITlsContext child class to register
    """
    _TlsContextSingletonFactory.register(context_class)
interface

This module defines the interface needed to provide TLS key/cert pairs to a given microservice and fetch a client-side certificate for making calls to a microservice that serves a key/cert pair derived from this context.

ITlsContext

Bases: ABC

This interface encapsulates the management of TLS for a running instance of the operand. It encapsulates the following functions:

  • Manage a CA key/cert pair for signing derived microservice certificates
  • Create derived key/cert pairs for individual microservices
Source code in oper8/x/utils/tls_context/interface.py
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
class ITlsContext(abc.ABC):
    """This interface encapsulates the management of TLS for a running instance
    of the operand. It encapsulates the following functions:

    * Manage a CA key/cert pair for signing derived microservice certificates
    * Create derived key/cert pairs for individual microservices
    """

    # The string constant that will be used by derived classes to define the
    # type label string
    _TYPE_LABEL_ATTRIBUTE = "TYPE_LABEL"

    ## Construction ############################################################

    def __init__(self, session: Session, config: aconfig.Config):
        """Construct with the current session so that member functions do not
        take it as an argument

        Args:
            session:  Session
                The current deploy session
            config:  aconfig.Config
                The config for this instance
        """
        self._session = session
        self._config = config

    @property
    def session(self) -> Session:
        return self._session

    @property
    def config(self) -> aconfig.Config:
        return self._config

    ## Interface ###############################################################

    def request_server_key_cert_pair(  # noqa: B027
        self,
        server_component: Component,
        san_hostnames_list: List[str],
        san_ip_list: List[str] = None,
        key_name: str = None,
        intermediate_ca: bool = False,
    ) -> None:
        """Request creation of the PEM encoded value of the key/cert pair for a
        given server. This function has to be called from before render_chart is
        called. I.e., parse_config / Component constructor phase.
        Implementations of this function will generate the pair (in background)
        if it has not been already requested.

        Args:
            server_component:  Component
                The Component that manages the server. This can be used to add
                a new Component if needed that will manage the resource for the
                derived content and configure dependencies.
            san_hostnames_list:  List[str]
                The list of Subject Alternate Names (hostnames only)
            san_ip_list:  List[str]
                The list of Subject Alternate Names (ip addresses only, IPv4,
                IPv6)
            key_name:  str
                In case component requires multiple certificates. The key_name
                is used to distinguishes between component cert requests.
            intermediate_ca:  bool
                Whether or not to configure the certificate for use as an
                intermediate CA. This implies setting the key_cert_sign usage
                bit in the generated cert.
                Reference: https://ldapwiki.com/wiki/KeyUsage
        """

    @abc.abstractmethod
    def get_server_key_cert_pair(
        self,
        server_component: Component,
        key_name: str = None,
        encode: bool = True,
        existing_key_pem: str = None,
        existing_cert_pem: str = None,
    ) -> Tuple[str, str]:
        """Get the PEM encoded value of the key/cert pair for a given server.
        You have to forst request_server_key_cert_pair in render_config phase,
         and later in render_chart retrieve generated cert.

        Args:
            server_component:  Component
                The Component that manages the server. This can be used to add
                a new Component if needed that will manage the resource for the
                derived content and configure dependencies.
            key_name:  str
                In case component requires multiple certificates. The key_name
                is used to distinguies between component cert requests.
            encode:  bool
                Whether or not to base64 encode the output pem strings
            existing_key_pem: str
                Optionaly, you may provide the (decoded) value of PK/CERK pair.
                TLS context is free to check the Cert/PK and return this pair or
                generate new one.
            existing_cert_pem: str,
                Optionaly, you may provide the (decoded) value of PK/CERK pair.
                TLS context is free to check the Cert/PK and return this pair or
                generate new one.
        Returns:
            key_pem:  Optional[str]
                This is the pem-encoded key content (base64
                encoded if encode is set)
            cert_pem:  Optional[str]
                This is the pem-encoded cert content (base64
                encoded if encode is set)
        """

    @abc.abstractmethod
    def get_client_cert(
        self,
        client_component: Component,
        encode: bool = True,
    ) -> str:
        """Get a cert which can be used by a client to connect to a server which
        is serving using a key/cert pair signed by the shared CA.

        Args:
            client_component:  Component
                The Component that manages the client. This can be used to add
                a new Component if needed that will manage the resource for the
                derived content and configure dependencies.
            encode:  bool
                Whether or not to base64 encode the output pem strings

        Returns:
            crt_pem:  Optional[str]
                The pem-encoded cert (base64 encoded if encode set).
        """
__init__(session, config)

Construct with the current session so that member functions do not take it as an argument

Parameters:

Name Type Description Default
session Session

Session The current deploy session

required
config Config

aconfig.Config The config for this instance

required
Source code in oper8/x/utils/tls_context/interface.py
32
33
34
35
36
37
38
39
40
41
42
43
def __init__(self, session: Session, config: aconfig.Config):
    """Construct with the current session so that member functions do not
    take it as an argument

    Args:
        session:  Session
            The current deploy session
        config:  aconfig.Config
            The config for this instance
    """
    self._session = session
    self._config = config
get_client_cert(client_component, encode=True) abstractmethod

Get a cert which can be used by a client to connect to a server which is serving using a key/cert pair signed by the shared CA.

Parameters:

Name Type Description Default
client_component Component

Component The Component that manages the client. This can be used to add a new Component if needed that will manage the resource for the derived content and configure dependencies.

required
encode bool

bool Whether or not to base64 encode the output pem strings

True

Returns:

Name Type Description
crt_pem str

Optional[str] The pem-encoded cert (base64 encoded if encode set).

Source code in oper8/x/utils/tls_context/interface.py
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
@abc.abstractmethod
def get_client_cert(
    self,
    client_component: Component,
    encode: bool = True,
) -> str:
    """Get a cert which can be used by a client to connect to a server which
    is serving using a key/cert pair signed by the shared CA.

    Args:
        client_component:  Component
            The Component that manages the client. This can be used to add
            a new Component if needed that will manage the resource for the
            derived content and configure dependencies.
        encode:  bool
            Whether or not to base64 encode the output pem strings

    Returns:
        crt_pem:  Optional[str]
            The pem-encoded cert (base64 encoded if encode set).
    """
get_server_key_cert_pair(server_component, key_name=None, encode=True, existing_key_pem=None, existing_cert_pem=None) abstractmethod

Get the PEM encoded value of the key/cert pair for a given server. You have to forst request_server_key_cert_pair in render_config phase, and later in render_chart retrieve generated cert.

Parameters:

Name Type Description Default
server_component Component

Component The Component that manages the server. This can be used to add a new Component if needed that will manage the resource for the derived content and configure dependencies.

required
key_name str

str In case component requires multiple certificates. The key_name is used to distinguies between component cert requests.

None
encode bool

bool Whether or not to base64 encode the output pem strings

True
existing_key_pem str

str Optionaly, you may provide the (decoded) value of PK/CERK pair. TLS context is free to check the Cert/PK and return this pair or generate new one.

None
existing_cert_pem str

str, Optionaly, you may provide the (decoded) value of PK/CERK pair. TLS context is free to check the Cert/PK and return this pair or generate new one.

None

Returns: key_pem: Optional[str] This is the pem-encoded key content (base64 encoded if encode is set) cert_pem: Optional[str] This is the pem-encoded cert content (base64 encoded if encode is set)

Source code in oper8/x/utils/tls_context/interface.py
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
@abc.abstractmethod
def get_server_key_cert_pair(
    self,
    server_component: Component,
    key_name: str = None,
    encode: bool = True,
    existing_key_pem: str = None,
    existing_cert_pem: str = None,
) -> Tuple[str, str]:
    """Get the PEM encoded value of the key/cert pair for a given server.
    You have to forst request_server_key_cert_pair in render_config phase,
     and later in render_chart retrieve generated cert.

    Args:
        server_component:  Component
            The Component that manages the server. This can be used to add
            a new Component if needed that will manage the resource for the
            derived content and configure dependencies.
        key_name:  str
            In case component requires multiple certificates. The key_name
            is used to distinguies between component cert requests.
        encode:  bool
            Whether or not to base64 encode the output pem strings
        existing_key_pem: str
            Optionaly, you may provide the (decoded) value of PK/CERK pair.
            TLS context is free to check the Cert/PK and return this pair or
            generate new one.
        existing_cert_pem: str,
            Optionaly, you may provide the (decoded) value of PK/CERK pair.
            TLS context is free to check the Cert/PK and return this pair or
            generate new one.
    Returns:
        key_pem:  Optional[str]
            This is the pem-encoded key content (base64
            encoded if encode is set)
        cert_pem:  Optional[str]
            This is the pem-encoded cert content (base64
            encoded if encode is set)
    """
request_server_key_cert_pair(server_component, san_hostnames_list, san_ip_list=None, key_name=None, intermediate_ca=False)

Request creation of the PEM encoded value of the key/cert pair for a given server. This function has to be called from before render_chart is called. I.e., parse_config / Component constructor phase. Implementations of this function will generate the pair (in background) if it has not been already requested.

Parameters:

Name Type Description Default
server_component Component

Component The Component that manages the server. This can be used to add a new Component if needed that will manage the resource for the derived content and configure dependencies.

required
san_hostnames_list List[str]

List[str] The list of Subject Alternate Names (hostnames only)

required
san_ip_list List[str]

List[str] The list of Subject Alternate Names (ip addresses only, IPv4, IPv6)

None
key_name str

str In case component requires multiple certificates. The key_name is used to distinguishes between component cert requests.

None
intermediate_ca bool

bool Whether or not to configure the certificate for use as an intermediate CA. This implies setting the key_cert_sign usage bit in the generated cert. Reference: https://ldapwiki.com/wiki/KeyUsage

False
Source code in oper8/x/utils/tls_context/interface.py
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
def request_server_key_cert_pair(  # noqa: B027
    self,
    server_component: Component,
    san_hostnames_list: List[str],
    san_ip_list: List[str] = None,
    key_name: str = None,
    intermediate_ca: bool = False,
) -> None:
    """Request creation of the PEM encoded value of the key/cert pair for a
    given server. This function has to be called from before render_chart is
    called. I.e., parse_config / Component constructor phase.
    Implementations of this function will generate the pair (in background)
    if it has not been already requested.

    Args:
        server_component:  Component
            The Component that manages the server. This can be used to add
            a new Component if needed that will manage the resource for the
            derived content and configure dependencies.
        san_hostnames_list:  List[str]
            The list of Subject Alternate Names (hostnames only)
        san_ip_list:  List[str]
            The list of Subject Alternate Names (ip addresses only, IPv4,
            IPv6)
        key_name:  str
            In case component requires multiple certificates. The key_name
            is used to distinguishes between component cert requests.
        intermediate_ca:  bool
            Whether or not to configure the certificate for use as an
            intermediate CA. This implies setting the key_cert_sign usage
            bit in the generated cert.
            Reference: https://ldapwiki.com/wiki/KeyUsage
    """
internal

This implementation of the ITlsContext uses internal code to manage the tls context

InternalCaComponent

Bases: Oper8xComponent

This Component will manage a single secret containing a CA key/cert pair

Source code in oper8/x/utils/tls_context/internal.py
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
class InternalCaComponent(Oper8xComponent):
    """This Component will manage a single secret containing a CA key/cert pair"""

    CA_SECRET_NAME = "infra-tls-ca"
    CA_KEY_FILENAME = "key.ca.pem"
    CA_CRT_FILENAME = "crt.ca.pem"

    name = "internal-tls"

    ## Component Interface #####################################################

    def __init__(
        self,
        session: Session,
        *args,
        labels: Optional[dict] = None,
        **kwargs,
    ):
        """Construct the parent Component and set up internal data holders"""
        super().__init__(*args, session=session, **kwargs)
        self._ca_key_pem = None
        self._ca_crt_pem = None

        # Pull labels from config or use defaults
        self._labels = labels

    def build_chart(self, *args, **kwargs):
        """Implement delayed chart construction in build_chart"""

        # Make sure the data values are populated
        self._initialize_data()

        # Get the labels to use for the secret
        secret_cluster_name = self._get_secret_name()
        labels = self._labels
        if labels is None:
            labels = common.get_labels(
                cluster_name=secret_cluster_name,
                session=self.session,
                component_name=self.CA_SECRET_NAME,
            )
        log.debug("Creating internal CA secret: %s", secret_cluster_name)
        self.add_resource(
            name=self.CA_SECRET_NAME,
            obj=dict(
                kind="Secret",
                apiVersion="v1",
                metadata=common.metadata_defaults(
                    session=self.session,
                    cluster_name=secret_cluster_name,
                    labels=labels,
                ),
                data={
                    self.CA_KEY_FILENAME: common.b64_secret(self._ca_key_pem),
                    self.CA_CRT_FILENAME: common.b64_secret(self._ca_crt_pem),
                },
            ),
        )

    ## Public Utilities ########################################################

    def get_ca_key_cert(self) -> Tuple[str, str]:
        """Get the pem-encoded CA key cert pair

        Returns:
            ca_key_pem:  str
                The pem-encoded (not base64 encoded) secret key
            ca_crt_pem:  str
                The pem-encoded (not base64 encoded) secret cert
        """
        self._initialize_data()
        return self._ca_key_pem, self._ca_crt_pem

    ## Implementation Details ##################################################

    def _get_secret_name(self) -> str:
        """Get the CA secret name with any scoping applied"""
        return self.get_cluster_name(self.CA_SECRET_NAME)

    def _initialize_data(self):
        """Initialize the data if needed"""

        # If this is the first time, actually do the init
        if None in [self._ca_crt_pem, self._ca_key_pem]:
            secret_cluster_name = self._get_secret_name()
            log.debug2("Cluster TLS Secret Name: %s", secret_cluster_name)
            success, content = self.session.get_object_current_state(
                kind="Secret",
                name=secret_cluster_name,
            )
            assert_cluster(
                success, f"Failed to check cluster for [{secret_cluster_name}]"
            )
            if content is not None:
                # Extract the pem strings
                key_pem = content.get("data", {}).get(self.CA_KEY_FILENAME)
                crt_pem = content.get("data", {}).get(self.CA_CRT_FILENAME)
                if None in [key_pem, crt_pem]:
                    log.warning(
                        "Found CA secret [%s] but content is invalid!",
                        secret_cluster_name,
                    )
                    self._generate()
                else:
                    log.debug("Found valid CA secret content.")
                    self._ca_key_pem = common.b64_secret_decode(key_pem)
                    self._ca_crt_pem = common.b64_secret_decode(crt_pem)
            else:
                log.debug2("No existing CA secret found. Generating.")
                self._generate()

    def _generate(self):
        """Generate a new CA"""
        key, self._ca_key_pem = tls.generate_key(encode=False)
        self._ca_crt_pem = tls.generate_ca_cert(key, encode=False)
__init__(session, *args, labels=None, **kwargs)

Construct the parent Component and set up internal data holders

Source code in oper8/x/utils/tls_context/internal.py
210
211
212
213
214
215
216
217
218
219
220
221
222
223
def __init__(
    self,
    session: Session,
    *args,
    labels: Optional[dict] = None,
    **kwargs,
):
    """Construct the parent Component and set up internal data holders"""
    super().__init__(*args, session=session, **kwargs)
    self._ca_key_pem = None
    self._ca_crt_pem = None

    # Pull labels from config or use defaults
    self._labels = labels
build_chart(*args, **kwargs)

Implement delayed chart construction in build_chart

Source code in oper8/x/utils/tls_context/internal.py
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
def build_chart(self, *args, **kwargs):
    """Implement delayed chart construction in build_chart"""

    # Make sure the data values are populated
    self._initialize_data()

    # Get the labels to use for the secret
    secret_cluster_name = self._get_secret_name()
    labels = self._labels
    if labels is None:
        labels = common.get_labels(
            cluster_name=secret_cluster_name,
            session=self.session,
            component_name=self.CA_SECRET_NAME,
        )
    log.debug("Creating internal CA secret: %s", secret_cluster_name)
    self.add_resource(
        name=self.CA_SECRET_NAME,
        obj=dict(
            kind="Secret",
            apiVersion="v1",
            metadata=common.metadata_defaults(
                session=self.session,
                cluster_name=secret_cluster_name,
                labels=labels,
            ),
            data={
                self.CA_KEY_FILENAME: common.b64_secret(self._ca_key_pem),
                self.CA_CRT_FILENAME: common.b64_secret(self._ca_crt_pem),
            },
        ),
    )
get_ca_key_cert()

Get the pem-encoded CA key cert pair

Returns:

Name Type Description
ca_key_pem str

str The pem-encoded (not base64 encoded) secret key

ca_crt_pem str

str The pem-encoded (not base64 encoded) secret cert

Source code in oper8/x/utils/tls_context/internal.py
260
261
262
263
264
265
266
267
268
269
270
def get_ca_key_cert(self) -> Tuple[str, str]:
    """Get the pem-encoded CA key cert pair

    Returns:
        ca_key_pem:  str
            The pem-encoded (not base64 encoded) secret key
        ca_crt_pem:  str
            The pem-encoded (not base64 encoded) secret cert
    """
    self._initialize_data()
    return self._ca_key_pem, self._ca_crt_pem
InternalTlsContext

Bases: ITlsContext

Source code in oper8/x/utils/tls_context/internal.py
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
class InternalTlsContext(ITlsContext):
    __doc__ = __doc__

    TYPE_LABEL = "internal"

    def __init__(self, session: Session, *args, **kwargs):
        """At construct time, this instance will add a Component to the session
        which will manage the CA secret
        """
        super().__init__(session, *args, **kwargs)

        # Add the CA Component if it is not already present in the session.
        # There is a pretty nasty condition here when running in standalone mode
        # where the factory may attempt to recreate the singleton instance after
        # a subsystem has overwritten it, so we need to check and see if there
        # is a matching component in the session already
        pre_existing_component = [
            comp
            for comp in session.get_components()
            if comp.name == InternalCaComponent.name
        ]
        if pre_existing_component:
            self._component = pre_existing_component[0]
        else:
            self._component = InternalCaComponent(
                session=session, labels=self.config.labels
            )

        # Keep track of pairs for each server so that they are only generated
        # once
        self._server_pairs = {}

    ## Interface ###############################################################
    def request_server_key_cert_pair(
        self,
        server_component: Component,
        san_hostnames_list: List[str],
        san_ip_list: List[str] = None,
        key_name: str = None,
        intermediate_ca: bool = False,
    ) -> None:
        """Request creation of the PEM encoded value of the key/cert pair for a
        given server. This function has to be called from before render_chart is
        called. I.e., parse_config / Component constructor phase.
        Implementations of this function will generate the pair (in background)
        if it has not been already requested.

        Args:
            server_component:  Component
                The Component that manages the server. This can be used to add
                a new Component if needed that will manage the resource for the
                derived content and configure dependencies.
            san_list:  List[str]
                The list of Subject Alternate Names
            key_name:  str
                In case component requires multiple certificates. The key_name
                is used to distinguish between component cert requests.
            intermediate_ca:  bool
                Whether or not to configure the certificate for use as an
                intermediate CA. This implies setting the key_cert_sign usage
                bit in the generated cert.
                Reference: https://ldapwiki.com/wiki/KeyUsage
        """
        cache_key = server_component.name + (
            "-" + key_name if key_name is not None else ""
        )

        if cache_key in self._server_pairs:
            log.warning(
                "Certificate server key/cert pair for %s has been already "
                "requested. Ignoring this request.",
                cache_key,
            )
            return

        log.debug("Generating server key/cert pair for %s", cache_key)

        # Mark the server component as dependent on the internal component. This
        # is not strictly necessary since values are consumed by value, but it
        # makes sequential sense.
        self.session.add_component_dependency(server_component, self._component)

        # Get the CA's private key
        ca_key = tls.parse_private_key_pem(self._component.get_ca_key_cert()[0])

        # Generate the derived pair
        san_list = (san_hostnames_list or []) + (san_ip_list or [])
        self._server_pairs[cache_key] = tls.generate_derived_key_cert_pair(
            ca_key=ca_key,
            san_list=san_list,
            encode=False,
            key_cert_sign=intermediate_ca,
        )

    def get_server_key_cert_pair(
        self,
        server_component: Component,
        key_name: str = None,
        encode: bool = True,
        existing_key_pem: str = None,
        existing_cert_pem: str = None,
    ) -> Tuple[str, str]:
        """This function derives a server key/cert pair from the CA key/cert
        managed by the internal component.

        Args:
            server_component:  Component
                The Component that manages the server. This can be used to add
                a new Component if needed that will manage the resource for the
                derived content and configure dependencies.
            key_name:  str
                In case component requires multiple certificates. The key_name
                is used to distinguish between component cert requests.
            encode:  bool
                Whether or not to base64 encode the output pem strings
            existing_key_pem: str
                If both existing key/cert are specified, then they are returned
                immediately without any checks
            existing_cert_pem: str
                If both existing key/cert are specified, then they are returned
                immediately without any checks
         Returns:
            key_pem:  str
                This is the pem-encoded key content (base64
                encoded if encode is set)
            cert_pem:  str
                This is the pem-encoded cert content (base64
                encoded if encode is set)
        """
        log.debug2("Getting server key/cert pair for %s", server_component)
        cache_key = server_component.name + (
            "-" + key_name if key_name is not None else ""
        )

        assert (
            cache_key in self._server_pairs
        ), f"Trying to obtain certificate {key_name} which was not previouly requested"
        if existing_key_pem is not None and existing_cert_pem is not None:
            key_pem = existing_key_pem
            cert_pem = existing_cert_pem
        else:
            # Return the stored pair for this server
            (key_pem, cert_pem) = self._server_pairs[cache_key]
        if encode:
            return (common.b64_secret(key_pem), common.b64_secret(cert_pem))
        return (key_pem, cert_pem)

    def get_client_cert(
        self,
        client_component: Component,
        encode: bool = True,
    ) -> str:
        """Get the CA's public cert

        Args:
            client_component:  Component
                The Component that manages the client. This implementation does
                not need the component.
            encode:  bool
                Whether or not to base64 encode the output pem strings

        Returns:
            crt_pem:  Optional[str]
               The pem-encoded cert (base64 encoded if encode set)
        """
        log.debug2("Getting client cert for %s", client_component)
        _, ca_crt = self._component.get_ca_key_cert()
        if encode:
            return common.b64_secret(ca_crt)
        return ca_crt
__init__(session, *args, **kwargs)

At construct time, this instance will add a Component to the session which will manage the CA secret

Source code in oper8/x/utils/tls_context/internal.py
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
def __init__(self, session: Session, *args, **kwargs):
    """At construct time, this instance will add a Component to the session
    which will manage the CA secret
    """
    super().__init__(session, *args, **kwargs)

    # Add the CA Component if it is not already present in the session.
    # There is a pretty nasty condition here when running in standalone mode
    # where the factory may attempt to recreate the singleton instance after
    # a subsystem has overwritten it, so we need to check and see if there
    # is a matching component in the session already
    pre_existing_component = [
        comp
        for comp in session.get_components()
        if comp.name == InternalCaComponent.name
    ]
    if pre_existing_component:
        self._component = pre_existing_component[0]
    else:
        self._component = InternalCaComponent(
            session=session, labels=self.config.labels
        )

    # Keep track of pairs for each server so that they are only generated
    # once
    self._server_pairs = {}
get_client_cert(client_component, encode=True)

Get the CA's public cert

Parameters:

Name Type Description Default
client_component Component

Component The Component that manages the client. This implementation does not need the component.

required
encode bool

bool Whether or not to base64 encode the output pem strings

True

Returns:

Name Type Description
crt_pem str

Optional[str] The pem-encoded cert (base64 encoded if encode set)

Source code in oper8/x/utils/tls_context/internal.py
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
def get_client_cert(
    self,
    client_component: Component,
    encode: bool = True,
) -> str:
    """Get the CA's public cert

    Args:
        client_component:  Component
            The Component that manages the client. This implementation does
            not need the component.
        encode:  bool
            Whether or not to base64 encode the output pem strings

    Returns:
        crt_pem:  Optional[str]
           The pem-encoded cert (base64 encoded if encode set)
    """
    log.debug2("Getting client cert for %s", client_component)
    _, ca_crt = self._component.get_ca_key_cert()
    if encode:
        return common.b64_secret(ca_crt)
    return ca_crt
get_server_key_cert_pair(server_component, key_name=None, encode=True, existing_key_pem=None, existing_cert_pem=None)

This function derives a server key/cert pair from the CA key/cert managed by the internal component.

Parameters:

Name Type Description Default
server_component Component

Component The Component that manages the server. This can be used to add a new Component if needed that will manage the resource for the derived content and configure dependencies.

required
key_name str

str In case component requires multiple certificates. The key_name is used to distinguish between component cert requests.

None
encode bool

bool Whether or not to base64 encode the output pem strings

True
existing_key_pem str

str If both existing key/cert are specified, then they are returned immediately without any checks

None
existing_cert_pem str

str If both existing key/cert are specified, then they are returned immediately without any checks

None

Returns: key_pem: str This is the pem-encoded key content (base64 encoded if encode is set) cert_pem: str This is the pem-encoded cert content (base64 encoded if encode is set)

Source code in oper8/x/utils/tls_context/internal.py
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
def get_server_key_cert_pair(
    self,
    server_component: Component,
    key_name: str = None,
    encode: bool = True,
    existing_key_pem: str = None,
    existing_cert_pem: str = None,
) -> Tuple[str, str]:
    """This function derives a server key/cert pair from the CA key/cert
    managed by the internal component.

    Args:
        server_component:  Component
            The Component that manages the server. This can be used to add
            a new Component if needed that will manage the resource for the
            derived content and configure dependencies.
        key_name:  str
            In case component requires multiple certificates. The key_name
            is used to distinguish between component cert requests.
        encode:  bool
            Whether or not to base64 encode the output pem strings
        existing_key_pem: str
            If both existing key/cert are specified, then they are returned
            immediately without any checks
        existing_cert_pem: str
            If both existing key/cert are specified, then they are returned
            immediately without any checks
     Returns:
        key_pem:  str
            This is the pem-encoded key content (base64
            encoded if encode is set)
        cert_pem:  str
            This is the pem-encoded cert content (base64
            encoded if encode is set)
    """
    log.debug2("Getting server key/cert pair for %s", server_component)
    cache_key = server_component.name + (
        "-" + key_name if key_name is not None else ""
    )

    assert (
        cache_key in self._server_pairs
    ), f"Trying to obtain certificate {key_name} which was not previouly requested"
    if existing_key_pem is not None and existing_cert_pem is not None:
        key_pem = existing_key_pem
        cert_pem = existing_cert_pem
    else:
        # Return the stored pair for this server
        (key_pem, cert_pem) = self._server_pairs[cache_key]
    if encode:
        return (common.b64_secret(key_pem), common.b64_secret(cert_pem))
    return (key_pem, cert_pem)
request_server_key_cert_pair(server_component, san_hostnames_list, san_ip_list=None, key_name=None, intermediate_ca=False)

Request creation of the PEM encoded value of the key/cert pair for a given server. This function has to be called from before render_chart is called. I.e., parse_config / Component constructor phase. Implementations of this function will generate the pair (in background) if it has not been already requested.

Parameters:

Name Type Description Default
server_component Component

Component The Component that manages the server. This can be used to add a new Component if needed that will manage the resource for the derived content and configure dependencies.

required
san_list

List[str] The list of Subject Alternate Names

required
key_name str

str In case component requires multiple certificates. The key_name is used to distinguish between component cert requests.

None
intermediate_ca bool

bool Whether or not to configure the certificate for use as an intermediate CA. This implies setting the key_cert_sign usage bit in the generated cert. Reference: https://ldapwiki.com/wiki/KeyUsage

False
Source code in oper8/x/utils/tls_context/internal.py
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
def request_server_key_cert_pair(
    self,
    server_component: Component,
    san_hostnames_list: List[str],
    san_ip_list: List[str] = None,
    key_name: str = None,
    intermediate_ca: bool = False,
) -> None:
    """Request creation of the PEM encoded value of the key/cert pair for a
    given server. This function has to be called from before render_chart is
    called. I.e., parse_config / Component constructor phase.
    Implementations of this function will generate the pair (in background)
    if it has not been already requested.

    Args:
        server_component:  Component
            The Component that manages the server. This can be used to add
            a new Component if needed that will manage the resource for the
            derived content and configure dependencies.
        san_list:  List[str]
            The list of Subject Alternate Names
        key_name:  str
            In case component requires multiple certificates. The key_name
            is used to distinguish between component cert requests.
        intermediate_ca:  bool
            Whether or not to configure the certificate for use as an
            intermediate CA. This implies setting the key_cert_sign usage
            bit in the generated cert.
            Reference: https://ldapwiki.com/wiki/KeyUsage
    """
    cache_key = server_component.name + (
        "-" + key_name if key_name is not None else ""
    )

    if cache_key in self._server_pairs:
        log.warning(
            "Certificate server key/cert pair for %s has been already "
            "requested. Ignoring this request.",
            cache_key,
        )
        return

    log.debug("Generating server key/cert pair for %s", cache_key)

    # Mark the server component as dependent on the internal component. This
    # is not strictly necessary since values are consumed by value, but it
    # makes sequential sense.
    self.session.add_component_dependency(server_component, self._component)

    # Get the CA's private key
    ca_key = tls.parse_private_key_pem(self._component.get_ca_key_cert()[0])

    # Generate the derived pair
    san_list = (san_hostnames_list or []) + (san_ip_list or [])
    self._server_pairs[cache_key] = tls.generate_derived_key_cert_pair(
        ca_key=ca_key,
        san_list=san_list,
        encode=False,
        key_cert_sign=intermediate_ca,
    )
public

This file holds functions that should be used outside of this module by components, subsystems, and applications that need access to the TLS context functionality.

get_client_cert(session, *args, **kwargs)

Get the CA's public cert

Parameters:

Name Type Description Default
session Session

Session The current deploy session

required
Passthrough Args

See ITlsContext.get_client_cert

Returns:

Name Type Description
crt_pem str

Optional[str] The pem-encoded cert (base64 encoded if encode set),

Source code in oper8/x/utils/tls_context/public.py
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
def get_client_cert(
    session: Session,
    *args,
    **kwargs,
) -> str:
    """Get the CA's public cert

    Args:
        session:  Session
            The current deploy session

    Passthrough Args:
        See ITlsContext.get_client_cert

    Returns:
        crt_pem:  Optional[str]
                           The pem-encoded cert (base64 encoded if encode set),
    """
    return get_tls_context(session).get_client_cert(*args, **kwargs)
get_server_key_cert_pair(session, *args, **kwargs)

Get the previously requested PEM encoded value of the key/cert pair for a given server. Implementations will retrieveh the pair if it does not exist and will fetch its content if it does. If the content is not available, the assertion is triggered.

Parameters:

Name Type Description Default
session Session

Session The current deploy session

required
Passthrough Args

See ITlsContext.get_server_key_cert_pair

Returns:

Name Type Description
key_pem str

str This is the pem-encoded key content (base64 encoded if encode is set)

cert_pem str

str This is the pem-encoded cert content (base64 encoded if encode is set)

Source code in oper8/x/utils/tls_context/public.py
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
def get_server_key_cert_pair(
    session: Session,
    *args,
    **kwargs,
) -> Tuple[str, str]:
    """Get the previously requested PEM encoded value of the key/cert pair for a
    given server. Implementations will retrieveh the pair if it does not exist
    and will fetch its content if it does. If the content is not available, the
    assertion is triggered.

    Args:
        session:  Session
            The current deploy session

    Passthrough Args:
        See ITlsContext.get_server_key_cert_pair

    Returns:
        key_pem:  str
            This is the pem-encoded key content (base64 encoded if
            encode is set)
        cert_pem:  str
            This is the pem-encoded cert content (base64 encoded
            if encode is set)
    """
    return get_tls_context(session).get_server_key_cert_pair(*args, **kwargs)
request_server_key_cert_pair(session, *args, **kwargs)

Request creation of the PEM encoded value of the key/cert pair for a given server. This function has to be called from before render_chart is called. I.e., parse_config / Component constructor phase. Implementations of this function will generate the pair (in background) if it has not been already requested.

Parameters:

Name Type Description Default
session Session

Session The current deploy session

required
Passthrough Args

See ITlsContext.request_server_key_cert_pair

Source code in oper8/x/utils/tls_context/public.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
def request_server_key_cert_pair(
    session: Session,
    *args,
    **kwargs,
) -> None:
    """Request creation of the PEM encoded value of the key/cert pair for a
    given server. This function has to be called from before render_chart is
    called. I.e., parse_config / Component constructor phase. Implementations of
    this function will generate the pair (in background) if it has not been
    already requested.

    Args:
        session:  Session
            The current deploy session

    Passthrough Args:
        See ITlsContext.request_server_key_cert_pair
    """
    return get_tls_context(session).request_server_key_cert_pair(*args, **kwargs)