From 43373b5003607bf56402a6f34cd7990374d7fa69 Mon Sep 17 00:00:00 2001 From: Alexandra Bara Date: Thu, 9 Apr 2026 13:52:46 -0500 Subject: [PATCH 1/4] RegexSearchPlugin --- nodescraper/base/regexanalyzer.py | 32 ++++- nodescraper/cli/dynamicparserbuilder.py | 14 +- nodescraper/cli/helper.py | 3 +- nodescraper/plugins/regex_search/__init__.py | 28 ++++ .../plugins/regex_search/analyzer_args.py | 50 +++++++ .../regex_search/regex_search_analyzer.py | 106 +++++++++++++++ .../plugins/regex_search/regex_search_data.py | 107 +++++++++++++++ .../regex_search/regex_search_plugin.py | 47 +++++++ .../regex_search_multi_logs/app_error.log | 3 + .../regex_search_multi_logs/no_errors.log | 2 + .../regex_search_multi_logs/storage_warn.log | 3 + .../fixtures/regex_search_sample.log | 4 + test/unit/framework/test_cli.py | 19 +++ .../unit/plugin/test_regex_search_analyzer.py | 128 ++++++++++++++++++ 14 files changed, 536 insertions(+), 10 deletions(-) create mode 100644 nodescraper/plugins/regex_search/__init__.py create mode 100644 nodescraper/plugins/regex_search/analyzer_args.py create mode 100644 nodescraper/plugins/regex_search/regex_search_analyzer.py create mode 100644 nodescraper/plugins/regex_search/regex_search_data.py create mode 100644 nodescraper/plugins/regex_search/regex_search_plugin.py create mode 100644 test/functional/fixtures/regex_search_multi_logs/app_error.log create mode 100644 test/functional/fixtures/regex_search_multi_logs/no_errors.log create mode 100644 test/functional/fixtures/regex_search_multi_logs/storage_warn.log create mode 100644 test/functional/fixtures/regex_search_sample.log create mode 100644 test/unit/plugin/test_regex_search_analyzer.py diff --git a/nodescraper/base/regexanalyzer.py b/nodescraper/base/regexanalyzer.py index 4103c99d..a53267fa 100644 --- a/nodescraper/base/regexanalyzer.py +++ b/nodescraper/base/regexanalyzer.py @@ -35,6 +35,24 @@ from nodescraper.models.event import Event +def _coerce_event_priority_from_dict(value: Union[str, int, EventPriority]) -> EventPriority: + """Turn a string name, integer level, or already-coerced value into the canonical priority member. + + Args: + value: Member name (case-insensitive), numeric level, or same-type value passthrough. + + Returns: + Matching priority member for the configured level. + """ + if isinstance(value, EventPriority): + return value + if isinstance(value, int): + return EventPriority(value) + if isinstance(value, str): + return EventPriority[value.upper()] + raise TypeError(f"Invalid event_priority: {value!r}") + + class ErrorRegex(BaseModel): regex: re.Pattern message: str @@ -135,13 +153,13 @@ def _convert_and_extend_error_regex( if isinstance(item, ErrorRegex): converted_regex.append(item) elif isinstance(item, dict): - # Convert dict to ErrorRegex - item["regex"] = re.compile(item["regex"]) - if "event_category" in item: - item["event_category"] = EventCategory(item["event_category"]) - if "event_priority" in item: - item["event_priority"] = EventPriority(item["event_priority"]) - converted_regex.append(ErrorRegex(**item)) + d = dict(item) + d["regex"] = re.compile(d["regex"]) + if "event_category" in d: + d["event_category"] = EventCategory(d["event_category"]) + if "event_priority" in d: + d["event_priority"] = _coerce_event_priority_from_dict(d["event_priority"]) + converted_regex.append(ErrorRegex(**d)) return converted_regex + list(base_regex) diff --git a/nodescraper/cli/dynamicparserbuilder.py b/nodescraper/cli/dynamicparserbuilder.py index fc6259db..54bd0d2e 100644 --- a/nodescraper/cli/dynamicparserbuilder.py +++ b/nodescraper/cli/dynamicparserbuilder.py @@ -167,12 +167,22 @@ def add_argument( if list in type_class_map: type_class = type_class_map[list] + inner = type_class.inner_type + if inner is dict or get_origin(inner) is dict: + elt_type = dict_arg + metavar = META_VAR_MAP[dict] + elif inner is not None: + elt_type = inner + metavar = META_VAR_MAP.get(inner, "STRING") + else: + elt_type = str + metavar = "STRING" self.parser.add_argument( f"--{arg_name}", nargs="*", - type=type_class.inner_type if type_class.inner_type else str, + type=elt_type, required=required, - metavar=META_VAR_MAP.get(type_class.inner_type, "STRING"), + metavar=metavar, **add_kw, ) elif bool in type_class_map: diff --git a/nodescraper/cli/helper.py b/nodescraper/cli/helper.py index 41e30ede..8d0def9e 100644 --- a/nodescraper/cli/helper.py +++ b/nodescraper/cli/helper.py @@ -398,7 +398,8 @@ def process_args( else: cur_plugin = None for arg in plugin_args: - if not arg.startswith("-") and "," in arg: + # Only split on commas before a plugin context is set (e.g. "P1,P2"). + if not arg.startswith("-") and "," in arg and cur_plugin is None: for potential_plugin in arg.split(","): potential_plugin = potential_plugin.strip() if potential_plugin in plugin_names: diff --git a/nodescraper/plugins/regex_search/__init__.py b/nodescraper/plugins/regex_search/__init__.py new file mode 100644 index 00000000..708b6b04 --- /dev/null +++ b/nodescraper/plugins/regex_search/__init__.py @@ -0,0 +1,28 @@ +############################################################################### +# +# MIT License +# +# Copyright (c) 2026 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +############################################################################### +from .regex_search_plugin import RegexSearchPlugin + +__all__ = ["RegexSearchPlugin"] diff --git a/nodescraper/plugins/regex_search/analyzer_args.py b/nodescraper/plugins/regex_search/analyzer_args.py new file mode 100644 index 00000000..b30acb7e --- /dev/null +++ b/nodescraper/plugins/regex_search/analyzer_args.py @@ -0,0 +1,50 @@ +############################################################################### +# +# MIT License +# +# Copyright (c) 2026 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +############################################################################### +from typing import Any, Optional + +from pydantic import Field + +from nodescraper.models import AnalyzerArgs + + +class RegexSearchAnalyzerArgs(AnalyzerArgs): + """Arguments for RegexSearchAnalyzer (dict items match Dmesg-style error_regex).""" + + error_regex: Optional[list[dict[str, Any]]] = Field( + default=None, + description=( + "Regex patterns to search for; each dict may include regex (str), message, " + "event_category, event_priority (same as Dmesg analyzer error_regex). " + ), + ) + interval_to_collapse_event: int = Field( + default=60, + description="Seconds within which repeated events are collapsed into one.", + ) + num_timestamps: int = Field( + default=3, + description="Number of timestamps to include per event in output.", + ) diff --git a/nodescraper/plugins/regex_search/regex_search_analyzer.py b/nodescraper/plugins/regex_search/regex_search_analyzer.py new file mode 100644 index 00000000..6bd79aff --- /dev/null +++ b/nodescraper/plugins/regex_search/regex_search_analyzer.py @@ -0,0 +1,106 @@ +############################################################################### +# +# MIT License +# +# Copyright (c) 2026 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +############################################################################### +import os +from typing import Optional, Union + +from nodescraper.base.regexanalyzer import ErrorRegex, RegexAnalyzer, RegexEvent +from nodescraper.enums import ExecutionStatus +from nodescraper.models import TaskResult + +from .analyzer_args import RegexSearchAnalyzerArgs +from .regex_search_data import RegexSearchData + + +class RegexSearchAnalyzer(RegexAnalyzer[RegexSearchData, RegexSearchAnalyzerArgs]): + """Run user-provided regexes against text loaded from --data (file or directory).""" + + DATA_MODEL = RegexSearchData + + ERROR_REGEX: list[ErrorRegex] = [] + + def _build_regex_event( + self, regex_obj: ErrorRegex, match: Union[str, list[str]], source: str + ) -> RegexEvent: + """Augment the default event text with a file path when the origin is a concrete path. + + Args: + regex_obj: Metadata for the rule that produced the match. + match: Substring or grouped capture text from the pattern. + source: Origin label, or an absolute path when matching per file. + + Returns: + Match record with an extended description when a path-like source is present. + """ + event = super()._build_regex_event(regex_obj, match, source) + if source and source != "regex_search": + event.description = f"{regex_obj.message} [file: {source}]" + return event + + def analyze_data( + self, + data: RegexSearchData, + args: Optional[RegexSearchAnalyzerArgs] = None, + ) -> TaskResult: + """Scan loaded inputs with the given patterns, or mark the task not run if inputs are incomplete. + + Args: + data: Aggregated and per-file text loaded from the user data path. + args: Optional pattern list and timing knobs; omitted or empty patterns skip work. + + Returns: + Work outcome with match events, or a not-run status when patterns are absent. + """ + if args is None or not args.error_regex: + self.result.status = ExecutionStatus.NOT_RAN + self.result.message = ( + "No error_regex patterns provided; nothing to analyze" + if args is not None + else "No analysis_args provided; nothing to analyze" + ) + return self.result + + final_regex = self._convert_and_extend_error_regex(args.error_regex, []) + + if data.files: + for rel_path in sorted(data.files.keys()): + file_content = data.files[rel_path] + abs_source = os.path.normpath(os.path.join(data.data_root, rel_path)) + self.result.events += self.check_all_regexes( + content=file_content, + source=abs_source, + error_regex=final_regex, + num_timestamps=args.num_timestamps, + interval_to_collapse_event=args.interval_to_collapse_event, + ) + else: + self.result.events += self.check_all_regexes( + content=data.content, + source=data.data_root or "regex_search", + error_regex=final_regex, + num_timestamps=args.num_timestamps, + interval_to_collapse_event=args.interval_to_collapse_event, + ) + return self.result diff --git a/nodescraper/plugins/regex_search/regex_search_data.py b/nodescraper/plugins/regex_search/regex_search_data.py new file mode 100644 index 00000000..a12b2841 --- /dev/null +++ b/nodescraper/plugins/regex_search/regex_search_data.py @@ -0,0 +1,107 @@ +############################################################################### +# +# MIT License +# +# Copyright (c) 2026 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +############################################################################### +import os +from pathlib import Path +from typing import Union + +from pydantic import Field + +from nodescraper.models import DataModel +from nodescraper.utils import get_unique_filename + + +class RegexSearchData(DataModel): + """Loaded file or directory contents passed to the analyzer (via --data).""" + + content: str + data_root: str = "" + files: dict[str, str] = Field(default_factory=dict) + + def log_model(self, log_path: str) -> None: + """Persist the aggregated text payload as one log file under the given base path. + + Args: + log_path: Directory where the log file should be written. + + Returns: + None. + """ + log_name = os.path.join(log_path, get_unique_filename(log_path, "regex_search_source.log")) + with open(log_name, "w", encoding="utf-8") as log_file: + log_file.write(self.content) + + @classmethod + def import_model(cls, model_input: Union[dict, str]) -> "RegexSearchData": + """Import datamodel. + + Args: + model_input: Keyed fields for direct validation, or a path string to load from disk. + + Returns: + Instance with content, root path, and per-file bodies filled in. + """ + if isinstance(model_input, dict): + return cls.model_validate(model_input) + if isinstance(model_input, str): + return cls._from_filesystem_path(model_input) + raise ValueError("Invalid input for regex search data") + + @classmethod + def _from_filesystem_path(cls, path: str) -> "RegexSearchData": + """Read one file or every file under a directory into a merged view plus a path-to-text map. + + Args: + path: Absolute or resolvable path to a file or directory. + + Returns: + Instance built from the read text and discovered relative paths. + + """ + path = os.path.abspath(path) + if not os.path.exists(path): + raise FileNotFoundError(f"Path not found: {path}") + if os.path.isfile(path): + text = Path(path).read_text(encoding="utf-8", errors="replace") + rel = os.path.basename(path) + data_root = os.path.dirname(path) or os.path.abspath(os.path.curdir) + return cls(content=text, data_root=data_root, files={rel: text}) + if os.path.isdir(path): + files: dict[str, str] = {} + parts: list[str] = [] + for root, _dirs, filenames in os.walk(path): + for name in sorted(filenames): + fp = os.path.join(root, name) + if not os.path.isfile(fp): + continue + rel = os.path.relpath(fp, path) + try: + text = Path(fp).read_text(encoding="utf-8", errors="replace") + except OSError: + continue + files[rel] = text + parts.append(f"===== {rel} =====\n{text}") + return cls(content="\n".join(parts), data_root=path, files=files) + raise ValueError(f"Unsupported path type: {path}") diff --git a/nodescraper/plugins/regex_search/regex_search_plugin.py b/nodescraper/plugins/regex_search/regex_search_plugin.py new file mode 100644 index 00000000..808e2a3e --- /dev/null +++ b/nodescraper/plugins/regex_search/regex_search_plugin.py @@ -0,0 +1,47 @@ +############################################################################### +# +# MIT License +# +# Copyright (c) 2026 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +############################################################################### +from nodescraper.connection.inband import InBandConnectionManager, SSHConnectionParams +from nodescraper.interfaces import DataPlugin +from nodescraper.models import CollectorArgs + +from .analyzer_args import RegexSearchAnalyzerArgs +from .regex_search_analyzer import RegexSearchAnalyzer +from .regex_search_data import RegexSearchData + + +class RegexSearchPlugin( + DataPlugin[ + InBandConnectionManager, + SSHConnectionParams, + RegexSearchData, + CollectorArgs, + RegexSearchAnalyzerArgs, + ] +): + """Analyzer-only plugin: search user regexes against a file or directory (--data).""" + + DATA_MODEL = RegexSearchData + ANALYZER = RegexSearchAnalyzer diff --git a/test/functional/fixtures/regex_search_multi_logs/app_error.log b/test/functional/fixtures/regex_search_multi_logs/app_error.log new file mode 100644 index 00000000..391fb32c --- /dev/null +++ b/test/functional/fixtures/regex_search_multi_logs/app_error.log @@ -0,0 +1,3 @@ +startup complete +ERROR: dependency timeout connecting to backend +shutdown clean diff --git a/test/functional/fixtures/regex_search_multi_logs/no_errors.log b/test/functional/fixtures/regex_search_multi_logs/no_errors.log new file mode 100644 index 00000000..854d9322 --- /dev/null +++ b/test/functional/fixtures/regex_search_multi_logs/no_errors.log @@ -0,0 +1,2 @@ +2026-04-09 service healthy +all checks passed diff --git a/test/functional/fixtures/regex_search_multi_logs/storage_warn.log b/test/functional/fixtures/regex_search_multi_logs/storage_warn.log new file mode 100644 index 00000000..bc11a75a --- /dev/null +++ b/test/functional/fixtures/regex_search_multi_logs/storage_warn.log @@ -0,0 +1,3 @@ +mount ok +WARNING: disk full on /data within 5% +idle diff --git a/test/functional/fixtures/regex_search_sample.log b/test/functional/fixtures/regex_search_sample.log new file mode 100644 index 00000000..7564228b --- /dev/null +++ b/test/functional/fixtures/regex_search_sample.log @@ -0,0 +1,4 @@ +2026-04-09T10:00:00Z service started OK +2026-04-09T10:01:00Z ERROR: connection reset by peer +2026-04-09T10:02:00Z routine check passed +2026-04-09T10:03:00Z WARNING: disk full on /var diff --git a/test/unit/framework/test_cli.py b/test/unit/framework/test_cli.py index 014befa8..8df56e95 100644 --- a/test/unit/framework/test_cli.py +++ b/test/unit/framework/test_cli.py @@ -148,6 +148,25 @@ def test_system_info_builder(): [], ), ), + ( + [ + "run-plugins", + "RegexSearchPlugin", + "--error-regex", + '{"regex":"a","message":"b","event_category":"UNKNOWN"}', + ], + ["RegexSearchPlugin"], + ( + ["run-plugins"], + { + "RegexSearchPlugin": [ + "--error-regex", + '{"regex":"a","message":"b","event_category":"UNKNOWN"}', + ], + }, + [], + ), + ), ], ) def test_process_args(raw_arg_input, plugin_names, exp_output): diff --git a/test/unit/plugin/test_regex_search_analyzer.py b/test/unit/plugin/test_regex_search_analyzer.py new file mode 100644 index 00000000..3b2b52e4 --- /dev/null +++ b/test/unit/plugin/test_regex_search_analyzer.py @@ -0,0 +1,128 @@ +############################################################################### +# +# MIT License +# +# Copyright (c) 2026 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +############################################################################### +import os +import tempfile + +from nodescraper.enums.executionstatus import ExecutionStatus +from nodescraper.plugins.regex_search.analyzer_args import RegexSearchAnalyzerArgs +from nodescraper.plugins.regex_search.regex_search_analyzer import RegexSearchAnalyzer +from nodescraper.plugins.regex_search.regex_search_data import RegexSearchData +from nodescraper.plugins.regex_search.regex_search_plugin import RegexSearchPlugin + + +def test_regex_search_data_from_file(): + with tempfile.NamedTemporaryFile(mode="w", suffix=".log", delete=False, encoding="utf-8") as f: + f.write("alpha\nbeta ERROR gamma\n") + path = f.name + try: + data = RegexSearchData.import_model(path) + assert "ERROR" in data.content + assert os.path.basename(path) in data.files + assert data.data_root == os.path.dirname(path) + finally: + os.unlink(path) + + +def test_regex_search_data_from_directory(): + with tempfile.TemporaryDirectory() as tmp: + with open(f"{tmp}/a.txt", "w", encoding="utf-8") as f: + f.write("one") + with open(f"{tmp}/b.txt", "w", encoding="utf-8") as f: + f.write("two") + data = RegexSearchData.import_model(tmp) + assert data.data_root == os.path.abspath(tmp) + assert set(data.files.keys()) == {"a.txt", "b.txt"} + assert data.files["a.txt"] == "one" + assert data.files["b.txt"] == "two" + assert "===== a.txt =====" in data.content + assert "===== b.txt =====" in data.content + + +def test_regex_search_analyzer_match(system_info): + data = RegexSearchData(content="line1\nFATAL: boom\nline3") + analyzer = RegexSearchAnalyzer(system_info=system_info) + args = RegexSearchAnalyzerArgs( + error_regex=[{"regex": r"FATAL:.*", "message": "fatal seen"}], + ) + result = analyzer.analyze_data(data, args) + assert result.status == ExecutionStatus.OK + assert len(result.events) == 1 + assert result.events[0].description == "fatal seen" + + +def test_regex_search_analyzer_missing_args(system_info): + data = RegexSearchData(content="x") + analyzer = RegexSearchAnalyzer(system_info=system_info) + result = analyzer.analyze_data(data, None) + assert result.status == ExecutionStatus.NOT_RAN + + result = analyzer.analyze_data(data, RegexSearchAnalyzerArgs(error_regex=None)) + assert result.status == ExecutionStatus.NOT_RAN + + result = analyzer.analyze_data(data, RegexSearchAnalyzerArgs(error_regex=[])) + assert result.status == ExecutionStatus.NOT_RAN + + +def test_regex_search_plugin_analyzer_only(system_info, logger): + with tempfile.NamedTemporaryFile(mode="w", suffix=".log", delete=False, encoding="utf-8") as f: + f.write("match_me_here\n") + path = f.name + try: + plugin = RegexSearchPlugin(system_info=system_info, logger=logger) + out = plugin.run( + collection=False, + analysis=True, + data=path, + analysis_args={ + "error_regex": [{"regex": r"match_me_here", "message": "found"}], + }, + ) + assert out.status == ExecutionStatus.OK + assert out.result_data.analysis_result.status == ExecutionStatus.OK + assert len(out.result_data.analysis_result.events) == 1 + desc = out.result_data.analysis_result.events[0].description + assert "found" in desc + assert "[file:" in desc + assert path.replace("\\", "/") in desc.replace("\\", "/") + finally: + os.unlink(path) + + +def test_regex_search_multi_file_event_paths(system_info): + with tempfile.TemporaryDirectory() as tmp: + open(os.path.join(tmp, "clean.log"), "w", encoding="utf-8").write("ok\n") + open(os.path.join(tmp, "bad.log"), "w", encoding="utf-8").write("ERROR: boom\n") + data = RegexSearchData.import_model(tmp) + analyzer = RegexSearchAnalyzer(system_info=system_info) + args = RegexSearchAnalyzerArgs( + error_regex=[{"regex": r"ERROR[: ].*", "message": "err line"}], + ) + result = analyzer.analyze_data(data, args) + assert result.status == ExecutionStatus.ERROR + assert len(result.events) == 1 + assert "err line" in result.events[0].description + assert "[file:" in result.events[0].description + assert "bad.log" in result.events[0].description From 12a396ede8819b342f471a6f7471b4ca3796223c Mon Sep 17 00:00:00 2001 From: Alexandra Bara Date: Thu, 9 Apr 2026 14:12:58 -0500 Subject: [PATCH 2/4] improved wording --- .../regex_search/regex_search_analyzer.py | 6 +--- .../regex_search/regex_search_plugin.py | 31 ++++++++++++++++++- .../unit/plugin/test_regex_search_analyzer.py | 4 +++ 3 files changed, 35 insertions(+), 6 deletions(-) diff --git a/nodescraper/plugins/regex_search/regex_search_analyzer.py b/nodescraper/plugins/regex_search/regex_search_analyzer.py index 6bd79aff..0b4384f4 100644 --- a/nodescraper/plugins/regex_search/regex_search_analyzer.py +++ b/nodescraper/plugins/regex_search/regex_search_analyzer.py @@ -75,11 +75,7 @@ def analyze_data( """ if args is None or not args.error_regex: self.result.status = ExecutionStatus.NOT_RAN - self.result.message = ( - "No error_regex patterns provided; nothing to analyze" - if args is not None - else "No analysis_args provided; nothing to analyze" - ) + self.result.message = "Analysis args need to be provided for the analyzer to run" return self.result final_regex = self._convert_and_extend_error_regex(args.error_regex, []) diff --git a/nodescraper/plugins/regex_search/regex_search_plugin.py b/nodescraper/plugins/regex_search/regex_search_plugin.py index 808e2a3e..36d650c6 100644 --- a/nodescraper/plugins/regex_search/regex_search_plugin.py +++ b/nodescraper/plugins/regex_search/regex_search_plugin.py @@ -23,9 +23,12 @@ # SOFTWARE. # ############################################################################### +from typing import Optional, Union + from nodescraper.connection.inband import InBandConnectionManager, SSHConnectionParams +from nodescraper.enums import EventPriority from nodescraper.interfaces import DataPlugin -from nodescraper.models import CollectorArgs +from nodescraper.models import CollectorArgs, TaskResult from .analyzer_args import RegexSearchAnalyzerArgs from .regex_search_analyzer import RegexSearchAnalyzer @@ -45,3 +48,29 @@ class RegexSearchPlugin( DATA_MODEL = RegexSearchData ANALYZER = RegexSearchAnalyzer + + def analyze( + self, + max_event_priority_level: Optional[Union[EventPriority, str]] = EventPriority.CRITICAL, + analysis_args: Optional[Union[RegexSearchAnalyzerArgs, dict]] = None, + data: Optional[Union[str, dict, RegexSearchData]] = None, + ) -> TaskResult: + if analysis_args is None: + missing_error_regex = True + elif isinstance(analysis_args, RegexSearchAnalyzerArgs): + missing_error_regex = not bool(analysis_args.error_regex) + elif isinstance(analysis_args, dict): + er = analysis_args.get("error_regex") + missing_error_regex = er is None or er == [] + else: + missing_error_regex = True + if missing_error_regex: + self.logger.warning( + "RegexSearchPlugin: analysis args need to be provided for the analyzer to run " + "(e.g. --error-regex for each pattern)." + ) + return super().analyze( + max_event_priority_level=max_event_priority_level, + analysis_args=analysis_args, + data=data, + ) diff --git a/test/unit/plugin/test_regex_search_analyzer.py b/test/unit/plugin/test_regex_search_analyzer.py index 3b2b52e4..e2c86225 100644 --- a/test/unit/plugin/test_regex_search_analyzer.py +++ b/test/unit/plugin/test_regex_search_analyzer.py @@ -76,14 +76,18 @@ def test_regex_search_analyzer_match(system_info): def test_regex_search_analyzer_missing_args(system_info): data = RegexSearchData(content="x") analyzer = RegexSearchAnalyzer(system_info=system_info) + expected = "Analysis args need to be provided for the analyzer to run" result = analyzer.analyze_data(data, None) assert result.status == ExecutionStatus.NOT_RAN + assert result.message == expected result = analyzer.analyze_data(data, RegexSearchAnalyzerArgs(error_regex=None)) assert result.status == ExecutionStatus.NOT_RAN + assert result.message == expected result = analyzer.analyze_data(data, RegexSearchAnalyzerArgs(error_regex=[])) assert result.status == ExecutionStatus.NOT_RAN + assert result.message == expected def test_regex_search_plugin_analyzer_only(system_info, logger): From cc5422ac5407c75d7147426fdf69578b176d7835 Mon Sep 17 00:00:00 2001 From: Alexandra Bara Date: Thu, 9 Apr 2026 15:18:46 -0500 Subject: [PATCH 3/4] utest upadte --- .../unit/plugin/test_regex_search_analyzer.py | 72 +++++++++++++++++-- 1 file changed, 65 insertions(+), 7 deletions(-) diff --git a/test/unit/plugin/test_regex_search_analyzer.py b/test/unit/plugin/test_regex_search_analyzer.py index e2c86225..ac018ee1 100644 --- a/test/unit/plugin/test_regex_search_analyzer.py +++ b/test/unit/plugin/test_regex_search_analyzer.py @@ -23,6 +23,7 @@ # SOFTWARE. # ############################################################################### +import logging import os import tempfile @@ -32,6 +33,8 @@ from nodescraper.plugins.regex_search.regex_search_data import RegexSearchData from nodescraper.plugins.regex_search.regex_search_plugin import RegexSearchPlugin +EXPECTED_MISSING_ANALYSIS_MSG = "Analysis args need to be provided for the analyzer to run" + def test_regex_search_data_from_file(): with tempfile.NamedTemporaryFile(mode="w", suffix=".log", delete=False, encoding="utf-8") as f: @@ -68,7 +71,9 @@ def test_regex_search_analyzer_match(system_info): error_regex=[{"regex": r"FATAL:.*", "message": "fatal seen"}], ) result = analyzer.analyze_data(data, args) - assert result.status == ExecutionStatus.OK + assert result.status == ExecutionStatus.ERROR + assert "task detected errors" in result.message + assert "fatal seen" in result.message assert len(result.events) == 1 assert result.events[0].description == "fatal seen" @@ -76,18 +81,69 @@ def test_regex_search_analyzer_match(system_info): def test_regex_search_analyzer_missing_args(system_info): data = RegexSearchData(content="x") analyzer = RegexSearchAnalyzer(system_info=system_info) - expected = "Analysis args need to be provided for the analyzer to run" result = analyzer.analyze_data(data, None) assert result.status == ExecutionStatus.NOT_RAN - assert result.message == expected + assert result.message == EXPECTED_MISSING_ANALYSIS_MSG result = analyzer.analyze_data(data, RegexSearchAnalyzerArgs(error_regex=None)) assert result.status == ExecutionStatus.NOT_RAN - assert result.message == expected + assert result.message == EXPECTED_MISSING_ANALYSIS_MSG result = analyzer.analyze_data(data, RegexSearchAnalyzerArgs(error_regex=[])) assert result.status == ExecutionStatus.NOT_RAN - assert result.message == expected + assert result.message == EXPECTED_MISSING_ANALYSIS_MSG + + +def test_regex_search_plugin_missing_error_regex_not_ran_and_warning( + system_info, logger, caplog, tmp_path +): + log_file = tmp_path / "sample.log" + log_file.write_text("line\n", encoding="utf-8") + plugin = RegexSearchPlugin(system_info=system_info, logger=logger) + with caplog.at_level(logging.WARNING, logger=logger.name): + out = plugin.run( + collection=False, + analysis=True, + data=str(log_file), + analysis_args=None, + ) + assert out.result_data.analysis_result.status == ExecutionStatus.NOT_RAN + assert out.result_data.analysis_result.message == EXPECTED_MISSING_ANALYSIS_MSG + assert any( + "analysis args need to be provided" in r.getMessage().lower() for r in caplog.records + ) + + +def test_regex_search_plugin_empty_analysis_args_dict_not_ran(system_info, logger, tmp_path): + log_file = tmp_path / "sample.log" + log_file.write_text("line\n", encoding="utf-8") + plugin = RegexSearchPlugin(system_info=system_info, logger=logger) + out = plugin.run( + collection=False, + analysis=True, + data=str(log_file), + analysis_args={}, + ) + assert out.result_data.analysis_result.status == ExecutionStatus.NOT_RAN + assert out.result_data.analysis_result.message == EXPECTED_MISSING_ANALYSIS_MSG + + +def test_regex_search_plugin_no_data_warns_and_data_message(system_info, logger, caplog): + plugin = RegexSearchPlugin(system_info=system_info, logger=logger) + with caplog.at_level(logging.WARNING, logger=logger.name): + out = plugin.run( + collection=False, + analysis=True, + data=None, + analysis_args=None, + ) + assert out.result_data.analysis_result.status == ExecutionStatus.NOT_RAN + assert "No data available to analyze for RegexSearchPlugin" in ( + out.result_data.analysis_result.message + ) + assert any( + "analysis args need to be provided" in r.getMessage().lower() for r in caplog.records + ) def test_regex_search_plugin_analyzer_only(system_info, logger): @@ -104,8 +160,10 @@ def test_regex_search_plugin_analyzer_only(system_info, logger): "error_regex": [{"regex": r"match_me_here", "message": "found"}], }, ) - assert out.status == ExecutionStatus.OK - assert out.result_data.analysis_result.status == ExecutionStatus.OK + assert out.status == ExecutionStatus.ERROR + assert "Analysis error:" in out.message + assert "found" in out.message + assert out.result_data.analysis_result.status == ExecutionStatus.ERROR assert len(out.result_data.analysis_result.events) == 1 desc = out.result_data.analysis_result.events[0].description assert "found" in desc From 51026bc9ffff2ef97be5b5a1263d16f798e2f282 Mon Sep 17 00:00:00 2001 From: Alexandra Bara Date: Thu, 9 Apr 2026 15:38:18 -0500 Subject: [PATCH 4/4] fix to show description for --data --- nodescraper/cli/dynamicparserbuilder.py | 37 +++++++++++++++++++++---- nodescraper/interfaces/dataplugin.py | 9 +++++- 2 files changed, 39 insertions(+), 7 deletions(-) diff --git a/nodescraper/cli/dynamicparserbuilder.py b/nodescraper/cli/dynamicparserbuilder.py index 54bd0d2e..8c0c9c68 100644 --- a/nodescraper/cli/dynamicparserbuilder.py +++ b/nodescraper/cli/dynamicparserbuilder.py @@ -43,15 +43,40 @@ from nodescraper.typeutils import TypeUtils +def _help_from_annotated(anno: object) -> str: + """Pull CLI help from ``Annotated[T, metadata...]`` (string or ``Field(description=...)``).""" + if anno is None or get_origin(anno) is not Annotated: + return "" + for meta in get_args(anno)[1:]: + if isinstance(meta, str): + return meta + desc = getattr(meta, "description", None) + if isinstance(desc, str) and desc.strip(): + return desc + return "" + + def _get_run_arg_help(plugin_class: Type[PluginInterface], arg: str) -> str: """Get help text for a run() parameter from typing.Annotated metadata on the parameter.""" try: - hints = get_type_hints(plugin_class.run, include_extras=True) - anno = hints.get(arg) - if anno is not None and get_origin(anno) is Annotated: - args = get_args(anno) - if len(args) >= 2 and isinstance(args[1], str): - return args[1] + run_obj = None + for cls in plugin_class.__mro__: + if "run" in cls.__dict__: + run_obj = cls.__dict__["run"] + break + if run_obj is None: + run_obj = plugin_class.run + run_fn = run_obj + if isinstance(run_obj, staticmethod): + run_fn = run_obj.__func__ + elif isinstance(run_obj, classmethod): + run_fn = run_obj.__func__ + raw = getattr(run_fn, "__annotations__", {}).get(arg) + text = _help_from_annotated(raw) + if text: + return text + hints = get_type_hints(run_fn, include_extras=True) + return _help_from_annotated(hints.get(arg)) except Exception: pass return "" diff --git a/nodescraper/interfaces/dataplugin.py b/nodescraper/interfaces/dataplugin.py index ca7e7daf..ed632fb4 100644 --- a/nodescraper/interfaces/dataplugin.py +++ b/nodescraper/interfaces/dataplugin.py @@ -29,6 +29,8 @@ from pathlib import Path from typing import Annotated, Any, Generic, Optional, Type, Union +from pydantic import Field + from nodescraper.enums import EventPriority, ExecutionStatus, SystemInteractionLevel from nodescraper.generictypes import TAnalyzeArg, TCollectArg, TDataModel from nodescraper.interfaces.dataanalyzertask import DataAnalyzer @@ -313,7 +315,12 @@ def run( preserve_connection: bool = False, data: Annotated[ Optional[Union[str, dict, TDataModel]], - "Path to pre-collected data (file or directory). Load this data instead of collecting; use with --collection False to run only the analyzer.", + Field( + description=( + "Path to pre-collected data" + "; use with --collection False to run the analyzer only." + ), + ), ] = None, collection_args: Optional[Union[TCollectArg, dict]] = None, analysis_args: Optional[Union[TAnalyzeArg, dict]] = None,