From 43373b5003607bf56402a6f34cd7990374d7fa69 Mon Sep 17 00:00:00 2001
From: Alexandra Bara <alex.bara@amd.com>
Date: Thu, 9 Apr 2026 13:52:46 -0500
Subject: [PATCH 1/4] RegexSearchPlugin

---
 nodescraper/base/regexanalyzer.py             |  32 ++++-
 nodescraper/cli/dynamicparserbuilder.py       |  14 +-
 nodescraper/cli/helper.py                     |   3 +-
 nodescraper/plugins/regex_search/__init__.py  |  28 ++++
 .../plugins/regex_search/analyzer_args.py     |  50 +++++++
 .../regex_search/regex_search_analyzer.py     | 106 +++++++++++++++
 .../plugins/regex_search/regex_search_data.py | 107 +++++++++++++++
 .../regex_search/regex_search_plugin.py       |  47 +++++++
 .../regex_search_multi_logs/app_error.log     |   3 +
 .../regex_search_multi_logs/no_errors.log     |   2 +
 .../regex_search_multi_logs/storage_warn.log  |   3 +
 .../fixtures/regex_search_sample.log          |   4 +
 test/unit/framework/test_cli.py               |  19 +++
 .../unit/plugin/test_regex_search_analyzer.py | 128 ++++++++++++++++++
 14 files changed, 536 insertions(+), 10 deletions(-)
 create mode 100644 nodescraper/plugins/regex_search/__init__.py
 create mode 100644 nodescraper/plugins/regex_search/analyzer_args.py
 create mode 100644 nodescraper/plugins/regex_search/regex_search_analyzer.py
 create mode 100644 nodescraper/plugins/regex_search/regex_search_data.py
 create mode 100644 nodescraper/plugins/regex_search/regex_search_plugin.py
 create mode 100644 test/functional/fixtures/regex_search_multi_logs/app_error.log
 create mode 100644 test/functional/fixtures/regex_search_multi_logs/no_errors.log
 create mode 100644 test/functional/fixtures/regex_search_multi_logs/storage_warn.log
 create mode 100644 test/functional/fixtures/regex_search_sample.log
 create mode 100644 test/unit/plugin/test_regex_search_analyzer.py

diff --git a/nodescraper/base/regexanalyzer.py b/nodescraper/base/regexanalyzer.py
index 4103c99d..a53267fa 100644
--- a/nodescraper/base/regexanalyzer.py
+++ b/nodescraper/base/regexanalyzer.py
@@ -35,6 +35,24 @@
 from nodescraper.models.event import Event
 
 
+def _coerce_event_priority_from_dict(value: Union[str, int, EventPriority]) -> EventPriority:
+    """Turn a string name, integer level, or already-coerced value into the canonical priority member.
+
+    Args:
+        value: Member name (case-insensitive), numeric level, or same-type value passthrough.
+
+    Returns:
+        Matching priority member for the configured level.
+    """
+    if isinstance(value, EventPriority):
+        return value
+    if isinstance(value, int):
+        return EventPriority(value)
+    if isinstance(value, str):
+        return EventPriority[value.upper()]
+    raise TypeError(f"Invalid event_priority: {value!r}")
+
+
 class ErrorRegex(BaseModel):
     regex: re.Pattern
     message: str
@@ -135,13 +153,13 @@ def _convert_and_extend_error_regex(
             if isinstance(item, ErrorRegex):
                 converted_regex.append(item)
             elif isinstance(item, dict):
-                # Convert dict to ErrorRegex
-                item["regex"] = re.compile(item["regex"])
-                if "event_category" in item:
-                    item["event_category"] = EventCategory(item["event_category"])
-                if "event_priority" in item:
-                    item["event_priority"] = EventPriority(item["event_priority"])
-                converted_regex.append(ErrorRegex(**item))
+                d = dict(item)
+                d["regex"] = re.compile(d["regex"])
+                if "event_category" in d:
+                    d["event_category"] = EventCategory(d["event_category"])
+                if "event_priority" in d:
+                    d["event_priority"] = _coerce_event_priority_from_dict(d["event_priority"])
+                converted_regex.append(ErrorRegex(**d))
 
         return converted_regex + list(base_regex)
 
diff --git a/nodescraper/cli/dynamicparserbuilder.py b/nodescraper/cli/dynamicparserbuilder.py
index fc6259db..54bd0d2e 100644
--- a/nodescraper/cli/dynamicparserbuilder.py
+++ b/nodescraper/cli/dynamicparserbuilder.py
@@ -167,12 +167,22 @@ def add_argument(
 
         if list in type_class_map:
             type_class = type_class_map[list]
+            inner = type_class.inner_type
+            if inner is dict or get_origin(inner) is dict:
+                elt_type = dict_arg
+                metavar = META_VAR_MAP[dict]
+            elif inner is not None:
+                elt_type = inner
+                metavar = META_VAR_MAP.get(inner, "STRING")
+            else:
+                elt_type = str
+                metavar = "STRING"
             self.parser.add_argument(
                 f"--{arg_name}",
                 nargs="*",
-                type=type_class.inner_type if type_class.inner_type else str,
+                type=elt_type,
                 required=required,
-                metavar=META_VAR_MAP.get(type_class.inner_type, "STRING"),
+                metavar=metavar,
                 **add_kw,
             )
         elif bool in type_class_map:
diff --git a/nodescraper/cli/helper.py b/nodescraper/cli/helper.py
index 41e30ede..8d0def9e 100644
--- a/nodescraper/cli/helper.py
+++ b/nodescraper/cli/helper.py
@@ -398,7 +398,8 @@ def process_args(
         else:
             cur_plugin = None
             for arg in plugin_args:
-                if not arg.startswith("-") and "," in arg:
+                # Only split on commas before a plugin context is set (e.g. "P1,P2").
+                if not arg.startswith("-") and "," in arg and cur_plugin is None:
                     for potential_plugin in arg.split(","):
                         potential_plugin = potential_plugin.strip()
                         if potential_plugin in plugin_names:
diff --git a/nodescraper/plugins/regex_search/__init__.py b/nodescraper/plugins/regex_search/__init__.py
new file mode 100644
index 00000000..708b6b04
--- /dev/null
+++ b/nodescraper/plugins/regex_search/__init__.py
@@ -0,0 +1,28 @@
+###############################################################################
+#
+# MIT License
+#
+# Copyright (c) 2026 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+###############################################################################
+from .regex_search_plugin import RegexSearchPlugin
+
+__all__ = ["RegexSearchPlugin"]
diff --git a/nodescraper/plugins/regex_search/analyzer_args.py b/nodescraper/plugins/regex_search/analyzer_args.py
new file mode 100644
index 00000000..b30acb7e
--- /dev/null
+++ b/nodescraper/plugins/regex_search/analyzer_args.py
@@ -0,0 +1,50 @@
+###############################################################################
+#
+# MIT License
+#
+# Copyright (c) 2026 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+###############################################################################
+from typing import Any, Optional
+
+from pydantic import Field
+
+from nodescraper.models import AnalyzerArgs
+
+
+class RegexSearchAnalyzerArgs(AnalyzerArgs):
+    """Arguments for RegexSearchAnalyzer (dict items match Dmesg-style error_regex)."""
+
+    error_regex: Optional[list[dict[str, Any]]] = Field(
+        default=None,
+        description=(
+            "Regex patterns to search for; each dict may include regex (str), message, "
+            "event_category, event_priority (same as Dmesg analyzer error_regex). "
+        ),
+    )
+    interval_to_collapse_event: int = Field(
+        default=60,
+        description="Seconds within which repeated events are collapsed into one.",
+    )
+    num_timestamps: int = Field(
+        default=3,
+        description="Number of timestamps to include per event in output.",
+    )
diff --git a/nodescraper/plugins/regex_search/regex_search_analyzer.py b/nodescraper/plugins/regex_search/regex_search_analyzer.py
new file mode 100644
index 00000000..6bd79aff
--- /dev/null
+++ b/nodescraper/plugins/regex_search/regex_search_analyzer.py
@@ -0,0 +1,106 @@
+###############################################################################
+#
+# MIT License
+#
+# Copyright (c) 2026 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+###############################################################################
+import os
+from typing import Optional, Union
+
+from nodescraper.base.regexanalyzer import ErrorRegex, RegexAnalyzer, RegexEvent
+from nodescraper.enums import ExecutionStatus
+from nodescraper.models import TaskResult
+
+from .analyzer_args import RegexSearchAnalyzerArgs
+from .regex_search_data import RegexSearchData
+
+
+class RegexSearchAnalyzer(RegexAnalyzer[RegexSearchData, RegexSearchAnalyzerArgs]):
+    """Run user-provided regexes against text loaded from --data (file or directory)."""
+
+    DATA_MODEL = RegexSearchData
+
+    ERROR_REGEX: list[ErrorRegex] = []
+
+    def _build_regex_event(
+        self, regex_obj: ErrorRegex, match: Union[str, list[str]], source: str
+    ) -> RegexEvent:
+        """Augment the default event text with a file path when the origin is a concrete path.
+
+        Args:
+            regex_obj: Metadata for the rule that produced the match.
+            match: Substring or grouped capture text from the pattern.
+            source: Origin label, or an absolute path when matching per file.
+
+        Returns:
+            Match record with an extended description when a path-like source is present.
+        """
+        event = super()._build_regex_event(regex_obj, match, source)
+        if source and source != "regex_search":
+            event.description = f"{regex_obj.message} [file: {source}]"
+        return event
+
+    def analyze_data(
+        self,
+        data: RegexSearchData,
+        args: Optional[RegexSearchAnalyzerArgs] = None,
+    ) -> TaskResult:
+        """Scan loaded inputs with the given patterns, or mark the task not run if inputs are incomplete.
+
+        Args:
+            data: Aggregated and per-file text loaded from the user data path.
+            args: Optional pattern list and timing knobs; omitted or empty patterns skip work.
+
+        Returns:
+            Work outcome with match events, or a not-run status when patterns are absent.
+        """
+        if args is None or not args.error_regex:
+            self.result.status = ExecutionStatus.NOT_RAN
+            self.result.message = (
+                "No error_regex patterns provided; nothing to analyze"
+                if args is not None
+                else "No analysis_args provided; nothing to analyze"
+            )
+            return self.result
+
+        final_regex = self._convert_and_extend_error_regex(args.error_regex, [])
+
+        if data.files:
+            for rel_path in sorted(data.files.keys()):
+                file_content = data.files[rel_path]
+                abs_source = os.path.normpath(os.path.join(data.data_root, rel_path))
+                self.result.events += self.check_all_regexes(
+                    content=file_content,
+                    source=abs_source,
+                    error_regex=final_regex,
+                    num_timestamps=args.num_timestamps,
+                    interval_to_collapse_event=args.interval_to_collapse_event,
+                )
+        else:
+            self.result.events += self.check_all_regexes(
+                content=data.content,
+                source=data.data_root or "regex_search",
+                error_regex=final_regex,
+                num_timestamps=args.num_timestamps,
+                interval_to_collapse_event=args.interval_to_collapse_event,
+            )
+        return self.result
diff --git a/nodescraper/plugins/regex_search/regex_search_data.py b/nodescraper/plugins/regex_search/regex_search_data.py
new file mode 100644
index 00000000..a12b2841
--- /dev/null
+++ b/nodescraper/plugins/regex_search/regex_search_data.py
@@ -0,0 +1,107 @@
+###############################################################################
+#
+# MIT License
+#
+# Copyright (c) 2026 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+###############################################################################
+import os
+from pathlib import Path
+from typing import Union
+
+from pydantic import Field
+
+from nodescraper.models import DataModel
+from nodescraper.utils import get_unique_filename
+
+
+class RegexSearchData(DataModel):
+    """Loaded file or directory contents passed to the analyzer (via --data)."""
+
+    content: str
+    data_root: str = ""
+    files: dict[str, str] = Field(default_factory=dict)
+
+    def log_model(self, log_path: str) -> None:
+        """Persist the aggregated text payload as one log file under the given base path.
+
+        Args:
+            log_path: Directory where the log file should be written.
+
+        Returns:
+            None.
+        """
+        log_name = os.path.join(log_path, get_unique_filename(log_path, "regex_search_source.log"))
+        with open(log_name, "w", encoding="utf-8") as log_file:
+            log_file.write(self.content)
+
+    @classmethod
+    def import_model(cls, model_input: Union[dict, str]) -> "RegexSearchData":
+        """Import datamodel.
+
+        Args:
+            model_input: Keyed fields for direct validation, or a path string to load from disk.
+
+        Returns:
+            Instance with content, root path, and per-file bodies filled in.
+        """
+        if isinstance(model_input, dict):
+            return cls.model_validate(model_input)
+        if isinstance(model_input, str):
+            return cls._from_filesystem_path(model_input)
+        raise ValueError("Invalid input for regex search data")
+
+    @classmethod
+    def _from_filesystem_path(cls, path: str) -> "RegexSearchData":
+        """Read one file or every file under a directory into a merged view plus a path-to-text map.
+
+        Args:
+            path: Absolute or resolvable path to a file or directory.
+
+        Returns:
+            Instance built from the read text and discovered relative paths.
+
+        """
+        path = os.path.abspath(path)
+        if not os.path.exists(path):
+            raise FileNotFoundError(f"Path not found: {path}")
+        if os.path.isfile(path):
+            text = Path(path).read_text(encoding="utf-8", errors="replace")
+            rel = os.path.basename(path)
+            data_root = os.path.dirname(path) or os.path.abspath(os.path.curdir)
+            return cls(content=text, data_root=data_root, files={rel: text})
+        if os.path.isdir(path):
+            files: dict[str, str] = {}
+            parts: list[str] = []
+            for root, _dirs, filenames in os.walk(path):
+                for name in sorted(filenames):
+                    fp = os.path.join(root, name)
+                    if not os.path.isfile(fp):
+                        continue
+                    rel = os.path.relpath(fp, path)
+                    try:
+                        text = Path(fp).read_text(encoding="utf-8", errors="replace")
+                    except OSError:
+                        continue
+                    files[rel] = text
+                    parts.append(f"===== {rel} =====\n{text}")
+            return cls(content="\n".join(parts), data_root=path, files=files)
+        raise ValueError(f"Unsupported path type: {path}")
diff --git a/nodescraper/plugins/regex_search/regex_search_plugin.py b/nodescraper/plugins/regex_search/regex_search_plugin.py
new file mode 100644
index 00000000..808e2a3e
--- /dev/null
+++ b/nodescraper/plugins/regex_search/regex_search_plugin.py
@@ -0,0 +1,47 @@
+###############################################################################
+#
+# MIT License
+#
+# Copyright (c) 2026 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+###############################################################################
+from nodescraper.connection.inband import InBandConnectionManager, SSHConnectionParams
+from nodescraper.interfaces import DataPlugin
+from nodescraper.models import CollectorArgs
+
+from .analyzer_args import RegexSearchAnalyzerArgs
+from .regex_search_analyzer import RegexSearchAnalyzer
+from .regex_search_data import RegexSearchData
+
+
+class RegexSearchPlugin(
+    DataPlugin[
+        InBandConnectionManager,
+        SSHConnectionParams,
+        RegexSearchData,
+        CollectorArgs,
+        RegexSearchAnalyzerArgs,
+    ]
+):
+    """Analyzer-only plugin: search user regexes against a file or directory (--data)."""
+
+    DATA_MODEL = RegexSearchData
+    ANALYZER = RegexSearchAnalyzer
diff --git a/test/functional/fixtures/regex_search_multi_logs/app_error.log b/test/functional/fixtures/regex_search_multi_logs/app_error.log
new file mode 100644
index 00000000..391fb32c
--- /dev/null
+++ b/test/functional/fixtures/regex_search_multi_logs/app_error.log
@@ -0,0 +1,3 @@
+startup complete
+ERROR: dependency timeout connecting to backend
+shutdown clean
diff --git a/test/functional/fixtures/regex_search_multi_logs/no_errors.log b/test/functional/fixtures/regex_search_multi_logs/no_errors.log
new file mode 100644
index 00000000..854d9322
--- /dev/null
+++ b/test/functional/fixtures/regex_search_multi_logs/no_errors.log
@@ -0,0 +1,2 @@
+2026-04-09 service healthy
+all checks passed
diff --git a/test/functional/fixtures/regex_search_multi_logs/storage_warn.log b/test/functional/fixtures/regex_search_multi_logs/storage_warn.log
new file mode 100644
index 00000000..bc11a75a
--- /dev/null
+++ b/test/functional/fixtures/regex_search_multi_logs/storage_warn.log
@@ -0,0 +1,3 @@
+mount ok
+WARNING: disk full on /data within 5%
+idle
diff --git a/test/functional/fixtures/regex_search_sample.log b/test/functional/fixtures/regex_search_sample.log
new file mode 100644
index 00000000..7564228b
--- /dev/null
+++ b/test/functional/fixtures/regex_search_sample.log
@@ -0,0 +1,4 @@
+2026-04-09T10:00:00Z service started OK
+2026-04-09T10:01:00Z ERROR: connection reset by peer
+2026-04-09T10:02:00Z routine check passed
+2026-04-09T10:03:00Z WARNING: disk full on /var
diff --git a/test/unit/framework/test_cli.py b/test/unit/framework/test_cli.py
index 014befa8..8df56e95 100644
--- a/test/unit/framework/test_cli.py
+++ b/test/unit/framework/test_cli.py
@@ -148,6 +148,25 @@ def test_system_info_builder():
                 [],
             ),
         ),
+        (
+            [
+                "run-plugins",
+                "RegexSearchPlugin",
+                "--error-regex",
+                '{"regex":"a","message":"b","event_category":"UNKNOWN"}',
+            ],
+            ["RegexSearchPlugin"],
+            (
+                ["run-plugins"],
+                {
+                    "RegexSearchPlugin": [
+                        "--error-regex",
+                        '{"regex":"a","message":"b","event_category":"UNKNOWN"}',
+                    ],
+                },
+                [],
+            ),
+        ),
     ],
 )
 def test_process_args(raw_arg_input, plugin_names, exp_output):
diff --git a/test/unit/plugin/test_regex_search_analyzer.py b/test/unit/plugin/test_regex_search_analyzer.py
new file mode 100644
index 00000000..3b2b52e4
--- /dev/null
+++ b/test/unit/plugin/test_regex_search_analyzer.py
@@ -0,0 +1,128 @@
+###############################################################################
+#
+# MIT License
+#
+# Copyright (c) 2026 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+###############################################################################
+import os
+import tempfile
+
+from nodescraper.enums.executionstatus import ExecutionStatus
+from nodescraper.plugins.regex_search.analyzer_args import RegexSearchAnalyzerArgs
+from nodescraper.plugins.regex_search.regex_search_analyzer import RegexSearchAnalyzer
+from nodescraper.plugins.regex_search.regex_search_data import RegexSearchData
+from nodescraper.plugins.regex_search.regex_search_plugin import RegexSearchPlugin
+
+
+def test_regex_search_data_from_file():
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".log", delete=False, encoding="utf-8") as f:
+        f.write("alpha\nbeta ERROR gamma\n")
+        path = f.name
+    try:
+        data = RegexSearchData.import_model(path)
+        assert "ERROR" in data.content
+        assert os.path.basename(path) in data.files
+        assert data.data_root == os.path.dirname(path)
+    finally:
+        os.unlink(path)
+
+
+def test_regex_search_data_from_directory():
+    with tempfile.TemporaryDirectory() as tmp:
+        with open(f"{tmp}/a.txt", "w", encoding="utf-8") as f:
+            f.write("one")
+        with open(f"{tmp}/b.txt", "w", encoding="utf-8") as f:
+            f.write("two")
+        data = RegexSearchData.import_model(tmp)
+        assert data.data_root == os.path.abspath(tmp)
+        assert set(data.files.keys()) == {"a.txt", "b.txt"}
+        assert data.files["a.txt"] == "one"
+        assert data.files["b.txt"] == "two"
+        assert "===== a.txt =====" in data.content
+        assert "===== b.txt =====" in data.content
+
+
+def test_regex_search_analyzer_match(system_info):
+    data = RegexSearchData(content="line1\nFATAL: boom\nline3")
+    analyzer = RegexSearchAnalyzer(system_info=system_info)
+    args = RegexSearchAnalyzerArgs(
+        error_regex=[{"regex": r"FATAL:.*", "message": "fatal seen"}],
+    )
+    result = analyzer.analyze_data(data, args)
+    assert result.status == ExecutionStatus.OK
+    assert len(result.events) == 1
+    assert result.events[0].description == "fatal seen"
+
+
+def test_regex_search_analyzer_missing_args(system_info):
+    data = RegexSearchData(content="x")
+    analyzer = RegexSearchAnalyzer(system_info=system_info)
+    result = analyzer.analyze_data(data, None)
+    assert result.status == ExecutionStatus.NOT_RAN
+
+    result = analyzer.analyze_data(data, RegexSearchAnalyzerArgs(error_regex=None))
+    assert result.status == ExecutionStatus.NOT_RAN
+
+    result = analyzer.analyze_data(data, RegexSearchAnalyzerArgs(error_regex=[]))
+    assert result.status == ExecutionStatus.NOT_RAN
+
+
+def test_regex_search_plugin_analyzer_only(system_info, logger):
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".log", delete=False, encoding="utf-8") as f:
+        f.write("match_me_here\n")
+        path = f.name
+    try:
+        plugin = RegexSearchPlugin(system_info=system_info, logger=logger)
+        out = plugin.run(
+            collection=False,
+            analysis=True,
+            data=path,
+            analysis_args={
+                "error_regex": [{"regex": r"match_me_here", "message": "found"}],
+            },
+        )
+        assert out.status == ExecutionStatus.OK
+        assert out.result_data.analysis_result.status == ExecutionStatus.OK
+        assert len(out.result_data.analysis_result.events) == 1
+        desc = out.result_data.analysis_result.events[0].description
+        assert "found" in desc
+        assert "[file:" in desc
+        assert path.replace("\\", "/") in desc.replace("\\", "/")
+    finally:
+        os.unlink(path)
+
+
+def test_regex_search_multi_file_event_paths(system_info):
+    with tempfile.TemporaryDirectory() as tmp:
+        open(os.path.join(tmp, "clean.log"), "w", encoding="utf-8").write("ok\n")
+        open(os.path.join(tmp, "bad.log"), "w", encoding="utf-8").write("ERROR: boom\n")
+        data = RegexSearchData.import_model(tmp)
+        analyzer = RegexSearchAnalyzer(system_info=system_info)
+        args = RegexSearchAnalyzerArgs(
+            error_regex=[{"regex": r"ERROR[: ].*", "message": "err line"}],
+        )
+        result = analyzer.analyze_data(data, args)
+        assert result.status == ExecutionStatus.ERROR
+        assert len(result.events) == 1
+        assert "err line" in result.events[0].description
+        assert "[file:" in result.events[0].description
+        assert "bad.log" in result.events[0].description

From 12a396ede8819b342f471a6f7471b4ca3796223c Mon Sep 17 00:00:00 2001
From: Alexandra Bara <alex.bara@amd.com>
Date: Thu, 9 Apr 2026 14:12:58 -0500
Subject: [PATCH 2/4] improved wording

---
 .../regex_search/regex_search_analyzer.py     |  6 +---
 .../regex_search/regex_search_plugin.py       | 31 ++++++++++++++++++-
 .../unit/plugin/test_regex_search_analyzer.py |  4 +++
 3 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/nodescraper/plugins/regex_search/regex_search_analyzer.py b/nodescraper/plugins/regex_search/regex_search_analyzer.py
index 6bd79aff..0b4384f4 100644
--- a/nodescraper/plugins/regex_search/regex_search_analyzer.py
+++ b/nodescraper/plugins/regex_search/regex_search_analyzer.py
@@ -75,11 +75,7 @@ def analyze_data(
         """
         if args is None or not args.error_regex:
             self.result.status = ExecutionStatus.NOT_RAN
-            self.result.message = (
-                "No error_regex patterns provided; nothing to analyze"
-                if args is not None
-                else "No analysis_args provided; nothing to analyze"
-            )
+            self.result.message = "Analysis args need to be provided for the analyzer to run"
             return self.result
 
         final_regex = self._convert_and_extend_error_regex(args.error_regex, [])
diff --git a/nodescraper/plugins/regex_search/regex_search_plugin.py b/nodescraper/plugins/regex_search/regex_search_plugin.py
index 808e2a3e..36d650c6 100644
--- a/nodescraper/plugins/regex_search/regex_search_plugin.py
+++ b/nodescraper/plugins/regex_search/regex_search_plugin.py
@@ -23,9 +23,12 @@
 # SOFTWARE.
 #
 ###############################################################################
+from typing import Optional, Union
+
 from nodescraper.connection.inband import InBandConnectionManager, SSHConnectionParams
+from nodescraper.enums import EventPriority
 from nodescraper.interfaces import DataPlugin
-from nodescraper.models import CollectorArgs
+from nodescraper.models import CollectorArgs, TaskResult
 
 from .analyzer_args import RegexSearchAnalyzerArgs
 from .regex_search_analyzer import RegexSearchAnalyzer
@@ -45,3 +48,29 @@ class RegexSearchPlugin(
 
     DATA_MODEL = RegexSearchData
     ANALYZER = RegexSearchAnalyzer
+
+    def analyze(
+        self,
+        max_event_priority_level: Optional[Union[EventPriority, str]] = EventPriority.CRITICAL,
+        analysis_args: Optional[Union[RegexSearchAnalyzerArgs, dict]] = None,
+        data: Optional[Union[str, dict, RegexSearchData]] = None,
+    ) -> TaskResult:
+        if analysis_args is None:
+            missing_error_regex = True
+        elif isinstance(analysis_args, RegexSearchAnalyzerArgs):
+            missing_error_regex = not bool(analysis_args.error_regex)
+        elif isinstance(analysis_args, dict):
+            er = analysis_args.get("error_regex")
+            missing_error_regex = er is None or er == []
+        else:
+            missing_error_regex = True
+        if missing_error_regex:
+            self.logger.warning(
+                "RegexSearchPlugin: analysis args need to be provided for the analyzer to run "
+                "(e.g. --error-regex for each pattern)."
+            )
+        return super().analyze(
+            max_event_priority_level=max_event_priority_level,
+            analysis_args=analysis_args,
+            data=data,
+        )
diff --git a/test/unit/plugin/test_regex_search_analyzer.py b/test/unit/plugin/test_regex_search_analyzer.py
index 3b2b52e4..e2c86225 100644
--- a/test/unit/plugin/test_regex_search_analyzer.py
+++ b/test/unit/plugin/test_regex_search_analyzer.py
@@ -76,14 +76,18 @@ def test_regex_search_analyzer_match(system_info):
 def test_regex_search_analyzer_missing_args(system_info):
     data = RegexSearchData(content="x")
     analyzer = RegexSearchAnalyzer(system_info=system_info)
+    expected = "Analysis args need to be provided for the analyzer to run"
     result = analyzer.analyze_data(data, None)
     assert result.status == ExecutionStatus.NOT_RAN
+    assert result.message == expected
 
     result = analyzer.analyze_data(data, RegexSearchAnalyzerArgs(error_regex=None))
     assert result.status == ExecutionStatus.NOT_RAN
+    assert result.message == expected
 
     result = analyzer.analyze_data(data, RegexSearchAnalyzerArgs(error_regex=[]))
     assert result.status == ExecutionStatus.NOT_RAN
+    assert result.message == expected
 
 
 def test_regex_search_plugin_analyzer_only(system_info, logger):

From cc5422ac5407c75d7147426fdf69578b176d7835 Mon Sep 17 00:00:00 2001
From: Alexandra Bara <alex.bara@amd.com>
Date: Thu, 9 Apr 2026 15:18:46 -0500
Subject: [PATCH 3/4] utest upadte

---
 .../unit/plugin/test_regex_search_analyzer.py | 72 +++++++++++++++++--
 1 file changed, 65 insertions(+), 7 deletions(-)

diff --git a/test/unit/plugin/test_regex_search_analyzer.py b/test/unit/plugin/test_regex_search_analyzer.py
index e2c86225..ac018ee1 100644
--- a/test/unit/plugin/test_regex_search_analyzer.py
+++ b/test/unit/plugin/test_regex_search_analyzer.py
@@ -23,6 +23,7 @@
 # SOFTWARE.
 #
 ###############################################################################
+import logging
 import os
 import tempfile
 
@@ -32,6 +33,8 @@
 from nodescraper.plugins.regex_search.regex_search_data import RegexSearchData
 from nodescraper.plugins.regex_search.regex_search_plugin import RegexSearchPlugin
 
+EXPECTED_MISSING_ANALYSIS_MSG = "Analysis args need to be provided for the analyzer to run"
+
 
 def test_regex_search_data_from_file():
     with tempfile.NamedTemporaryFile(mode="w", suffix=".log", delete=False, encoding="utf-8") as f:
@@ -68,7 +71,9 @@ def test_regex_search_analyzer_match(system_info):
         error_regex=[{"regex": r"FATAL:.*", "message": "fatal seen"}],
     )
     result = analyzer.analyze_data(data, args)
-    assert result.status == ExecutionStatus.OK
+    assert result.status == ExecutionStatus.ERROR
+    assert "task detected errors" in result.message
+    assert "fatal seen" in result.message
     assert len(result.events) == 1
     assert result.events[0].description == "fatal seen"
 
@@ -76,18 +81,69 @@ def test_regex_search_analyzer_match(system_info):
 def test_regex_search_analyzer_missing_args(system_info):
     data = RegexSearchData(content="x")
     analyzer = RegexSearchAnalyzer(system_info=system_info)
-    expected = "Analysis args need to be provided for the analyzer to run"
     result = analyzer.analyze_data(data, None)
     assert result.status == ExecutionStatus.NOT_RAN
-    assert result.message == expected
+    assert result.message == EXPECTED_MISSING_ANALYSIS_MSG
 
     result = analyzer.analyze_data(data, RegexSearchAnalyzerArgs(error_regex=None))
     assert result.status == ExecutionStatus.NOT_RAN
-    assert result.message == expected
+    assert result.message == EXPECTED_MISSING_ANALYSIS_MSG
 
     result = analyzer.analyze_data(data, RegexSearchAnalyzerArgs(error_regex=[]))
     assert result.status == ExecutionStatus.NOT_RAN
-    assert result.message == expected
+    assert result.message == EXPECTED_MISSING_ANALYSIS_MSG
+
+
+def test_regex_search_plugin_missing_error_regex_not_ran_and_warning(
+    system_info, logger, caplog, tmp_path
+):
+    log_file = tmp_path / "sample.log"
+    log_file.write_text("line\n", encoding="utf-8")
+    plugin = RegexSearchPlugin(system_info=system_info, logger=logger)
+    with caplog.at_level(logging.WARNING, logger=logger.name):
+        out = plugin.run(
+            collection=False,
+            analysis=True,
+            data=str(log_file),
+            analysis_args=None,
+        )
+    assert out.result_data.analysis_result.status == ExecutionStatus.NOT_RAN
+    assert out.result_data.analysis_result.message == EXPECTED_MISSING_ANALYSIS_MSG
+    assert any(
+        "analysis args need to be provided" in r.getMessage().lower() for r in caplog.records
+    )
+
+
+def test_regex_search_plugin_empty_analysis_args_dict_not_ran(system_info, logger, tmp_path):
+    log_file = tmp_path / "sample.log"
+    log_file.write_text("line\n", encoding="utf-8")
+    plugin = RegexSearchPlugin(system_info=system_info, logger=logger)
+    out = plugin.run(
+        collection=False,
+        analysis=True,
+        data=str(log_file),
+        analysis_args={},
+    )
+    assert out.result_data.analysis_result.status == ExecutionStatus.NOT_RAN
+    assert out.result_data.analysis_result.message == EXPECTED_MISSING_ANALYSIS_MSG
+
+
+def test_regex_search_plugin_no_data_warns_and_data_message(system_info, logger, caplog):
+    plugin = RegexSearchPlugin(system_info=system_info, logger=logger)
+    with caplog.at_level(logging.WARNING, logger=logger.name):
+        out = plugin.run(
+            collection=False,
+            analysis=True,
+            data=None,
+            analysis_args=None,
+        )
+    assert out.result_data.analysis_result.status == ExecutionStatus.NOT_RAN
+    assert "No data available to analyze for RegexSearchPlugin" in (
+        out.result_data.analysis_result.message
+    )
+    assert any(
+        "analysis args need to be provided" in r.getMessage().lower() for r in caplog.records
+    )
 
 
 def test_regex_search_plugin_analyzer_only(system_info, logger):
@@ -104,8 +160,10 @@ def test_regex_search_plugin_analyzer_only(system_info, logger):
                 "error_regex": [{"regex": r"match_me_here", "message": "found"}],
             },
         )
-        assert out.status == ExecutionStatus.OK
-        assert out.result_data.analysis_result.status == ExecutionStatus.OK
+        assert out.status == ExecutionStatus.ERROR
+        assert "Analysis error:" in out.message
+        assert "found" in out.message
+        assert out.result_data.analysis_result.status == ExecutionStatus.ERROR
         assert len(out.result_data.analysis_result.events) == 1
         desc = out.result_data.analysis_result.events[0].description
         assert "found" in desc

From 51026bc9ffff2ef97be5b5a1263d16f798e2f282 Mon Sep 17 00:00:00 2001
From: Alexandra Bara <alex.bara@amd.com>
Date: Thu, 9 Apr 2026 15:38:18 -0500
Subject: [PATCH 4/4] fix to show description for --data

---
 nodescraper/cli/dynamicparserbuilder.py | 37 +++++++++++++++++++++----
 nodescraper/interfaces/dataplugin.py    |  9 +++++-
 2 files changed, 39 insertions(+), 7 deletions(-)

diff --git a/nodescraper/cli/dynamicparserbuilder.py b/nodescraper/cli/dynamicparserbuilder.py
index 54bd0d2e..8c0c9c68 100644
--- a/nodescraper/cli/dynamicparserbuilder.py
+++ b/nodescraper/cli/dynamicparserbuilder.py
@@ -43,15 +43,40 @@
 from nodescraper.typeutils import TypeUtils
 
 
+def _help_from_annotated(anno: object) -> str:
+    """Pull CLI help from ``Annotated[T, metadata...]`` (string or ``Field(description=...)``)."""
+    if anno is None or get_origin(anno) is not Annotated:
+        return ""
+    for meta in get_args(anno)[1:]:
+        if isinstance(meta, str):
+            return meta
+        desc = getattr(meta, "description", None)
+        if isinstance(desc, str) and desc.strip():
+            return desc
+    return ""
+
+
 def _get_run_arg_help(plugin_class: Type[PluginInterface], arg: str) -> str:
     """Get help text for a run() parameter from typing.Annotated metadata on the parameter."""
     try:
-        hints = get_type_hints(plugin_class.run, include_extras=True)
-        anno = hints.get(arg)
-        if anno is not None and get_origin(anno) is Annotated:
-            args = get_args(anno)
-            if len(args) >= 2 and isinstance(args[1], str):
-                return args[1]
+        run_obj = None
+        for cls in plugin_class.__mro__:
+            if "run" in cls.__dict__:
+                run_obj = cls.__dict__["run"]
+                break
+        if run_obj is None:
+            run_obj = plugin_class.run
+        run_fn = run_obj
+        if isinstance(run_obj, staticmethod):
+            run_fn = run_obj.__func__
+        elif isinstance(run_obj, classmethod):
+            run_fn = run_obj.__func__
+        raw = getattr(run_fn, "__annotations__", {}).get(arg)
+        text = _help_from_annotated(raw)
+        if text:
+            return text
+        hints = get_type_hints(run_fn, include_extras=True)
+        return _help_from_annotated(hints.get(arg))
     except Exception:
         pass
     return ""
diff --git a/nodescraper/interfaces/dataplugin.py b/nodescraper/interfaces/dataplugin.py
index ca7e7daf..ed632fb4 100644
--- a/nodescraper/interfaces/dataplugin.py
+++ b/nodescraper/interfaces/dataplugin.py
@@ -29,6 +29,8 @@
 from pathlib import Path
 from typing import Annotated, Any, Generic, Optional, Type, Union
 
+from pydantic import Field
+
 from nodescraper.enums import EventPriority, ExecutionStatus, SystemInteractionLevel
 from nodescraper.generictypes import TAnalyzeArg, TCollectArg, TDataModel
 from nodescraper.interfaces.dataanalyzertask import DataAnalyzer
@@ -313,7 +315,12 @@ def run(
         preserve_connection: bool = False,
         data: Annotated[
             Optional[Union[str, dict, TDataModel]],
-            "Path to pre-collected data (file or directory). Load this data instead of collecting; use with --collection False to run only the analyzer.",
+            Field(
+                description=(
+                    "Path to pre-collected data"
+                    "; use with --collection False to run the analyzer only."
+                ),
+            ),
         ] = None,
         collection_args: Optional[Union[TCollectArg, dict]] = None,
         analysis_args: Optional[Union[TAnalyzeArg, dict]] = None,