Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Yara improvements #1212

Merged
merged 16 commits into from
Dec 27, 2024
2 changes: 1 addition & 1 deletion core/database_arango.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,7 @@ def tag(
if not isinstance(tags, (list, set, tuple)):
raise ValueError("Tags must be of type list, set or tuple.")

tags = [t.strip() for t in tags if t.strip()]
tags = list({t.strip() for t in tags if t.strip()})
if strict:
self.clear_tags()

Expand Down
132 changes: 124 additions & 8 deletions core/schemas/indicators/yara.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from typing import ClassVar, Literal
from typing import Any, ClassVar, Literal

import plyara
import plyara.exceptions
import plyara.utils
import yara
from pydantic import BaseModel, PrivateAttr, field_validator
from pydantic import BaseModel, PrivateAttr, model_validator

from core.schemas import indicator

Expand Down Expand Up @@ -111,16 +114,52 @@ class Yara(indicator.Indicator):

_type_filter: ClassVar[str] = "yara"
_compiled_pattern: yara.Match | None = PrivateAttr(None)

name: str = "" # gets overridden during validation
type: Literal["yara"] = "yara"
dependencies: list[str] = []
private: bool = False

@field_validator("pattern")
@model_validator(mode="before")
@classmethod
def validate_yara(cls, value) -> str:
def validate_yara(cls, data: Any):
rule = data.get("pattern")
if not rule:
raise ValueError("Yara rule body is required.")
try:
yara.compile(source=value, externals=ALLOWED_EXTERNALS)
except yara.SyntaxError as error:
raise ValueError(f"Invalid Yara rule: {error}")
return value
rules = plyara.Plyara().parse_string(rule)
except plyara.exceptions.ParseTypeError as error:
raise ValueError(str(error)) from error
if len(rules) > 1:
raise ValueError("Only one Yara rule is allowed in the rule body.")
if not rules:
raise ValueError("No valid Yara rules found in the rule body.")
parsed_rule = rules[0]
rule_deps = set(plyara.utils.detect_dependencies(parsed_rule))
data["dependencies"] = rule_deps - ALLOWED_EXTERNALS.keys()
data["name"] = parsed_rule["rule_name"]
data["private"] = "private" in parsed_rule.get("scopes", [])

return data

def save(self):
self = super().save()
nodes, relationships, _ = self.neighbors(
link_types=["depends"], direction="outbound", max_hops=1
)

for edge in relationships:
for rel in edge:
if nodes[rel.target].name not in self.dependencies:
rel.delete()

for dependency in self.dependencies:
dep = Yara.find(name=dependency)
if not dep:
raise ValueError(f"Rule depends on unknown dependency '{dependency}'")
self.link_to(dep, "depends", "Depends on")

return self

@property
def compiled_pattern(self):
Expand All @@ -134,3 +173,80 @@ def match(self, value: str | bytes) -> YaraMatch | None:
if result:
return YaraMatch(matches=yaramatch.matches)
return None

@classmethod
def import_bulk_rules(cls, bulk_rule_text: str, tags: list[str] | None = None):
"""Import bulk rules from a rule body.

Args:
bulk_rule_text: The text containing the bulk rules.
tags: A list of tags to apply to the imported rules.
"""
if not tags:
tags = []

try:
yara.compile(source=bulk_rule_text, externals=ALLOWED_EXTERNALS)
except yara.SyntaxError as error:
raise ValueError(str(error)) from error

parsed_rules = plyara.Plyara().parse_string(bulk_rule_text)
# all_rule_names = {rule["rule_name"] for rule in parsed_rules}

for rule in parsed_rules:
raw_rule = plyara.utils.rebuild_yara_rule(rule)
print(f'Processing {rule["rule_name"]}')
yara_object = Yara(
name=rule["rule_name"],
pattern=raw_rule,
diamond=indicator.DiamondModel.capability,
location=rule.get("scan_context", "N/A"),
).save()

rule_tags = rule.get("tags", [])
try:
if rule_tags and isinstance(rule_tags, str):
rule_tags = rule_tags.split(",")
except ValueError:
rule_tags = []

if tags + rule_tags:
yara_object.tag(tags + rule_tags)

def rule_with_dependencies(
self, resolved: set[str] | None = None, seen: set[str] | None = None
) -> str:
"""
Find dependencies in a Yara rule.

Returns:
A string containing the original rule text with dependencies added.
"""
if resolved is None:
resolved = set()
if seen is None:
seen = set()

if self.name in seen:
raise ValueError(f"Circular dependency detected: {self.name}")

seen.add(self.name)

concatenated_rules = ""

parsed_rule = plyara.Plyara().parse_string(self.pattern)[0]
dependencies = plyara.utils.detect_dependencies(parsed_rule)

for dependency in dependencies:
dep_rule = Yara.find(name=dependency)
if not dep_rule:
raise ValueError(f"Rule depends on unknown dependency '{dependency}'")
if dep_rule.name not in resolved:
concatenated_rules += dep_rule.rule_with_dependencies(resolved, seen)

if self.name not in resolved:
concatenated_rules += self.pattern + "\n\n"
resolved.add(self.name)

seen.remove(self.name)
return concatenated_rules
32 changes: 16 additions & 16 deletions plugins/feeds/public/artifacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,22 +36,22 @@ def run(self):
tempdir, "artifacts-main", "artifacts", "data"
)

data_files_glob = glob.glob(os.path.join(artifacts_datadir, "*.yaml"))
artifacts_dict = {}
for file in data_files_glob:
result = validator_object.CheckFile(file)
if not result:
logging.error("Failed to validate %s, skipping", file)
continue
logging.info("Processing %s", file)
with open(file, "r") as f:
yaml_string = f.read()

forensic_indicators = indicator.ForensicArtifact.from_yaml_string(
yaml_string, update_parents=False
)
for fi in forensic_indicators:
artifacts_dict[fi.name] = fi
data_files_glob = glob.glob(os.path.join(artifacts_datadir, "*.yaml"))
artifacts_dict = {}
for file in data_files_glob:
result = validator_object.CheckFile(file)
if not result:
logging.error("Failed to validate %s, skipping", file)
continue
logging.info("Processing %s", file)
with open(file, "r") as f:
yaml_string = f.read()

forensic_indicators = indicator.ForensicArtifact.from_yaml_string(
yaml_string, update_parents=False
)
for fi in forensic_indicators:
artifacts_dict[fi.name] = fi

for artifact in artifacts_dict.values():
artifact.update_parents(artifacts_dict)
Expand Down
36 changes: 7 additions & 29 deletions plugins/feeds/public/signaturebase.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,13 @@
import glob
import json
import logging
import os
import tempfile
from datetime import timedelta
from io import BytesIO
from zipfile import ZipFile

import yara

from core import taskmanager
from core.schemas import entity, indicator, task

ALLOWED_EXTERNALS = {
"filename": "",
"filepath": "",
"extension": "",
"filetype": "",
"owner": "",
}
from core.schemas import indicator, task


class Neo23x0SignatureBase(task.FeedTask):
Expand All @@ -41,24 +30,13 @@ def run(self):
ZipFile(BytesIO(response.content)).extractall(path=tempdir)
rules_path = os.path.join(tempdir, "signature-base-master", "yara")

for file in glob.glob(f"{rules_path}/*.yar"):
with open(file, "r") as f:
rule = f.read()

try:
yara.compile(source=rule, externals=ALLOWED_EXTERNALS)
except Exception as e:
logging.warning(f"Error compiling rule {file}: {e}")
raise

yara_object = indicator.Yara(
name=f"Neo23x0: {os.path.basename(file)}",
pattern=rule,
diamond=indicator.DiamondModel.capability,
location="filesystem",
).save()
for file in glob.glob(f"{rules_path}/*.yar"):
with open(file, "r") as f:
rule = f.read()

yara_object.tag(["Neo23x0", "signature-base"])
indicator.Yara.import_bulk_rules(
rule, tags=["Neo23x0", "signature-base"]
)


taskmanager.TaskManager.register_task(Neo23x0SignatureBase)
45 changes: 45 additions & 0 deletions plugins/feeds/public/yaraforge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import glob
import json
import logging
import os
import tempfile
from datetime import timedelta
from io import BytesIO
from zipfile import ZipFile

from core import taskmanager
from core.schemas import indicator, task

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


class YaraForge(task.FeedTask):
_defaults = {
"name": "YaraForge",
"frequency": timedelta(days=1),
"type": "feed",
"description": "Collection of community Yara rules: https://yarahq.github.io/",
}

_SOURCE_ZIP = "https://github.com/YARAHQ/yara-forge/releases/latest/download/yara-forge-rules-core.zip"

def run(self):
response = self._make_request(self._SOURCE_ZIP, no_cache=True)
if not response:
logging.info(f"No response: skipping {self.name} update")
return

with tempfile.TemporaryDirectory() as tempdir:
ZipFile(BytesIO(response.content)).extractall(path=tempdir)

rules_path = os.path.join(
tempdir, "packages", "core", "yara-rules-core.yar"
)
with open(rules_path, "r") as f:
rules = f.read()

indicator.Yara.import_bulk_rules(rules, tags=["yara-forge-core"])


taskmanager.TaskManager.register_task(YaraForge)
29 changes: 27 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ idstools = "^0.6.5"
aenum = "^3.1.15"
boto3 = { version = "^1.35.22", optional = true }
tqdm = "^4.67.1"
plyara = "2.2.1" # while https://github.com/plyara/plyara/issues/143 is addressed

[tool.poetry.group.dev.dependencies]
pylint = "^2.16.1"
Expand Down
8 changes: 4 additions & 4 deletions tests/apiv2/indicators.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,6 @@ def test_bad_regex(self):

def test_bad_yara(self):
indicator_dict = {
"name": "badYara",
"type": "yara",
"pattern": "rule test {",
"location": "filesystem",
Expand All @@ -174,11 +173,12 @@ def test_bad_yara(self):
)
self.assertEqual(response.status_code, 422)
data = response.json()
self.assertIn("Value error, Invalid Yara rule", data["detail"][0]["msg"])
self.assertIn(
"No valid Yara rules found in the rule body", data["detail"][0]["msg"]
)

def test_new_yara(self):
indicator_dict = {
"name": "yara",
"type": "yara",
"pattern": 'rule test { strings: $a = "test" condition: $a }',
"location": "filesystem",
Expand All @@ -190,5 +190,5 @@ def test_new_yara(self):
)
self.assertEqual(response.status_code, 200)
data = response.json()
self.assertEqual(data["name"], "yara")
self.assertEqual(data["name"], "test")
self.assertEqual(data["type"], "yara")
Loading
Loading