Skip to content

Commit

Permalink
dev to lightfuzz sync
Browse files Browse the repository at this point in the history
  • Loading branch information
liquidsec committed Jan 10, 2025
2 parents 85bd5ec + d383ec9 commit ac4f329
Show file tree
Hide file tree
Showing 48 changed files with 2,137 additions and 109,829 deletions.
2 changes: 1 addition & 1 deletion bbot/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ async def akeyboard_listen():
log_to_stderr(f"Error in keyboard listen task: {e}", level="ERROR")
log_to_stderr(traceback.format_exc(), level="TRACE")

asyncio.create_task(akeyboard_listen())
keyboard_listen_task = asyncio.create_task(akeyboard_listen()) # noqa F841

await scan.async_start_without_generator()

Expand Down
116 changes: 76 additions & 40 deletions bbot/core/event/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,22 +516,25 @@ def scope_distance(self, scope_distance):
new_scope_distance = min(self.scope_distance, scope_distance)
if self._scope_distance != new_scope_distance:
# remove old scope distance tags
for t in list(self.tags):
if t.startswith("distance-"):
self.remove_tag(t)
if self.host:
if scope_distance == 0:
self.add_tag("in-scope")
self.remove_tag("affiliate")
else:
self.remove_tag("in-scope")
self.add_tag(f"distance-{new_scope_distance}")
self._scope_distance = new_scope_distance
self.refresh_scope_tags()
# apply recursively to parent events
parent_scope_distance = getattr(self.parent, "scope_distance", None)
if parent_scope_distance is not None and self.parent is not self:
self.parent.scope_distance = new_scope_distance + 1

def refresh_scope_tags(self):
for t in list(self.tags):
if t.startswith("distance-"):
self.remove_tag(t)
if self.host:
if self.scope_distance == 0:
self.add_tag("in-scope")
self.remove_tag("affiliate")
else:
self.remove_tag("in-scope")
self.add_tag(f"distance-{self.scope_distance}")

@property
def scope_description(self):
"""
Expand Down Expand Up @@ -588,7 +591,7 @@ def parent(self, parent):
if t in ("spider-danger", "spider-max"):
self.add_tag(t)
elif not self._dummy:
log.warning(f"Tried to set invalid parent on {self}: (got: {parent})")
log.warning(f"Tried to set invalid parent on {self}: (got: {repr(parent)} ({type(parent)}))")

@property
def children(self):
Expand Down Expand Up @@ -1060,6 +1063,9 @@ def sanitize_data(self, data):
blob = None
try:
self._data_path = Path(data["path"])
# prepend the scan's home dir if the path is relative
if not self._data_path.is_absolute():
self._data_path = self.scan.home / self._data_path
if self._data_path.is_file():
self.add_tag("file")
if file_blobs:
Expand Down Expand Up @@ -1244,11 +1250,25 @@ def sanitize_data(self, data):
return data

def add_tag(self, tag):
host_same_as_parent = self.parent and self.host == self.parent.host
if tag == "spider-danger" and host_same_as_parent and "spider-danger" not in self.tags:
# increment the web spider distance
if self.type == "URL_UNVERIFIED":
self.web_spider_distance += 1
self_url = getattr(self, "parsed_url", "")
self_host = getattr(self, "host", "")
# autoincrement web spider distance if the "spider-danger" tag is added
if tag == "spider-danger" and "spider-danger" not in self.tags and self_url and self_host:
parent_hosts_and_urls = set()
for p in self.get_parents():
# URL_UNVERIFIED events don't count because they haven't been visited yet
if p.type == "URL_UNVERIFIED":
continue
url = getattr(p, "parsed_url", "")
parent_hosts_and_urls.add((p.host, url))
# if there's a URL anywhere in our parent chain that's different from ours but shares our host, we're in dAnGeR
dangerous_parent = any(
p_host == self.host and p_url != self_url for p_host, p_url in parent_hosts_and_urls
)
if dangerous_parent:
# increment the web spider distance
if self.type == "URL_UNVERIFIED":
self.web_spider_distance += 1
if self.is_spider_max:
self.add_tag("spider-max")
super().add_tag(tag)
Expand Down Expand Up @@ -1414,18 +1434,22 @@ def sanitize_data(self, data):
self.parsed_url = self.validators.validate_url_parsed(url)
data["url"] = self.parsed_url.geturl()

header_dict = {}
for i in data.get("raw_header", "").splitlines():
if len(i) > 0 and ":" in i:
k, v = i.split(":", 1)
k = k.strip().lower()
v = v.lstrip()
if k in header_dict:
header_dict[k].append(v)
else:
header_dict[k] = [v]
if not "raw_header" in data:
raise ValueError("raw_header is required for HTTP_RESPONSE events")

if "header-dict" not in data:
header_dict = {}
for i in data.get("raw_header", "").splitlines():
if len(i) > 0 and ":" in i:
k, v = i.split(":", 1)
k = k.strip().lower()
v = v.lstrip()
if k in header_dict:
header_dict[k].append(v)
else:
header_dict[k] = [v]
data["header-dict"] = header_dict

data["header-dict"] = header_dict
# move URL to the front of the dictionary for visibility
data = dict(data)
new_data = {"url": data.pop("url")}
Expand All @@ -1439,6 +1463,13 @@ def _words(self):
def _pretty_string(self):
return f'{self.data["hash"]["header_mmh3"]}:{self.data["hash"]["body_mmh3"]}'

@property
def raw_response(self):
"""
Formats the status code, headers, and body into a single string formatted as an HTTP/1.1 response.
"""
return f'{self.data["raw_header"]}{self.data["body"]}'

@property
def http_status(self):
try:
Expand Down Expand Up @@ -1623,19 +1654,22 @@ def __init__(self, *args, **kwargs):
# detect type of file content using magic
from bbot.core.helpers.libmagic import get_magic_info, get_compression

extension, mime_type, description, confidence = get_magic_info(self.data["path"])
self.data["magic_extension"] = extension
self.data["magic_mime_type"] = mime_type
self.data["magic_description"] = description
self.data["magic_confidence"] = confidence
# detection compression
compression = get_compression(mime_type)
if compression:
self.add_tag("compressed")
self.add_tag(f"{compression}-archive")
self.data["compression"] = compression
# refresh hash
self.data = self.data
try:
extension, mime_type, description, confidence = get_magic_info(self.data["path"])
self.data["magic_extension"] = extension
self.data["magic_mime_type"] = mime_type
self.data["magic_description"] = description
self.data["magic_confidence"] = confidence
# detection compression
compression = get_compression(mime_type)
if compression:
self.add_tag("compressed")
self.add_tag(f"{compression}-archive")
self.data["compression"] = compression
# refresh hash
self.data = self.data
except Exception as e:
log.debug(f"Error detecting file type: {type(e).__name__}: {e}")


class RAW_DNS_RECORD(DictHostEvent, DnsEvent):
Expand Down Expand Up @@ -1728,6 +1762,8 @@ def make_event(
When working within a module's `handle_event()`, use the instance method
`self.make_event()` instead of calling this function directly.
"""
if not data:
raise ValidationError("No data provided")

# allow tags to be either a string or an array
if not tags:
Expand Down
8 changes: 7 additions & 1 deletion bbot/core/helpers/async_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,18 @@ async def lock(self, name):
class TaskCounter:
def __init__(self):
self.tasks = {}
self.lock = asyncio.Lock() # create a new lock
self._lock = None

@property
def value(self):
return sum([t.n for t in self.tasks.values()])

@property
def lock(self):
if self._lock is None:
self._lock = asyncio.Lock()
return self._lock

def count(self, task_name, n=1, _log=True):
if callable(task_name):
task_name = f"{task_name.__qualname__}()"
Expand Down
100 changes: 70 additions & 30 deletions bbot/core/helpers/depsinstaller/installer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,43 @@


class DepsInstaller:
CORE_DEPS = {
# core BBOT dependencies in the format of binary: package_name
# each one will only be installed if the binary is not found
"unzip": "unzip",
"zipinfo": "unzip",
"curl": "curl",
"git": "git",
"make": "make",
"gcc": "gcc",
"bash": "bash",
"which": "which",
"unrar": "unrar-free",
"tar": "tar",
# debian why are you like this
"7z": [
{
"name": "Install 7zip (Debian)",
"package": {"name": ["p7zip-full"], "state": "present"},
"become": True,
"when": "ansible_facts['os_family'] == 'Debian'",
},
{
"name": "Install 7zip (Non-Debian)",
"package": {"name": ["p7zip"], "state": "present"},
"become": True,
"when": "ansible_facts['os_family'] != 'Debian'",
},
],
}

def __init__(self, parent_helper):
self.parent_helper = parent_helper
self.preset = self.parent_helper.preset
self.core = self.preset.core

self.os_platform = os_platform()

# respect BBOT's http timeout
self.web_config = self.parent_helper.config.get("web", {})
http_timeout = self.web_config.get("http_timeout", 30)
Expand Down Expand Up @@ -202,28 +234,32 @@ def apt_install(self, packages):
"""
Install packages with the OS's default package manager (apt, pacman, dnf, etc.)
"""
packages_str = ",".join(packages)
args, kwargs = self._make_apt_ansible_args(packages)
success, err = self.ansible_run(module="package", args=args, **kwargs)
if success:
log.info(f'Successfully installed OS packages "{",".join(sorted(packages))}"')
else:
log.warning(
f"Failed to install OS packages ({err}). Recommend installing the following packages manually:"
)
for p in packages:
log.warning(f" - {p}")
return success

def _make_apt_ansible_args(self, packages):
packages_str = ",".join(sorted(packages))
log.info(f"Installing the following OS packages: {packages_str}")
args = {"name": packages_str, "state": "present"} # , "update_cache": True, "cache_valid_time": 86400}
kwargs = {}
# don't sudo brew
if os_platform() != "darwin":
if self.os_platform != "darwin":
kwargs = {
"ansible_args": {
"ansible_become": True,
"ansible_become_method": "sudo",
}
}
success, err = self.ansible_run(module="package", args=args, **kwargs)
if success:
log.info(f'Successfully installed OS packages "{packages_str}"')
else:
log.warning(
f"Failed to install OS packages ({err}). Recommend installing the following packages manually:"
)
for p in packages:
log.warning(f" - {p}")
return success
return args, kwargs

def shell(self, module, commands):
tasks = []
Expand Down Expand Up @@ -269,7 +305,7 @@ def ansible_run(self, tasks=None, module=None, args=None, ansible_args=None):
for task in tasks:
if "package" in task:
# special case for macos
if os_platform() == "darwin":
if self.os_platform == "darwin":
# don't sudo brew
task["become"] = False
# brew doesn't support update_cache
Expand All @@ -292,8 +328,8 @@ def ansible_run(self, tasks=None, module=None, args=None, ansible_args=None):
},
module=module,
module_args=module_args,
quiet=not self.ansible_debug,
verbosity=(3 if self.ansible_debug else 0),
quiet=True,
verbosity=0,
cancel_callback=lambda: None,
)

Expand All @@ -303,7 +339,7 @@ def ansible_run(self, tasks=None, module=None, args=None, ansible_args=None):
err = ""
for e in res.events:
if self.ansible_debug and not success:
log.debug(json.dumps(e, indent=4))
log.debug(json.dumps(e, indent=2))
if e["event"] == "runner_on_failed":
err = e["event_data"]["res"]["msg"]
break
Expand Down Expand Up @@ -347,26 +383,30 @@ def ensure_root(self, message=""):

def install_core_deps(self):
to_install = set()
to_install_friendly = set()
playbook = []
self._install_sudo_askpass()
# ensure tldextract data is cached
self.parent_helper.tldextract("evilcorp.co.uk")
# command: package_name
core_deps = {
"unzip": "unzip",
"zipinfo": "unzip",
"curl": "curl",
"git": "git",
"make": "make",
"gcc": "gcc",
"bash": "bash",
"which": "which",
}
for command, package_name in core_deps.items():
for command, package_name_or_playbook in self.CORE_DEPS.items():
if not self.parent_helper.which(command):
to_install.add(package_name)
to_install_friendly.add(command)
if isinstance(package_name_or_playbook, str):
to_install.add(package_name_or_playbook)
else:
playbook.extend(package_name_or_playbook)
if to_install:
playbook.append(
{
"name": "Install Core BBOT Dependencies",
"package": {"name": list(to_install), "state": "present"},
"become": True,
}
)
if playbook:
log.info(f"Installing core BBOT dependencies: {','.join(sorted(to_install_friendly))}")
self.ensure_root()
self.apt_install(list(to_install))
self.ansible_run(tasks=playbook)

def _setup_sudo_cache(self):
if not self._sudo_cache_setup:
Expand Down
7 changes: 5 additions & 2 deletions bbot/core/helpers/regexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,12 @@
)
ip_range_regexes = [re.compile(r, re.I) for r in _ip_range_regexes]

# dns names with periods
# all dns names including IP addresses and bare hostnames (e.g. "localhost")
_dns_name_regex = r"(?:\w(?:[\w-]{0,100}\w)?\.?)+(?:[xX][nN]--)?[^\W_]{1,63}\.?"
dns_name_extraction_regex = re.compile(_dns_name_regex, re.I)
# dns names with periods (e.g. "www.example.com")
_dns_name_regex_with_period = r"(?:\w(?:[\w-]{0,100}\w)?\.)+(?:[xX][nN]--)?[^\W_]{1,63}\.?"

dns_name_extraction_regex = re.compile(_dns_name_regex_with_period, re.I)
dns_name_validation_regex = re.compile(r"^" + _dns_name_regex + r"$", re.I)

_email_regex = r"(?:[^\W_][\w\-\.\+']{,100})@" + _dns_name_regex
Expand Down
Loading

0 comments on commit ac4f329

Please sign in to comment.