Files
HexaHost-Frontend/scripts/obfuscate_release.py

464 lines
13 KiB
Python

#!/usr/bin/env python3
from __future__ import annotations
import argparse
import hashlib
import re
import shutil
import subprocess
import sys
import tempfile
from collections import defaultdict
from pathlib import Path
TEXT_EXTENSIONS = {".php", ".html", ".htm", ".xml", ".txt", ".js", ".css"}
HASH_SUFFIX_RE = re.compile(r"\.[a-f0-9]{12}$", re.I)
def strip_comments_keep_strings(text: str) -> str:
out = []
i = 0
n = len(text)
in_single = False
in_double = False
in_template = False
escape = False
in_line_comment = False
in_block_comment = False
while i < n:
ch = text[i]
nxt = text[i + 1] if i + 1 < n else ""
if in_line_comment:
if ch == "\n":
in_line_comment = False
out.append(ch)
i += 1
continue
if in_block_comment:
if ch == "*" and nxt == "/":
in_block_comment = False
i += 2
else:
if ch == "\n":
out.append("\n")
i += 1
continue
if in_single or in_double or in_template:
out.append(ch)
if escape:
escape = False
elif ch == "\\":
escape = True
elif in_single and ch == "'":
in_single = False
elif in_double and ch == '"':
in_double = False
elif in_template and ch == "`":
in_template = False
i += 1
continue
if ch == "/" and nxt == "/":
in_line_comment = True
i += 2
continue
if ch == "/" and nxt == "*":
in_block_comment = True
i += 2
continue
if ch == "'":
in_single = True
out.append(ch)
i += 1
continue
if ch == '"':
in_double = True
out.append(ch)
i += 1
continue
if ch == "`":
in_template = True
out.append(ch)
i += 1
continue
out.append(ch)
i += 1
return "".join(out)
def strip_php_comments(text: str) -> str:
out = []
i = 0
n = len(text)
in_single = False
in_double = False
in_line_comment = False
in_block_comment = False
escape = False
while i < n:
ch = text[i]
nxt = text[i + 1] if i + 1 < n else ""
if in_line_comment:
if ch == "\n":
in_line_comment = False
out.append("\n")
i += 1
continue
if in_block_comment:
if ch == "*" and nxt == "/":
in_block_comment = False
i += 2
else:
if ch == "\n":
out.append("\n")
i += 1
continue
if in_single or in_double:
out.append(ch)
if escape:
escape = False
elif ch == "\\":
escape = True
elif in_single and ch == "'":
in_single = False
elif in_double and ch == '"':
in_double = False
i += 1
continue
if ch == "/" and nxt == "/":
in_line_comment = True
i += 2
continue
if ch == "#":
in_line_comment = True
i += 1
continue
if ch == "/" and nxt == "*":
in_block_comment = True
i += 2
continue
if ch == "'":
in_single = True
elif ch == '"':
in_double = True
out.append(ch)
i += 1
return "".join(out)
def minify_css_fallback(text: str) -> str:
text = strip_comments_keep_strings(text)
text = re.sub(r"\s+", " ", text)
text = re.sub(r"\s*([{}:;,>+~])\s*", r"\1", text)
return text.strip()
def minify_js_fallback(text: str) -> str:
text = strip_comments_keep_strings(text)
text = re.sub(r"\s+", " ", text)
text = re.sub(r"\s*([{}:;,()=+\-*/<>!&|?])\s*", r"\1", text)
return text.strip()
def canonical_asset_base(stem: str) -> str:
name = stem
while HASH_SUFFIX_RE.search(name):
name = HASH_SUFFIX_RE.sub("", name)
return name
def is_skipped_asset(path: Path) -> bool:
lowered = path.as_posix().lower()
if ".min." in path.name or ".obf." in path.name or ".deob." in path.name:
return True
if "deobfuscated" in lowered:
return True
return False
def is_valid_source_content(content: str) -> bool:
if "[javascript-obfuscator-cli]" in content:
return False
return len(content.strip()) >= 20
def collect_asset_groups(asset_root: Path) -> dict[tuple[Path, str, str], list[Path]]:
groups: dict[tuple[Path, str, str], list[Path]] = defaultdict(list)
for ext in (".js", ".css"):
for file_path in sorted(asset_root.rglob(f"*{ext}")):
if is_skipped_asset(file_path):
continue
base = canonical_asset_base(file_path.stem)
key = (file_path.parent, base, ext)
groups[key].append(file_path)
return groups
def pick_source_file(paths: list[Path], base: str, ext: str) -> Path | None:
if not paths:
return None
parent = paths[0].parent
plain = parent / f"{base}{ext}"
ordered: list[Path] = []
if plain in paths:
ordered.append(plain)
for candidate in sorted(paths, key=lambda p: len(p.name)):
if candidate not in ordered:
ordered.append(candidate)
for candidate in ordered:
try:
content = candidate.read_text(encoding="utf-8")
except (OSError, UnicodeDecodeError):
continue
if is_valid_source_content(content):
return candidate
return None
def cleanup_invalid_siblings(paths: list[Path], source: Path) -> None:
for path in paths:
if path == source or not path.exists():
continue
try:
content = path.read_text(encoding="utf-8")
except (OSError, UnicodeDecodeError):
content = ""
if not is_valid_source_content(content):
path.unlink()
def run_cmd(command: list[str], cwd: Path) -> None:
proc = subprocess.run(
command,
cwd=str(cwd),
text=True,
capture_output=True,
check=False,
)
if proc.returncode != 0:
raise RuntimeError(proc.stderr.strip() or proc.stdout.strip() or "command failed")
def process_js(path: Path, cwd: Path) -> None:
original = path.read_text(encoding="utf-8")
if not is_valid_source_content(original):
raise ValueError(
f"invalid or corrupted JS source: {path} "
f"(restore e.g. 'git checkout dev -- {path.as_posix()}')"
)
if shutil.which("npx"):
tmpdir = Path(tempfile.mkdtemp())
try:
src = tmpdir / "input.js"
out = tmpdir / "output.js"
src.write_text(original, encoding="utf-8")
run_cmd(
[
"npx",
"--yes",
"terser",
str(src),
"-o",
str(src),
"--compress",
"--mangle",
"--comments",
"false",
],
cwd,
)
run_cmd(
[
"npx",
"--yes",
"javascript-obfuscator",
str(src),
"--output",
str(out),
"--compact",
"true",
"--control-flow-flattening",
"true",
"--dead-code-injection",
"true",
"--string-array",
"true",
"--string-array-encoding",
"base64",
"--target",
"browser-no-eval",
"--source-map",
"false",
],
cwd,
)
if not out.exists():
raise RuntimeError("obfuscator produced no output file")
result = out.read_text(encoding="utf-8")
if not is_valid_source_content(result):
raise RuntimeError("obfuscator output looks invalid")
path.write_text(result.strip() + "\n", encoding="utf-8")
return
except Exception:
pass
finally:
shutil.rmtree(tmpdir, ignore_errors=True)
path.write_text(minify_js_fallback(original) + "\n", encoding="utf-8")
def process_css(path: Path, cwd: Path) -> None:
original = path.read_text(encoding="utf-8")
if shutil.which("npx"):
tmpdir = Path(tempfile.mkdtemp())
try:
src = tmpdir / "input.css"
out = tmpdir / "output.css"
src.write_text(original, encoding="utf-8")
run_cmd(
[
"npx",
"--yes",
"clean-css-cli",
str(src),
"-o",
str(out),
"--skip-rebase",
"-O2",
],
cwd,
)
path.write_text(out.read_text(encoding="utf-8").strip() + "\n", encoding="utf-8")
return
except Exception:
pass
finally:
shutil.rmtree(tmpdir, ignore_errors=True)
path.write_text(minify_css_fallback(original) + "\n", encoding="utf-8")
def process_php(path: Path) -> None:
original = path.read_text(encoding="utf-8")
stripped = strip_php_comments(original)
path.write_text(stripped, encoding="utf-8")
def hash_file(path: Path) -> str:
return hashlib.sha256(path.read_bytes()).hexdigest()[:12]
def replace_references(root: Path, mapping: dict[str, str]) -> None:
for candidate in root.rglob("*"):
if not candidate.is_file() or candidate.suffix.lower() not in TEXT_EXTENSIONS:
continue
try:
content = candidate.read_text(encoding="utf-8")
except UnicodeDecodeError:
continue
updated = content
for src, dst in sorted(mapping.items(), key=lambda item: len(item[0]), reverse=True):
updated = updated.replace(src, dst)
updated = updated.replace("/" + src, "/" + dst)
if updated != content:
candidate.write_text(updated, encoding="utf-8")
def build_hash_mapping(public_root: Path) -> dict[str, str]:
mapping: dict[str, str] = {}
asset_root = public_root / "assets"
if not asset_root.exists():
return mapping
groups = collect_asset_groups(asset_root)
for (parent, base, ext), paths in groups.items():
source = pick_source_file(paths, base, ext)
if source is None:
continue
digest = hash_file(source)
target = parent / f"{base}.{digest}{ext}"
rel_new = target.relative_to(public_root).as_posix()
for old in paths:
rel_old = old.relative_to(public_root).as_posix()
if rel_old != rel_new:
mapping[rel_old] = rel_new
if source != target:
if target.exists():
target.unlink()
source.replace(target)
for old in paths:
if old != target and old.exists():
old.unlink()
return mapping
def main() -> int:
parser = argparse.ArgumentParser(description="Release obfuscation build.")
parser.add_argument("--root", default=".", help="Repository root")
parser.add_argument("--hash-assets", action="store_true", help="Hash JS/CSS file names")
args = parser.parse_args()
repo_root = Path(args.root).resolve()
public_root = repo_root / "public"
if not public_root.exists():
print("public directory not found", file=sys.stderr)
return 1
asset_root = public_root / "assets"
if asset_root.exists():
groups = collect_asset_groups(asset_root)
for (parent, base, ext), paths in sorted(groups.items()):
source = pick_source_file(paths, base, ext)
if source is None:
rel = (parent / f"{base}{ext}").relative_to(public_root)
print(
f"ERROR: No valid source for {rel}. "
f"Restore from dev, e.g.: git checkout dev -- {rel}",
file=sys.stderr,
)
return 1
cleanup_invalid_siblings(paths, source)
if ext == ".js":
process_js(source, repo_root)
else:
process_css(source, repo_root)
for php in sorted(public_root.rglob("*.php")):
process_php(php)
for php in sorted((repo_root / "backend").rglob("*.php")):
process_php(php)
if args.hash_assets:
mapping = build_hash_mapping(public_root)
replace_references(repo_root, mapping)
print("Release obfuscation complete.")
return 0
if __name__ == "__main__":
raise SystemExit(main())