Enhance obfuscation workflow and asset processing: Updated the Gitea workflow to skip CI for specific commit messages, improving efficiency. Refactored the obfuscation script to include better asset handling, validation, and cleanup processes, ensuring only valid files are processed. Introduced temporary directories for intermediate files during obfuscation, enhancing reliability and reducing errors.

This commit is contained in:
smueller
2026-05-28 17:32:21 +02:00
parent 6c9114e0a7
commit 8f985da61f
3 changed files with 170 additions and 37 deletions

View File

@@ -3,15 +3,17 @@ from __future__ import annotations
import argparse
import hashlib
import os
import re
import shutil
import subprocess
import sys
import tempfile
from collections import defaultdict
from pathlib import Path
TEXT_EXTENSIONS = {".php", ".html", ".htm", ".xml", ".txt", ".js", ".css"}
HASH_SUFFIX_RE = re.compile(r"\.[a-f0-9]{12}$", re.I)
def strip_comments_keep_strings(text: str) -> str:
@@ -174,42 +176,124 @@ def minify_js_fallback(text: str) -> str:
return text.strip()
def run_cmd(command: list[str], cwd: Path, input_text: str | None = None) -> str:
def canonical_asset_base(stem: str) -> str:
name = stem
while HASH_SUFFIX_RE.search(name):
name = HASH_SUFFIX_RE.sub("", name)
return name
def is_skipped_asset(path: Path) -> bool:
lowered = path.as_posix().lower()
if ".min." in path.name or ".obf." in path.name or ".deob." in path.name:
return True
if "deobfuscated" in lowered:
return True
return False
def is_valid_source_content(content: str) -> bool:
if "[javascript-obfuscator-cli]" in content:
return False
return len(content.strip()) >= 20
def collect_asset_groups(asset_root: Path) -> dict[tuple[Path, str, str], list[Path]]:
groups: dict[tuple[Path, str, str], list[Path]] = defaultdict(list)
for ext in (".js", ".css"):
for file_path in sorted(asset_root.rglob(f"*{ext}")):
if is_skipped_asset(file_path):
continue
base = canonical_asset_base(file_path.stem)
key = (file_path.parent, base, ext)
groups[key].append(file_path)
return groups
def pick_source_file(paths: list[Path], base: str, ext: str) -> Path | None:
if not paths:
return None
parent = paths[0].parent
plain = parent / f"{base}{ext}"
ordered: list[Path] = []
if plain in paths:
ordered.append(plain)
for candidate in sorted(paths, key=lambda p: len(p.name)):
if candidate not in ordered:
ordered.append(candidate)
for candidate in ordered:
try:
content = candidate.read_text(encoding="utf-8")
except (OSError, UnicodeDecodeError):
continue
if is_valid_source_content(content):
return candidate
return None
def cleanup_invalid_siblings(paths: list[Path], source: Path) -> None:
for path in paths:
if path == source or not path.exists():
continue
try:
content = path.read_text(encoding="utf-8")
except (OSError, UnicodeDecodeError):
content = ""
if not is_valid_source_content(content):
path.unlink()
def run_cmd(command: list[str], cwd: Path) -> None:
proc = subprocess.run(
command,
cwd=str(cwd),
input=input_text,
text=True,
capture_output=True,
check=False,
)
if proc.returncode != 0:
raise RuntimeError(proc.stderr.strip() or "command failed")
return proc.stdout
raise RuntimeError(proc.stderr.strip() or proc.stdout.strip() or "command failed")
def process_js(path: Path, cwd: Path) -> None:
original = path.read_text(encoding="utf-8")
if not is_valid_source_content(original):
raise ValueError(
f"invalid or corrupted JS source: {path} "
f"(restore e.g. 'git checkout dev -- {path.as_posix()}')"
)
if shutil.which("npx"):
tmpdir = Path(tempfile.mkdtemp())
try:
minified = run_cmd(
src = tmpdir / "input.js"
out = tmpdir / "output.js"
src.write_text(original, encoding="utf-8")
run_cmd(
[
"npx",
"--yes",
"terser",
str(src),
"-o",
str(src),
"--compress",
"--mangle",
"--comments",
"false",
],
cwd,
input_text=original,
)
obfuscated = run_cmd(
run_cmd(
[
"npx",
"--yes",
"javascript-obfuscator",
str(src),
"--output",
str(out),
"--compact",
"true",
"--control-flow-flattening",
@@ -224,38 +308,51 @@ def process_js(path: Path, cwd: Path) -> None:
"browser-no-eval",
"--source-map",
"false",
"--output",
"stdout",
],
cwd,
input_text=minified,
)
path.write_text(obfuscated.strip() + "\n", encoding="utf-8")
if not out.exists():
raise RuntimeError("obfuscator produced no output file")
result = out.read_text(encoding="utf-8")
if not is_valid_source_content(result):
raise RuntimeError("obfuscator output looks invalid")
path.write_text(result.strip() + "\n", encoding="utf-8")
return
except Exception:
pass
finally:
shutil.rmtree(tmpdir, ignore_errors=True)
path.write_text(minify_js_fallback(original) + "\n", encoding="utf-8")
def process_css(path: Path, cwd: Path) -> None:
original = path.read_text(encoding="utf-8")
if shutil.which("npx"):
tmpdir = Path(tempfile.mkdtemp())
try:
minified = run_cmd(
src = tmpdir / "input.css"
out = tmpdir / "output.css"
src.write_text(original, encoding="utf-8")
run_cmd(
[
"npx",
"--yes",
"clean-css-cli",
str(src),
"-o",
str(out),
"--skip-rebase",
"-O2",
],
cwd,
input_text=original,
)
path.write_text(minified.strip() + "\n", encoding="utf-8")
path.write_text(out.read_text(encoding="utf-8").strip() + "\n", encoding="utf-8")
return
except Exception:
pass
finally:
shutil.rmtree(tmpdir, ignore_errors=True)
path.write_text(minify_css_fallback(original) + "\n", encoding="utf-8")
@@ -266,8 +363,7 @@ def process_php(path: Path) -> None:
def hash_file(path: Path) -> str:
digest = hashlib.sha256(path.read_bytes()).hexdigest()[:12]
return digest
return hashlib.sha256(path.read_bytes()).hexdigest()[:12]
def replace_references(root: Path, mapping: dict[str, str]) -> None:
@@ -279,7 +375,7 @@ def replace_references(root: Path, mapping: dict[str, str]) -> None:
except UnicodeDecodeError:
continue
updated = content
for src, dst in mapping.items():
for src, dst in sorted(mapping.items(), key=lambda item: len(item[0]), reverse=True):
updated = updated.replace(src, dst)
updated = updated.replace("/" + src, "/" + dst)
if updated != content:
@@ -289,17 +385,32 @@ def replace_references(root: Path, mapping: dict[str, str]) -> None:
def build_hash_mapping(public_root: Path) -> dict[str, str]:
mapping: dict[str, str] = {}
asset_root = public_root / "assets"
for ext in (".js", ".css"):
for file_path in sorted(asset_root.rglob(f"*{ext}")):
if ".min." in file_path.name or ".obf." in file_path.name:
continue
digest = hash_file(file_path)
new_name = f"{file_path.stem}.{digest}{file_path.suffix}"
new_path = file_path.with_name(new_name)
file_path.rename(new_path)
rel_old = file_path.relative_to(public_root).as_posix()
rel_new = new_path.relative_to(public_root).as_posix()
mapping[rel_old] = rel_new
if not asset_root.exists():
return mapping
groups = collect_asset_groups(asset_root)
for (parent, base, ext), paths in groups.items():
source = pick_source_file(paths, base, ext)
if source is None:
continue
digest = hash_file(source)
target = parent / f"{base}.{digest}{ext}"
rel_new = target.relative_to(public_root).as_posix()
for old in paths:
rel_old = old.relative_to(public_root).as_posix()
if rel_old != rel_new:
mapping[rel_old] = rel_new
if source != target:
if target.exists():
target.unlink()
source.replace(target)
for old in paths:
if old != target and old.exists():
old.unlink()
return mapping
@@ -316,11 +427,26 @@ def main() -> int:
print("public directory not found", file=sys.stderr)
return 1
for js in sorted(public_root.rglob("*.js")):
process_js(js, repo_root)
for css in sorted(public_root.rglob("*.css")):
process_css(css, repo_root)
for php in sorted((repo_root / "public").rglob("*.php")):
asset_root = public_root / "assets"
if asset_root.exists():
groups = collect_asset_groups(asset_root)
for (parent, base, ext), paths in sorted(groups.items()):
source = pick_source_file(paths, base, ext)
if source is None:
rel = (parent / f"{base}{ext}").relative_to(public_root)
print(
f"ERROR: No valid source for {rel}. "
f"Restore from dev, e.g.: git checkout dev -- {rel}",
file=sys.stderr,
)
return 1
cleanup_invalid_siblings(paths, source)
if ext == ".js":
process_js(source, repo_root)
else:
process_css(source, repo_root)
for php in sorted(public_root.rglob("*.php")):
process_php(php)
for php in sorted((repo_root / "backend").rglob("*.php")):
process_php(php)