#!/usr/bin/env python3
"""UniSOC Honeypot — HTTP honeytrap port 80.

Faux serveur web "BackupServer Dashboard v3.2" cohérent avec l'identité
"serveur de sauvegarde" (cf Veeam fake sur 443, samba shares "Sauvegardes",
hostname srv-backup-001).

Objectifs :
  1. Crédibilité — un attaquant qui scanne voit un dashboard avec faux disques
     (96 TB sur 12 volumes), faux jobs nocturnes (200 jobs scheduled), faux
     network throughput (Gbps in/out), faux statut Veeam "running" → super appât.
  2. Capture des payloads — toute requête sur les paths classiques de scan
     (/admin, /phpmyadmin, /.env, /.git/HEAD, /wp-login.php, /actuator, …)
     est loggée intégralement (path + method + headers + body). On capture
     les wordlists, les fingerprints de scanners (Nuclei/Nikto/etc), les
     payloads SQLi/RCE/SSRF tentés.
  3. Tarpit léger — sleep 0.3-1.5s sur les routes piégées pour ralentir les
     scanners auto et augmenter le coût de scan.

Logs : /var/log/opencanary/http-honeytrap.log (JSON par ligne).
Tail-F'd par l'agent UniSOC, source="http-honeytrap".
"""

from __future__ import annotations

import http.server
import json
import os
import random
import socketserver
import time
import urllib.parse
from datetime import datetime, timezone, timedelta

LOG = "/var/log/opencanary/http-honeytrap.log"

# ─────────────────────────────────────────────────────────────────────────────
# Faux contenu cohérent "BackupServer"
# ─────────────────────────────────────────────────────────────────────────────

FAKE_VOLUMES = [
    {"id": "vol01", "label": "BACKUP_PRD_DC1",   "size_tb": 8.0, "used_tb": 6.8, "raid": "RAID6", "status": "healthy", "iops": 1240},
    {"id": "vol02", "label": "BACKUP_PRD_DC2",   "size_tb": 8.0, "used_tb": 7.2, "raid": "RAID6", "status": "healthy", "iops": 1180},
    {"id": "vol03", "label": "BACKUP_VEEAM_VBR", "size_tb": 12.0, "used_tb": 9.4, "raid": "RAID10", "status": "healthy", "iops": 980},
    {"id": "vol04", "label": "BACKUP_SQL_LOGS",  "size_tb": 4.0, "used_tb": 2.1, "raid": "RAID10", "status": "healthy", "iops": 2400},
    {"id": "vol05", "label": "BACKUP_FILES_HRD", "size_tb": 16.0, "used_tb": 13.2, "raid": "RAID6", "status": "warn",   "iops": 750},
    {"id": "vol06", "label": "BACKUP_AD_DC",     "size_tb": 2.0, "used_tb": 0.4, "raid": "RAID1",  "status": "healthy", "iops": 320},
    {"id": "vol07", "label": "BACKUP_FILESERV",  "size_tb": 16.0, "used_tb": 11.6, "raid": "RAID6", "status": "healthy", "iops": 890},
    {"id": "vol08", "label": "ARCHIVE_2024",     "size_tb": 8.0, "used_tb": 7.9, "raid": "RAID6", "status": "healthy", "iops": 50},
    {"id": "vol09", "label": "ARCHIVE_2023",     "size_tb": 8.0, "used_tb": 8.0, "raid": "RAID6", "status": "full",    "iops": 0},
    {"id": "vol10", "label": "REPLICA_OFFSITE",  "size_tb": 8.0, "used_tb": 5.4, "raid": "RAID6", "status": "healthy", "iops": 540},
    {"id": "vol11", "label": "STAGING_TEMP",     "size_tb": 4.0, "used_tb": 1.8, "raid": "RAID0", "status": "healthy", "iops": 3100},
    {"id": "vol12", "label": "STAGING_INDEX",    "size_tb": 2.0, "used_tb": 0.9, "raid": "RAID1", "status": "healthy", "iops": 4200},
]

FAKE_JOB_NAMES = [
    "Veeam-Daily-DC1-VMs", "Veeam-Daily-DC2-VMs", "Veeam-Hourly-SQL", "Veeam-Weekly-Files",
    "Veeam-Sync-Replica-Offsite", "SQL-LogShip-PROD", "SQL-LogShip-PREPROD", "AD-Backup-DC01",
    "AD-Backup-DC02", "FileSrv-Incremental", "Exchange-DAG-Backup", "VEEAM-PostgreSQL-Daily",
    "Veeam-Quick-Backup", "Veeam-Tape-Out", "S3-Sync-Wasabi", "S3-Sync-AWS",
]

CORP_DC_TARGETS = [
    "DC01.corp.local", "DC02-BKP.corp.local", "SRV-FILES01.corp.local",
    "SRV-SQL01.corp.local", "SRV-EXCHANGE01.corp.local", "SRV-VEEAM-VBR.corp.local",
    "SRV-AD01.corp.local", "ESXi-PRD-01.corp.local", "ESXi-PRD-02.corp.local",
    "VCENTER-PRD.corp.local",
]


def gen_jobs() -> list[dict]:
    """Génère ~200 jobs scheduled la nuit, comme un vrai planning Veeam d'entreprise."""
    out = []
    base = datetime.now(timezone.utc).replace(hour=22, minute=0, second=0, microsecond=0)
    for i in range(200):
        name = random.choice(FAKE_JOB_NAMES) + f"-{i:03d}"
        target = random.choice(CORP_DC_TARGETS)
        offset_min = random.randint(0, 480)  # 22h00 → 06h00
        sched = (base + timedelta(minutes=offset_min)).isoformat()
        last_run_ok = random.random() > 0.05  # 95% des jobs OK
        out.append({
            "id": f"job-{i:04d}",
            "name": name,
            "target": target,
            "last_run_status": "success" if last_run_ok else "warning",
            "last_run_iso": (base - timedelta(days=1, minutes=random.randint(0, 480))).isoformat(),
            "next_run_iso": sched,
            "type": random.choice(["full", "incremental", "synthetic_full", "transaction_log"]),
            "size_gb": round(random.uniform(50, 8000), 1),
        })
    return out


def gen_dashboard_html() -> bytes:
    vols = FAKE_VOLUMES
    total_tb = sum(v["size_tb"] for v in vols)
    used_tb = sum(v["used_tb"] for v in vols)
    rows = "\n".join([
        f'<tr><td>{v["label"]}</td><td>{v["raid"]}</td>'
        f'<td>{v["used_tb"]:.1f} / {v["size_tb"]:.1f} TB</td>'
        f'<td><span class="b-{v["status"]}">{v["status"]}</span></td>'
        f'<td>{v["iops"]} IOPS</td></tr>'
        for v in vols
    ])
    html = f"""<!DOCTYPE html>
<html lang="fr"><head><meta charset="UTF-8">
<title>BackupServer Dashboard v3.2 - {os.uname().nodename}</title>
<style>
body{{background:#1a1a2e;color:#e0e0e0;font-family:Segoe UI,sans-serif;margin:0;padding:20px}}
.h{{background:#16213e;padding:15px 20px;border-bottom:2px solid #0f3460;margin:-20px -20px 20px}}
.h h1{{margin:0;font-size:18px;color:#e94560}}.h .sub{{font-size:11px;color:#999}}
.kpi{{display:grid;grid-template-columns:repeat(auto-fit,minmax(150px,1fr));gap:10px;margin:15px 0}}
.k{{background:#16213e;padding:12px;border-radius:4px;border-left:3px solid #0f3460}}
.k .lbl{{font-size:10px;color:#888;text-transform:uppercase}}.k .val{{font-size:22px;font-weight:bold;color:#e94560}}
table{{width:100%;border-collapse:collapse;background:#16213e;border-radius:4px;overflow:hidden}}
th,td{{padding:8px 12px;text-align:left;border-bottom:1px solid #0f3460;font-size:13px}}
th{{background:#0f3460;color:#e0e0e0;font-size:11px;text-transform:uppercase}}
.b-healthy{{color:#4caf50}}.b-warn{{color:#ff9800}}.b-full{{color:#f44336}}
h2{{color:#888;font-size:14px;margin:25px 0 10px;text-transform:uppercase;letter-spacing:1px}}
.foot{{margin-top:30px;padding-top:15px;border-top:1px solid #0f3460;font-size:10px;color:#666}}
</style></head>
<body>
<div class="h">
  <h1>📦 BackupServer Dashboard <span style="color:#999;font-size:12px">v3.2.1-build2034</span></h1>
  <div class="sub">{os.uname().nodename} — Backup Cluster Node — Apache/2.4.62 · last update {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</div>
</div>

<div class="kpi">
  <div class="k"><div class="lbl">Total Capacity</div><div class="val">{total_tb:.0f} TB</div></div>
  <div class="k"><div class="lbl">Used</div><div class="val">{used_tb:.0f} TB</div></div>
  <div class="k"><div class="lbl">Volumes</div><div class="val">{len(vols)}</div></div>
  <div class="k"><div class="lbl">Jobs scheduled tonight</div><div class="val">200</div></div>
  <div class="k"><div class="lbl">Active Replications</div><div class="val">7</div></div>
  <div class="k"><div class="lbl">Throughput (peak)</div><div class="val">3.4 Gbps</div></div>
</div>

<h2>Storage Volumes</h2>
<table><thead><tr><th>Label</th><th>RAID</th><th>Used</th><th>Status</th><th>IOPS</th></tr></thead>
<tbody>{rows}</tbody></table>

<h2>Backup Endpoints</h2>
<table><tbody>
<tr><td>SMB/CIFS Sauvegardes</td><td><code>\\\\srvdomaine\\Sauvegardes</code></td></tr>
<tr><td>Veeam Backup &amp; Replication</td><td><a href="https://srvdomaine/" style="color:#e94560">https://srvdomaine/</a></td></tr>
<tr><td>FTP archives 2024</td><td><code>ftp://srvdomaine:21/archives</code></td></tr>
<tr><td>SQL log shipping</td><td><code>tcp://srvdomaine:1433</code></td></tr>
<tr><td>Replica offsite (Wasabi S3)</td><td><code>s3://wasabi/backup-prd/</code></td></tr>
</tbody></table>

<div class="foot">
  © corp.local IT Infrastructure · Powered by Apache 2.4.62 + custom node.js dashboard ·
  Server time: {datetime.now().strftime('%Y-%m-%dT%H:%M:%S%z')}<br>
  Need help? Contact <a href="mailto:itops@corp.local" style="color:#888">itops@corp.local</a>
</div>
</body></html>
"""
    return html.encode()


# ─────────────────────────────────────────────────────────────────────────────
# Routes piégées (paths classiques de scan)
# ─────────────────────────────────────────────────────────────────────────────

# Réponses crédibles par path — on RETOURNE des données de leurre quand le path
# pourrait suggérer une vraie présence (api routes, jobs.json), pour que le scanner
# continue son exploration et qu'on capture davantage.
TRAP_PATHS = {
    "/api/v1/jobs":       lambda: ("application/json", json.dumps({"jobs": gen_jobs()[:50]}).encode(), 200),
    "/api/v1/volumes":    lambda: ("application/json", json.dumps({"volumes": FAKE_VOLUMES}).encode(), 200),
    "/api/v1/health":     lambda: ("application/json", b'{"status":"ok","version":"3.2.1","uptime":"34d 12h"}', 200),
    "/api/v1/sessions":   lambda: ("application/json", b'{"active_sessions":[]}', 200),
    "/api/v1/backups":    lambda: ("application/json", json.dumps({"backups": gen_jobs()[:30]}).encode(), 200),
    "/jobs.json":         lambda: ("application/json", json.dumps({"jobs": gen_jobs()}).encode(), 200),
    "/disks":             lambda: ("application/json", json.dumps({"disks": FAKE_VOLUMES}).encode(), 200),
    # Identification ranges classiques de scanners
    "/server-status":     lambda: ("text/html", b"<h1>Apache Server Status</h1><p>Server Version: Apache/2.4.62</p>", 200),
    "/server-info":       lambda: ("text/html", b"<h1>Apache Server Info</h1>", 200),
    # Routes triviales d'auth (pour capter les wordlists)
    "/login":             lambda: ("text/html", b'<form method=POST action="/login"><input name=user><input name=pass type=password><button>Login</button></form>', 200),
    "/admin":             lambda: ("text/html", b'<form method=POST action="/admin/login"><input name=username><input name=password type=password><button>Admin login</button></form>', 200),
    "/admin/login":       lambda: ("text/html", b'<form method=POST action="/admin/login"><input name=username><input name=password type=password><button>Login</button></form>', 200),
    "/wp-login.php":      lambda: ("text/html", b'<title>Log In &lsaquo; corp Backup &mdash; WordPress</title><body><div id="login"><form name="loginform" method=post><input name=log><input name=pwd type=password></form></div>', 200),
    "/phpmyadmin/":       lambda: ("text/html", b'<title>phpMyAdmin</title><form method=POST action="index.php"><input name=pma_username><input name=pma_password type=password></form>', 200),
    "/phpmyadmin/index.php": lambda: ("text/html", b'<title>phpMyAdmin</title><form method=POST><input name=pma_username><input name=pma_password type=password></form>', 200),
    # Faux secrets
    "/.env":              lambda: ("text/plain", b'APP_ENV=production\nAPP_DEBUG=false\nDB_HOST=srvdomaine\nDB_PORT=1433\nDB_USERNAME=sa\nDB_PASSWORD=S@P-2024!Pwd\nVEEAM_API=https://srvdomaine:443\nVEEAM_TOKEN=v33m_api_token_xQz9xR3k2N\nMAIL_USERNAME=itops@corp.local\nMAIL_PASSWORD=Backup-MailRelay-2024\n', 200),
    "/.git/HEAD":         lambda: ("text/plain", b'ref: refs/heads/main\n', 200),
    "/.git/config":       lambda: ("text/plain", b'[core]\n\trepositoryformatversion = 0\n[remote "origin"]\n\turl = git@gitlab.corp.local:itops/backup-scripts.git\n', 200),
    "/backup.zip":        lambda: ("application/zip", b'PK\x03\x04', 200),  # fake zip header
    "/.aws/credentials":  lambda: ("text/plain", b'[default]\naws_access_key_id = AKIAS9X4R7LC8Y2ZMEQF\naws_secret_access_key = wJalrXUtnFEMI/K7MDENG/bPxRfiCYzEXAMPLEKEY\nregion = eu-west-3\n', 200),
    # Spring Boot actuator (CVE-2022-22965)
    "/actuator":          lambda: ("application/json", b'{"_links":{"self":{"href":"/actuator"},"health":{"href":"/actuator/health"},"env":{"href":"/actuator/env"}}}', 200),
    "/actuator/health":   lambda: ("application/json", b'{"status":"UP","groups":["liveness","readiness"]}', 200),
    "/actuator/env":      lambda: ("application/json", b'{"activeProfiles":["prod"],"propertySources":[]}', 200),
    # Plus de paths CVE
    "/api/v2/cli/commands": lambda: ("application/json", b'{"error":"unauthorized"}', 401),  # Confluence CVE-2022-26134-ish
    "/.well-known/security.txt": lambda: ("text/plain", b'Contact: mailto:security@corp.local\nExpires: 2026-12-31T23:59:59.000Z\n', 200),
}


def log_event(handler, kind: str, extra: dict = None) -> None:
    try:
        body_bytes = b""
        if hasattr(handler, "_body_cache"):
            body_bytes = handler._body_cache or b""
        # Tronque le body à 4 KB pour ne pas exploser le log
        body = body_bytes[:4096].decode(errors="replace")
        ev = {
            "ts": datetime.now(timezone.utc).isoformat(),
            "logtype": 80000,
            "src_ip": handler.client_address[0],
            "src_port": handler.client_address[1],
            "method": handler.command,
            "path": handler.path,
            "user_agent": handler.headers.get("User-Agent", ""),
            "host": handler.headers.get("Host", ""),
            "referer": handler.headers.get("Referer", ""),
            "x_forwarded_for": handler.headers.get("X-Forwarded-For", ""),
            "headers_count": len(handler.headers),
            "body_size": len(body_bytes),
            "body_preview": body if body else None,
            "kind": kind,
            "node_id": "unisoc-canary-http",
        }
        if extra:
            ev.update(extra)
        with open(LOG, "a") as f:
            f.write(json.dumps(ev) + "\n")
    except Exception:
        pass


class TrapHandler(http.server.BaseHTTPRequestHandler):
    server_version = "Apache/2.4.62"
    sys_version = ""

    def log_message(self, fmt, *args):  # noqa: N802
        return  # disable default access log → tout passe par log_event

    def _read_body(self) -> bytes:
        try:
            n = int(self.headers.get("Content-Length", 0))
            return self.rfile.read(n) if n > 0 else b""
        except Exception:
            return b""

    def _send(self, status: int, ctype: str, body: bytes, extra_headers: dict = None) -> None:
        self.send_response(status)
        self.send_header("Server", "Apache/2.4.62")
        self.send_header("Content-Type", ctype)
        self.send_header("Content-Length", str(len(body)))
        self.send_header("X-Powered-By", "PHP/7.4.33")  # fake stack
        for k, v in (extra_headers or {}).items():
            self.send_header(k, v)
        self.end_headers()
        try:
            self.wfile.write(body)
        except Exception:
            pass

    def _handle(self):
        self._body_cache = self._read_body() if self.command in ("POST", "PUT", "PATCH") else b""
        path = urllib.parse.urlsplit(self.path).path

        # Dashboard
        if path in ("/", "/index.html", "/dashboard"):
            log_event(self, "GET_dashboard")
            time.sleep(random.uniform(0.05, 0.15))
            self._send(200, "text/html; charset=utf-8", gen_dashboard_html())
            return

        # Routes piégées avec réponse crédible
        if path in TRAP_PATHS:
            ctype, body, status = TRAP_PATHS[path]()
            kind = (
                "POST_login_attempt" if (self.command == "POST" and "login" in path)
                else "GET_secret_leak" if any(p in path for p in (".env", ".git", "credentials", "backup.zip"))
                else "GET_actuator_probe" if "actuator" in path
                else "GET_admin_probe" if any(p in path for p in ("admin", "phpmyadmin", "wp-"))
                else "GET_api_probe" if path.startswith("/api/")
                else "GET_trap"
            )
            log_event(self, kind, {"trapped_path": path, "response_status": status})
            time.sleep(random.uniform(0.3, 1.5))  # tarpit léger
            self._send(status, ctype, body)
            return

        # Path inconnu → on logge tout (pour fingerprinter les scans + capter les payloads)
        kind = (
            "POST_unknown" if self.command == "POST"
            else "GET_404_scan"
        )
        log_event(self, kind, {"unknown_path": path})
        # 50% de chance de retourner 404 stock, 50% page custom — pour fingerprinter scanners
        if random.random() < 0.5:
            self._send(404, "text/html",
                       b'<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN"><html><head><title>404 Not Found</title></head><body><h1>Not Found</h1><p>The requested URL was not found on this server.</p><hr><address>Apache/2.4.62 Server</address></body></html>')
        else:
            self._send(404, "text/html",
                       b'<title>404</title><h1>Not Found</h1>')

    def do_GET(self):     self._handle()
    def do_POST(self):    self._handle()
    def do_PUT(self):     self._handle()
    def do_DELETE(self):  self._handle()
    def do_PATCH(self):   self._handle()
    def do_OPTIONS(self): self._handle()
    def do_HEAD(self):
        # HEAD : on logge mais on ne renvoie pas de body
        self._body_cache = b""
        log_event(self, "HEAD_probe")
        self._send(200, "text/html", b"")


class ThreadingServer(socketserver.ThreadingMixIn, socketserver.TCPServer):
    allow_reuse_address = True
    daemon_threads = True


if __name__ == "__main__":
    os.makedirs("/var/log/opencanary", exist_ok=True)
    httpd = ThreadingServer(("0.0.0.0", 80), TrapHandler)
    print("[unisoc-http-honeytrap] listening on :80")
    httpd.serve_forever()
