초기 커밋

2026-03-01 07:44:19 +09:00
commit 09359f30be
146 changed files with 6120 additions and 0 deletions
--- a/app/processing/init.py
+++ b/app/processing/init.py
--- a/app/processing/analyzers/init.py
+++ b/app/processing/analyzers/init.py
--- a/app/processing/analyzers/device_analyzer.py
+++ b/app/processing/analyzers/device_analyzer.py
@@ -0,0 +1,37 @@
+from __future__ import annotations
+
+from datetime import datetime
+
+from app.models.mongodb.device_log import DeviceLog
+
+
+async def analyze_device_status(
+    device_id: str, start: datetime, end: datetime
+) -> dict:
+    """Analyze device status changes over a period."""
+    logs = await (
+        DeviceLog.find(
+            DeviceLog.device_id == device_id,
+            DeviceLog.event_type == "status_change",
+            DeviceLog.timestamp >= start,
+            DeviceLog.timestamp <= end,
+        )
+        .sort("+timestamp")
+        .to_list()
+    )
+
+    status_counts: dict[str, int] = {}
+    for log in logs:
+        status = log.payload.get("status", "unknown")
+        status_counts[status] = status_counts.get(status, 0) + 1
+
+    total_events = len(logs)
+    uptime_events = status_counts.get("online", 0)
+    uptime_ratio = uptime_events / total_events if total_events > 0 else 0.0
+
+    return {
+        "total_events": total_events,
+        "status_counts": status_counts,
+        "uptime_ratio": round(uptime_ratio, 4),
+        "period": {"start": start.isoformat(), "end": end.isoformat()},
+    }
--- a/app/processing/analyzers/trend_analyzer.py
+++ b/app/processing/analyzers/trend_analyzer.py
@@ -0,0 +1,45 @@
+from __future__ import annotations
+
+from datetime import datetime
+
+import numpy as np
+
+from app.models.mongodb.telemetry import TelemetryData
+
+
+async def analyze_trend(
+    device_id: str, start: datetime, end: datetime
+) -> dict:
+    """Analyze telemetry data trends using linear regression."""
+    docs = await (
+        TelemetryData.find(
+            TelemetryData.device_id == device_id,
+            TelemetryData.timestamp >= start,
+            TelemetryData.timestamp <= end,
+        )
+        .sort("+timestamp")
+        .to_list()
+    )
+
+    if len(docs) < 2:
+        return {"status": "insufficient_data", "count": len(docs)}
+
+    timestamps = np.array([d.timestamp.timestamp() for d in docs])
+    values = np.array([d.metrics.get("value", 0) for d in docs], dtype=float)
+
+    # Normalize timestamps
+    t_norm = timestamps - timestamps[0]
+
+    # Linear regression
+    coeffs = np.polyfit(t_norm, values, 1)
+    slope = float(coeffs[0])
+
+    return {
+        "count": len(docs),
+        "mean": float(np.mean(values)),
+        "std": float(np.std(values)),
+        "min": float(np.min(values)),
+        "max": float(np.max(values)),
+        "slope": slope,
+        "trend": "increasing" if slope > 0.001 else "decreasing" if slope < -0.001 else "stable",
+    }
--- a/app/processing/pipelines/init.py
+++ b/app/processing/pipelines/init.py
--- a/app/processing/pipelines/report_pipeline.py
+++ b/app/processing/pipelines/report_pipeline.py
@@ -0,0 +1,29 @@
+from __future__ import annotations
+
+from datetime import datetime
+
+from app.models.mongodb.analytics_result import AnalyticsResult
+from app.processing.analyzers.device_analyzer import analyze_device_status
+from app.processing.analyzers.trend_analyzer import analyze_trend
+
+
+async def generate_device_report(
+    device_id: str, start: datetime, end: datetime
+) -> AnalyticsResult:
+    """Generate a comprehensive device report."""
+    status_report = await analyze_device_status(device_id, start, end)
+    trend_report = await analyze_trend(device_id, start, end)
+
+    result = AnalyticsResult(
+        analysis_type="device_report",
+        device_id=device_id,
+        parameters={"start": start.isoformat(), "end": end.isoformat()},
+        result={
+            "status": status_report,
+            "trends": trend_report,
+        },
+        period_start=start,
+        period_end=end,
+    )
+    await result.insert()
+    return result
--- a/app/processing/pipelines/telemetry_pipeline.py
+++ b/app/processing/pipelines/telemetry_pipeline.py
@@ -0,0 +1,57 @@
+from __future__ import annotations
+
+from datetime import datetime
+
+import polars as pl
+
+from app.models.mongodb.telemetry import TelemetryData
+
+
+async def aggregate_telemetry(
+    device_id: str,
+    start: datetime,
+    end: datetime,
+    interval: str = "1h",
+) -> pl.DataFrame:
+    """Aggregate telemetry data for a device over a time range."""
+    docs = await (
+        TelemetryData.find(
+            TelemetryData.device_id == device_id,
+            TelemetryData.timestamp >= start,
+            TelemetryData.timestamp <= end,
+        )
+        .sort("+timestamp")
+        .to_list()
+    )
+
+    if not docs:
+        return pl.DataFrame()
+
+    records = [
+        {"timestamp": d.timestamp, "device_id": d.device_id, **d.metrics}
+        for d in docs
+    ]
+
+    df = pl.DataFrame(records)
+    return df.sort("timestamp").group_by_dynamic("timestamp", every=interval).agg(
+        pl.all().exclude("timestamp", "device_id").mean()
+    )
+
+
+async def get_latest_telemetry(device_id: str, limit: int = 100) -> pl.DataFrame:
+    """Get latest telemetry records as a Polars DataFrame."""
+    docs = await (
+        TelemetryData.find(TelemetryData.device_id == device_id)
+        .sort("-timestamp")
+        .limit(limit)
+        .to_list()
+    )
+
+    if not docs:
+        return pl.DataFrame()
+
+    records = [
+        {"timestamp": d.timestamp, "device_id": d.device_id, **d.metrics}
+        for d in docs
+    ]
+    return pl.DataFrame(records)
--- a/app/processing/utils/init.py
+++ b/app/processing/utils/init.py
--- a/app/processing/utils/dataframe_utils.py
+++ b/app/processing/utils/dataframe_utils.py
@@ -0,0 +1,26 @@
+from __future__ import annotations
+
+from datetime import datetime
+
+import polars as pl
+
+
+def filter_time_range(
+    df: pl.DataFrame, column: str, start: datetime, end: datetime
+) -> pl.DataFrame:
+    return df.filter(
+        (pl.col(column) >= start) & (pl.col(column) <= end)
+    )
+
+
+def resample(df: pl.DataFrame, time_column: str, interval: str) -> pl.DataFrame:
+    numeric_cols = [
+        c for c in df.columns if c != time_column and df[c].dtype.is_numeric()
+    ]
+    return df.sort(time_column).group_by_dynamic(time_column, every=interval).agg(
+        [pl.col(c).mean().alias(c) for c in numeric_cols]
+    )
+
+
+def to_records(df: pl.DataFrame) -> list[dict]:
+    return df.to_dicts()
--- a/app/processing/utils/statistics.py
+++ b/app/processing/utils/statistics.py
@@ -0,0 +1,39 @@
+from __future__ import annotations
+
+import numpy as np
+
+
+def moving_average(values: list[float], window: int = 5) -> list[float]:
+    if len(values) < window:
+        return values
+    arr = np.array(values, dtype=float)
+    return np.convolve(arr, np.ones(window) / window, mode="valid").tolist()
+
+
+def detect_anomalies(
+    values: list[float], threshold: float = 2.0
+) -> list[dict]:
+    """Detect anomalies using Z-score method."""
+    arr = np.array(values, dtype=float)
+    mean = np.mean(arr)
+    std = np.std(arr)
+
+    if std == 0:
+        return []
+
+    z_scores = np.abs((arr - mean) / std)
+    anomalies = []
+    for i, (val, z) in enumerate(zip(values, z_scores)):
+        if z > threshold:
+            anomalies.append({"index": i, "value": val, "z_score": float(z)})
+    return anomalies
+
+
+def percentile_stats(values: list[float]) -> dict:
+    arr = np.array(values, dtype=float)
+    return {
+        "p50": float(np.percentile(arr, 50)),
+        "p90": float(np.percentile(arr, 90)),
+        "p95": float(np.percentile(arr, 95)),
+        "p99": float(np.percentile(arr, 99)),
+    }