초기 커밋
This commit is contained in:
0
app/processing/__init__.py
Normal file
0
app/processing/__init__.py
Normal file
0
app/processing/analyzers/__init__.py
Normal file
0
app/processing/analyzers/__init__.py
Normal file
37
app/processing/analyzers/device_analyzer.py
Normal file
37
app/processing/analyzers/device_analyzer.py
Normal file
@@ -0,0 +1,37 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from app.models.mongodb.device_log import DeviceLog
|
||||
|
||||
|
||||
async def analyze_device_status(
|
||||
device_id: str, start: datetime, end: datetime
|
||||
) -> dict:
|
||||
"""Analyze device status changes over a period."""
|
||||
logs = await (
|
||||
DeviceLog.find(
|
||||
DeviceLog.device_id == device_id,
|
||||
DeviceLog.event_type == "status_change",
|
||||
DeviceLog.timestamp >= start,
|
||||
DeviceLog.timestamp <= end,
|
||||
)
|
||||
.sort("+timestamp")
|
||||
.to_list()
|
||||
)
|
||||
|
||||
status_counts: dict[str, int] = {}
|
||||
for log in logs:
|
||||
status = log.payload.get("status", "unknown")
|
||||
status_counts[status] = status_counts.get(status, 0) + 1
|
||||
|
||||
total_events = len(logs)
|
||||
uptime_events = status_counts.get("online", 0)
|
||||
uptime_ratio = uptime_events / total_events if total_events > 0 else 0.0
|
||||
|
||||
return {
|
||||
"total_events": total_events,
|
||||
"status_counts": status_counts,
|
||||
"uptime_ratio": round(uptime_ratio, 4),
|
||||
"period": {"start": start.isoformat(), "end": end.isoformat()},
|
||||
}
|
||||
45
app/processing/analyzers/trend_analyzer.py
Normal file
45
app/processing/analyzers/trend_analyzer.py
Normal file
@@ -0,0 +1,45 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
|
||||
from app.models.mongodb.telemetry import TelemetryData
|
||||
|
||||
|
||||
async def analyze_trend(
|
||||
device_id: str, start: datetime, end: datetime
|
||||
) -> dict:
|
||||
"""Analyze telemetry data trends using linear regression."""
|
||||
docs = await (
|
||||
TelemetryData.find(
|
||||
TelemetryData.device_id == device_id,
|
||||
TelemetryData.timestamp >= start,
|
||||
TelemetryData.timestamp <= end,
|
||||
)
|
||||
.sort("+timestamp")
|
||||
.to_list()
|
||||
)
|
||||
|
||||
if len(docs) < 2:
|
||||
return {"status": "insufficient_data", "count": len(docs)}
|
||||
|
||||
timestamps = np.array([d.timestamp.timestamp() for d in docs])
|
||||
values = np.array([d.metrics.get("value", 0) for d in docs], dtype=float)
|
||||
|
||||
# Normalize timestamps
|
||||
t_norm = timestamps - timestamps[0]
|
||||
|
||||
# Linear regression
|
||||
coeffs = np.polyfit(t_norm, values, 1)
|
||||
slope = float(coeffs[0])
|
||||
|
||||
return {
|
||||
"count": len(docs),
|
||||
"mean": float(np.mean(values)),
|
||||
"std": float(np.std(values)),
|
||||
"min": float(np.min(values)),
|
||||
"max": float(np.max(values)),
|
||||
"slope": slope,
|
||||
"trend": "increasing" if slope > 0.001 else "decreasing" if slope < -0.001 else "stable",
|
||||
}
|
||||
0
app/processing/pipelines/__init__.py
Normal file
0
app/processing/pipelines/__init__.py
Normal file
29
app/processing/pipelines/report_pipeline.py
Normal file
29
app/processing/pipelines/report_pipeline.py
Normal file
@@ -0,0 +1,29 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from app.models.mongodb.analytics_result import AnalyticsResult
|
||||
from app.processing.analyzers.device_analyzer import analyze_device_status
|
||||
from app.processing.analyzers.trend_analyzer import analyze_trend
|
||||
|
||||
|
||||
async def generate_device_report(
|
||||
device_id: str, start: datetime, end: datetime
|
||||
) -> AnalyticsResult:
|
||||
"""Generate a comprehensive device report."""
|
||||
status_report = await analyze_device_status(device_id, start, end)
|
||||
trend_report = await analyze_trend(device_id, start, end)
|
||||
|
||||
result = AnalyticsResult(
|
||||
analysis_type="device_report",
|
||||
device_id=device_id,
|
||||
parameters={"start": start.isoformat(), "end": end.isoformat()},
|
||||
result={
|
||||
"status": status_report,
|
||||
"trends": trend_report,
|
||||
},
|
||||
period_start=start,
|
||||
period_end=end,
|
||||
)
|
||||
await result.insert()
|
||||
return result
|
||||
57
app/processing/pipelines/telemetry_pipeline.py
Normal file
57
app/processing/pipelines/telemetry_pipeline.py
Normal file
@@ -0,0 +1,57 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
import polars as pl
|
||||
|
||||
from app.models.mongodb.telemetry import TelemetryData
|
||||
|
||||
|
||||
async def aggregate_telemetry(
|
||||
device_id: str,
|
||||
start: datetime,
|
||||
end: datetime,
|
||||
interval: str = "1h",
|
||||
) -> pl.DataFrame:
|
||||
"""Aggregate telemetry data for a device over a time range."""
|
||||
docs = await (
|
||||
TelemetryData.find(
|
||||
TelemetryData.device_id == device_id,
|
||||
TelemetryData.timestamp >= start,
|
||||
TelemetryData.timestamp <= end,
|
||||
)
|
||||
.sort("+timestamp")
|
||||
.to_list()
|
||||
)
|
||||
|
||||
if not docs:
|
||||
return pl.DataFrame()
|
||||
|
||||
records = [
|
||||
{"timestamp": d.timestamp, "device_id": d.device_id, **d.metrics}
|
||||
for d in docs
|
||||
]
|
||||
|
||||
df = pl.DataFrame(records)
|
||||
return df.sort("timestamp").group_by_dynamic("timestamp", every=interval).agg(
|
||||
pl.all().exclude("timestamp", "device_id").mean()
|
||||
)
|
||||
|
||||
|
||||
async def get_latest_telemetry(device_id: str, limit: int = 100) -> pl.DataFrame:
|
||||
"""Get latest telemetry records as a Polars DataFrame."""
|
||||
docs = await (
|
||||
TelemetryData.find(TelemetryData.device_id == device_id)
|
||||
.sort("-timestamp")
|
||||
.limit(limit)
|
||||
.to_list()
|
||||
)
|
||||
|
||||
if not docs:
|
||||
return pl.DataFrame()
|
||||
|
||||
records = [
|
||||
{"timestamp": d.timestamp, "device_id": d.device_id, **d.metrics}
|
||||
for d in docs
|
||||
]
|
||||
return pl.DataFrame(records)
|
||||
0
app/processing/utils/__init__.py
Normal file
0
app/processing/utils/__init__.py
Normal file
26
app/processing/utils/dataframe_utils.py
Normal file
26
app/processing/utils/dataframe_utils.py
Normal file
@@ -0,0 +1,26 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
import polars as pl
|
||||
|
||||
|
||||
def filter_time_range(
|
||||
df: pl.DataFrame, column: str, start: datetime, end: datetime
|
||||
) -> pl.DataFrame:
|
||||
return df.filter(
|
||||
(pl.col(column) >= start) & (pl.col(column) <= end)
|
||||
)
|
||||
|
||||
|
||||
def resample(df: pl.DataFrame, time_column: str, interval: str) -> pl.DataFrame:
|
||||
numeric_cols = [
|
||||
c for c in df.columns if c != time_column and df[c].dtype.is_numeric()
|
||||
]
|
||||
return df.sort(time_column).group_by_dynamic(time_column, every=interval).agg(
|
||||
[pl.col(c).mean().alias(c) for c in numeric_cols]
|
||||
)
|
||||
|
||||
|
||||
def to_records(df: pl.DataFrame) -> list[dict]:
|
||||
return df.to_dicts()
|
||||
39
app/processing/utils/statistics.py
Normal file
39
app/processing/utils/statistics.py
Normal file
@@ -0,0 +1,39 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
def moving_average(values: list[float], window: int = 5) -> list[float]:
|
||||
if len(values) < window:
|
||||
return values
|
||||
arr = np.array(values, dtype=float)
|
||||
return np.convolve(arr, np.ones(window) / window, mode="valid").tolist()
|
||||
|
||||
|
||||
def detect_anomalies(
|
||||
values: list[float], threshold: float = 2.0
|
||||
) -> list[dict]:
|
||||
"""Detect anomalies using Z-score method."""
|
||||
arr = np.array(values, dtype=float)
|
||||
mean = np.mean(arr)
|
||||
std = np.std(arr)
|
||||
|
||||
if std == 0:
|
||||
return []
|
||||
|
||||
z_scores = np.abs((arr - mean) / std)
|
||||
anomalies = []
|
||||
for i, (val, z) in enumerate(zip(values, z_scores)):
|
||||
if z > threshold:
|
||||
anomalies.append({"index": i, "value": val, "z_score": float(z)})
|
||||
return anomalies
|
||||
|
||||
|
||||
def percentile_stats(values: list[float]) -> dict:
|
||||
arr = np.array(values, dtype=float)
|
||||
return {
|
||||
"p50": float(np.percentile(arr, 50)),
|
||||
"p90": float(np.percentile(arr, 90)),
|
||||
"p95": float(np.percentile(arr, 95)),
|
||||
"p99": float(np.percentile(arr, 99)),
|
||||
}
|
||||
Reference in New Issue
Block a user