Initial commit - SolSolAutomazione project
This commit is contained in:
340
src/analysis/statistical/advanced_metrics.R
Normal file
340
src/analysis/statistical/advanced_metrics.R
Normal file
@@ -0,0 +1,340 @@
|
||||
# Advanced Statistical Analysis for Pyranometer Data
|
||||
# Implements research-backed statistical methods for solar irradiance analysis
|
||||
|
||||
# Load required libraries
|
||||
library(dplyr)
|
||||
library(tidyr)
|
||||
library(lubridate)
|
||||
library(forecast)
|
||||
library(tseries)
|
||||
library(nortest)
|
||||
library(DescTools)
|
||||
library(moments)
|
||||
|
||||
#' Calculate comprehensive statistical metrics for pyranometer data
|
||||
#'
|
||||
#' @param data Data frame with timestamp and sensor columns
|
||||
#' @param reference_col Name of reference sensor column
|
||||
#' @param test_cols Vector of test sensor column names
|
||||
#' @param timestamp_col Name of timestamp column (default: "timestamp")
|
||||
#' @return List containing comprehensive statistical analysis
|
||||
calculate_advanced_metrics <- function(data, reference_col, test_cols, timestamp_col = "timestamp") {
|
||||
|
||||
results <- list()
|
||||
|
||||
for (test_col in test_cols) {
|
||||
cat("Analyzing sensor:", test_col, "vs reference:", reference_col, "\n")
|
||||
|
||||
# Extract clean data (remove NAs)
|
||||
clean_data <- data[complete.cases(data[[reference_col]], data[[test_col]]), ]
|
||||
|
||||
if (nrow(clean_data) < 10) {
|
||||
warning(paste("Insufficient data for comparison:", test_col, "vs", reference_col))
|
||||
next
|
||||
}
|
||||
|
||||
reference <- clean_data[[reference_col]]
|
||||
test <- clean_data[[test_col]]
|
||||
|
||||
# Basic descriptive statistics
|
||||
basic_stats <- calculate_basic_statistics(test, reference)
|
||||
|
||||
# Error metrics
|
||||
error_metrics <- calculate_error_metrics(test, reference)
|
||||
|
||||
# Distribution analysis
|
||||
distribution_analysis <- analyze_distribution(test, reference)
|
||||
|
||||
# Time series analysis
|
||||
time_series_analysis <- analyze_time_series(clean_data, test_col, reference_col, timestamp_col)
|
||||
|
||||
# Statistical tests
|
||||
statistical_tests <- perform_statistical_tests(test, reference)
|
||||
|
||||
# Performance indicators
|
||||
performance_indicators <- calculate_performance_indicators(test, reference)
|
||||
|
||||
results[[test_col]] <- list(
|
||||
basic_statistics = basic_stats,
|
||||
error_metrics = error_metrics,
|
||||
distribution_analysis = distribution_analysis,
|
||||
time_series_analysis = time_series_analysis,
|
||||
statistical_tests = statistical_tests,
|
||||
performance_indicators = performance_indicators,
|
||||
sample_size = nrow(clean_data)
|
||||
)
|
||||
}
|
||||
|
||||
return(results)
|
||||
}
|
||||
|
||||
#' Calculate basic descriptive statistics
|
||||
calculate_basic_statistics <- function(test, reference) {
|
||||
list(
|
||||
test_mean = mean(test, na.rm = TRUE),
|
||||
test_median = median(test, na.rm = TRUE),
|
||||
test_sd = sd(test, na.rm = TRUE),
|
||||
test_min = min(test, na.rm = TRUE),
|
||||
test_max = max(test, na.rm = TRUE),
|
||||
test_q1 = quantile(test, 0.25, na.rm = TRUE),
|
||||
test_q3 = quantile(test, 0.75, na.rm = TRUE),
|
||||
test_iqr = IQR(test, na.rm = TRUE),
|
||||
test_skewness = moments::skewness(test, na.rm = TRUE),
|
||||
test_kurtosis = moments::kurtosis(test, na.rm = TRUE),
|
||||
|
||||
reference_mean = mean(reference, na.rm = TRUE),
|
||||
reference_median = median(reference, na.rm = TRUE),
|
||||
reference_sd = sd(reference, na.rm = TRUE)
|
||||
)
|
||||
}
|
||||
|
||||
#' Calculate comprehensive error metrics
|
||||
calculate_error_metrics <- function(test, reference) {
|
||||
errors <- test - reference
|
||||
|
||||
list(
|
||||
# Absolute errors
|
||||
mbe = mean(errors, na.rm = TRUE), # Mean Bias Error
|
||||
mae = mean(abs(errors), na.rm = TRUE), # Mean Absolute Error
|
||||
rmse = sqrt(mean(errors^2, na.rm = TRUE)), # Root Mean Square Error
|
||||
|
||||
# Relative errors (percentage)
|
||||
relative_mbe = (mean(errors, na.rm = TRUE) / mean(reference, na.rm = TRUE)) * 100,
|
||||
relative_mae = (mean(abs(errors), na.rm = TRUE) / mean(reference, na.rm = TRUE)) * 100,
|
||||
relative_rmse = (sqrt(mean(errors^2, na.rm = TRUE)) / mean(reference, na.rm = TRUE)) * 100,
|
||||
|
||||
# Advanced error metrics
|
||||
mape = mean(abs(errors / reference), na.rm = TRUE) * 100, # Mean Absolute Percentage Error
|
||||
smape = 100 * mean(2 * abs(errors) / (abs(test) + abs(reference)), na.rm = TRUE), # Symmetric MAPE
|
||||
nrmse = sqrt(mean(errors^2, na.rm = TRUE)) / (max(reference, na.rm = TRUE) - min(reference, na.rm = TRUE)), # Normalized RMSE
|
||||
|
||||
# Theil's decomposition
|
||||
theil_u = theils_u(test, reference),
|
||||
|
||||
# Error distribution
|
||||
error_mean = mean(errors, na.rm = TRUE),
|
||||
error_sd = sd(errors, na.rm = TRUE),
|
||||
error_skewness = moments::skewness(errors, na.rm = TRUE),
|
||||
error_kurtosis = moments::kurtosis(errors, na.rm = TRUE)
|
||||
)
|
||||
}
|
||||
|
||||
#' Calculate Theil's U statistic
|
||||
theils_u <- function(actual, predicted) {
|
||||
# Theil's U statistic for forecast accuracy
|
||||
n <- length(actual)
|
||||
numerator <- sqrt(sum((predicted - actual)^2) / n)
|
||||
denominator <- sqrt(sum(actual^2) / n) + sqrt(sum(predicted^2) / n)
|
||||
return(numerator / denominator)
|
||||
}
|
||||
|
||||
#' Analyze distribution characteristics
|
||||
analyze_distribution <- function(test, reference) {
|
||||
list(
|
||||
# Correlation analysis
|
||||
pearson_correlation = cor(test, reference, method = "pearson"),
|
||||
spearman_correlation = cor(test, reference, method = "spearman"),
|
||||
kendall_correlation = cor(test, reference, method = "kendall"),
|
||||
|
||||
# Regression analysis
|
||||
regression_slope = coef(lm(test ~ reference))[2],
|
||||
regression_intercept = coef(lm(test ~ reference))[1],
|
||||
r_squared = summary(lm(test ~ reference))$r.squared,
|
||||
adjusted_r_squared = summary(lm(test ~ reference))$adj.r.squared,
|
||||
|
||||
# Distribution tests
|
||||
test_normality_p = shapiro.test(test)$p.value,
|
||||
reference_normality_p = shapiro.test(reference)$p.value,
|
||||
errors_normality_p = shapiro.test(test - reference)$p.value
|
||||
)
|
||||
}
|
||||
|
||||
#' Perform comprehensive time series analysis
|
||||
analyze_time_series <- function(data, test_col, reference_col, timestamp_col) {
|
||||
|
||||
# Ensure data is sorted by timestamp
|
||||
data <- data[order(data[[timestamp_col]]), ]
|
||||
|
||||
test_ts <- data[[test_col]]
|
||||
reference_ts <- data[[reference_col]]
|
||||
|
||||
# Calculate time differences
|
||||
time_diffs <- diff(data[[timestamp_col]])
|
||||
avg_interval <- mean(as.numeric(time_diffs, units = "secs"))
|
||||
|
||||
# Autocorrelation analysis
|
||||
test_acf <- acf(test_ts, plot = FALSE)
|
||||
reference_acf <- acf(reference_ts, plot = FALSE)
|
||||
|
||||
# Partial autocorrelation
|
||||
test_pacf <- pacf(test_ts, plot = FALSE)
|
||||
reference_pacf <- pacf(reference_ts, plot = FALSE)
|
||||
|
||||
# Cross-correlation
|
||||
ccf_result <- ccf(test_ts, reference_ts, plot = FALSE)
|
||||
|
||||
# Stationarity tests
|
||||
test_adf <- tryCatch(
|
||||
adf.test(test_ts)$p.value,
|
||||
error = function(e) NA
|
||||
)
|
||||
|
||||
reference_adf <- tryCatch(
|
||||
adf.test(reference_ts)$p.value,
|
||||
error = function(e) NA
|
||||
)
|
||||
|
||||
list(
|
||||
sampling_interval_seconds = avg_interval,
|
||||
test_autocorrelation_lag1 = test_acf$acf[2],
|
||||
reference_autocorrelation_lag1 = reference_acf$acf[2],
|
||||
cross_correlation_max = max(ccf_result$acf),
|
||||
cross_correlation_lag = ccf_result$lag[which.max(ccf_result$acf)],
|
||||
test_stationarity_p = test_adf,
|
||||
reference_stationarity_p = reference_adf
|
||||
)
|
||||
}
|
||||
|
||||
#' Perform statistical hypothesis tests
|
||||
perform_statistical_tests <- function(test, reference) {
|
||||
errors <- test - reference
|
||||
|
||||
# Kolmogorov-Smirnov test for distribution similarity
|
||||
ks_test <- tryCatch(
|
||||
ks.test(test, reference),
|
||||
error = function(e) list(statistic = NA, p.value = NA)
|
||||
)
|
||||
|
||||
# Mann-Whitney U test (non-parametric)
|
||||
mw_test <- tryCatch(
|
||||
wilcox.test(test, reference),
|
||||
error = function(e) list(statistic = NA, p.value = NA)
|
||||
)
|
||||
|
||||
# Variance equality test (F-test)
|
||||
var_test <- tryCatch(
|
||||
var.test(test, reference),
|
||||
error = function(e) list(statistic = NA, p.value = NA)
|
||||
)
|
||||
|
||||
# Anderson-Darling test for normality of errors
|
||||
ad_test <- tryCatch(
|
||||
nortest::ad.test(errors),
|
||||
error = function(e) list(statistic = NA, p.value = NA)
|
||||
)
|
||||
|
||||
list(
|
||||
ks_statistic = ks_test$statistic,
|
||||
ks_p_value = ks_test$p.value,
|
||||
mw_statistic = mw_test$statistic,
|
||||
mw_p_value = mw_test$p.value,
|
||||
f_statistic = var_test$statistic,
|
||||
f_p_value = var_test$p.value,
|
||||
ad_statistic = ad_test$statistic,
|
||||
ad_p_value = ad_test$p.value
|
||||
)
|
||||
}
|
||||
|
||||
#' Calculate performance indicators specific to solar applications
|
||||
calculate_performance_indicators <- function(test, reference) {
|
||||
|
||||
# Performance Ratio (simplified)
|
||||
performance_ratio <- mean(test, na.rm = TRUE) / mean(reference, na.rm = TRUE)
|
||||
|
||||
# Clear Sky Index (simplified - assumes reference is clear sky)
|
||||
clear_sky_index <- mean(test / reference, na.rm = TRUE)
|
||||
|
||||
# Variability index
|
||||
variability_index <- sd(test, na.rm = TRUE) / mean(test, na.rm = TRUE)
|
||||
|
||||
# Consistency metrics
|
||||
consistency_ratio <- 1 - (sd(test - reference, na.rm = TRUE) / mean(reference, na.rm = TRUE))
|
||||
|
||||
list(
|
||||
performance_ratio = performance_ratio,
|
||||
clear_sky_index = clear_sky_index,
|
||||
variability_index = variability_index,
|
||||
consistency_ratio = consistency_ratio
|
||||
)
|
||||
}
|
||||
|
||||
#' Generate comprehensive statistical report
|
||||
generate_statistical_report <- function(analysis_results, output_file = NULL) {
|
||||
|
||||
report <- list()
|
||||
|
||||
for (sensor_name in names(analysis_results)) {
|
||||
sensor_results <- analysis_results[[sensor_name]]
|
||||
|
||||
sensor_report <- list(
|
||||
sensor_name = sensor_name,
|
||||
sample_size = sensor_results$sample_size,
|
||||
|
||||
# Summary statistics
|
||||
mean_bias_error = sensor_results$error_metrics$mbe,
|
||||
root_mean_square_error = sensor_results$error_metrics$rmse,
|
||||
mean_absolute_error = sensor_results$error_metrics$mae,
|
||||
|
||||
# Correlation and fit
|
||||
r_squared = sensor_results$distribution_analysis$r_squared,
|
||||
pearson_correlation = sensor_results$distribution_analysis$pearson_correlation,
|
||||
|
||||
# Statistical significance
|
||||
ks_p_value = sensor_results$statistical_tests$ks_p_value,
|
||||
mw_p_value = sensor_results$statistical_tests$mw_p_value,
|
||||
|
||||
# Performance indicators
|
||||
performance_ratio = sensor_results$performance_indicators$performance_ratio,
|
||||
variability_index = sensor_results$performance_indicators$variability_index
|
||||
)
|
||||
|
||||
report[[sensor_name]] <- sensor_report
|
||||
}
|
||||
|
||||
# Convert to data frame for easier viewing
|
||||
report_df <- do.call(rbind, lapply(report, as.data.frame))
|
||||
|
||||
if (!is.null(output_file)) {
|
||||
write.csv(report_df, output_file, row.names = FALSE)
|
||||
cat("Statistical report saved to:", output_file, "\n")
|
||||
}
|
||||
|
||||
return(report_df)
|
||||
}
|
||||
|
||||
# Example usage and testing
|
||||
if (FALSE) {
|
||||
# Create sample data
|
||||
set.seed(123)
|
||||
n <- 1000
|
||||
timestamp <- seq(as.POSIXct("2024-01-01 06:00:00"), by = "1 min", length.out = n)
|
||||
|
||||
# Simulate reference sensor (clear sky conditions with some noise)
|
||||
reference <- 800 + 200 * sin(seq(0, 2*pi, length.out = n)) + rnorm(n, 0, 20)
|
||||
|
||||
# Simulate test sensors with different characteristics
|
||||
sensor1 <- reference + rnorm(n, 5, 15) # Small bias, good precision
|
||||
sensor2 <- reference * 1.02 + rnorm(n, 0, 30) # 2% overestimation, poorer precision
|
||||
sensor3 <- reference + rnorm(n, -10, 25) # Negative bias
|
||||
|
||||
sample_data <- data.frame(
|
||||
timestamp = timestamp,
|
||||
reference_sensor = reference,
|
||||
test_sensor_1 = sensor1,
|
||||
test_sensor_2 = sensor2,
|
||||
test_sensor_3 = sensor3
|
||||
)
|
||||
|
||||
# Run analysis
|
||||
results <- calculate_advanced_metrics(
|
||||
data = sample_data,
|
||||
reference_col = "reference_sensor",
|
||||
test_cols = c("test_sensor_1", "test_sensor_2", "test_sensor_3")
|
||||
)
|
||||
|
||||
# Generate report
|
||||
report <- generate_statistical_report(results, "sensor_comparison_report.csv")
|
||||
|
||||
print("Advanced statistical analysis completed!")
|
||||
print(report)
|
||||
}
|
||||
432
src/analysis/uncertainty/uncertainty_calculator.R
Normal file
432
src/analysis/uncertainty/uncertainty_calculator.R
Normal file
@@ -0,0 +1,432 @@
|
||||
# Uncertainty Calculation Engine for Pyranometer Data
|
||||
# Implements ISO GUM (Guide to the Expression of Uncertainty in Measurement) standards
|
||||
# and advanced uncertainty analysis methods for solar irradiance measurements
|
||||
|
||||
# Load required libraries
|
||||
library(dplyr)
|
||||
library(tidyr)
|
||||
library(moments)
|
||||
library(boot)
|
||||
|
||||
#' Calculate comprehensive measurement uncertainty for pyranometer data
|
||||
#'
|
||||
#' @param data Data frame with sensor measurements
|
||||
#' @param reference_col Name of reference sensor column
|
||||
#' @param test_cols Vector of test sensor column names
|
||||
#' @param sensor_specs List containing sensor specifications and calibration data
|
||||
#' @param coverage_factor Coverage factor for expanded uncertainty (default: 2 for 95% confidence)
|
||||
#' @return List containing comprehensive uncertainty analysis
|
||||
calculate_measurement_uncertainty <- function(data, reference_col, test_cols,
|
||||
sensor_specs = NULL, coverage_factor = 2) {
|
||||
|
||||
results <- list()
|
||||
|
||||
for (test_col in test_cols) {
|
||||
cat("Calculating uncertainty for sensor:", test_col, "\n")
|
||||
|
||||
# Extract clean data
|
||||
clean_data <- data[complete.cases(data[[reference_col]], data[[test_col]]), ]
|
||||
|
||||
if (nrow(clean_data) < 10) {
|
||||
warning(paste("Insufficient data for uncertainty analysis:", test_col))
|
||||
next
|
||||
}
|
||||
|
||||
reference <- clean_data[[reference_col]]
|
||||
test <- clean_data[[test_col]]
|
||||
errors <- test - reference
|
||||
|
||||
# Type A uncertainty (statistical analysis of measurements)
|
||||
type_a_uncertainty <- calculate_type_a_uncertainty(errors, test, reference)
|
||||
|
||||
# Type B uncertainty (systematic uncertainties from specifications)
|
||||
type_b_uncertainty <- calculate_type_b_uncertainty(test_col, sensor_specs, mean(reference))
|
||||
|
||||
# Combined standard uncertainty
|
||||
combined_uncertainty <- calculate_combined_uncertainty(type_a_uncertainty, type_b_uncertainty)
|
||||
|
||||
# Expanded uncertainty
|
||||
expanded_uncertainty <- calculate_expanded_uncertainty(combined_uncertainty, coverage_factor)
|
||||
|
||||
# Uncertainty budget
|
||||
uncertainty_budget <- create_uncertainty_budget(type_a_uncertainty, type_b_uncertainty,
|
||||
combined_uncertainty, expanded_uncertainty)
|
||||
|
||||
# Monte Carlo simulation for complex uncertainty propagation
|
||||
monte_carlo_results <- perform_monte_carlo_simulation(test, reference, sensor_specs, test_col)
|
||||
|
||||
results[[test_col]] <- list(
|
||||
type_a_uncertainty = type_a_uncertainty,
|
||||
type_b_uncertainty = type_b_uncertainty,
|
||||
combined_uncertainty = combined_uncertainty,
|
||||
expanded_uncertainty = expanded_uncertainty,
|
||||
uncertainty_budget = uncertainty_budget,
|
||||
monte_carlo_simulation = monte_carlo_results,
|
||||
coverage_factor = coverage_factor,
|
||||
sample_size = nrow(clean_data)
|
||||
)
|
||||
}
|
||||
|
||||
return(results)
|
||||
}
|
||||
|
||||
#' Calculate Type A uncertainty (statistical analysis)
|
||||
calculate_type_a_uncertainty <- function(errors, test, reference) {
|
||||
|
||||
n <- length(errors)
|
||||
|
||||
# Standard uncertainty from repeated measurements
|
||||
standard_uncertainty <- sd(errors, na.rm = TRUE) / sqrt(n)
|
||||
|
||||
# Uncertainty of the mean
|
||||
uncertainty_of_mean <- sd(test, na.rm = TRUE) / sqrt(n)
|
||||
|
||||
# Regression uncertainty (if using regression model)
|
||||
regression_uncertainty <- calculate_regression_uncertainty(test, reference)
|
||||
|
||||
# Autocorrelation-corrected uncertainty
|
||||
autocorr_uncertainty <- calculate_autocorrelation_uncertainty(errors)
|
||||
|
||||
list(
|
||||
standard_uncertainty = standard_uncertainty,
|
||||
uncertainty_of_mean = uncertainty_of_mean,
|
||||
regression_uncertainty = regression_uncertainty,
|
||||
autocorrelation_uncertainty = autocorr_uncertainty,
|
||||
standard_deviation = sd(errors, na.rm = TRUE),
|
||||
standard_error = sd(errors, na.rm = TRUE) / sqrt(n),
|
||||
degrees_of_freedom = n - 1
|
||||
)
|
||||
}
|
||||
|
||||
#' Calculate Type B uncertainty (systematic uncertainties)
|
||||
calculate_type_b_uncertainty <- function(sensor_name, sensor_specs, mean_irradiance) {
|
||||
|
||||
# Default uncertainties if no specifications provided
|
||||
default_uncertainties <- list(
|
||||
calibration_uncertainty = 0.02, # 2% calibration uncertainty
|
||||
temperature_uncertainty = 0.005, # 0.5% per °C temperature effect
|
||||
cosine_uncertainty = 0.01, # 1% cosine response uncertainty
|
||||
azimuth_uncertainty = 0.002, # 0.2% azimuth uncertainty
|
||||
spectral_uncertainty = 0.015, # 1.5% spectral mismatch
|
||||
linearity_uncertainty = 0.005, # 0.5% non-linearity
|
||||
stability_uncertainty = 0.01, # 1% long-term stability
|
||||
resolution_uncertainty = 0.001 # 0.1% resolution uncertainty
|
||||
)
|
||||
|
||||
# Use sensor-specific specifications if available
|
||||
if (!is.null(sensor_specs) && sensor_name %in% names(sensor_specs)) {
|
||||
specs <- sensor_specs[[sensor_name]]
|
||||
uncertainties <- modifyList(default_uncertainties, specs)
|
||||
} else {
|
||||
uncertainties <- default_uncertainties
|
||||
}
|
||||
|
||||
# Convert relative uncertainties to absolute (W/m²)
|
||||
absolute_uncertainties <- lapply(uncertainties, function(u) u * mean_irradiance)
|
||||
|
||||
# Calculate combined Type B uncertainty
|
||||
type_b_combined <- sqrt(sum(sapply(absolute_uncertainties, function(x) x^2)))
|
||||
|
||||
list(
|
||||
relative_uncertainties = uncertainties,
|
||||
absolute_uncertainties = absolute_uncertainties,
|
||||
combined_uncertainty = type_b_combined,
|
||||
effective_degrees_of_freedom = Inf # Type B uncertainties typically have infinite df
|
||||
)
|
||||
}
|
||||
|
||||
#' Calculate regression-related uncertainty
|
||||
calculate_regression_uncertainty <- function(test, reference) {
|
||||
|
||||
# Fit linear regression model
|
||||
model <- lm(test ~ reference)
|
||||
|
||||
# Standard error of the regression
|
||||
regression_se <- summary(model)$sigma
|
||||
|
||||
# Prediction interval uncertainty
|
||||
n <- length(test)
|
||||
prediction_uncertainty <- regression_se * sqrt(1 + 1/n)
|
||||
|
||||
list(
|
||||
regression_standard_error = regression_se,
|
||||
prediction_uncertainty = prediction_uncertainty,
|
||||
residual_standard_error = sd(residuals(model))
|
||||
)
|
||||
}
|
||||
|
||||
#' Calculate uncertainty corrected for autocorrelation
|
||||
calculate_autocorrelation_uncertainty <- function(errors) {
|
||||
|
||||
n <- length(errors)
|
||||
|
||||
# Calculate autocorrelation at lag 1
|
||||
acf_result <- acf(errors, plot = FALSE, lag.max = 1)
|
||||
rho <- acf_result$acf[2]
|
||||
|
||||
# Effective sample size considering autocorrelation
|
||||
effective_n <- n * (1 - rho) / (1 + rho)
|
||||
|
||||
# Autocorrelation-corrected uncertainty
|
||||
corrected_uncertainty <- sd(errors, na.rm = TRUE) / sqrt(effective_n)
|
||||
|
||||
list(
|
||||
autocorrelation_coefficient = rho,
|
||||
effective_sample_size = effective_n,
|
||||
corrected_uncertainty = corrected_uncertainty
|
||||
)
|
||||
}
|
||||
|
||||
#' Calculate combined standard uncertainty
|
||||
calculate_combined_uncertainty <- function(type_a, type_b) {
|
||||
|
||||
# Combine Type A and Type B uncertainties
|
||||
combined <- sqrt(type_a$standard_uncertainty^2 + type_b$combined_uncertainty^2)
|
||||
|
||||
# Calculate effective degrees of freedom using Welch-Satterthwaite formula
|
||||
u_a <- type_a$standard_uncertainty
|
||||
u_b <- type_b$combined_uncertainty
|
||||
df_a <- type_a$degrees_of_freedom
|
||||
df_b <- type_b$effective_degrees_of_freedom
|
||||
|
||||
effective_df <- (combined^4) / ((u_a^4 / df_a) + (u_b^4 / df_b))
|
||||
|
||||
list(
|
||||
combined_standard_uncertainty = combined,
|
||||
effective_degrees_of_freedom = effective_df,
|
||||
coverage_factor_95 = qt(0.975, effective_df)
|
||||
)
|
||||
}
|
||||
|
||||
#' Calculate expanded uncertainty
|
||||
calculate_expanded_uncertainty <- function(combined_uncertainty, coverage_factor = 2) {
|
||||
|
||||
expanded <- coverage_factor * combined_uncertainty$combined_standard_uncertainty
|
||||
|
||||
list(
|
||||
expanded_uncertainty = expanded,
|
||||
coverage_factor = coverage_factor,
|
||||
confidence_level = ifelse(coverage_factor == 2, 0.9545,
|
||||
ifelse(coverage_factor == 1.96, 0.95, NA))
|
||||
)
|
||||
}
|
||||
|
||||
#' Create detailed uncertainty budget
|
||||
create_uncertainty_budget <- function(type_a, type_b, combined, expanded) {
|
||||
|
||||
budget <- list(
|
||||
type_a_components = list(
|
||||
standard_uncertainty = type_a$standard_uncertainty,
|
||||
uncertainty_of_mean = type_a$uncertainty_of_mean,
|
||||
regression_uncertainty = type_a$regression_uncertainty$regression_standard_error,
|
||||
autocorrelation_uncertainty = type_a$autocorrelation_uncertainty$corrected_uncertainty
|
||||
),
|
||||
|
||||
type_b_components = type_b$absolute_uncertainties,
|
||||
|
||||
summary = list(
|
||||
combined_standard_uncertainty = combined$combined_standard_uncertainty,
|
||||
expanded_uncertainty = expanded$expanded_uncertainty,
|
||||
coverage_factor = expanded$coverage_factor,
|
||||
effective_degrees_of_freedom = combined$effective_degrees_of_freedom
|
||||
)
|
||||
)
|
||||
|
||||
return(budget)
|
||||
}
|
||||
|
||||
#' Perform Monte Carlo simulation for complex uncertainty propagation
|
||||
perform_monte_carlo_simulation <- function(test, reference, sensor_specs, sensor_name,
|
||||
n_simulations = 10000) {
|
||||
|
||||
n <- length(test)
|
||||
mean_test <- mean(test, na.rm = TRUE)
|
||||
mean_reference <- mean(reference, na.rm = TRUE)
|
||||
|
||||
# Get sensor specifications
|
||||
if (!is.null(sensor_specs) && sensor_name %in% names(sensor_specs)) {
|
||||
specs <- sensor_specs[[sensor_name]]
|
||||
} else {
|
||||
specs <- list(
|
||||
calibration_uncertainty = 0.02,
|
||||
temperature_uncertainty = 0.005,
|
||||
cosine_uncertainty = 0.01
|
||||
)
|
||||
}
|
||||
|
||||
# Initialize simulation results
|
||||
simulated_biases <- numeric(n_simulations)
|
||||
|
||||
for (i in 1:n_simulations) {
|
||||
# Simulate random errors for each uncertainty component
|
||||
cal_error <- rnorm(1, 0, specs$calibration_uncertainty * mean_reference)
|
||||
temp_error <- rnorm(1, 0, specs$temperature_uncertainty * mean_reference)
|
||||
cosine_error <- rnorm(1, 0, specs$cosine_uncertainty * mean_reference)
|
||||
|
||||
# Simulate random measurement errors
|
||||
measurement_error <- rnorm(1, 0, sd(test - reference, na.rm = TRUE))
|
||||
|
||||
# Total simulated bias
|
||||
total_error <- cal_error + temp_error + cosine_error + measurement_error
|
||||
simulated_biases[i] <- total_error
|
||||
}
|
||||
|
||||
# Calculate statistics from simulation
|
||||
simulation_stats <- list(
|
||||
mean_bias = mean(simulated_biases),
|
||||
sd_bias = sd(simulated_biases),
|
||||
quantile_2.5 = quantile(simulated_biases, 0.025),
|
||||
quantile_97.5 = quantile(simulated_biases, 0.975),
|
||||
simulation_size = n_simulations
|
||||
)
|
||||
|
||||
return(simulation_stats)
|
||||
}
|
||||
|
||||
#' Generate uncertainty report
|
||||
generate_uncertainty_report <- function(uncertainty_results, output_file = NULL) {
|
||||
|
||||
report <- list()
|
||||
|
||||
for (sensor_name in names(uncertainty_results)) {
|
||||
sensor_results <- uncertainty_results[[sensor_name]]
|
||||
|
||||
sensor_report <- list(
|
||||
sensor_name = sensor_name,
|
||||
sample_size = sensor_results$sample_size,
|
||||
|
||||
# Key uncertainty metrics
|
||||
combined_standard_uncertainty = sensor_results$combined_uncertainty$combined_standard_uncertainty,
|
||||
expanded_uncertainty = sensor_results$expanded_uncertainty$expanded_uncertainty,
|
||||
coverage_factor = sensor_results$expanded_uncertainty$coverage_factor,
|
||||
|
||||
# Type A components
|
||||
type_a_standard_uncertainty = sensor_results$type_a_uncertainty$standard_uncertainty,
|
||||
type_a_degrees_of_freedom = sensor_results$type_a_uncertainty$degrees_of_freedom,
|
||||
|
||||
# Type B components
|
||||
type_b_combined_uncertainty = sensor_results$type_b_uncertainty$combined_uncertainty,
|
||||
|
||||
# Monte Carlo results
|
||||
monte_carlo_mean_bias = sensor_results$monte_carlo_simulation$mean_bias,
|
||||
monte_carlo_uncertainty = sensor_results$monte_carlo_simulation$sd_bias
|
||||
)
|
||||
|
||||
report[[sensor_name]] <- sensor_report
|
||||
}
|
||||
|
||||
# Convert to data frame
|
||||
report_df <- do.call(rbind, lapply(report, as.data.frame))
|
||||
|
||||
if (!is.null(output_file)) {
|
||||
write.csv(report_df, output_file, row.names = FALSE)
|
||||
cat("Uncertainty report saved to:", output_file, "\n")
|
||||
}
|
||||
|
||||
return(report_df)
|
||||
}
|
||||
|
||||
#' Calculate measurement uncertainty following ISO GUM
|
||||
#'
|
||||
#' This function implements the complete ISO GUM uncertainty calculation procedure
|
||||
#' @param measurement_result The measured value
|
||||
#' @param uncertainty_components List of uncertainty components with their values and types
|
||||
#' @param coverage_probability Desired coverage probability (default: 0.95)
|
||||
#' @return List containing the complete uncertainty analysis
|
||||
calculate_iso_gum_uncertainty <- function(measurement_result, uncertainty_components,
|
||||
coverage_probability = 0.95) {
|
||||
|
||||
# Step 1: Identify all uncertainty sources
|
||||
sources <- names(uncertainty_components)
|
||||
|
||||
# Step 2: Quantify uncertainty components
|
||||
standard_uncertainties <- sapply(uncertainty_components, function(comp) {
|
||||
if (comp$type == "A") {
|
||||
# Type A: statistical analysis
|
||||
comp$value / sqrt(comp$n) # Standard error of the mean
|
||||
} else if (comp$type == "B") {
|
||||
# Type B: rectangular distribution assumed unless specified
|
||||
if (!is.null(comp$distribution) && comp$distribution == "normal") {
|
||||
comp$value
|
||||
} else {
|
||||
comp$value / sqrt(3) # Rectangular distribution
|
||||
}
|
||||
} else {
|
||||
comp$value # Default: treat as standard uncertainty
|
||||
}
|
||||
})
|
||||
|
||||
# Step 3: Calculate combined standard uncertainty
|
||||
combined_uncertainty <- sqrt(sum(standard_uncertainties^2))
|
||||
|
||||
# Step 4: Calculate expanded uncertainty
|
||||
# For large degrees of freedom, use coverage factor 2 for 95% confidence
|
||||
expanded_uncertainty <- 2 * combined_uncertainty
|
||||
|
||||
list(
|
||||
measurement_result = measurement_result,
|
||||
standard_uncertainties = standard_uncertainties,
|
||||
combined_standard_uncertainty = combined_uncertainty,
|
||||
expanded_uncertainty = expanded_uncertainty,
|
||||
coverage_factor = 2,
|
||||
coverage_probability = coverage_probability,
|
||||
uncertainty_budget = data.frame(
|
||||
source = sources,
|
||||
standard_uncertainty = standard_uncertainties
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
# Example usage and testing
|
||||
if (FALSE) {
|
||||
# Create sample data
|
||||
set.seed(123)
|
||||
n <- 500
|
||||
timestamp <- seq(as.POSIXct("2024-01-01 06:00:00"), by = "1 min", length.out = n)
|
||||
|
||||
# Simulate reference and test sensors
|
||||
reference <- 800 + 200 * sin(seq(0, 2*pi, length.out = n)) + rnorm(n, 0, 10)
|
||||
test_sensor <- reference + rnorm(n, 5, 15) # 5 W/m² bias, 15 W/m² random error
|
||||
|
||||
sample_data <- data.frame(
|
||||
timestamp = timestamp,
|
||||
reference_sensor = reference,
|
||||
test_sensor = test_sensor
|
||||
)
|
||||
|
||||
# Define sensor specifications
|
||||
sensor_specs <- list(
|
||||
test_sensor = list(
|
||||
calibration_uncertainty = 0.02, # 2%
|
||||
temperature_uncertainty = 0.005, # 0.5%
|
||||
cosine_uncertainty = 0.01, # 1%
|
||||
spectral_uncertainty = 0.015 # 1.5%
|
||||
)
|
||||
)
|
||||
|
||||
# Calculate uncertainty
|
||||
uncertainty_results <- calculate_measurement_uncertainty(
|
||||
data = sample_data,
|
||||
reference_col = "reference_sensor",
|
||||
test_cols = "test_sensor",
|
||||
sensor_specs = sensor_specs,
|
||||
coverage_factor = 2
|
||||
)
|
||||
|
||||
# Generate report
|
||||
report <- generate_uncertainty_report(uncertainty_results, "uncertainty_report.csv")
|
||||
|
||||
print("Uncertainty analysis completed!")
|
||||
print(report)
|
||||
|
||||
# Example of ISO GUM calculation
|
||||
gum_components <- list(
|
||||
calibration = list(type = "B", value = 16, distribution = "normal"), # 2% of 800 W/m²
|
||||
temperature = list(type = "B", value = 4), # 0.5% of 800 W/m²
|
||||
repeatability = list(type = "A", value = 15, n = 500)
|
||||
)
|
||||
|
||||
gum_result <- calculate_iso_gum_uncertainty(805, gum_components)
|
||||
print("ISO GUM uncertainty calculation:")
|
||||
print(gum_result)
|
||||
}
|
||||
Reference in New Issue
Block a user