Untitled
raw download clone
TEXT
views 61
,
size 9868 b
class LoadCurveAnalysis < ApplicationRecord
  include Calculation
  MAX_CLUSTER_SIZE = 5
  OPTIMAL_DELTA_PERCENTAGE = 10




  enum status: [
    :setup, :uploaded, :progress, :ready, :completed,
    :missing_points, :outliers, :wrong_shape
  ]




  belongs_to :facility
  belongs_to :grid
  belongs_to :utility_flow




  # all original points (delete them in one SQl with no callback)
  has_many :load_curve_points, inverse_of: :load_curve_analysis, dependent: :delete_all




  # series excluding outliers and excluded points
  has_many :points, -> {
    where('selected = false AND production > 0 AND consumption > 0')
  }, class_name: 'LoadCurvePoint', inverse_of: :load_curve_analysis




  # series of outliers
  has_many :outliers, -> { where(selected: true) }, class_name: 'LoadCurvePoint',
    inverse_of: :load_curve_analysis




  # series of outliers and excluded points
  has_many :excluded_points, -> {
    where('selected = true OR production <= 0 OR consumption <= 0')
  }, class_name: 'LoadCurvePoint', inverse_of: :load_curve_analysis




  scope :not_setup, -> { where.not(status: :setup) }




  validates :period, :frequency, :measure_unit, :grid, presence: true




  def to_csv
    CSV.generate do |csv|
      # analysis aggregated data
      csv << [
        'Load Curve Calculation', nil, nil, nil, nil, nil,
        'Best Repetable Perf.', nil, nil,
        'Average', nil, nil,
        'Loss', nil
      ]
      csv << [
        'Fixed (a)', fixed_shift.round(2), nil,
        'Max threshold', max_delta_treshold.round(2), nil,
        'Production', production_performance, nil,
        'Production', production_average, nil,
        'Load Loss', load_loss
      ]
      csv << [
        'Variable (b)', variable_shift.round(2), nil,
        'Min threshold', min_delta_treshold.round(2), nil,
        'Consumption', consumption_performance, nil,
        'Consumption', consumption_average, nil,
        'Perf. Loss', perf_loss, nil,
        nil, nil
      ]




      # rows separator
      csv << (1..10).map { nil }




      # raw data and analysis data
      csv << [
        'Raw Data', nil, nil, 'Analysis Data', nil, nil, nil, nil, nil, nil
      ]
      csv << [
        'Date', "Production #{measure_unit}/shift",
        "Consumption #{grid.measure_unit}", 'Production',
        'Production*Production', 'Production*Consumption',
        'Simulated Cons. unit/shift', 'Simulated Cons. unit/ton',
        'Is Under Curve', 'Perf Loss', 'Delta load', 'Load Loss'
      ]
      load_curve_points.order(:id).each do |p|
        csv << [
          p.date_or_batch, p.production.round(2), p.consumption.round(2),
          p.shift_consumption.round(2), p.squared_production.round(2),
          p.production_times_consumption.round(2), p.simulated_batch_consumption.round(2),
          p.simulated_unit_consumption.round(2), p.is_under_curve? ? 1 : 0,
          p.perf_loss.round(2), p.delta_load.round(2), p.load_loss.round(2)
        ]
      end
    end
  end




  def is_batch?
    frequency == 'batches'
  end




  def date_format
    case frequency
    when 'batches'
      nil
    when 'hours'
      "%m/%d/%Y %k:00"
    when 'minutes'
      "%m/%d/%Y %k:%M"
    else
      "%m/%d/%Y"
    end
  end




  def total_production
    @total_production ||= points.sum(:production)
  end




  def total_production_square
    total_production ** 2
  end




  def total_consumption
    @total_consumption ||= points.sum(:consumption)
  end




  def total_shift_consumption
    points.map(&:shift_consumption).sum
  end




  def total_production_times_consumption
    points.map(&:production_times_consumption).sum
  end




  def total_squared_production
    @total_squared_production ||= points.map(&:squared_production).sum
  end




  def total_points
    @total_points ||= points.size
  end




  def fixed_shift
    @fixed_shift ||= begin
      (total_points * total_production_times_consumption - total_shift_consumption * total_production) /
      (total_points * total_squared_production - total_production_square)
    end
  end




  def variable_shift
    @variable_shift ||= (total_shift_consumption - fixed_shift * total_production) / total_points
  end




  # maximum production
  def production_performance
    @production_performance ||= points.maximum(:production).round(2)
  end




  # min simulated unit consumption
  def consumption_performance
    points.map(&:simulated_unit_consumption).min.round(2)
  end




  def production_average
    @production_average ||= (total_production.to_f / total_points).round(2)
  end




  def consumption_average
    # memoize the consumption cause is heavily used when calculating perf_loss
    @consumption_average ||= (total_consumption.to_f / total_points).round(2)
  end




  def perf_loss
    points.map(&:perf_loss).select(&:positive?).sum.round(2)
  end
  alias_method :loss_consumption, :perf_loss # for compatibility




  def load_loss
    points.map(&:load_loss).select(&:positive?).sum.round(2)
  end




  def loss
    perf_loss + load_loss
  end




  # cached loss
  def points_loss
    @points_loss ||= points.map(&:loss)
  end




  # min loss
  def min_delta_treshold
    points_loss.min.round(2)
  end




  # max loss
  def max_delta_treshold
    points_loss.max.round(2)
  end




  def calc_percent_of_points_below_the_curve
    ((points.map(&:under_curve).sum.to_f / total_points) * 100).round(2)
  end




  def percent_of_points_below_the_curve(delta)
    reset_variable_shift
    self.optimum_delta = delta
    calc_percent_of_points_below_the_curve
  end




  def reset_variable_shift
    @variable_shift = nil
  end




  # binay search over min and max deltas.
  def _find_optimum_delta(min_delta, max_delta)
    (min_delta..max_delta).bsearch do |d|
      percent_of_points_below_the_curve(d) >= OPTIMAL_DELTA_PERCENTAGE
    end
    # self save after the finding and return optimum_delta
    tap { save! }.optimum_delta
  end




  def find_optimum_delta!
    return optimum_delta unless optimum_delta.nil?
    # find the optimum delta using binay search
    _find_optimum_delta(min_delta_treshold, max_delta_treshold)
  end




  def year_average_values
    unless is_batch?
      period.map do |year|
        year_points = points.select { |p| p.year == year }




        production = (
          year_points.map(&:production).sum / year_points.size
        ).round(2) if year_points.present?




        consumption = (
          year_points.map(&:consumption).sum / year_points.size
        ).round(2) if year_points.present?




        { year: year, production: production, consumption: consumption }
      end
    else
      []
    end
  end




  # outliers detection algorithm using
  # Interquartile ranges ruling on clusters
  def detect_outliers
    # only run if optimum_delta hasn't been calculated
    return self unless optimum_delta.nil?
    tap do
      # order points by production amount descendantly
      ordered_points = load_curve_points.to_be_included.sort_by { |p| -p.production }
      production_values = ordered_points.map(&:production)
      # get pre-defined, min and max
      group_size, min, max = MAX_CLUSTER_SIZE, production_values.min,
        production_values.max*1.01
      # set what group each points belong to
      ordered_points.each do |p|
        p.group = ((p.production - min) / ((max - min) / group_size)).to_i
      end
      # get quartiles by groups
      fences = {}
      # divide grouped by group each point belongs to
      groups = ordered_points.group_by(&:group)
      groups.keys.each do |group|
        # get lower and upper quartiles
        productions = groups[group].map(&:regresion_by_production)
        q1, q2, q3 = percentile(productions, 25), percentile(productions, 50),
          percentile(productions, 75)
        # interquartile range
        range = 1.5 * (q3 - q1)
        # fill fences for each group
        fences[group] = { min: q1 - range, max: q3 + range, q1: q1, q2: q2, q3: q3 }
      end
      # utility lambda
      outlier_for_group = -> (group, value) do
        return false if [0, MAX_CLUSTER_SIZE].include? group
        (fences.dig(group, :min)..fences.dig(group, :max)).exclude? value
      end
      # return outlier points
      outliers = ordered_points.select do |point|
        group = point.group
        regresion = point.regresion_by_production
        # is an outlier?
        outlier_for_group.call(group, regresion) &&
        # if bigger than middle quartile check on previous cluster else check on next cluster
        outlier_for_group.call(regresion > fences.dig(group, :q2) ?
          group - 1 : group + 1, regresion)
      end
      # mark these points as outliers
      load_curve_points.where(id: outliers.map(&:id)).update_all(selected: true)
    end
  end




  def curve
    tap { detect_outliers.find_optimum_delta! }
  end
end
close fullscreen
Login or Register to edit or fork this paste. It's free.