I'm building a little app that analyze ebay historical prices of sold items and for some keywords/items the range is very wide because the search is too broad or simply wrong, infected by item not properly related
eg.
search prices for iphone the results include either the phone, but also the charger and accessories/unrelated items which adulterate the prices data... so i have a range that goes form $5 fro a charger and 500$ for an iphone
so, given that I will try to improve the search on my side, i'm wondering if there is math calculation to exclude the outliers
say I have
$1200
$549
$399
$519
$9
$599
$549
$9
$499
$399
$519
$99
$5
$5
how to i get the price range to be $300-$600 instead of $10-$800 or so...
her ebelow the current php im using...not sure if is the best
function remove_outliers($dataset, $magnitude = 1)
{
$count = count($dataset);
$mean = array_sum($dataset) / $count; // Calculate the mean
$deviation = sqrt(array_sum(array_map("sd_square", $dataset, array_fill(0, $count, $mean))) / $count) * $magnitude; // Calculate standard deviation and times by magnitude
return array_filter($dataset, function ($x) use ($mean, $deviation) {return ($x <= $mean + $deviation && $x >= $mean - $deviation);}); // Return filtered array of values that lie within $mean +- $deviation.
}
function sd_square($x, $mean)
{
return pow($x - $mean, 2);
}
function calculate_median($arr)
{
sort($arr);
$count = count($arr);
$middleval = floor(($count - 1) / 2);
if ($count % 2) {
$median = $arr[$middleval];
} else {
$low = $arr[$middleval];
$high = $arr[$middleval + 1];
$median = (($low + $high) / 2);
}
return $median;
}
$prices = remove_outliers($prices); //$prices is the array with all the prices stored
$trend = calculate_median($prices);
$trend = round(($trend));
$min = round(min($prices));
$max = round(max($prices));