已经了解余弦相似度的大概思路,但是代码写不出来,请教各位同学
PHP 的余弦相似度计算函数:
function cosine_similarity($vector1, $vector2) {
$dot_product = 0.0;
$magnitude1 = 0.0;
$magnitude2 = 0.0;
foreach ($vector1 as $key => $value) {
$magnitude1 += $value * $value;
if (isset($vector2[$key])) {
$dot_product += $value * $vector2[$key];
}
}
foreach ($vector2 as $key => $value) {
$magnitude2 += $value * $value;
}
if ($magnitude1 == 0.0 || $magnitude2 == 0.0) {
return 0.0;
}
return $dot_product / (sqrt($magnitude1) * sqrt($magnitude2));
}
在使用该函数时,可以将两个向量表示为关联数组,键表示特征或维度,值表示该特征或维度的权重。例如:
$vector1 = array('apple' => 3, 'banana' => 2, 'orange' => 1);
$vector2 = array('apple' => 1, 'banana' => 2, 'pear' => 3);
$similarity = cosine_similarity($vector1, $vector2);
echo $similarity; // 输出 0.80473785412429
function cosine_similarity($str1, $str2) {
// 将字符串拆分为单词
$words1 = explode(' ', $str1);
$words2 = explode(' ', $str2);
// 获取单词的出现次数
$counts1 = array_count_values($words1);
$counts2 = array_count_values($words2);
// 获取单词列表
$keys = array_unique(array_merge(array_keys($counts1), array_keys($counts2)));
// 计算向量点积
$dot_product = 0;
foreach ($keys as $key) {
$dot_product += $counts1[$key] * $counts2[$key];
}
// 计算向量长度
$len1 = sqrt(array_sum(array_map(function($count) { return pow($count, 2); }, $counts1)));
$len2 = sqrt(array_sum(array_map(function($count) { return pow($count, 2); }, $counts2)));
// 计算余弦相似度
return $dot_product / ($len1 * $len2);
}
// 例子
$str1 = "Hello there! How are you?";
$str2 = "Hey! How are you doing?";
$similarity = cosine_similarity($str1, $str2);
echo "余弦相似度为 " . $similarity;
```php
<?php
function cosine_similarity($vec1, $vec2) {
$dot_product = 0;
$magnitude1 = 0;
$magnitude2 = 0;
foreach ($vec1 as $key => $value) {
$magnitude1 += $value * $value;
if (isset($vec2[$key])) {
$dot_product += $value * $vec2[$key];
}
}
foreach ($vec2 as $key => $value) {
$magnitude2 += $value * $value;
}
$magnitude = sqrt($magnitude1) * sqrt($magnitude2);
if ($magnitude == 0) {
return 0;
} else {
return $dot_product / $magnitude;
}
}
// Example usage
$vec1 = array(1, 2, 3);
$vec2 = array(2, 3, 4);
$similarity = cosine_similarity($vec1, $vec2);
echo "The cosine similarity between vec1 and vec2 is: " . $similarity;
?>
```