I have something like this array of strings, as seen they are very similar, with the exception of the same one place in the string:
$strings = [
'This is +1% better than last time!',
'This is +2% better than last time!',
'This is +3% better than last time!',
'This is +4% better than last time!',
'This is +5% better than last time!',
...
];
// Psuedo code
From this I'd like to end up with
$array = [
'1',
'2',
'3',
'4',
'5',
...
];
// And preferrably
$string = 'This is +%s% better than last time!';
Via a function that takes any array of similar strings and outputs what is actually different in them.
Thanks a lot for the help everybody, all of which gave me very good hints of where to go with this. Here is my solution / class to solve this problem, which is an extension of the code of Vicente Olivert Riera's answer and the method FlyingFoX explained in his/her answer:
class StringDiff
{
/**
* The unformatted string to be used in the
* vsprintf call
* @var string
*/
protected $unformattedString = '';
/**
* Array with arguments replacing the %s in the
* unformatted string
* @var array
*/
protected $args = [];
/**
* Returns the arguments to be used for a vsprintf
* call along with the format string
* @return array
*/
public function getArgs()
{
return $this->args;
}
/**
* Returns the unformatted string to be used in
* a vsprint call along with the arguments
* @return string
*/
public function getUnformattedString()
{
return $this->unformattedString;
}
/**
* Takes an array argument of very similarly formatted
* strings and fills in the $unformattedString and $args
* variables from the data provided
* @param array $strings Group of very similar strings
* @return void
*/
public function run(array $strings)
{
// If there are no strings, return nothing
if (count($strings) == 0) return '';
// Replacing % with %% so the vsprintf call doesn't
// bug the arguments
$strings = str_replace('%', '%%', $strings);
$num_words = 0;
// Explodes each string into many smaller containing
// only words
foreach($strings as $key => $string)
{
$strings[$key] = explode(' ', $string);
}
$num_words = count($strings[0]);
// Array containing the indices of the substrings
// that are different
$sub_str_nr = [];
// Loops through all the words in each string
for ($n = 0; $n < $num_words; $n++)
{
// First round only sets the string to be compared with
$first_round = true;
for ($s = 0; $s < count($strings); $s++)
{
if ($first_round)
{
$first_round = false;
$tmp[0] = $strings[$s][$n];
}
else
{
$tmp[1] = $strings[$s][$n];
if ($tmp[0] == $tmp[1])
{
$tmp[0] = $tmp[1];
}
else
{
if (!in_array($n, $sub_str_nr))
{
$sub_str_nr[] = $n;
}
}
}
}
}
// Array to hold the arguments, i.e. all the strings
// that differ from each other. From these the differences
// will be deduced and put into the $this->args variable
$args = [];
foreach($sub_str_nr as $nr)
{
$tmpArgs = [];
for ($a = 0; $a < count($strings); $a++)
{
$tmpArgs[] = $strings[$a][$nr];
}
$args[] = $tmpArgs;
}
foreach($args as $key => $arg)
{
// Offset from the beginning of the string that is still the same
$front_offset = 0;
// If the offset from the beginning has been maxed
$front_flag = true;
// Offset from the end of the string that is still the same
$back_offset = 0;
// Id the offset from the end has been maxed
$back_flag = true;
// The string to be compared against is the first in line
$tmp = $arg[0];
while ($front_flag || $back_flag)
{
// Flag 1 & 2 limits to only one increase of offset per loop
$flag1 = true;
$flag2 = true;
for ($a = 1; $a < count($strings); $a++)
{
// The two following if statements compare substring
// to substring of length one
if ($front_flag && $flag1)
{
if (substr($tmp, $front_offset, 1) != substr($arg[$a], $front_offset, 1) || is_numeric(substr($arg[$a], $front_offset, 1)))
{
$front_flag = false;
}
else
{
$front_offset++;
$flag1 = false;
}
}
if ($back_flag && $flag2)
{
if (substr($tmp, strlen($tmp) - $back_offset - 1, 1) != substr($arg[$a], strlen($arg[$a]) - $back_offset - 1, 1) || is_numeric(substr($arg[$a], strlen($arg[$a]) - $back_offset - 1, 1)))
{
$back_flag = false;
}
else
{
$back_offset++;
$flag2 = false;
}
}
}
}
// Sets the $this->args variable with the found arguments
foreach($arg as $arkey => $ar)
{
$this->args[$arkey][$key] = (float)substr($arg[$arkey], $front_offset, strlen($arg[$arkey]) - $back_offset - $front_offset);
}
// Changes the strings for the unformatted string, switches
// out the varying part to %s
$strings[0][$sub_str_nr[$key]] = substr($arg[0], 0, $front_offset) . '%s' . substr($arg[0], strlen($arg[0]) - $back_offset, $back_offset);
}
// Creates the unformatted string from the array of
// words, which originates from the original long string
$unformattedString = '';
foreach($strings[0] as $string)
{
$unformattedString.= ' ' . $string;
}
// Trim whitespaces in the beginning and end of the
// formatted string
$this->unformattedString = trim($unformattedString);
return;
}
}
How to use:
$stringd = new StringDiff;
$test_array = [
"Your Cooldown Reduction cap is increased to 41% and you gain 1% Cooldown Reduction",
"Your Cooldown Reduction cap is increased to 42% and you gain 2% Cooldown Reduction",
"Your Cooldown Reduction cap is increased to 43% and you gain 3% Cooldown Reduction",
"Your Cooldown Reduction cap is increased to 44% and you gain 4% Cooldown Reduction",
"Your Cooldown Reduction cap is increased to 45% and you gain 5% Cooldown Reduction",
];
$stringd->run($test_array);
foreach($stringd->getArgs() as $arg)
{
echo vsprintf($stringd->getUnformattedString(), $arg) . '<br>';
}
Outputs:
Your Cooldown Reduction cap is increased to 41% and you gain 1% Cooldown Reduction
Your Cooldown Reduction cap is increased to 42% and you gain 2% Cooldown Reduction
Your Cooldown Reduction cap is increased to 43% and you gain 3% Cooldown Reduction
Your Cooldown Reduction cap is increased to 44% and you gain 4% Cooldown Reduction
Your Cooldown Reduction cap is increased to 45% and you gain 5% Cooldown Reduction
array(5) {
[0]=>
array(2) {
[0]=>
float(41)
[1]=>
float(1)
}
[1]=>
array(2) {
[0]=>
float(42)
[1]=>
float(2)
}
[2]=>
array(2) {
[0]=>
float(43)
[1]=>
float(3)
}
[3]=>
array(2) {
[0]=>
float(44)
[1]=>
float(4)
}
[4]=>
array(2) {
[0]=>
float(45)
[1]=>
float(5)
}
}
Your Cooldown Reduction cap is increased to %s%% and you gain %s%% Cooldown Reduction
And yes, if you are wondering, this is related to the Riot API.
If you have suggestions for improvement or any changes at all, feel free to comment down below :)
function myfunc($strings) {
// if the array is empty we don't have anything to do
if (count($strings) == 0) return "";
// count how many words are in a string (use the first one)
$num_tokens = 0;
$tok = strtok($strings[0], " ");
while ($tok !== false) {
$num_tokens++;
$tok = strtok(" ");
}
$output = "";
$tokens = [];
// iterate over each word of the string
for ($w = 0; $w < $num_tokens; $w++) {
// iterate over each string
for ($s = 0; $s < count($strings); $s++) {
// extract the same word of each string
$tokens[$s] = strtok($strings[$s], " ");
// remove that word from the string so it
// will not be extracted again
$strings[$s] = trim(substr($strings[$s], strlen($tokens[$s])));
}
$first_token = true;
$tmp = "";
// If all words we have extracted are equal, we add that
// word to the $output string. Otherwise we add '+%s%'.
for ($s = 0; $s < count($strings); $s++) {
// In the first iteration we just extract the word. We
// will start comparing from the next iteration.
if ($first_token) {
$tmp = $tokens[$s];
$first_token = false;
} else {
// If the words are not the same, we will add '+%s%' and
// exit the loop.
if ($tokens[$s] != $tmp) {
$tmp = "+%s%";
break;
}
}
}
// Add the word to the $output string. If it's the first
// word we don't add a white space before it.
if ($output == "") {
$output .= $tmp;
} else {
$output .= " $tmp";
}
}
return $output;
}
Example of usage:
$strings = [
'This is +1% better than last time!',
'This is +2% better than last time!',
'This is +3% better than last time!',
'This is +4% better than last time!',
'This is +5% better than last time!',
];
echo myfunc($strings);
If you know that your array of similar strings will only differ in one place than you can compare them from the beginning until they differ and from the end until they differ and record those offsets. Then extract the string beginning at the first difference until the last difference from each string in your array.