Removing duplicates in a row in Python

What is an efficient algorithm to remove all duplicates in a row?

For example: aaaabbbccdbdbcd

Required Result: abcd

+3
source share
18 answers

You use a hash table to store the keys that are currently discovered (access O (1)), and then loop through the array. If the character is in the hash table, discard it. If he does not add it to the hash table and the result line.

In general: O (n) time (and space).

The naive solution is to search for a character - this is the result string for each processing. This is O (n 2 ).

+18
source

: .

- . Hashtables (, ). char. ( Java, , HashMap Map<Character,?>.) Hashtable - O (n) - .

8kb Unicode BitSet. , BitSets ( BitSet). BitSet, O (1).

+5

Python

>>> ''.join(set("aaaabbbccdbdbcd"))
'acbd'

>>> q="aaaabbbccdbdbcd"                    # this one is not
>>> ''.join(sorted(set(q),key=q.index))    # so efficient
'abcd'

>>> S=set()
>>> res=""
>>> for c in "aaaabbbccdbdbcd":
...  if c not in S:
...   res+=c
...   S.add(c)
... 
>>> res
'abcd'

>>> S=set()
>>> L=[]
>>> for c in "aaaabbbccdbdbcd":
...  if c not in S:
...   L.append(c)
...   S.add(c)
... 
>>> ''.join(L)
'abcd'

python3.1

>>> from collections import OrderedDict
>>> ''.join(list(OrderedDict((c,0) for c in "aaaabbbccdbdbcd").keys()))
'abcd'
+3

256 "" , . . , "" .

+2

O (n), HashTable. . , 256

void removeDuplicates(char *str)
{
 int len = strlen(str); //Gets the length of the String
 int count[256] = {0};  //initializes all elements as zero
 int i;
     for(i=0;i<len;i++)
     {
        count[str[i]]++;  
        if(count[str[i]] == 1)
          printf("%c",str[i]);                  
     }     
}
+2

PHP algorythm - O (n):

function remove_duplicate_chars($str) {
    if (2 > $len = strlen($str)) {
        return $str;
    }
    $flags = array_fill(0,256,false);
    $flags[ord($str[0])]=true;
    $j = 1;
    for ($i=1; $i<$len; $i++) {
        $ord = ord($str[$i]);
        if (!$flags[$ord]) {
            $str[$j] = $str[$i];
            $j++;
            $flags[$ord] = true;
        }
    }
    if ($j<$i) { //if duplicates removed
        $str = substr($str,0,$j);
    }
    return $str;
}

echo remove_duplicate_chars('aaaabbbccdbdbcd'); // result: 'abcd'
+1
#include <iostream>
#include<string>
using namespace std;
#define MAX_SIZE 256

int main()
{
    bool arr[MAX_SIZE] = {false};

    string s;
    cin>>s;
    int k = 0;

    for(int i = 0; i < s.length(); i++)
    {
        while(arr[s[i]] == true && i < s.length())
        {
            i++;
        }
        if(i < s.length())
        {
            s[k]    = s[i];
            arr[s[k]] = true;
            k++;
        }
    }
    s.resize(k);

    cout << s<< endl; 

    return 0;
}
+1
  string newString = new string("aaaaabbbbccccdddddd".ToCharArray().Distinct().ToArray());   

 char[] characters = "aaaabbbccddd".ToCharArray();
                string result = string.Empty ;
                foreach (char c in characters)
                {
                    if (result.IndexOf(c) < 0)
                        result += c.ToString();
                }
0

++ , , std::set:

std::string input("aaaabbbccddd");
std::set<char> unique_chars(input.begin(), input.end());

std::unordered_set std::set, O (N) ( O (N 2) ), O (N lg M) ( N = , M = ). , , , , .

0

, .

#include <iostream>
#include <algorithm>
#include <string>

int main()
{
    std::string s = "aaaabbbccdbdbcd";

    std::sort(s.begin(), s.end());
    s.erase(std::unique(s.begin(), s.end()), s.end());

    std::cout << s << std::endl;
}
0

.

0

++ - O (n), O (1), .

std::string characters = "aaaabbbccddd";
std::vector<bool> seen(std::numeric_limits<char>::max()-std::numeric_limits<char>::min());

for(std::string::iterator it = characters.begin(), endIt = characters.end(); it != endIt; ++it) {
  seen[(*it)-std::numeric_limits<char>::min()] = true;
}

characters = "";
for(char ch = std::numeric_limits<char>::min(); ch != std::numeric_limits<char>::max(); ++ch) {
  if( seen[ch-std::numeric_limits<char>::min()] ) {
    characters += ch;
  }
}
0

C , : O (n) , .

void remDup(char *str)
{
    int flags[256] = { 0 };

    for(int i=0; i<(int)strlen(str); i++) {
        if( flags[str[i]] == 0 )
            printf("%c", str[i]);

        flags[str[i]] = 1;
    }
}
0
import java.util.HashSet;

public class RemoveDup {

    public static String Duplicate()
    {
        HashSet h = new HashSet();
        String value = new String("aaaabbbccdbdbcd");
        String finalString = new String();
        int stringLength = value.length();
        for (int i=0;i<=stringLength-1;i++)
        {
            if(h.add(value.charAt(i)))
            {
                finalString = finalString + (value.charAt(i));
            }


        }
        return finalString;

    }
public static void main(String[] args) {


        System.out.println(Duplicate());
    }
}
0

26 . (a, b, c, d ..) .. ( a = 2, b = 3, c = 5 .. , , e = 2, r = 3, a = 5 ..)... int prime [26]..

i=0;
int product = 1;
while(char[i] != null){
   if(product % prime[i] == 0)
      the character is already present delete it
   else
      product = product*prime[i];
}

O (n) .. O (1) , ... "int",

0
int main()    
{    
    std::string s = "aaacabbbccdbdbcd";

    std::set<char> set1;
    set1.insert(s.begin(), s.end());

    for(set<char>::iterator it = set1.begin(); it!= set1.end(); ++it)
    std::cout << *it;

    return 0;
}

std::set takes O(log n) to insert 
0

O (n) :

#include<stdio.h>
#include<string.h>
#include<stdlib.h>

void removeDuplicates(char *);

void removeDuplicates(char *inp)
{
        int i=0, j=0, FLAG=0, repeat=0;

     while(inp[i]!='\0')
    {
        if(FLAG==1)
        {
                inp[i-repeat]=inp[i];
        }
        if(j==(j | 1<<(inp[i]-'\0')))
        {
                repeat++;
                FLAG=1;
        }
                j= j | 1<<(inp[i]-'\0');
                i++;
    }

     inp[i-repeat]='\0';
}

int main()
{
     char inp[100] = "aaAABCCDdefgccc";
    //char inp[100] = "ccccc";
    //char inp[100] = "\0";
    //char *inp = (char *)malloc(sizeof(char)*100);

    printf (" INPUT STRING : %s\n", inp);

     removeDuplicates(inp);

    printf (" OUTPUT STRING : %s:\n", inp);
    return 1;
}
0

, Python , , " ". :

=====================

:

string = "aaabbbccc"

product = reduce((lambda x,y: x if (y in x) else x+y), string)

print product

abc

=========================

:

string = "aaabssabcdsdwa"

str_uniq = ''.join(set(string))

print str_uniq

acbdsw
0

Source: https://habr.com/ru/post/1733310/


All Articles