Simple and works, and based on the link you sent, BUT it is again ... therefore, not sure if it will not break for some reason that I canβt think of :)
import re def trim_utm(url): if "utm_" not in url: return url matches = re.findall('(.+\?)([^#]*)(.*)', url) if len(matches) == 0: return url match = matches[0] query = match[1] sanitized_query = '&'.join([p for p in query.split('&') if not p.startswith('utm_')]) return match[0]+sanitized_query+match[2] if __name__ == "__main__": tests = [ "http://localhost/index.php?a=1&utm_source=1&b=2", "http://localhost/index.php?a=1&utm_source=1&b=2#hash", "http://localhost/index.php?a=1&utm_source=1&b=2&utm_something=no#hash", "http://localhost/index.php?a=1&utm_source=1&utm_a=yes&b=2#hash", "http://localhost/index.php?utm_a=a", "http://localhost/index.php?a=utm_a", "http://localhost/index.php?a=1&b=2", "http://localhost/index.php", "http://localhost/index.php#hash2" ] for t in tests: trimmed = trim_utm(t) print t print trimmed print
source share