, .
, , , , " " = .
abbrevs <- list('Limited'=c('Limited','Ltd'),'Incorporated'=c('Incorporated','Inc'))
( , gsub agrep ):
regexes <- lapply(abbrevs,function(x) { paste0("(",paste0(x,collapse='|'),")[.]?") })
:
$Limited
[1] "(Limited|Ltd)[.]?"
$Incorporated
[1] "(Incorporated|Inc)[.]?"
company.name df:
for (i in seq_along(regexes)) {
Address$Company.name <- gsub(regexes[[i]], names(regexes[i]), Address$Company.name, ignore.case=TRUE)
Enterprise$Company.name <- gsub(regexes[[i]], names(regexes[i]), Enterprise$Company.name, ignore.case=TRUE)
}
. agrep adist, .
:
> Address
Company.name Address
1 Deloitte Limited New York
2 Coca-Cola New York
3 Tesla Limited California
4 Microsoft Limited Washington
:
Address <- structure(list(Company.name = c("Deloitte Ltd.", "Coca-Cola",
"Tesla ltd", "Microsoft Limited"), Address = c("New York", "New York",
"California", "Washington")), .Names = c("Company.name", "Address"
), class = "data.frame", row.names = c(NA, -4L))
Enterprise <- structure(list(Company.name = c("Deloitte Ltd.", "Coca-Cola",
"Tesla ltd", "Microsoft Limited"), EnterpriseNumber = c(221L,
334L, 725L, 127L)), .Names = c("Company.name", "EnterpriseNumber"
), class = "data.frame", row.names = c(NA, -4L))