Workaround if you want to skip erroneous lines:
sep="\n", # , collapse , . .
:
require(data.table)
wrong <- fread("
var1|var2|var3|var4
a|1|10|TRUE
b|2|10|FALSE
c|3|10FALSE # note the missing separator between 10 and FALSE.
d|4|10|TRUE
e|5|10|TRUE",sep="\n")
count :
, . stringr ?str_count :
wrong[,n_seps := str_count(wrong[[1]],fixed("|"))] # see below for explanation.
rcpp:
( ), . c++ R rcpp sourceCpp().
"helpers.cpp"
#include <Rcpp.h>
#include <algorithm>
#include <string>
using namespace Rcpp;
using namespace std;
NumericVector v_str_count_cpp(CharacterVector x, char y) {
int n = x.size();
NumericVector out(n);
for(int i = 0; i < n; ++i) {
out[i] = std::count(x[i].begin(), x[i].end(), y);
}
return out;
}
:
|
n_seps.
wrong[,n_seps := apply(wrong,1,v_str_count_cpp,"|")]
wrong :
> wrong
var1|var2|var3|var4 n_seps
1: a|1|10|TRUE 3
2: b|2|10|FALSE 3
3: c|3|10FALSE 2
4: d|4|10|TRUE 3
5: e|5|10|TRUE 3
:
collapsed <- paste0( wrong[n_seps == 3][[1]], collapse = "\n" )
, , :
correct <- fread(collapsed,sep="|")
:
> correct
V1 V2 V3 V4
1: a 1 10 TRUE
2: b 2 10 FALSE
3: d 4 10 TRUE
4: e 5 10 TRUE
, .