How to remove rows in csv cells using regex / terminal tools?

I have a csv file in which some of the cells have a newline character inside. For example:

id,name 
01,"this is
with newline"
02,no newline 

I want to remove all newline characters inside cells.

How to do this with regular expression or with other terminal tools in general, without knowing the number of columns in advance?

+4
source share
6 answers

, , , , , regex . / , "", regex. ( , ).

. Perl Text::CSV, :

#!/usr/bin/env perl

use strict;
use warnings;

use Text::CSV;

my $csv = Text::CSV->new( { binary => 1, eol => "\n" } );

while ( my $row = $csv->getline( \*ARGV ) ) {
    s/\n/ /g for @$row;
    $csv->print( \*STDOUT, $row );
}

, / - \*ARGV - , ... , sed :

somecommand.sh | myscript.pl
myscript.pl filename_to_process

ARGV do . ( \*STDIN, )

+6

, ?

, . , , .

, python :

#!/usr/bin/python3 -B

import csv
import sys

with open(sys.argv[1]) as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        stripped = [col.replace('\n', ' ') for col in row]
        print(','.join(stripped))

, , .

:

id,name
01,"this is
with newline"
02,no newline

, , :

➜  ~  ./test.py input.csv
id,name
01,this is with newline
02,no newline

python script - . python , .

, (, this iswith newline), , , ''.

+1

, . , , :

$ awk '{ORS=( (c+=gsub(/"/,"&"))%2 ? FS : RS )} 1' file
id,name
01,"this is with newline"
02,no newline
+1

xlsx2csv, :

-e, --escape          Escape \r\n\t characters

, \n , ( \n ):

sed 's/\\n//g' filein.csv` > fileout.csv

:

PATH/TO/xlsx2csv.py -e filein.xlsx | sed 's/\\n//g' > fileout.csv
0

. java.util.List, CSV

List<String> getAllRowsInCSVFileAsList(File selectedCSVFile){
  FileReader fileReader = null;
  BufferedReader reader = null;
  List<String> values = new ArrayList<String>();
  try{
      fileReader = new FileReader(selectedCSVFile);
      reader = new BufferedReader(fileReader);
      String line = reader.readLine();
      String previousLine = "";
      //
      boolean intendLineInCell = false;
      while(line != null){
          if(intendLineInCell){
              if(line.indexOf("\"") != -1 && line.indexOf("\"") == line.lastIndexOf("\"")){
                  previousLine += line;
                  values.add(previousLine);
                  previousLine = "";
                  intendLineInCell = false;
              } else if(line.indexOf("\"") != -1 && line.indexOf("\"") != line.lastIndexOf("\"")){
                  if(getTotalNumberOfCharacterSequenceOccurrenceInString("\"", line) % 2 == 0){
                      previousLine += line;
                  }else{
                      previousLine += line;
                      values.add(previousLine);
                      previousLine = "";
                      intendLineInCell = false;
                  }
              } else{
                  previousLine += line;
              }
          }else{
              if(line.indexOf("\"") == -1){
                  values.add(line);
              }else if ((line.indexOf("\"") == line.lastIndexOf("\"")) && line.indexOf("\"") != -1){
                  intendLineInCell = true;
                  previousLine = line;
              }else if(line.indexOf("\"") != line.lastIndexOf("\"") && line.indexOf("\"") != -1){
                  values.add(line);
              }
          }
          line = reader.readLine();
      }
  }catch(IOException ie){
      ie.printStackTrace();
  }finally{
      if(fileReader != null){
          try {
              fileReader.close();
          } catch (IOException e) {
              e.printStackTrace();
          }
      }
      if(reader != null){
          try {
              reader.close();
          } catch (IOException e) {
            e.printStackTrace();
          }
      }
  }
  return values;

}

int getTotalNumberOfCharacterSequenceOccurrenceInString(String characterSequence, String text){
  int count = 0;
  while(text.indexOf(characterSequence) != -1){
      text = text.replaceFirst(characterSequence, "");
      count++;
  }
  return count;

}

, csv , 4- ( ).

( csv, , 2 ).

dinesh,kumar,24,"23 
tambaram india",green

"23
tambaram india"

( ") (" ).

( ") , (" ), , .

, ( ") . , java.util.List, (" ) . , , .

0

* csv notepadd ++, Ctrl + H. replace "newline", , , , , .

-2

Source: https://habr.com/ru/post/1617948/


All Articles