Delete all data from HBase table according to time range?

I am trying to delete all data from an HBase table that has a timestamp older than the specified timestamp. This contains all column and row families.

Can this be done using the shell as well as the Java API?

+4
source share
2 answers

HBase has no concept of range deletion markers. This means that if you need to delete multiple cells, you need to place a delete token for each cell, which means that you have to scan each row on the client side or server side. This means that you have two options:

  • BulkDeleteProtocol: , , . , . , , HBase.
  • : . , , , , , , .

    Scan scan = new Scan();
    scan.setTimeRange(0, STOP_TS);  // STOP_TS: The timestamp in question
    // Crucial optimization: Make sure you process multiple rows together
    scan.setCaching(1000);
    // Crucial optimization: Retrieve only row keys
    FilterList filters = new FilterList(FilterList.Operator.MUST_PASS_ALL,
        new FirstKeyOnlyFilter(), new KeyOnlyFilter());
    scan.setFilter(filters);
    ResultScanner scanner = table.getScanner(scan);
    List<Delete> deletes = new ArrayList<>(1000);
    Result [] rr;
    do {
      // We set caching to 1000 above
      // make full use of it and get next 1000 rows in one go
      rr = scanner.next(1000);
      if (rr.length > 0) {
        for (Result r: rr) {
          Delete delete = new Delete(r.getRow(), STOP_TS);
          deletes.add(delete);
        }
        table.delete(deletes);
        deletes.clear();
      }
    } while(rr.length > 0);
    
+4
Yes, this can be done easily by setting time range to scanner and then deleting the returned result set.

    public class BulkDeleteDriver {
    //Added colum family and column to lessen the scan I/O
    private static final byte[] COL_FAM = Bytes.toBytes("<column family>");
    private static final byte[] COL = Bytes.toBytes("column");
    final byte[] TEST_TABLE = Bytes.toBytes("<TableName>");

    public static void main(final String[] args) throws IOException,
    InterruptedException {
    //Create connection to Hbase
    Configuration conf = null;
    Connection conn = null;

    try {
    conf = HBaseConfiguration.create();
    //Path to HBase-site.xml
    conf.addResource(new Path(hbasepath));
    //Get the connection
    conn = ConnectionFactory.createConnection(conf);
    logger.info("Connection created successfully");
    } 
    catch (Exception e) {
    logger.error(e + "Connection Unsuccessful");
    }

    //Get the table instance
    Table table = conn.getTable(TableName.valueOf(TEST_TABLE));
    List<Delete> listOfBatchDeletes = new ArrayList<Delete>();
    long recordCount = 0;
    // Set scanCache if required
    logger.info("Got The Table : " + table.getName());

    //Get calendar instance and get proper start and end timestamps
    Calendar calStart = Calendar.getInstance();
    calStart.add(Calendar.DAY_OF_MONTH, day);
    Calendar calEnd = Calendar.getInstance();
    calEnd.add(Calendar.HOUR, hour);

    //Get timestamps
    long starTS = calStart.getTimeInMillis();
    long endTS = calEnd.getTimeInMillis();

    //Set all scan related properties
    Scan scan = new Scan();
    //Most important part of code set it properly!
    //here my purpose it to delete everthing Present Time - 6 hours
    scan.setTimeRange(starTS, endTS);
    scan.setCaching(scanCache);
    scan.addColumn(COL_FAM, COL);

    //Scan the table and get the row keys
    ResultScanner resultScanner = table.getScanner(scan);
    for (Result scanResult : resultScanner) {
    Delete delete = new Delete(scanResult.getRow());

    //Create batches of Bult Delete
    listOfBatchDeletes.add(delete);
    recordCount++;
    if (listOfBatchDeletes.size() == //give any suitable batch size here) {
    System.out.println("Firing Batch Delete Now......");
    table.delete(listOfBatchDeletes);
    //don't forget to clear the array list
    listOfBatchDeletes.clear();
    }}
    System.out.println("Firing Final Batch of Deletes.....");
    table.delete(listOfBatchDeletes);
    System.out.println("Total Records Deleted are.... " + recordCount);
    try {
    table.close();
    } catch (Exception e) {
    e.printStackTrace();
    logger.error("ERROR", e);
    }}}
0

Source: https://habr.com/ru/post/1656332/


All Articles