You can use mapreduce for this. In this case, you do not need to install custom libraries for hbase, as in the case of the coprocessor. Below is the code for creating the mapreduce task.
Job Setting
Job job = Job.getInstance(config); job.setJobName("Distinct columns"); Scan scan = new Scan(); scan.setBatch(500); scan.addFamily(YOU_COLUMN_FAMILY_NAME); scan.setFilter(new KeyOnlyFilter()); //scan only key part of KeyValue (raw, column family, column) scan.setCacheBlocks(false); // don't set to true for MR jobs TableMapReduceUtil.initTableMapperJob( YOU_TABLE_NAME, scan, OnlyColumnNameMapper.class, // mapper Text.class, // mapper output key Text.class, // mapper output value job); job.setNumReduceTasks(1); job.setReducerClass(OnlyColumnNameReducer.class); job.setReducerClass(OnlyColumnNameReducer.class);
Chart maker
public class OnlyColumnNameMapper extends TableMapper<Text, Text> { @Override protected void map(ImmutableBytesWritable key, Result value, final Context context) throws IOException, InterruptedException { CellScanner cellScanner = value.cellScanner(); while (cellScanner.advance()) { Cell cell = cellScanner.current(); byte[] q = Bytes.copy(cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength()); context.write(new Text(q),new Text()); } }
}
Gearbox
public class OnlyColumnNameReducer extends Reducer<Text, Text, Text, Text> { @Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { context.write(new Text(key), new Text()); } }
source share