, jkff, , TextIO.write.to(FilenamePolicy).
:
, :
lines.apply(TextIO.write() ( "///file.txt" ).);
, . Google, local/s3.
public class MinimalWordCountJava8 {
public static void main(String[] args) {
PipelineOptions options = PipelineOptionsFactory.create();
Pipeline p = Pipeline.create(options);
p.apply(TextIO.read().from("gs://apache-beam-samples/shakespeare/*"))
.apply(FlatMapElements
.into(TypeDescriptors.strings())
.via((String word) -> Arrays.asList(word.split("[^\\p{L}]+"))))
.apply(Filter.by((String word) -> !word.isEmpty()))
.apply(Count.<String>perElement())
.apply(MapElements
.into(TypeDescriptors.strings())
.via((KV<String, Long> wordCount) -> wordCount.getKey() + ": " + wordCount.getValue()))
.apply(TextIO.write().to("gs://YOUR_OUTPUT_BUCKET/AND_OUTPUT_PREFIX"));
p.run().waitUntilFinish();
}
}
:
protected static class PerWindowFiles extends FilenamePolicy {
private final ResourceId prefix;
public PerWindowFiles(ResourceId prefix) {
this.prefix = prefix;
}
public String filenamePrefixForWindow(IntervalWindow window) {
String filePrefix = prefix.isDirectory() ? "" : prefix.getFilename();
return String.format(
"%s-%s-%s", filePrefix, formatter.print(window.start()), formatter.print(window.end()));
}
@Override
public ResourceId windowedFilename(int shardNumber,
int numShards,
BoundedWindow window,
PaneInfo paneInfo,
OutputFileHints outputFileHints) {
IntervalWindow intervalWindow = (IntervalWindow) window;
String filename =
String.format(
"%s-%s-of-%s%s",
filenamePrefixForWindow(intervalWindow),
shardNumber,
numShards,
outputFileHints.getSuggestedFilenameSuffix());
return prefix.getCurrentDirectory().resolve(filename, StandardResolveOptions.RESOLVE_FILE);
}
@Override
public ResourceId unwindowedFilename(
int shardNumber, int numShards, OutputFileHints outputFileHints) {
throw new UnsupportedOperationException("Unsupported.");
}
}
@Override
public PDone expand(PCollection<InputT> teamAndScore) {
if (windowed) {
teamAndScore
.apply("ConvertToRow", ParDo.of(new BuildRowFn()))
.apply(new WriteToText.WriteOneFilePerWindow(filenamePrefix));
} else {
teamAndScore
.apply("ConvertToRow", ParDo.of(new BuildRowFn()))
.apply(TextIO.write().to(filenamePrefix));
}
return PDone.in(teamAndScore.getPipeline());
}