Combine many results/files into one. (Map-Reduce)
A beginner-friendly map-reduce pattern for combining many outputs into one file.
from pathlib import Path
from burla import remote_parallel_map
def write_part_file(number):
part_file_path = f"/workspace/shared/map-reduce-demo/parts/number-{number}.txt"
Path(part_file_path).parent.mkdir(parents=True, exist_ok=True)
Path(part_file_path).write_text(f"{number}\n")
return part_file_path
inputs = list(range(5))
part_file_paths = remote_parallel_map(write_part_file, inputs)
print(part_file_paths)from pathlib import Path
from burla import remote_parallel_map
def combine_part_files(part_paths):
total = 0
for path in part_paths:
total += int(Path(path).read_text().strip())
output_file_path = "/workspace/shared/map-reduce-demo/final/total.txt"
Path(output_file_path).parent.mkdir(parents=True, exist_ok=True)
Path(output_file_path).write_text(f"{total}\n")
return output_file_path
output_file_paths = remote_parallel_map(
combine_part_files,
[part_file_paths],
func_cpu=8,
func_ram=32,
)
print(output_file_paths[0])