From bda465355e7d11f6cfc9ce4affe6bc55a4de8de4 Mon Sep 17 00:00:00 2001 From: JohnHBauer Date: Wed, 30 Aug 2017 15:07:54 -0500 Subject: [PATCH 1/2] Use DictWriter from Python csv module (untested at the moment) --- plots/json2csv.py | 77 +++++++++++++++++++++-------------------------- 1 file changed, 34 insertions(+), 43 deletions(-) diff --git a/plots/json2csv.py b/plots/json2csv.py index 0de1306..f3b42e4 100644 --- a/plots/json2csv.py +++ b/plots/json2csv.py @@ -14,6 +14,7 @@ # val_loss param1 param2 ... import logging +import csv # Set PYTHONPATH=$PWD from plottools import * @@ -32,47 +33,37 @@ logging.info("Found %i directories." % len(rundirs)) # The CSV file -fp_out = open(output_file, "w") - -# Write CSV header -fp_out.write("val_loss,") -header = ",".join(selected) -fp_out.write(header) -fp_out.write("\n") - -def write_values(fp, val_loss, D, selected): - # I think we have to do this for consistent ordering - L = [ str(val_loss) ] + [ D[param] for param in selected ] - # print(L) - fp.write(",".join(L)) - fp.write("\n") - -for rundir in rundirs: - - Js = get_jsons(rundir) - if len(Js) == 0: - continue - - # Get parameters from the first JSON file - record_start = Js[0][0] - params = record_start["parameters"] - D = {} - for entry in params: - tokens = entry.split(":") - param = tokens[0] - if param in selected: - value = tokens[1].strip() - D[param] = value - - # Get minimum val_loss in the directory - val_losses = [] - for J in Js: - record_count = len(J) - record_penult = J[record_count-2] - val_losses.append(record_penult["validation_loss"]["set"]) - val_loss = min(val_losses) - - write_values(fp_out, val_loss, D, selected) - -fp_out.close() +with open(output_file, "w") as fp_out: + fieldnames = ["val_loss"] + selected + writer = csv.DictWriter(fp_out, fieldnames=fieldnames) + writer.writeheader() + + for rundir in rundirs: + + Js = get_jsons(rundir) + if len(Js) == 0: + continue + + # Get parameters from the first JSON file + record_start = Js[0][0] + params = record_start["parameters"] + D = {} + for entry in params: + tokens = entry.split(":") + param = tokens[0] + if param in selected: + value = tokens[1].strip() + D[param] = value + + # Get minimum val_loss in the directory + val_losses = [] + for J in Js: + record_count = len(J) + record_penult = J[record_count-2] + val_losses.append(record_penult["validation_loss"]["set"]) + val_loss = min(val_losses) + D["val_loss"] = val_loss + + writer.writerow(D) + logging.info("Wrote %s ." % output_file) From f8733de5a1a7fdea5ab33ac772b4dc9de9e1b931 Mon Sep 17 00:00:00 2001 From: JohnHBauer Date: Thu, 31 Aug 2017 10:40:23 -0500 Subject: [PATCH 2/2] Update json2csv.py Note that in Python lists item[-1] is the last, item[-2] is next-to-last, etc. Also fixed data_url parameter value truncation. --- plots/json2csv.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/plots/json2csv.py b/plots/json2csv.py index f3b42e4..25fc9fc 100644 --- a/plots/json2csv.py +++ b/plots/json2csv.py @@ -52,14 +52,14 @@ tokens = entry.split(":") param = tokens[0] if param in selected: - value = tokens[1].strip() + # re-join tail e.g. ['data_url', 'ftp', '//ftp.mcs...'] + value = ":".join(tokens[1:]).strip() D[param] = value # Get minimum val_loss in the directory val_losses = [] for J in Js: - record_count = len(J) - record_penult = J[record_count-2] + record_penult = J[-2] val_losses.append(record_penult["validation_loss"]["set"]) val_loss = min(val_losses) D["val_loss"] = val_loss