import csv
import matplotlib
import matplotlib.pyplot as plt
from datetime import datetime
import numpy as np
from trait_ids import Trait
# loads specified csv file and returns a list of dictionaries where
# dictionary keys are csv column headers and values are data values.
# list of dictionaries are ordered as they appear in csv file.
# column headers must be unique and 1st row of csv file must contain column headers.
# attribute values must be able to be represented as floating point values.
# @param csv_filename: the csv file to load
def load_csv(csv_filename):
# store list of ordered column headers
column_headers = []
# store ordered list of row key/value dictionaries
row_dicts = []
# store current row number
row_number = 0
# open csv file
with open(csv_filename, 'r') as csvfile:
# create reader object
reader = csv.reader(csvfile, delimiter=',', quotechar='|')
# iterate through the rows of the csv file
for row in reader:
if len(row) > 0:
# if this is the first row (header row)
if row_number == 0:
# iterate through the columns in the header row and add to key set
for value in row:
column_headers.append(value)
# otherwise if we are on a data row
else:
# create an empty dictionary for this csv row
row_dict = dict()
# counter to store which column we are currently processing
value_counter = 0
# iterate through the columns in the row
for value in row:
# add key/value pair to dictionary
row_dict[column_headers[value_counter]] = value
# increment the column counter
value_counter += 1
# add the row dictionary to the list of dictionaries
row_dicts.append(row_dict)
# move to next row in csv file
row_number += 1
# return list of dictionaries
return row_dicts
# get date ordered list of values for specified plot id and attribute from list of dictionaries representing csv file
# attribute values must be able to be represented as floating point values
# @param row_data: ordered list of dictionaries (as returned by load_csv function)
# @param experiment_id: the experiment id to return data for (genotype/treatment)
# @param tray_number: the tray number to return data for
# @param plt_id: the plot id to return data for
# @param column_header: the trait attribute id to get data for (column header in csv file)
# @param input_date_format: the date format in the csv file
def get_trait_info(row_data, experiment_id, tray_number, plt_id, column_header, input_date_format="%Y-%m-%d"):
# create a dictionary where key is row date and value is list of attribute values
data_map = dict()
# iterate through the rows of the csv file and add date/attribute pair to dictionary
for row in row_data:
# if the experiment id and tray number are the specified experiment id and tray number
if row[Trait.EXPERIMENT_REF] == experiment_id and float(row[Trait.TRAY_NO]) == float(tray_number):
# if the current row is showing data for the specified plot id
if plt_id is None or row[Trait.POT_ID] == plt_id:
# get the date
date = row[Trait.EXP_DATE]
# parse as datetime object
datetime_obj = datetime.strptime(date, input_date_format)
# convert to y/m/d format and output as string (so date strings can be ordered)
date = datetime_obj.strftime('%Y-%m-%d')
# get the attribute value (convert to floating point representation)
value = float(row[column_header])
# do we already have a record for this date in our dictionary?
if date in data_map.keys():
# if so, add the value to the end of the list of values for this date
data_map[date].append(value)
# otherwise, add the date and attribute value to the dictionary
else:
data_map[date] = [value]
# sort the dates (dictionary keys)
sorted_dates = list(data_map.keys())
sorted_dates.sort()
# create a list of dates and a list of values for plotting
date_list = []
value_list = []
# iterate through the sorted dates
for date_key in sorted_dates:
# iterate through the ordered attribute values for this date
for value in data_map[date_key]:
date_list.append(date_key)
value_list.append(value)
# return the list of dates and attribute values
return date_list, value_list
# generate a plot from the specified csv file for the specified trait attribute
# returns a numpy matrix of the plot
# @param csv_filename: the csv file to plot
# @param experiment_id: the experiment id to plot
# @param tray_number: the tray number to plot
# @param col_header: the trait attribute id to plot (csv column header)
# returns None if there is an exception
def generate_plot(csv_filename, experiment_id, tray_number, col_header):
try:
# set font
font = {'family': 'normal',
'weight': 'normal',
'size': 16}
matplotlib.rc('font', **font)
# load the csv file as a list of dictionaries for each row
row_data = load_csv(csv_filename)
# get a set of pot ids
pot_ids = set()
# iterate through the rows
for row in row_data:
if len(row) > 0:
# set pot id to none by default
# this is needed when processing wheat images as there is no pot id column in csv file
pot_id = None
# if pot id is in this row
if Trait.POT_ID in row.keys():
# update the pot id
pot_id = int(row[Trait.POT_ID])
# add the pot id to the set
pot_ids.add(pot_id)
# sort the keys so that they appear in the legend in a sensible order
sorted_pot_ids = list(pot_ids)
sorted_pot_ids.sort()
# create the figure
fig, axis = plt.subplots(1, 1, figsize=(15, 10), dpi=300)
# get the earliest date (for any pot)
dates = []
# iterate through all of the pots
for pot_id in sorted_pot_ids:
# get list of dates and attribute values for current pot id
if pot_id is not None:
pot_id = str(pot_id)
dates_strs, values = get_trait_info(row_data, experiment_id, tray_number, pot_id, col_header)
# iterate through the dates
for date_str in dates_strs:
# get the date as a datetime object. This will also throw exception if date cannot be parsed
datetime_obj = datetime.strptime(date_str, "%Y-%m-%d")
# convert datetime object to string (so they can be ordered).
date = datetime_obj.strftime('%Y-%m-%d')
# add the date string the list of dates
dates.append(date)
# sort the date strings
dates.sort()
# get the very first date
first_date = dates[0]
# convert to datetime object for days calculations
first_date_obj = datetime.strptime(first_date, "%Y-%m-%d")
# iterate through all of the pots (plot each pot as different line)
for pot_id in sorted_pot_ids:
# store list of days from 1st date
days = []
# get trait information for current pot id
if pot_id is not None:
pot_id = str(pot_id)
dates_strs, values = get_trait_info(row_data, experiment_id, tray_number, pot_id, col_header)
# iterate through the returned dates
for date_str in dates_strs:
# convert date to datetime object (for day calculations)
datetime_obj = datetime.strptime(date_str, "%Y-%m-%d")
# determine the number of days this is from the first date
time_elapsed = datetime_obj - first_date_obj
# add 1 to the date (so not 0 when the first day), and add to days list
days.append(time_elapsed.days + 1)
# determine label for legend (if label is None then don't add a label. this occurs when there is no pot id)
if pot_id is not None:
label = "Pot " + str(pot_id)
# plot the data for this pot
axis.plot(days, values, ls='-', label=label, marker='o')
else:
axis.plot(days, values, ls='-', marker='o')
# set-up x axis label
axis.set_xlabel("Days")
# replace underscores with spaces for y axis label
y_axis_label = col_header.replace("_", " ")
# set y-axis label
axis.set_ylabel(y_axis_label, fontsize=16)
axis.minorticks_on()
# set-up legend
handles, labels = axis.get_legend_handles_labels()
axis.legend(handles, labels)
plt.legend()
# set layout options
fig.tight_layout()
# force rendering (so that we can capture the data and convert to numpy matrix)
fig.canvas.draw()
# convert to numpy matrix
data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))
# clear the figure
plt.cla() # Clear axis
plt.clf() # Clear figure
plt.close() # Close a figure window
fig = None
# return the image data
return data
except Exception as ex:
print("Error: Could not generate plot:", ex)
return None
# class that stores mapping between csv column ids as enums and text-friendly descriptions
class Trait:
# enums for csv column names
IMAGE_NAME = "Img_name"
EXP_DATE = "EXP_Date"
EXPERIMENT_REF = "Genotype_Treatment"
TRAY_NO = "Tray_No"
POT_ID = "Pot_ID"
POT_X = "Pot_X"
POT_Y = "Pot_Y"
PROJECTED_LEAF_AREA = "Projected_LeafArea(mm^2)"
LEAF_PERIMETER = "Leaf_perimeter(mm)"
CANOPY_LENGTH = "Canopy_Length(mm)"
CANOPY_WIDTH = "Canopy_Width(mm)"
STOCKINESS = "Stockiness(%)"
LEAF_STOCKINESS = "Leaf_Stockiness"
LEAF_CANOPY_SIZE = "Leaf_CanopySize(mm^2)"
LEAF_COMPACTNESS = "Leaf_Compactness(%)"
LARGE_LEAF_NO = "Large_Leaf_No"
LEAF_TOTAL_NO = "Leaf_TotalNo"
GREENNESS = "Greenness(0-255)"
PIX2MM_RATIO = "pix2mm2_ratio"
# mapping between column ids and text-friendly descriptions (e.g. for GUI table columns)
TRAIT_IDS = { PROJECTED_LEAF_AREA: "Projected Leaf Area (mm^2)",
LEAF_PERIMETER: "Leaf Perimeter (mm)",
CANOPY_LENGTH: "Canopy Length (mm)",
STOCKINESS: "Stockiness (%)",
LEAF_STOCKINESS: "Leaf Stockiness",
LEAF_CANOPY_SIZE: "Leaf Canopy Size (mm^2)",
LEAF_COMPACTNESS: "Compactness (%)",
LEAF_TOTAL_NO: "Total No. Leaves"
}
# list of column id enums for attributes that will be plotted when analysing arabidopsis datasets
ARABIDOPSIS_PLOT_IDS = [PROJECTED_LEAF_AREA, LEAF_PERIMETER, CANOPY_LENGTH, STOCKINESS, LEAF_CANOPY_SIZE, LEAF_COMPACTNESS, LEAF_TOTAL_NO]
# list of column id enums for attributes that will be plotted when analysing wheat datasets
WHEAT_PLOT_IDS = [PROJECTED_LEAF_AREA, CANOPY_LENGTH, LEAF_STOCKINESS, LEAF_COMPACTNESS]
# list of column id enums for attributes that will be plotted when no experimental data available
NO_EXPERIMENTAL_DATA_PLOT_IDS = []