% Script to compare lake areas delineated from NDWI masks using thresholds % specified in the main script to determine the errors on the areas. % Current NDWI threshold for Landsat-8 was 0.25 and for Sentinel-2 was 0.36 % Script outputs figure to show the lake areas defined from the two methods % compared to one another, including the correlation between them and the % RMSE associated with their calculations. Error bars can then be included % within the main script. % Andrew Williamson, September 2017 %% Specify directories & dates for analysis clear all; clc % specify image dates date1 = '20160701'; date2 = '20160731'; pix_size = 0.01; % in km filter_size = 50; % number of pixels to filter lakes out at if wanted % specify directories main_dir = '/Users/Andrew/Documents/MATLAB/PhD_GrIS_SGLs_S2/'; addpath(main_dir); L8_folder = '/Users/Andrew/Documents/MATLAB/PhD_GrIS_SGLs_S2/outputs_LS8'; L8_NDWI_folder = [L8_folder '/NDWI_10m_masks']; S2_folder = '/Users/Andrew/Documents/MATLAB/PhD_GrIS_SGLs_S2/outputs_S2'; S2_NDWI_folder = [S2_folder '/NDWI_masks']; mask_folder = [main_dir '/masks']; out_dir = [main_dir 'output_figs/']; % specify filenames to load L8_NDWI_Jul01_name = ['L8_' date1 '_NDWI_10m_SGL_mask.mat']; L8_NDWI_Jul31_name = ['L8_' date2 '_NDWI_10m_SGL_mask.mat']; S2_NDWI_Jul01_name = ['S2_' date1 '_NDWI_SGL_mask.mat']; S2_NDWI_Jul31_name = ['S2_' date2 '_NDWI_SGL_mask.mat']; %% Load data and mask ice-marginal areas % GrIS mask cd(mask_folder); GrIS_mask_10m = imread('S2_mask_10m.tif'); radius = 200; % amount erode ice edge by; a scalar to be multiplied by 10 m e = strel('disk',radius); % to get rid of the issues at the ice edge GrIS_mask_10m = imerode(GrIS_mask_10m,e); % erode mask by radius value % NDWI masks cd(L8_NDWI_folder); load(L8_NDWI_Jul01_name) Jul01_L8_SGLs = L8_lmask_10m; Jul01_L8_SGLs(GrIS_mask_10m == 0) = 0; Jul01_L8_SGLs = SGL_mask_filter(Jul01_L8_SGLs,filter_size,2); clear L8_lmask_10m; load(L8_NDWI_Jul31_name) Jul31_L8_SGLs = L8_lmask_10m; Jul31_L8_SGLs(GrIS_mask_10m == 0) = 0; Jul31_L8_SGLs = SGL_mask_filter(Jul31_L8_SGLs,filter_size,2); clear L8_lmask_10m; cd(S2_NDWI_folder); load(S2_NDWI_Jul01_name); Jul01_S2_SGLs = NDWI_defined_SGLs; Jul01_S2_SGLs(GrIS_mask_10m == 0) = 0; Jul01_S2_SGLs = SGL_mask_filter(Jul01_S2_SGLs,filter_size,2); clear NDWI_defined_SGLs; load(S2_NDWI_Jul31_name); Jul31_S2_SGLs = NDWI_defined_SGLs; Jul31_S2_SGLs(GrIS_mask_10m == 0) = 0; Jul31_S2_SGLs = SGL_mask_filter(Jul31_S2_SGLs,filter_size,2); clear NDWI_defined_SGLs; cd(main_dir) %% Compare masks qualitatively with figure figure(1) subplot(2,2,1) imagesc(Jul01_L8_SGLs) subplot(2,2,2) imagesc(Jul01_S2_SGLs) subplot(2,2,3) imagesc(Jul31_L8_SGLs) subplot(2,2,4) imagesc(Jul31_S2_SGLs) %% Calculate total lake areas for each day for each image type % set up blank arrays areas_Jul01 = zeros(1,2); areas_Jul31 = zeros(1,2); L8_totarea_Jul01 = nansum(nansum(Jul01_L8_SGLs)); S2_totarea_Jul01 = nansum(nansum(Jul01_S2_SGLs)); areas_Jul01(1,1) = L8_totarea_Jul01; areas_Jul01(1,2) = S2_totarea_Jul01; L8_totarea_Jul31 = nansum(nansum(Jul31_L8_SGLs)); S2_totarea_Jul31 = nansum(nansum(Jul31_S2_SGLs)); areas_Jul31(1,1) = L8_totarea_Jul31; areas_Jul31(1,2) = S2_totarea_Jul31; areas_Jul01 = areas_Jul01 * (pix_size^2); % scale by pixel size areas_Jul31 = areas_Jul31 * (pix_size^2); % scale by pixel size disp(['On 1 July, Landsat-8 total lake area is ' num2str(areas_Jul01(1,1)) ' km^2.']); disp(['On 1 July, Sentinel-2 total lake area is ' num2str(areas_Jul01(1,2)) ' km^2.']); disp(['On 31 July, Landsat-8 total lake area is ' num2str(areas_Jul31(1,1)) ' km^2.']); disp(['On 31 July, Sentinel-2 total lake area is ' num2str(areas_Jul31(1,2)) ' km^2.']); %% Calculate number of lakes from each method numbers_Jul01 = zeros(1,2); numbers_Jul31 = zeros(1,2); [lab_01Jul_L8,lnum_01_Jul_L8] = bwlabel(Jul01_L8_SGLs); [lab_01Jul_S2,lnum_01_Jul_S2] = bwlabel(Jul01_S2_SGLs); [lab_31Jul_L8,lnum_31_Jul_L8] = bwlabel(Jul31_L8_SGLs); [lab_31Jul_S2,lnum_31_Jul_S2] = bwlabel(Jul31_S2_SGLs); numbers_Jul01(1,1) = lnum_01_Jul_L8; numbers_Jul01(1,2) = lnum_01_Jul_S2; numbers_Jul31(1,1) = lnum_31_Jul_L8; numbers_Jul31(1,2) = lnum_31_Jul_S2; disp(['On 1 July, Landsat-8 predicts ' num2str(numbers_Jul01(1,1)) ' lakes.']); disp(['On 1 July, Sentinel-2 predicts ' num2str(numbers_Jul01(1,2)) ' lakes.']); disp(['On 31 July, Landsat-8 predicts ' num2str(numbers_Jul31(1,1)) ' lakes.']); disp(['On 31 July, Sentinel-2 predicts ' num2str(numbers_Jul31(1,2)) ' lakes.']); %% Create mask for overlapping lakes lakemask_Jul01 = Jul01_L8_SGLs + Jul01_S2_SGLs; lakemask_Jul31 = Jul31_L8_SGLs + Jul31_S2_SGLs; binary_mask_Jul01 = zeros(size(lakemask_Jul01)); binary_mask_Jul01(lakemask_Jul01 >= 1) = 1; binary_mask_Jul31 = zeros(size(lakemask_Jul31)); binary_mask_Jul31(lakemask_Jul31 >= 1) = 1; [lab_max_01Jul,lnum_max_01Jul] = bwlabel(binary_mask_Jul01); [lab_max_31Jul,lnum_max_31Jul] = bwlabel(binary_mask_Jul31); %% 1st July area comparisons area_comps_Jul01 = zeros(1,2); row = 1; disp('Date is 1 July.') % For July 1st first for i = 1:lnum_max_01Jul curr_lmask = zeros(size(lab_max_01Jul)); curr_lmask(lab_max_01Jul == i) = 1; disp(['Analysis step number ' num2str(i) ' of ' num2str(lnum_max_01Jul)... ' total steps.']) L8_extract = Jul01_L8_SGLs(curr_lmask == 1); L8_total = sum(L8_extract); L8_area = L8_total * pix_size^2; area_comps_Jul01(row,1) = L8_area; S2_extract = Jul01_S2_SGLs(curr_lmask == 1); S2_total = sum(S2_extract); S2_area = S2_total * pix_size^2; area_comps_Jul01(row,2) = S2_area; row = row + 1; end %% 31st July area comparisons area_comps_Jul31 = zeros(1,2); row = 1; disp('Date is 31 July.') % For July 1st first for i = 1:lnum_max_31Jul curr_lmask = zeros(size(lab_max_31Jul)); curr_lmask(lab_max_31Jul == i) = 1; disp(['Analysis step number ' num2str(i) ' of ' num2str(lnum_max_31Jul)... ' total steps.']) L8_extract = Jul31_L8_SGLs(curr_lmask == 1); L8_total = sum(L8_extract); L8_area = L8_total * pix_size^2; area_comps_Jul31(row,1) = L8_area; S2_extract = Jul31_S2_SGLs(curr_lmask == 1); S2_total = sum(S2_extract); S2_area = S2_total * pix_size^2; area_comps_Jul31(row,2) = S2_area; row = row + 1; end %% Put the two datasets into one big one to get the regression all_area_comps = [area_comps_Jul01;area_comps_Jul31]; %% Pearson linear correlation stats [r,p] = corr(all_area_comps(:,1),all_area_comps(:,2),'Type','Pearson'); r2 = r*r; disp(['Pearson r value is ' num2str(r) '.']) disp(['Probability indicator is ' num2str(p) '.']); disp(['R-square value is ' num2str(r2) '.']); % calculate RMSE errors = all_area_comps(:,1) - all_area_comps(:,2); square_errors = errors .^2; MSE = mean(square_errors); RMSE = sqrt(MSE); % get the degrees of freedom data df = size(square_errors,1); %% Make scatter graph to show each day in different colours (red and blue) figure(2); clf; hold on data2 = scatter(area_comps_Jul31(:,1),area_comps_Jul31(:,2),[],'b',... 'Marker','x'); data1 = scatter(area_comps_Jul01(:,1),area_comps_Jul01(:,2),[],'r',... 'Marker','x'); % hline = refline(1,0); % hline.Color = 'k'; % hline.LineWidth = 1; % hline.LineStyle = '--'; % data3 = hline; xlabel('Landsat 8 lake area (km^2)','FontSize',14); ylabel('Sentinel-2 lake area (km^2)','FontSize',14); set(gca,'XGrid','on','YGrid','on','FontSize',14) set(gca,'TickLength',[0.005 0.005]); box on % add polynomial (first-order, i.e. linear) trend line x = all_area_comps(:,1); y = all_area_comps(:,2); p = polyfit(x,y,1); a = p(1,1); b = p(1,2); xl = xlim; x_low = xl(1,1); % get the x-axis lower limit x_high = xl(1,2); % get the x-axis higher limit x_locs = linspace(x_low,x_high,515); % get the plotting locations for x y_pred = (a*x_locs) + b; % predict y values for these x locations data3 = plot(x_locs,y_pred,'Color','k','LineWidth',1,'LineStyle','--'); lngd = legend([data1,data2,data3],'01 July 2016','31 July 2016',... 'OLS regression','Location','northwest'); set(lngd,'fontsize',14); ylim([0 3]) b_pos = -b; line_str = ['y = ' num2str(a) 'x - ' num2str(b_pos)]; dim = [0.6 0.56 0.28 0.3]; % annotation('textbox',dim,'String',line_str,'FitBoxToText','on',... % 'BackgroundColor','none','FontSize',14,'LineStyle','none','Interpreter','latex'); % str = {['R^2 = ' num2str(r2,'%.3f')],['RMSE = ' num2str(RMSE,'%.3f') ' km^2'],... % ['df = ' num2str(df)]}; % dim2 = [0.66 0.00001 0.28 0.3]; % specify the dimensions and location of text box % annotation('textbox',dim2,'String',str,'FitBoxToText','on',... % 'BackgroundColor','none','FontSize',14,'LineStyle','none'); ax = gca; ax.YAxis.TickLabelFormat = '%,.1f'; ax.XAxis.TickLabelFormat = '%,.1f'; %% Save figure filename = 'S2_L8_area_regression_unannotated'; cd(out_dir); saveas(gcf,[filename '.pdf']); hold on str = {['R^2 = ' num2str(r2,'%.3f')],['RMSE = ' num2str(RMSE,'%.3f') ' km^2'],... ['df = ' num2str(df)]}; dim2 = [0.66 0.00001 0.28 0.3]; % specify the dimensions and location of text box annotation('textbox',dim2,'String',str,'FitBoxToText','on',... 'BackgroundColor','none','FontSize',14,'LineStyle','none'); filename2 = 'S2_L8_area_regression_annotated'; saveas(gcf,[filename2 '.pdf']); cd(main_dir)