Compare the excess mortality rates of European countries in 2020 with those of earlier time periods
Read cleaned data, calculate excess mortalities and compare them based on Wilcoxon's rank sum test
clear
Tbl = readtable('C:\Users\MSchneider\Documents\CoronaFacts\DataEurostat\demo_r_mwk_ts.xlsx', 'Sheet','DataCleanedV2', 'Range','A1:AM1082');
countryidx = 3:39; % column indizes of the contries
nc = length(countryidx);
alpha = 0.05; % significance level for CIs and hypothesis tests
y = table;
countries = Tbl.Properties.VariableNames(countryidx);
lr = 18; % look 'lr' indizes left and right to the peaks and look for excess mortality
for i = 1:nc
excess_2020 = NaN; % initialisation since some traces might not reach 2020
country = countries{i};
fprintf('Analyzing country "%s" (number %d of %d)\n', country, i, nc)
y = Tbl.(country);
xweeks = Tbl.TIME(~isnan(y));
y = y(~isnan(y));
%x = [1:height(Tbl)]';
x = [1:length(y)]';
cos52 = cos((2*pi/(365.25/7)) * x);
sin52 = sin((2*pi/(365.25/7)) * x);
sin26 = sin((4*pi/(365.25/7)) * x);
cos26 = cos((4*pi/(365.25/7)) * x);
%modelspec = [country, ' ~ 1+ cos52 + sin52 + cos26 + sin26 + index'];
modelspec = [country, ' ~ 1 + cos52 + sin52 + x'];
mytab = [table(y, 'VariableNames',{country}), table(cos52), table(sin52), table(cos26), table(sin26), table(x)];
% Model is quasi-Poisson (dispersion flag is true)
mdl = fitglm(mytab, modelspec, 'Distribution','poisson', 'DispersionFlag', true);
[ypred, y_ci] = predict(mdl,mytab,'Alpha',0.05,'Simultaneous',false);
yci_lower = y_ci(:,1);
yci_upper = y_ci(:,2);
% plot the results
figure('Name', country)
subplot(2,1,1)
hold on
plot(x, y)
plot(x, ypred, 'k')
plot(x, yci_upper, 'm--')
title(country)
ylabel('Weekly death counts')
% Set x-ticks manually
xtickrange = round(linspace(min(x), max(x), 10));
xticks(xtickrange)
xticklabels(xweeks(xtickrange,1)')
xtickangle(45)
hold off
% Detect maxima of the baseline 'ypred' and find find excess
% mortalities
excess_res = y - yci_upper;
[loc, ~]=peakseek(ypred,40,mean(ypred));
idx_low = loc - lr;
idx_hi = loc + lr;
idx_low(idx_low < 1) = 1;
idx_hi(idx_hi > length(ypred)) = length(ypred);
for j=1:numel(loc)
idx = idx_low(j):idx_hi(j);
%plot(x(idx), ypred(idx), 'x')
idxintbl = strfind(Tbl.(country)', y(idx)') + [0:length(y(idx))-1];
yearweekcell = cell2mat(cellfun(@(x) str2double(x(1:4)),...
Tbl.TIME(idxintbl),'un',0));
excess_res_sum = excess_res(idx);
% Plot those which are positive
idxpos = idx(excess_res_sum > 0);
curve1 = yci_upper;
x2 = x;
curve2 = y;
ix = curve2 < curve1;
curve2(ix) = curve1(ix);
%plot(x(idxpos), y(idxpos), 'rv', 'MarkerSize', 3) % highlight excess mort. data points with triangles
% Plot shaded peaks.
subplot(2,1,2)
[ph,msg] = jbfill(x2(:)',curve2(:)', curve1(:)');
ylabel('Weekly death counts')
xtickrange = round(linspace(min(x), max(x), 10));
xticks(xtickrange)
xticklabels(xweeks(xtickrange,1)')
xtickangle(45)
hold off
if yearweekcell(end) == 2020
% Count the positive (excess) residuals
excess_2020 = sum(y(idxpos) - yci_upper(idxpos));
else
excess_earlier(j) = sum(y(idxpos) - yci_upper(idxpos));
end
end
% Values equal to zero are no excess mortality per definitionem
excess_earlier = excess_earlier(excess_earlier > 0);
mdata.(country).Excess2020 = excess_2020;
mdata.(country).ExcessEarlier = excess_earlier;
% Perform Wilcoxon Rank Sum test to compare excess mortality of 2020
% with the excess mortality of previous years
if (length(excess_earlier) > 2 && ~isnan(excess_2020))
[p,h] = signrank(excess_earlier, excess_2020(end),'alpha', alpha,...
'tail', 'left');
% Rem.: Signficantly could also mean the excess mortality in 2020
% is significantly less than in earlier years -> so make a
% one-tailed test
mdata.(country).SignificantlyMoreIn2020 = h;
% make a qqplot if there is enough data from previous years
if length(excess_earlier) > 10
figqqplot = figure;
[mdata.(country).DataNormal, ~, ~] = swtest(excess_earlier, alpha);
qqplot(excess_earlier);
saveas(gcf, [country, '_excessmortality_qqplot.png'])
close(figqqplot)
end
else
mdata.(country).SignificantlyMoreIn2020 = NaN;
end
mdata.(country).Trace = y;
clear excess_earlier excess_2020
end
Print the key results to the command line
clc
for ii=1:nc
if isnan(mdata.(countries{ii}).SignificantlyMoreIn2020)
fprintf('The significance of the excess mortality in %s in 2020 could not be determined\n',...
countries{ii})
elseif mdata.(countries{ii}).SignificantlyMoreIn2020
fprintf('There was a significant excess mortality in %s in 2020 compared to previous years\n',...
countries{ii})
else
fprintf('There was no significant excess mortality in %s in 2020 compared to previous years\n',...
countries{ii})
end
end
Please note that I used some external MATLAB functions from MATLAB Central File Exchange:
- jbfill: John Bockstege (2020). Shade area between two curves (https://www.mathworks.com/matlabcentral/fileexchange/13188-shade-area-between-two-curves), MATLAB Central File Exchange. Retrieved November 16, 2020.
- swtest: Ahmed BenSaïda (2020). Shapiro-Wilk and Shapiro-Francia normality tests. (https://www.mathworks.com/matlabcentral/fileexchange/13964-shapiro-wilk-and-shapiro-francia-normality-tests), MATLAB Central File Exchange. Retrieved November 16, 2020.
- peakseek: Peter O'Connor (2020). PeakSeek (https://www.mathworks.com/matlabcentral/fileexchange/26581-peakseek), MATLAB Central File Exchange. Retrieved November 18, 2020
Please also note that I used some of the R-Code from EUROMOMO as sort of template for the analysis here. The corresponding R-package can be downloaded from the EUROMOMO website.