%% Compare the excess mortality rates of European countries in 2020 with those of earlier time periods % The data was downloaded from EuroStat on September 29th % % M. Schneider, Updated: 10/02/2021 %% Read cleaned data, calculate excess mortalities and compare them based on Wilcoxon's rank sum test clear Tbl = readtable('C:\Users\MSchneider\Documents\CoronaFacts\DataEurostat\demo_r_mwk_ts.xlsx', 'Sheet','DataCleanedV2', 'Range','A1:AM1082'); countryidx = 3:39; % column indizes of the contries nc = length(countryidx); alpha = 0.05; % significance level for CIs and hypothesis tests y = table; countries = Tbl.Properties.VariableNames(countryidx); lr = 18; % look 'lr' indizes left and right to the peaks and look for excess mortality for i = 1:nc excess_2020 = NaN; % initialisation since some traces might not reach 2020 country = countries{i}; fprintf('Analyzing country "%s" (number %d of %d)\n', country, i, nc) y = Tbl.(country); xweeks = Tbl.TIME(~isnan(y)); y = y(~isnan(y)); %x = [1:height(Tbl)]'; x = [1:length(y)]'; cos52 = cos((2*pi/(365.25/7)) * x); sin52 = sin((2*pi/(365.25/7)) * x); sin26 = sin((4*pi/(365.25/7)) * x); cos26 = cos((4*pi/(365.25/7)) * x); %modelspec = [country, ' ~ 1+ cos52 + sin52 + cos26 + sin26 + index']; modelspec = [country, ' ~ 1 + cos52 + sin52 + x']; mytab = [table(y, 'VariableNames',{country}), table(cos52), table(sin52), table(cos26), table(sin26), table(x)]; % Model is quasi-Poisson (dispersion flag is true) mdl = fitglm(mytab, modelspec, 'Distribution','poisson', 'DispersionFlag', true); [ypred, y_ci] = predict(mdl,mytab,'Alpha',0.05,'Simultaneous',false); yci_lower = y_ci(:,1); yci_upper = y_ci(:,2); % plot the results figure('Name', country) subplot(2,1,1) hold on plot(x, y) plot(x, ypred, 'k') plot(x, yci_upper, 'm--') title(country) ylabel('Weekly death counts') % Set x-ticks manually xtickrange = round(linspace(min(x), max(x), 10)); xticks(xtickrange) xticklabels(xweeks(xtickrange,1)') xtickangle(45) hold off % Detect maxima of the baseline 'ypred' and find find excess % mortalities excess_res = y - yci_upper; [loc, ~]=peakseek(ypred,40,mean(ypred)); idx_low = loc - lr; idx_hi = loc + lr; idx_low(idx_low < 1) = 1; idx_hi(idx_hi > length(ypred)) = length(ypred); for j=1:numel(loc) idx = idx_low(j):idx_hi(j); %plot(x(idx), ypred(idx), 'x') idxintbl = strfind(Tbl.(country)', y(idx)') + [0:length(y(idx))-1]; yearweekcell = cell2mat(cellfun(@(x) str2double(x(1:4)),... Tbl.TIME(idxintbl),'un',0)); excess_res_sum = excess_res(idx); % Plot those which are positive idxpos = idx(excess_res_sum > 0); curve1 = yci_upper; x2 = x; curve2 = y; ix = curve2 < curve1; curve2(ix) = curve1(ix); %plot(x(idxpos), y(idxpos), 'rv', 'MarkerSize', 3) % highlight excess mort. data points with triangles % Plot shaded peaks. subplot(2,1,2) [ph,msg] = jbfill(x2(:)',curve2(:)', curve1(:)'); ylabel('Weekly death counts') xtickrange = round(linspace(min(x), max(x), 10)); xticks(xtickrange) xticklabels(xweeks(xtickrange,1)') xtickangle(45) hold off if yearweekcell(end) == 2020 % Count the positive (excess) residuals excess_2020 = sum(y(idxpos) - yci_upper(idxpos)); else excess_earlier(j) = sum(y(idxpos) - yci_upper(idxpos)); end end % Values equal to zero are no excess mortality per definitionem excess_earlier = excess_earlier(excess_earlier > 0); mdata.(country).Excess2020 = excess_2020; mdata.(country).ExcessEarlier = excess_earlier; % Perform Wilcoxon Rank Sum test to compare excess mortality of 2020 % with the excess mortality of previous years if (length(excess_earlier) > 4 && ~isnan(excess_2020)) [p,h] = signrank(excess_earlier, excess_2020(end),'alpha', alpha,... 'tail', 'left'); % Rem.: Signficantly could also mean the excess mortality in 2020 % is significantly less than in earlier years -> so make a % one-tailed test mdata.(country).SignificantlyMoreIn2020 = h; % make a qqplot if there is enough data from previous years if length(excess_earlier) > 10 figqqplot = figure; [mdata.(country).DataNormal, ~, ~] = swtest(excess_earlier, alpha); qqplot(excess_earlier); saveas(gcf, [country, '_excessmortality_qqplot.png']) close(figqqplot) end else mdata.(country).SignificantlyMoreIn2020 = NaN; end mdata.(country).Trace = y; clear excess_earlier excess_2020 end %% Print the key results to the command line %% clc for ii=1:nc if isnan(mdata.(countries{ii}).SignificantlyMoreIn2020) fprintf('The significance of the excess mortality in %s in 2020 could not be determined\n',... countries{ii}) elseif mdata.(countries{ii}).SignificantlyMoreIn2020 fprintf('There was a significant excess mortality in %s in 2020 compared to previous years\n',... countries{ii}) else fprintf('There was no significant excess mortality in %s in 2020 compared to previous years\n',... countries{ii}) end end %% % *Please note that I used some external MATLAB functions from MATLAB Central % File Exchange:* % % * _jbfill_: John Bockstege (2020). Shade area between two curves (https://www.mathworks.com/matlabcentral/fileexchange/13188-shade-area-between-two-curves), % MATLAB Central File Exchange. Retrieved November 16, 2020. % * _swtest_: Ahmed BenSaïda (2020). Shapiro-Wilk and Shapiro-Francia normality % tests. (https://www.mathworks.com/matlabcentral/fileexchange/13964-shapiro-wilk-and-shapiro-francia-normality-tests), % MATLAB Central File Exchange. Retrieved November 16, 2020. % * _peakseek_: Peter O'Connor (2020). PeakSeek (https://www.mathworks.com/matlabcentral/fileexchange/26581-peakseek), % MATLAB Central File Exchange. Retrieved November 18, 2020 % % Please also note that I used some of the R-Code from EUROMOMO as sort of % template for the analysis here. The corresponding R-package can be downloaded % from the EUROMOMO .