use strict;
use warnings;
use XML::LibXML;
use XML::LibXML::Reader;
use LWP::UserAgent;
use Data::Dumper;
use HTTP::Request;
use Scalar::Util qw(looks_like_number);
my $debug = 0;
my $use_restricted = 0;
my $noheader = 0;
my $sendfile;
my $query_type;
my $death_type;
my $session_id;
my $year_sel;
my $state_sel;
#
# Years in the FINAL database
#
my @YSF1 = ("1999","2000","2001","2002","2003","2004");
my @YSF2 = ("2005","2006","2007","2008","2009","2010");
my @YSF3 = ("2011","2012","2013","2014","2015","2016");
my @YSF4 = ("2017","2018","2019","2020");
#
# Years in the PROVISIONAL database
#
my @YSP1 = ("2021","2022","2023","2024");
my $YEARS;
#
# We can't ask WONDER to return too many rows (max 150K) so for big queries (i.e. MONTH ly) we have to break it up into State Sets (SS)
# and do it one group at a time
#
my @SSF1 = ("Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado");
my @SSF2 = ("Connecticut", "Delaware", "District of Columbia", "Florida", "Georgia", "Hawaii", "Idaho");
my @SSF3 = ("Illinois", "Indiana", "Iowa", "Kansas", "Kentucky", "Louisiana");
my @SSF4 = ("Maine", "Maryland", "Massachusetts", "Michigan", "Minnesota", "Mississippi");
my @SSF5 = ("Missouri", "Montana", "Nebraska", "Nevada", "New Hampshire", "New Jersey");
my @SSF6 = ("New Mexico", "New York", "North Carolina", "North Dakota", "Ohio", "Oklahoma");
my @SSF7 = ("Oregon", "Pennsylvania", "Rhode Island","South Carolina", "South Dakota", "Tennessee");
my @SSF8 = ("Texas", "Utah", "Vermont", "Virginia", "Washington", "West Virginia", "Wisconsin", "Wyoming");
#
# State Sets in the PROVISIONAL database
#
my @SSP1 = (@SSF1,@SSF2,@SSF3,@SSF4);
my @SSP2 = (@SSF5,@SSF6,@SSF7,@SSF8);
my $STATES;
#
# HHS regions
#
my @HSF1 = ("HHS1","HHS2","HHS3","HHS4","HHS5");
my @HSF2 = ("HHS6","HHS7","HHS8","HHS9","HHS10");
my $HHR;
#
#
if(scalar(@ARGV) > 0) {
while($ARGV[0] =~ /--debug|--send|--sid|--noheader|--use_restricted/) {
my $opt = $ARGV[0];
shift(@ARGV);
if($opt eq "--debug") {
$debug = 1;
}
if($opt eq "--use_restricted") {
$use_restricted = 1;
}
if($opt eq "--send") {
if(scalar(@ARGV) < 1) {
die "Usage: wonder_query --send ";
}
$sendfile = $ARGV[0];
shift(@ARGV);
}
if($opt eq "--sid") {
if(scalar(@ARGV) < 1) {
die "Usage: wonder_query --sid ";
}
$session_id = $ARGV[0];
shift(@ARGV);
}
if($opt eq "--noheader") {
$noheader = 1;
}
}
if (scalar(@ARGV) != 4) {
die "Usage: wonder_query [--debug|--send |--sid |--noheader] [QUERY_TYPE] [DEATH_TYPE] [YEAR_SELECTOR] [LOCATION]\n";
}
$query_type = $ARGV[0];
$death_type = $ARGV[1];
$year_sel = $ARGV[2];
$state_sel = $ARGV[3];
$state_sel =~ s/_/ /g;
} else {
die "No arguments";
}
my ($age_group,$database_type,$date_type) = parse_options($query_type);
if(($death_type eq "999_ucd") && ($database_type eq "FINAL")) {
write_csv_header($query_type);
print("#Code 999 not available in the FINAL database\n");
exit;
}
#print "$debug/$noheader/$use_restricted/$sendfile/$query_type/$death_type/$state_sel/$session_id/$year_sel\n";
if(! (
(($database_type eq "FINAL") && ($year_sel =~ /YSF1|YSF2|YSF3|YSF4|ALL|All/)) |
(($database_type eq "PROVISIONAL") && ($year_sel =~ /YSP1|ALL|All/))
) ) {
die "wonder_query: Year selector $year_sel wrong for |$database_type|";
}
if($year_sel =~ "All|ALL") {
$YEARS=["*All*"];
}
if($year_sel eq "YSF1") {
$YEARS = \@YSF1;
} elsif($year_sel eq "YSF2") {
$YEARS = \@YSF2;
} elsif($year_sel eq "YSF3") {
$YEARS = \@YSF3;
} elsif($year_sel eq "YSF4") {
$YEARS = \@YSF4;
} elsif($year_sel eq "YSP1") {
$YEARS = \@YSP1;
}
if(! (
(($database_type eq "FINAL") && ($state_sel =~ /SSF1|SSF2|SSF3|SSF4|SSF5|SSF6|SSF7|SSF8|USA|HHS1|HHS2|ALL|All/)) |
(($database_type eq "PROVISIONAL") && ($state_sel =~ /SSP1|SSP2|USA|HHS1|HHS2|ALL|All/))
) ) {
die "wonder_query: State selector $state_sel wrong";
}
if($state_sel eq "SSF1") {
$STATES = \@SSF1;
} elsif($state_sel eq "SSF2") {
$STATES = \@SSF2;
} elsif($state_sel eq "SSF3") {
$STATES = \@SSF3;
} elsif($state_sel eq "SSF4") {
$STATES = \@SSF4;
} elsif($state_sel eq "SSF5") {
$STATES = \@SSF5;
} elsif($state_sel eq "SSF6") {
$STATES = \@SSF6;
} elsif($state_sel eq "SSF7") {
$STATES = \@SSF7;
} elsif($state_sel eq "SSF8") {
$STATES = \@SSF8;
} elsif($state_sel eq "SSP1") {
$STATES = \@SSP1;
} elsif($state_sel eq "SSP2") {
$STATES = \@SSP2;
} elsif($state_sel eq "HHS1") {
$HHR = \@HSF1;
} elsif($state_sel eq "HHS2") {
$HHR = \@HSF2;
} elsif($state_sel =~ /USA|ALL|All/i) {
# DO NOTHING
}
my %state_to_code = ("USA" => '*All*',
"Alabama" => "01","Alaska" => "02","Arizona" => "04","Arkansas" => "05","California" => "06","Colorado" => "08","Connecticut" => "09","Delaware" => "10",
"District of Columbia" => 11,"Florida" => 12,"Georgia" => 13,"Hawaii" => 15,"Idaho" => 16,"Illinois" => 17,
"Indiana" => 18,"Iowa" => 19,"Kansas" => 20,"Kentucky" => 21,"Louisiana" => 22,"Maine" => 23,"Maryland" => 24,"Massachusetts" => 25,
"Michigan" => 26,"Minnesota" => 27,"Mississippi" => 28,"Missouri" => 29,"Montana" => 30,"Nebraska" => 31,"Nevada" => 32,"New Hampshire" => 33,
"New Jersey" => 34,"New Mexico" => 35,"New York" => 36,"North Carolina" => 37,"North Dakota" => 38,"Ohio" => 39,"Oklahoma" => 40,
"Oregon" => 41,"Pennsylvania" => 42,"Rhode Island" => 44,"South Carolina" => 45,"South Dakota" => 46,"Tennessee" => 47,"Texas" => 48,
"Utah" => 49,"Vermont" => 50,"Virginia" => 51,"Washington" => 53,"West Virginia" => 54,"Wisconsin" => 55,"Wyoming" => 56);
#
#
#
# See: https://github.com/alipphardt/cdc-wonder-api
#
#
#
# Query Parameters
#
#
#
#
my %datause_parameters = (
"accept_datause_restrictions","true"
);
# "by-variables" or those parameters selected in the "Group Results By" and the "And By" drop-down lists
# in the "Request Form." These "by-variables" are the cross-tabulations, stratifications or indexes
# to the query results. Expect the results data table to show a row for each category in the by-variables,
# and a column for each measure. For example, if you wish to compare data by sex, then "group results by" gender,
# to get a row for females and a row for males in the output.
# M_ are measures to return, the default measures plus any optional measures.
# V1-level1 - year
# V1-level2 - month
# V2 ICD - Chapter (level1 & level 2)
# V3 is ????
# V4 - ICD-10 113 Cause List
# V5 - Age Groups (i.e. Ten Year Age Groups)
# V6 is ???
# V7 - Gender
# V8 - Race
# V9-level1 - state
# V9-level2 - county
# V10-level1 - census region
# V10-level2 - census division
# V11 - 2006 urbanization
# V12 - ICD-10 130 Cause List
# V13 is MCD codes (used in advanced (V_) (level1 & 2)
# V14 is ?????
# V15 is ICD-10 113 Cause List (MCD)
# V16 is ICD-10 130 Cause List (Infants) (MCD)
# V17 - Hispanic Origin
# V18 is ??????
# V19 - 2006 Urbanization
# V20 - Autopsy
# V21 - Place of death
# V22 - Injury Intent
# V23 - Injury Mechanism & All Other Leading Causes
# V24 - Weekday
# V25 - Drug/Alcohol Induced Causes UCD (level 1 & 2)
# V26 - Drug/Alcohol Induced Causes MCD (level 1 & 2)
# V27-level1 is HHS Region
#
# By Parameter control what is found in each row
#
my %b_parameters = (
"B_1" => "*None*",
"B_2" => "*None*", # B_2 is second
"B_3" => "*None*", # B_3 is third
"B_4" => "*None*", # B_4 is fourth
"B_5" => "*None*" # B_5 is fifth
);
#
# Measures to return, the default measures plus any optional measures
#
my %m_parameters = (
"M_1" => "{-DATABASE-}.M1", # Deaths, must be included
"M_2" => "{-DATABASE-}.M2", # Population, must be included
"M_3" => "{-DATABASE-}.M3" # Crude rate, must be included
#"M_31" => "{-DATABASE-}.M31", # Standard error (crude rate)
#"M_32" => "{-DATABASE-}.M32" # 95% confidence interval (crude rate)
#"M_41" => "{-DATABASE-}.M41", # Standard error (age-adjusted rate)
#"M_42" => "{-DATABASE-}.M42" # 95% confidence interval (age-adjusted rate)
);
# values highlighted in a "Finder" control for hierarchical lists,
# such as the "Regions/Divisions/States/Counties hierarchical" list.
# For this example, include all years, months, census regions, hhs regions, states. Only include ICD-10 K00-K92
# for disease of the digestive system
#
#
# F parameters are "regular" paramters vs. V parameters which are "advanced." The O_V??fmode settings control where the
# server gets its info for each
my %f_parameters = (
"F_{-DATABASE-}.V1" => ["*All*"], # year/month
"F_{-DATABASE-}.V2" => ["*All*"], # ICD-10 Codes for UCD
"F_{-DATABASE-}.V9" => ["*All*"], # State County - dont change
"F_{-DATABASE-}.V10" => ["*All*"], # Census Regions - dont change
"F_{-DATABASE-}.V13" => ["*All*"], # ICD-10 CODES for AND part of MCD
"F_{-DATABASE-}.V25" => ["*All*"], # States (?) - dont change
"F_{-DATABASE-}.V26" => ["*All*"], # States (?) - dont change
"F_{-DATABASE-}.V27" => ["*All*"], # HHS Regions - dont change
# Below don't seem to work for D71
"F_{-DATABASE-}.V77" => undef, # ????
"F_{-DATABASE-}.V79" => undef, # ????
"F_{-DATABASE-}.V80" => undef, # ????
# "F_{-DATABASE-}.V81" => undef, # ????
"F_{-DATABASE-}.V100" => undef, # ????
);
# contents of the "Currently selected" information areas next to "Finder" controls in the "Request Form."
# For this example, include all dates, census regions, hhs regions, and states.
# These are like F parameters. Are they needed?
my %i_parameters = (
"I_{-DATABASE-}.V1" => "*All* (All Dates)", # year/month
"I_{-DATABASE-}.V2" => "*All* (All Causes of Death)", # ICD-10 Codes in UCD Finder
"I_{-DATABASE-}.V9" => "*All* (The United States)", # State County - dont change
"I_{-DATABASE-}.V10" => "*All* (The United States)", # Census Regions - dont change
# "I_{-DATABASE-}.V13" => "*All*", # ???
# "I_{-DATABASE-}.V15" => "*All*", # ???
# "I_{-DATABASE-}.V16" => "*All*", # ???
"I_{-DATABASE-}.V25" => "*All*", # ???
# "I_{-DATABASE-}.V26" => "*All*", # ???
"I_{-DATABASE-}.V27" => "*All* (The United States)", # HHS Regions - dont change
"I_{-DATABASE-}.V77" => "*All* (The United States)", # ????
"I_{-DATABASE-}.V79" => "*All* (The United States)", # ????
"I_{-DATABASE-}.V80" => "*All* (The United States)", # ????
"I_{-DATABASE-}.V100" => "*All* (The United States)", # ?????
);
my %l_parameters = (
"L_{-DATABASE-}.V15" => "*All*", # ????
"L_{-DATABASE-}.V16" => "*All*", # ????
);
# variable values to limit in the "where" clause of the query, found in multiple select
# list boxes and advanced finder text entry boxes in the "Request Form."
# For this example, we want to include ten-year age groups for ages 15-44.
# For all other categories, include all values
my %v_parameters = (
"V_{-DATABASE-}.V1" => "", # Year/Month
"V_{-DATABASE-}.V2" => "*All*", # ICD-10 UCD Codes List
"V_{-DATABASE-}.V4" => "*All*", # ICD-10 113 Cause List
"V_{-DATABASE-}.V5" => "*All*", # Ten-Year Age Groups
"V_{-DATABASE-}.V6" => "00", # Infant Age Groups
"V_{-DATABASE-}.V7" => "*All*", # Gender
"V_{-DATABASE-}.V8" => undef, # Race
"V_{-DATABASE-}.V9" => undef, # State/County
"V_{-DATABASE-}.V10" => "", # Census Regions
"V_{-DATABASE-}.V11" => "*All*", # 2006 Urbanization
"V_{-DATABASE-}.V12" => "*All*", # ICD-10 130 Cause List (Infants)
"V_{-DATABASE-}.V13" => "*All*", # ICD-10 MCD Codes LIST
"V_{-DATABASE-}.V13_AND" => "*All*", # ICD-10 MCD Codes AND LIST
"V_{-DATABASE-}.V15" => "", # ICD-10 Cause List for MCD
"V_{-DATABASE-}.V15_AND" => "", # ICD-10 Cause List for MCD
"V_{-DATABASE-}.V16" => "", # ICD-10 Infant Cause List for MCD
"V_{-DATABASE-}.V16_AND" => "", # ICD-10 Infant Cause List for MCD
"V_{-DATABASE-}.V17" => "*All*", # Hispanic Origin
"V_{-DATABASE-}.V19" => "*All*", # 2013 Urbanization
"V_{-DATABASE-}.V20" => "*All*", # Autopsy
"V_{-DATABASE-}.V21" => "*All*", # Place of Death
"V_{-DATABASE-}.V22" => "*All*", # Injury Intent
"V_{-DATABASE-}.V23" => "*All*", # Injury Mechanism and All Other Leading Causes
"V_{-DATABASE-}.V24" => undef, # Weekday
"V_{-DATABASE-}.V25" => "", # Drug/Alcohol Induced Causes UCD
"V_{-DATABASE-}.V26" => "", # Drug/Alcogol Induced Causes MCD
"V_{-DATABASE-}.V26_AND" => "", # Drug/Alcogol Induced Causes MCD
"V_{-DATABASE-}.V27" => undef, # HHS Regions (was "")
#V42-44 are rejected by D176
"V_{-DATABASE-}.V42" => undef, # Single Race 6
"V_{-DATABASE-}.V43" => undef, # Single Race 15
"V_{-DATABASE-}.V44" => undef, # Single/Multi Race 31
#
"V_{-DATABASE-}.V51" => "*All*", # Five-Year Age Groups
"V_{-DATABASE-}.V52" => "*All*", # Single-Year Ages
# Below don't seem to work for D176 (rejected
"V_{-DATABASE-}.V77" => undef, # Death occurrence HHS region
"V_{-DATABASE-}.V79" => undef, # Death occurrence State
"V_{-DATABASE-}.V80" => undef, # Death occurrence census region/division
"V_{-DATABASE-}.V81" => undef, # Death occurrence 2006 Urbanization
"V_{-DATABASE-}.V89" => undef, # Death occurrence 2013 Urbanization
"V_{-DATABASE-}.V100" => undef, # MMWR Year/Week (level1 level2)
);
my %o_parameters = (
"O_age" => "{-DATABASE-}.V5", # select age-group (e.g. ten-year (V5), five-year (V51), single-year (V52), infant groups (V6)
"O_ucd" => "{-DATABASE-}.V2", # select underlying cause of death category
"O_mcd" => "{-DATABASE-}.V13", # select underlying cause of death category
"O_urban" => "{-DATABASE-}.V19", # select urbanization category
"O_location" => "{-DATABASE-}.V9", # select location variable to use (e.g. state/county, census, hhs regions)
#
"O_V1_fmode" => "freg", # Use regular finder and ignore v parameter value for year/month
"O_V2_fmode" => "freg", # Use regular finder and ignore v parameter value for UCD ICD code
"O_V9_fmode" => "freg", # Use regular finder and ignore v parameter value for State/County
"O_V10_fmode" => "freg", # Use regular finder and ignore v parameter value for Census Region
# "O_V13_fmode" => "fadv", # Use advanced finder and ignore f parameter value for MCD ICD code
"O_V13_fmode" => "freg", # Use advanced finder and ignore f parameter value for MCD ICD code
"O_V15_fmode" => "fadv", # Use advanced finder and ignore f parameter for MCD ICD-10 Cause List
"O_V16_fmode" => "fadv", # Use advanced finder and ignore f parameter for MCD Infant Cause List
"O_V25_fmode" => "freg", # Use regular finder and ignore v parameter value for UCD Drug/Alcohol Causes
"O_V26_fmode" => "fadv", # Use advanced finder and ignore f paramter for MCD Drug/Alcohol Causes
"O_V27_fmode" => "freg", # Use regular finder and ignore v parameter value for HHS regions
# New fmode shit below
"O_V77_fmode" => "freg",
"O_V79_fmode" => "freg",
"O_V80_fmode" => "freg",
"O_V100_fmode" => "freg",
#
"O_aar" => "aar_none", # age-adjusted rates
"O_aar_pop" => "0000", # population selection for age-adjusted rates
"O_javascript" => "on", # Set to off by default (I don't do javascript in my Perl)
"O_precision" => "1", # decimal places
"O_rate_per" => "100000", # rates calculated per X persons
"O_show_totals" => "false", # Show totals
# "O_change_action-Send-Export Results" => "false", # Export Results
"O_show_zeros" => "true", # Show zero rows
"O_show_suppressed" => "true", # Show suppressed rows
"O_timeout" => "1200",
"O_title" => "", # title for data run
# New shit below
"O_title" => "",
"O_MMWR" => "false",
"O_dates" => "YEAR",
"O_race" => "{-DATABASE-}.V42",
"O_death_location" => "{-DATABASE-}.V79",
"O_death_urban" => "{-DATABASE-}.V89",
"O_oc-sect1-request" => "close",
);
# values for non-standard age adjusted rates (see mortality online databases).
# For this example, these parameters are ignored as standard age adjusted rates are used
#
# I don't use this and it is ignored
# Year - VM_D76.M6_D76.V1_S
# All | list of years from 1999-2015 (inclusive)
# Gender - VM_D76.M6_D76.V7
# F - Female
# M - Male
#Hispanic Origin - VM_D76.M6_D76.V17
# *All* - All Origins
# 2135-2 - Hispanic or Latino
# 2186-2 - Not Hispanic or Latino
# NS - Not Stated
#Race - VM_D76.M6_D76.V8
# *All* - All Races
# 1002-5 - American Indian or Alaska Native
# A-PI - Asian or Pacific Islander
# 2054-5 - Black or African American
# 2106-3 - White
#Location - VM_D76.M6_D76.V10
my %vm_parameters = (
"VM_{-DATABASE-}.M6_{-DATABASE-}.V1_S" => "*All*", # Year
"VM_{-DATABASE-}.M6_{-DATABASE-}.V7" => "*All*", # Gender
# "VM_{-DATABASE-}.M6_{-DATABASE-}.V8" => "*All*", # Race
"VM_{-DATABASE-}.M6_{-DATABASE-}.V42" => "*All*", # Race
"VM_{-DATABASE-}.M6_{-DATABASE-}.V10" => "", # Location
"VM_{-DATABASE-}.M6_{-DATABASE-}.V17" => "*All*" # Hispanic-Origin
);
# Miscellaneous hidden inputs/parameters usually passed by web form. These do not change.
my %misc_parameters = (
"action-Send" => "Send",
"dataset_code" => "{-DATABASE-}",
"dataset_label" => "Provisional Mortality Statistics",
"dataset_vintage" => "As of today",
"finder-stage-{-DATABASE-}.V1" => "codeset",
"finder-stage-{-DATABASE-}.V2" => "codeset",
"finder-stage-{-DATABASE-}.V9" => "codeset",
"finder-stage-{-DATABASE-}.V10" => "codeset",
"finder-stage-{-DATABASE-}.V13" => "codeset",
"finder-stage-{-DATABASE-}.V15" => "codeset",
"finder-stage-{-DATABASE-}.V16" => "codeset",
"finder-stage-{-DATABASE-}.V25" => "codeset",
"finder-stage-{-DATABASE-}.V26" => "codeset",
"finder-stage-{-DATABASE-}.V27" => "codeset",
"finder-stage-{-DATABASE-}.V77" => "codeset",
"finder-stage-{-DATABASE-}.V79" => "codeset",
"finder-stage-{-DATABASE-}.V80" => "codeset",
"finder-stage-{-DATABASE-}.V100" => "codeset",
# "saved_id" => "",
"stage" => "request"
);
my %mon2num = qw(
Jan 01 Feb 02 Mar 03 Apr 04 May 05 Jun 06
Jul 07 Aug 08 Sep 09 Oct 10 Nov 11 Dec 12
);
sub write_csv_header {
my ($query_type) = @_;
print "\"Notes\",";
print "\"Month\",\"Month Code\"," if($date_type =~ /MONTH/);
print "\"Year\",\"Year Code\"," if($date_type =~ /YEAR/);
print "\"Ten-Year Age Groups\"," if($age_group =~ /TENYEAR/);
if($state_sel ne "USA") {
if($state_sel =~ /HHS/) {
print "\"HHS_Region\",";
} else {
print "\"State\",";
}
}
print "\"Deaths\",\"Population\",\"Crude Rate\"";
print "\n";
}
sub is_a_wonder_date {
my ($label) = @_;
if($label =~ /^(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\.*, \d\d\d\d/) { # It's a date
return(1);
}
if($label =~ /^(\d\d\d\d).*$/) { # It's a date
return(1);
}
return(0);
}
sub make_a_date {
my ($label) = @_;
if($label =~ /^(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\.*, \d\d\d\d/) { # It's a date
my ($mbrev,$year) = $label =~ /(...)\.*, (\d\d\d\d)/;
my $mnum = $mon2num{$mbrev};
return("$year/$mnum");
}
if($label =~ /^(\d\d\d\d).*$/) { # It's a date
my ($year) = $label =~ /(\d\d\d\d).*$/;
return("$year");
}
}
sub write_output {
my ($response,$query_type) = @_;
print "Response: \n$response\n" if $debug > 0;
my $wonder_dom = XML::LibXML->load_xml(string => $response);
my $wonder_reader = XML::LibXML::Reader->new(string => $response);
my $repeat_label;
my $repeat_label_count;
write_csv_header($query_type);
foreach my $data_rows ($wonder_dom->findnodes("//data-table/r")) {
my $doneOne = 0;
print ","; # NOTES Column is first column
if($repeat_label) {
print "\"$repeat_label\",";
if(is_a_wonder_date($repeat_label)) { # The repeat label is a fucked up date actually. Now make a nice date
my $p = make_a_date($repeat_label);
print "\"$p\",";
}
$repeat_label_count--;
if($repeat_label_count <= 0) {
undef $repeat_label;
}
}
foreach my $columns ($data_rows->findnodes("./c")) {
if($doneOne) {
print ",";
} else {
$doneOne=1;
}
my $label = $columns->{"l"};
my $value = $columns->{"v"};
my $c = $columns->{"c"};
my $dt = $columns->{"dt"};
if(defined($label)) {
print "\"$label\"";
if($columns->{"r"}) { # Repeat_count
$repeat_label = $label;
$repeat_label_count = $columns->{"r"}-1;
}
if(is_a_wonder_date($label)) {
my $p = make_a_date($label);
print ",\"$p\"";
}
}
if(defined($value)) {
$value =~ s/,//g;
if(looks_like_number($value)) {
print $value;
} else {
print "\"$value\"";
}
}
if(defined($c)) {
# print $c;
}
if(defined($dt)) {
$dt =~ s/,//g;
# print $dt;
}
}
print "\n";
}
}
sub create_ParameterList {
my %pL = %{$_[0]}; # Creates a copy
my $database = $_[1];
my $pS = "";
foreach my $key (sort keys %pL) {
if(defined($pL{$key})) {
$pS .= "\t\n";
my $name = $key;
$name =~ s/\{-DATABASE-\}/$database/g;
$pS .= "\t\t" . $name . "\n";
if($pL{$key} eq "") {
$pS .= "\t\t\n";
} else {
my @values;
if(ref($pL{$key}) ne 'ARRAY') {
$values[0] = $pL{$key};
} else {
@values = @{$pL{$key}};
}
my $doneOne = 0;
foreach my $value (@values) {
$value =~ s/\{-DATABASE-\}/$database/g;
# if($doneOne > 0) {
# $pS .= "\n";
# } else {
# $doneOne = 1;
# }
$pS .= "\t\t" . $value . "\n";
}
}
$pS .= "\t\n";
}
}
return($pS);
}
sub set_param {
# my %pL = %{$_[0]}; # Creates a copy
my $pL = $_[0];
my $whatSet = $_[1];
my $toSet = $_[2];
$pL->{$whatSet} = $toSet;
}
sub parse_options {
my ($query_type) = @_;
my $age_group;
my $database_type;
my $date_type;
if($query_type =~ /TENYEAR/i) {
$age_group = "TENYEAR";
} elsif ($query_type =~ /SCHOOL/) {
$age_group = "5-17";
} elsif ($query_type =~ /CHILD/) {
$age_group = "0-17";
} elsif ($query_type =~ /YOUNGADULT/) {
$age_group = "18-24";
} else {
die "Unknown age group $query_type";
}
if($query_type =~ /FINAL/) {
$database_type = "FINAL";
} elsif ($query_type =~ /PROVISIONAL/) {
$database_type = "PROVISIONAL";
} else {
die "Unknown database type";
}
if($query_type =~ /ANNUAL/) {
$date_type = "YEAR";
} elsif ($query_type =~ /MONTHLY/) {
$date_type = "MONTH";
} else {
die "Unknown date type";
}
return($age_group,$database_type,$date_type);
}
sub do_query {
my ($query_type) = @_;
my $database;
my %bs = %b_parameters;
my %ms = %m_parameters;
my %fs = %f_parameters;
my %os = %o_parameters;
my %vs = %v_parameters;
my %is = %i_parameters;
my %ls = %l_parameters;
my %miscs = %misc_parameters;
my %vms = %vm_parameters;
my $xml_request;
#
# First decision -- which "database" to use and which year range for the database
#
# D77 is -- "Multiple Cause of Death (Final) 1999-2020"
# D176 is -- "Multiple Cause of Death (Provisional) 2018-Last Week"
#
#
# Set which states to select (if using the restricted dataset)
if($use_restricted > 0) {
my @values;
foreach my $key (@$STATES) {
my $value = $state_to_code{$key};
push @values, $value if defined $value;
}
# print "Values is ",@values;
if(@values) {
set_param(\%fs,"F_{-DATABASE-}.V9",\@values); # Set states
set_param(\%os,"O_location","{-DATABASE-}.V9");
}
}
#
# Set which HHS Regions to select (if using the restricted dataset)
#
if($use_restricted > 0) {
my @values;
foreach my $value (@$HHR) {
push @values, $value if defined $value;
}
# print "Values is ",@values;
if(@values) {
set_param(\%fs,"F_{-DATABASE-}.V27",\@values); # Set HHR regions
set_param(\%os,"O_location","{-DATABASE-}.V27");
}
}
#
# And which years
set_param(\%fs,"F_{-DATABASE-}.V1",$YEARS); # Years
#
#
if($database_type eq "FINAL") {
$database = "D77";
set_param(\%vs,"V_{-DATABASE-}.V8","*All*");
set_param(\%vs,"V_{-DATABASE-}.V24","*All*");
} else {
$database = "D176";
set_param(\%fs,"F_{-DATABASE-}.V77","*All*");
set_param(\%fs,"F_{-DATABASE-}.V79","*All*");
set_param(\%fs,"F_{-DATABASE-}.V80","*All*");
# set_param(\%fs,"F_{-DATABASE-}.V81","*All*");
set_param(\%fs,"F_{-DATABASE-}.V100","*All*");
set_param(\%vs,"V_{-DATABASE-}.V9","");
set_param(\%vs,"V_{-DATABASE-}.V42","*All*");
set_param(\%vs,"V_{-DATABASE-}.V43","*All*");
set_param(\%vs,"V_{-DATABASE-}.V44","*All*");
set_param(\%vs,"V_{-DATABASE-}.V100","");
set_param(\%vs,"V_{-DATABASE-}.V77","");
set_param(\%vs,"V_{-DATABASE-}.V79","");
set_param(\%vs,"V_{-DATABASE-}.V80","");
set_param(\%vs,"V_{-DATABASE-}.V81","*All*");
set_param(\%vs,"V_{-DATABASE-}.V89","*All*");
set_param(\%vs,"V_{-DATABASE-}.V100","");
}
#
# Second decision -- what is the aggregation -- By year or by month?
#
if($date_type eq "YEAR") {
set_param(\%bs,"B_1","{-DATABASE-}.V1-level1"); # Group by years
} else {
set_param(\%bs,"B_1","{-DATABASE-}.V1-level2"); # Group by months
}
#
# Third decision -- Ten year age groups or a specific range of ages?
#
if($age_group eq "TENYEAR") {
set_param(\%vs,"V_{-DATABASE-}.V52","*All*"); # Single Year Ages
set_param(\%os,"O_age","{-DATABASE-}.V5"); # Ten Year Ages
set_param(\%bs, "B_2","{-DATABASE-}.V5"); # Group by Ten year ages
if(($state_sel ne "USA") and ($use_restricted > 0)) {
if($state_sel =~ /HHS/) {
set_param(\%bs, "B_3","{-DATABASE-}.V27-level1"); # Group by HHS region
} else {
set_param(\%bs, "B_3","{-DATABASE-}.V9-level1"); # Group by state
}
}
} else {
set_param(\%os,"O_age","{-DATABASE-}.V52"); # Single year ages
if(($state_sel ne "USA") and ($use_restricted > 0)) {
if($state_sel =~ /HHS/) {
set_param(\%bs, "B_2","{-DATABASE-}.V27-level1"); # Group by HHS region
} else {
set_param(\%bs, "B_2","{-DATABASE-}.V9-level1"); # Group by state
}
}
if($age_group eq "0-17") {
set_param(\%vs,"V_{-DATABASE-}.V52",["0","1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17"]); # Single Year Ages
} elsif ($age_group eq "5-17") {
set_param(\%vs,"V_{-DATABASE-}.V52",["5","6","7","8","9","10","11","12","13","14","15","16","17"]); # Single Year Ages
} elsif($age_group eq "18-24") {
set_param(\%vs,"V_{-DATABASE-}.V52",["18","19","20","21","22","23","24"]); # Single Year Ages
} else {
die "Unknown age group $age_group";
}
}
#
# Fourth decision -- What kind of death?
#
my @ucd_list;
my @mcd_list;
@ucd_list = ("*All*");
@mcd_list = ("*All*");
if($death_type eq "All") {
# Don't need to restrict at all
# Do nothing
} elsif ($death_type eq "drugalcoholod_ucd") {
@ucd_list = ["X40","X41","X42","X43","X44","X45","X65","Y15"];
} elsif ($death_type eq "drugod_ucd") {
@ucd_list = ["X40","X41","X42","X43","X44","Y10","Y11","Y12","Y13","Y14"];
} elsif ($death_type eq "alcoholod_ucd") {
@ucd_list = ["X45","X65","Y15"];
} elsif ($death_type eq "suicide_ucd") {
@ucd_list = ["X60-X84","Y87.0"];
} elsif ($death_type eq "r99_ucd") {
@ucd_list = ["R99"];
} elsif ($death_type eq "999_ucd") {
@ucd_list = ["999"];
} elsif ($death_type eq "covid_mcd") {
if($database_type =~ /FINAL/) {
@mcd_list = ["U07.1"];
} else {
@mcd_list = ["U07.1","U09.9"];
}
} elsif ($death_type eq "disease_mcd") {
@mcd_list = ["A00-B99","C00-D48","D50-D89","E00-E89","F01-F99","G00-G98","H00-H59",
"H60-H95","I00-I99","J00-J98","K00-K92","L00-L98","M00-M99","N00-N99",
"U07.1",
"R00-R09","R10-R19","R20-R23","R25-R29","R30-R39","R40-R46","R47-R49",
"R50-R68","R70-R79","R80-R82","R83-R89","R90-R94"];
} elsif ($death_type eq "disease_ucd") {
@ucd_list = ["A00-B99","C00-D48","D50-D89","E00-E88","F01-F99","G00-G98","H00-H57",
"H60-H93","I00-I99","J00-J98","K00-K92","L00-L98","M00-M99","N00-N98",
"U07.1",
"R00-R09","R10-R19","R20-R23","R25-R29","R30-R39","R40-R46","R47-R49",
"R50-R68","R70-R79","R80-R82","R83-R89","R90-R94"];
} elsif ($death_type eq "influenza_mcd") {
@mcd_list = ["J09","J10","J11"];
} elsif ($death_type eq "nondisease_mcd") {
@mcd_list = ["S00-T98","V01-Y89"];
} elsif ($death_type eq "nondisease_ucd") {
@ucd_list = ["V01-Y89"];
} elsif ($death_type eq "cancer_ucd") {
@ucd_list = ["C00-D48"];
} elsif ($death_type eq "circulatory_ucd") {
@ucd_list = ["I00-I99"];
} elsif ($death_type eq "respiratory_ucd") {
@ucd_list = ["J00-J98"];
} elsif ($death_type eq "infectparasite_ucd") {
@ucd_list = ["A00-B99"];
} elsif ($death_type eq "blood_ucd") {
@ucd_list = ["D50-D89"];
} else {
die "$death_type unrecognized type of death";
}
set_param(\%fs,"F_{-DATABASE-}.V2",@ucd_list); # List of UCD Codes
set_param(\%fs,"F_{-DATABASE-}.V13",@mcd_list); # List of MCD Codes
#
#
#
$xml_request .= "";
$xml_request .= "\n";
#
# These are the specific entries given to me by the WONDER team for restricted data access
#
if($use_restricted > 0) {
$xml_request .= 'accept_datause_restrictionstrue';
$xml_request .= 'apix_projectXXXXXXXXXXXXXXXXX';
$xml_request .= 'apix_tokenXXXXXXXXXXXXXXXXXXXX';
$xml_request .= '';
}
#
#
#
# $xml_request .= create_ParameterList(\%datause_parameters,$database);
$xml_request .= create_ParameterList(\%bs,$database); ## Needed
$xml_request .= create_ParameterList(\%fs,$database); ## Needed
$xml_request .= create_ParameterList(\%is,$database); ## Not Needed.
$xml_request .= create_ParameterList(\%ls,$database); ## Not Needed.
$xml_request .= create_ParameterList(\%ms,$database); ## Needed
$xml_request .= create_ParameterList(\%os,$database); ## Needed
$xml_request .= create_ParameterList(\%vms,$database); ## Not Needed now as I am not screwing with age-adjusted rates
$xml_request .= create_ParameterList(\%vs,$database); ## Needed
$xml_request .= create_ParameterList(\%miscs,$database);
$xml_request .= "\n";
my $response = send_request($xml_request,$database);
return($response);
}
sub send_request {
my ($xml,$dataset) = @_;
my $ua = LWP::UserAgent->new(timeout => 1200);
my $url = "https://wonder.cdc.gov/controller/datarequest/";
my %form;
if($debug) {
# print "XML is:\n$xml\n";
print "$xml\n";
}
$form{'request_xml'} = $xml;
$form{'accept_datause_restrictions'} = "true";
$url .= $dataset;
if($session_id) {
$url .= ";jsessionid=" . $session_id;
}
if($debug) {
print "URL is: $url\n";
}
my $response = $ua->post($url, \%form);
if($response->is_success) {
return($response->decoded_content);
} else {
my $resp = $response->status_line;
die "POST to $url failed. Response: $resp\n";
}
}
my $response;
if($sendfile) {
if($debug) {
print "Splitting $sendfile...";
}
my ($database,$tfile) = split(/:/,$sendfile);
open my $fh, '<', $tfile or die "Can't open file $!";
my $xml_request = "";
$xml_request = do { local $/; <$fh> };
$response = send_request($xml_request,$database);
} else {
$response = do_query($query_type); # Do_query figures out which database to use
}
if($response) {
#print $response;
write_output($response,$query_type);
}