#---------------------------------------------------------------------- # # Filename: btdata_filter.r # Purpose: Prepare data for Marvier et al. (2007) analyses # # Brief overview: # # To be included in the analysis, a study had to: (i) occur in the field # or be semi-field; (ii) report abundance as a response variable for a # nontarget group; (iii) include a comparison to a non-transgenic # control (either no treatment control, control sprayed with # insecticide, or both control and treatment spread with insecticide); # and (iv) present treatment and control means, accompanied by non-zero # standard deviations (s) and sample sizes (n) (or the author directly # provided these values to us). # # We then limited our meta-analyses to three categories of Bt crops: # 1. lepidopteran resistant Cry1Ac cotton # 2. lepidopteran resistant Cry1Ab maize # 3. coleopteran resistant Cry3Bb maize # # These additional initial filters were applied: # - For maize, insecticide_chemical_class must be 'pyrethroid' (or # 'na', corresponding to cases where there was no insecticide # treatment) # - For replicate_data_issues, exclude rows with the word 'lumped' # - Exclude 'Orchesella zebra (yellow morph)' nontarget_species # # Next, in cases where there are multiple entries for a species and # functional group within an experiment, apply the following rules in # order: # 1. prefer 'seasonal mean' over 'peak days' # 2. take largest control mean # 3. if tied, take smallest control standard deviation # 4. if still tied, take more mature stage # # Author: Jim Regetz, NCEAS # Date created: 22 Mar 2007 # Last modified: 19 Apr 2007 # #---------------------------------------------------------------------- # Read in the full dataset, and subset rows meeting the initial criteria bt.suitable <- subset( read.csv("btAll_Marvier2007.csv", na.strings="", stringsAsFactors=FALSE), study_type %in% c('Field', 'Semi-field') & nontarget_species!='Orchesella zebra (yellow morph)' & ( (crop_species=='maize' & pip %in% c('Cry1Ab', 'Cry3Bb') & insecticide_chemical_class %in% c('pyrethroid', 'na')) | (crop_species=='cotton' & pip=='Cry1Ac')) & comparison_type %in% c('non-transgenic control', 'control w/ insecticides', 'insecticides on Bt & control') & response_variable_abbrev=='abundance' & !replicate_data_issues %in% grep("lumped", replicate_data_issues, value=TRUE) & # Ensure sufficient information for meta-analysis control_mean != -99 & expmtl_mean != -99 & control_std_dev != -99 & expmtl_std_dev != -99 & true_control_sample_size > 0 & true_expmtl_sample_size > 0 & # Ensure valid evaluation of Hedge's d (true_control_sample_size + true_expmtl_sample_size > 2) & ((true_control_sample_size - 1) * control_std_dev^2 + (true_expmtl_sample_size - 1) * expmtl_std_dev^2) > 0 ) # Roughly sort stages from young to old; this ordering will be used to # break ties later when selecting data bt.suitable$nontarget_final_age_or_stage <- factor(bt.suitable$nontarget_final_age_or_stage, levels=c("not specified", "all stages", "natural population of variable ages", "egg", "egg and larva", "immature", "larva", "larvae", "nymph", "larva and pupa", "larva & pupa", "pupae", "immature and adult", "juvenile and adult", "adult and nymph", "nymph & adult", "larva & adult", "adult and larva", "adult"), ordered=TRUE) # Separate data for the two crop species cotton <- subset(bt.suitable, crop_species=="cotton") maize <- subset(bt.suitable, crop_species=="maize") # Define function that implements rules for ensuring sufficient # independence of the data to be analyzed btfilter <- function(df) { inds <- unlist(lapply( split(1:nrow(df), df[c("article_id", "expmt_num", "nontarget_finest_grouping", "nontarget_functional_group")], drop=TRUE), function(rows) { if(!all(df$seasonal_or_peak[rows]=="peak day")) rows <- rows[df$seasonal_or_peak[rows]!="peak day"] if(length(rows)>1) rows <- rows[df$control_mean[rows]==max(df$control_mean[rows])] if(length(rows)>1) rows <- rows[df$control_std_dev[rows]==min(df$control_std_dev[rows])] if(length(rows)>1) { rnk <- rank(df$nontarget_final_age_or_stage[rows]) rows <- rows[rnk==max(rnk)] } if(length(rows)>1) message("Note: Multiple records remain for article ", df$article_id[rows[1]], ", id ", df$expmt_num[rows[1]], ", rows [", paste(df$id[rows], collapse=", "), "]\n", sep="") rows } )) df[sort(inds),] } # Apply function defined above cotton.final <- btfilter(cotton) maize.final <- btfilter(maize) # Write the filtered Cotton and Maize data to CSV files write.csv(cotton.final, file = "btCotton_Marvier2007.csv", na="", row.names = FALSE) write.csv(maize.final, file = "btMaize_Marvier2007.csv", na="", row.names = FALSE)