hd_qc_summary()
summarizes the quality control results of the input data and metadata.
It returns general information about the datasets, missing value information,
protein-protein correlations, and metadata summary visualizations.
Usage
hd_qc_summary(
dat,
metadata = NULL,
variable,
palette = NULL,
unique_threshold = 5,
cor_threshold = 0.8,
cor_method = "pearson",
verbose = TRUE
)
Arguments
- dat
An HDAnalyzeR object or a dataset in wide format and sample ID as its first column.
- metadata
A dataset containing the metadata information with the sample ID as the first column. If a HDAnalyzeR object is provided, this parameter is not needed.
- variable
The name of the metadata variable (column) containing the different classes (for example the column that contains your case and control groups).
- palette
A list of color palettes for the plots. The names of the list should match the column names in the metadata. Default is NULL.
- unique_threshold
The threshold to consider a numeric variable as categorical. Default is 5.
- cor_threshold
The threshold to consider a protein-protein correlation as high. Default is 0.8.
- cor_method
The method to calculate the correlation. Default is "pearson". Other options are "spearman" and "kendall".
- verbose
Whether to print the summary. Default is TRUE.
Examples
# Create the HDAnalyzeR object providing the data and metadata
hd_object <- hd_initialize(example_data,
example_metadata |> dplyr::select(-Sample))
# Run the quality control summary
qc_res <- hd_qc_summary(hd_object,
variable = "Disease",
palette = list(Disease = "cancers12", Sex = "sex"),
cor_threshold = 0.7,
verbose = TRUE)
#> Summary:
#> Note: In case of long output, only the first 10 rows are shown. To see the rest display the object with view()
#> Number of samples: 586
#> Number of variables: 101
#> --------------------------------------
#> categorical:1
#> continuous:100
#> --------------------------------------
#> NA percentage in each column:
#> c("ACE2", "ACTA2", "ACTN4", "ADAM15", "ADAMTS16", "ADH4", "AKR1C4", "AMBN", "AMN", "AOC1", "ARNT", "ARTN", "AGR2", "AKT1S1", "AMFR", "ANXA10", "ANXA5", "ARID4B", "ATF2", "ATP5PO", "ATP6V1F", "ATXN10", "AARSD1", "ABL1", "ADAMTS15", "ADGRG1", "AIFM1", "ALPP", "AMIGO2", "APBB1IP", "AREG", "ARSB", "ATP6V1D", "ACAA1", "ADCYAP1R1", "AGR3", "AIF1", "AKR1B1", "AKT3", "ANKRD54", "ARHGAP1", "ARHGAP25", "ATG4A", "ATP6AP2", "ALDH3A1", "ANXA11", "ARHGEF12", "AXIN1", "ADA2", "ADAMTS8", "ANG", "ANGPTL3", "ANPEP",
#> "AOC3", "APOM", "ART3", "ATOX1", "AXL", "ACAN", "ACOX1", "ACY1", "ADGRG2", "AGXT", "AHCY", "AK1", "ANGPTL1", "ANXA4", "APLP1", "ADGRE2", "AGER", "AGRN", "AHSP", "ALDH1A1", "ANGPT1", "ANGPTL2", "ANGPTL4", "APOH", "APP", "ARSA", "ATP5IF1", "B4GALT1", "ACP6", "ADM", "AMBP", "ANGPT2", "ANGPTL7", "APEX1", "ARG1", "ADA", "ADAM23", "AGRP")c(6.1, 6.1, 6.1, 6.1, 6.1, 6.1, 6.1, 6.1, 6.1, 6.1, 6.1, 6.1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5.8, 5.8, 5.8, 5.8, 5.8, 5.8, 5.8, 5.8, 5.8, 5.8, 5.8, 5.3, 5.3, 5.3, 5.3, 5.3, 5.3, 5.3, 5.3, 5.3, 5.3, 5.3, 4.9, 4.9, 4.9, 4.9, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 4.1, 3.9, 3.9, 3.9, 3.9, 3.9, 3.9, 3.9, 3.9, 3.9, 3.9, 3.4, 3.4, 3.4, 3.4, 3.4, 3.4, 3.4, 3.4, 3.4, 3.4, 3.4, 3.4, 3.4, 2.2, 2.2, 2.2, 2.2, 2.2, 2.2, 2.2, 1.4, 1.4, 1.4)
#> --------------------------------------
#> NA percentage in each row:
#> c("DA00450", "DA00482", "DA00542", "DA00003", "DA00463", "DA00116", "DA00475", "DA00578", "DA00443", "DA00476", "DA00510", "DA00408", "DA00085", "DA00106", "DA00209", "DA00341", "DA00394", "DA00461", "DA00520", "DA00115", "DA00056", "DA00299", "DA00468", "DA00479", "DA00548", "DA00218", "DA00278", "DA00421", "DA00422", "DA00525", "DA00564", "DA00025", "DA00076", "DA00079", "DA00090", "DA00125", "DA00208", "DA00338", "DA00423", "DA00458", "DA00568", "DA00389", "DA00563", "DA00318", "DA00537", "DA00573",
#> "DA00027", "DA00584", "DA00293", "DA00574", "DA00104", "DA00121", "DA00123", "DA00357", "DA00456", "DA00050", "DA00136", "DA00226", "DA00236", "DA00307", "DA00343", "DA00062", "DA00088", "DA00112", "DA00141", "DA00165", "DA00199", "DA00234", "DA00262", "DA00259", "DA00413", "DA00119", "DA00034", "DA00139", "DA00403", "DA00462", "DA00007", "DA00015", "DA00040", "DA00124", "DA00162", "DA00217", "DA00221", "DA00232", "DA00267", "DA00335", "DA00362", "DA00363", "DA00424", "DA00465", "DA00484", "DA00498",
#> "DA00533", "DA00540", "DA00004", "DA00028", "DA00143", "DA00164", "DA00177", "DA00202", "DA00327", "DA00334", "DA00349", "DA00375", "DA00397", "DA00399", "DA00405", "DA00433", "DA00464", "DA00490", "DA00508", "DA00528", "DA00213", "DA00308", "DA00347", "DA00514", "DA00188", "DA00231", "DA00361", "DA00439", "DA00489", "DA00006", "DA00137", "DA00194", "DA00227", "DA00235", "DA00281", "DA00331", "DA00367", "DA00391", "DA00435", "DA00499", "DA00507", "DA00519", "DA00523", "DA00531", "DA00551", "DA00586",
#> "DA00069", "DA00130", "DA00240", "DA00478", "DA00554", "DA00344")c(57.4, 53.5, 53.5, 50.5, 46.5, 43.6, 42.6, 42.6, 41.6, 35.6, 34.7, 33.7, 32.7, 32.7, 32.7, 32.7, 32.7, 32.7, 32.7, 29.7, 26.7, 26.7, 26.7, 26.7, 26.7, 24.8, 24.8, 24.8, 24.8, 24.8, 24.8, 22.8, 22.8, 22.8, 22.8, 22.8, 22.8, 22.8, 22.8, 22.8, 22.8, 21.8, 21.8, 20.8, 20.8, 20.8, 19.8, 18.8, 17.8, 17.8, 16.8, 16.8, 16.8, 16.8, 16.8, 15.8, 15.8, 15.8, 15.8, 15.8, 15.8, 14.9, 14.9, 14.9, 14.9, 14.9, 14.9, 14.9, 14.9, 13.9, 13.9, 12.9, 11.9, 11.9, 11.9, 11.9, 10.9, 10.9, 10.9, 10.9, 10.9, 10.9, 10.9, 10.9,
#> 10.9, 10.9, 10.9, 10.9, 10.9, 10.9, 10.9, 10.9, 10.9, 10.9, 9.9, 9.9, 9.9, 9.9, 9.9, 9.9, 9.9, 9.9, 9.9, 9.9, 9.9, 9.9, 9.9, 9.9, 9.9, 9.9, 9.9, 9.9, 7.9, 7.9, 7.9, 7.9, 6.9, 6.9, 6.9, 6.9, 6.9, 5.9, 5.9, 5.9, 5.9, 5.9, 5.9, 5.9, 5.9, 5.9, 5.9, 5.9, 5.9, 5.9, 5.9, 5.9, 5.9, 5.9, 4, 4, 4, 4, 4, 2)
#> --------------------------------------
#> Protein-protein correlations above 0.7:
#> c("ATP5IF1", "AXIN1", "AIFM1", "ARHGEF12", "ARHGEF12", "AIFM1")c("AIFM1", "ARHGEF12", "ATP5IF1", "AXIN1", "AIFM1", "ARHGEF12")c(0.76, 0.76, 0.76, 0.76, 0.71, 0.71)
#> --------------------------------------
#> Summary:
#> Note: In case of long output, only the first 10 rows are shown. To see the rest display the object with view()
#> Number of samples: 586
#> Number of variables: 8
#> --------------------------------------
#> categorical:6
#> continuous:2
#> --------------------------------------
#> NA percentage in each column:
#> Grade91.5
#> --------------------------------------
#> NA percentage in each row:
#> c("DA00001", "DA00002", "DA00003", "DA00004", "DA00005", "DA00006", "DA00007", "DA00008", "DA00009", "DA00010", "DA00011", "DA00012", "DA00013", "DA00014", "DA00015", "DA00016", "DA00017", "DA00018", "DA00019", "DA00020", "DA00021", "DA00022", "DA00023", "DA00024", "DA00025", "DA00026", "DA00027", "DA00028", "DA00029", "DA00030", "DA00031", "DA00032", "DA00033", "DA00034", "DA00035", "DA00036", "DA00037", "DA00038", "DA00039", "DA00040", "DA00041", "DA00042", "DA00043", "DA00044", "DA00045", "DA00046",
#> "DA00047", "DA00048", "DA00049", "DA00050", "DA00051", "DA00052", "DA00053", "DA00054", "DA00055", "DA00056", "DA00057", "DA00058", "DA00059", "DA00060", "DA00061", "DA00062", "DA00063", "DA00064", "DA00065", "DA00066", "DA00067", "DA00068", "DA00069", "DA00070", "DA00071", "DA00072", "DA00073", "DA00074", "DA00075", "DA00076", "DA00077", "DA00078", "DA00079", "DA00080", "DA00081", "DA00082", "DA00083", "DA00084", "DA00085", "DA00086", "DA00087", "DA00088", "DA00089", "DA00090", "DA00091", "DA00092",
#> "DA00093", "DA00094", "DA00095", "DA00096", "DA00097", "DA00098", "DA00099", "DA00100", "DA00101", "DA00102", "DA00103", "DA00104", "DA00105", "DA00106", "DA00107", "DA00108", "DA00109", "DA00110", "DA00111", "DA00112", "DA00113", "DA00114", "DA00115", "DA00116", "DA00117", "DA00118", "DA00119", "DA00120", "DA00121", "DA00122", "DA00123", "DA00124", "DA00125", "DA00126", "DA00127", "DA00128", "DA00129", "DA00130", "DA00131", "DA00132", "DA00133", "DA00134", "DA00135", "DA00136", "DA00137", "DA00138",
#> "DA00139", "DA00140", "DA00141", "DA00142", "DA00143", "DA00144", "DA00145", "DA00146", "DA00147", "DA00148", "DA00149", "DA00150", "DA00151", "DA00152", "DA00153", "DA00154", "DA00155", "DA00156", "DA00157", "DA00158", "DA00159", "DA00160", "DA00161", "DA00162", "DA00163", "DA00164", "DA00165", "DA00166", "DA00167", "DA00168", "DA00169", "DA00170", "DA00171", "DA00172", "DA00173", "DA00174", "DA00175", "DA00176", "DA00177", "DA00178", "DA00179", "DA00180", "DA00181", "DA00182", "DA00183", "DA00184",
#> "DA00185", "DA00186", "DA00187", "DA00188", "DA00189", "DA00190", "DA00191", "DA00192", "DA00193", "DA00194", "DA00195", "DA00196", "DA00197", "DA00198", "DA00199", "DA00200", "DA00201", "DA00202", "DA00203", "DA00204", "DA00205", "DA00206", "DA00207", "DA00208", "DA00209", "DA00210", "DA00211", "DA00212", "DA00213", "DA00214", "DA00215", "DA00216", "DA00217", "DA00218", "DA00219", "DA00220", "DA00221", "DA00222", "DA00223", "DA00224", "DA00225", "DA00226", "DA00227", "DA00228", "DA00229", "DA00230",
#> "DA00231", "DA00232", "DA00233", "DA00234", "DA00235", "DA00236", "DA00237", "DA00238", "DA00239", "DA00240", "DA00241", "DA00242", "DA00243", "DA00244", "DA00245", "DA00246", "DA00247", "DA00248", "DA00249", "DA00250", "DA00251", "DA00252", "DA00253", "DA00254", "DA00255", "DA00256", "DA00257", "DA00258", "DA00259", "DA00260", "DA00261", "DA00262", "DA00263", "DA00264", "DA00265", "DA00266", "DA00267", "DA00268", "DA00269", "DA00270", "DA00271", "DA00272", "DA00273", "DA00274", "DA00275", "DA00276",
#> "DA00277", "DA00278", "DA00279", "DA00280", "DA00281", "DA00282", "DA00283", "DA00284", "DA00285", "DA00286", "DA00287", "DA00288", "DA00289", "DA00290", "DA00291", "DA00292", "DA00293", "DA00294", "DA00295", "DA00296", "DA00297", "DA00298", "DA00299", "DA00300", "DA00301", "DA00302", "DA00303", "DA00304", "DA00305", "DA00306", "DA00307", "DA00308", "DA00309", "DA00310", "DA00311", "DA00312", "DA00313", "DA00314", "DA00315", "DA00316", "DA00317", "DA00318", "DA00319", "DA00320", "DA00321", "DA00322",
#> "DA00323", "DA00324", "DA00325", "DA00326", "DA00327", "DA00328", "DA00329", "DA00330", "DA00331", "DA00332", "DA00333", "DA00334", "DA00335", "DA00336", "DA00337", "DA00338", "DA00339", "DA00340", "DA00341", "DA00342", "DA00343", "DA00344", "DA00345", "DA00346", "DA00347", "DA00348", "DA00349", "DA00350", "DA00351", "DA00352", "DA00353", "DA00354", "DA00355", "DA00356", "DA00357", "DA00358", "DA00359", "DA00360", "DA00361", "DA00362", "DA00363", "DA00364", "DA00365", "DA00366", "DA00367", "DA00368",
#> "DA00369", "DA00370", "DA00371", "DA00372", "DA00373", "DA00374", "DA00375", "DA00376", "DA00377", "DA00378", "DA00379", "DA00380", "DA00381", "DA00382", "DA00383", "DA00384", "DA00385", "DA00386", "DA00387", "DA00388", "DA00389", "DA00390", "DA00391", "DA00392", "DA00393", "DA00394", "DA00395", "DA00396", "DA00397", "DA00398", "DA00399", "DA00400", "DA00401", "DA00402", "DA00403", "DA00404", "DA00405", "DA00406", "DA00407", "DA00408", "DA00409", "DA00410", "DA00411", "DA00412", "DA00413", "DA00414",
#> "DA00415", "DA00416", "DA00417", "DA00418", "DA00419", "DA00420", "DA00421", "DA00422", "DA00423", "DA00424", "DA00425", "DA00426", "DA00427", "DA00428", "DA00429", "DA00430", "DA00431", "DA00432", "DA00433", "DA00434", "DA00435", "DA00436", "DA00437", "DA00438", "DA00439", "DA00440", "DA00441", "DA00442", "DA00443", "DA00444", "DA00445", "DA00446", "DA00447", "DA00448", "DA00449", "DA00450", "DA00451", "DA00452", "DA00453", "DA00454", "DA00455", "DA00456", "DA00457", "DA00458", "DA00459", "DA00460",
#> "DA00461", "DA00462", "DA00463", "DA00464", "DA00465", "DA00466", "DA00467", "DA00468", "DA00469", "DA00470", "DA00471", "DA00472", "DA00473", "DA00474", "DA00475", "DA00476", "DA00477", "DA00478", "DA00479", "DA00480", "DA00481", "DA00482", "DA00483", "DA00484", "DA00485", "DA00486", "DA00487", "DA00488", "DA00489", "DA00490", "DA00491", "DA00492", "DA00493", "DA00494", "DA00495", "DA00496", "DA00497", "DA00498", "DA00499", "DA00500", "DA00501", "DA00502", "DA00503", "DA00504", "DA00505", "DA00506",
#> "DA00507", "DA00508", "DA00509", "DA00510", "DA00511", "DA00512", "DA00513", "DA00514", "DA00515", "DA00516", "DA00517", "DA00518", "DA00519", "DA00520", "DA00521", "DA00522", "DA00523", "DA00524", "DA00525", "DA00526", "DA00527", "DA00528", "DA00529", "DA00530", "DA00531", "DA00532", "DA00533", "DA00534", "DA00535", "DA00536")c(12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5,
#> 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5,
#> 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5,
#> 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5,
#> 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5,
#> 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5,
#> 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5)
#> --------------------------------------
# Data summary -------------------------------------------------------------
qc_res$data_summary$na_col_hist
#> `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
qc_res$data_summary$na_row_hist
#> `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
qc_res$data_summary$cor_results
#> Protein1 Protein2 Correlation
#> 1 ATP5IF1 AIFM1 0.76
#> 2 AXIN1 ARHGEF12 0.76
#> 3 AIFM1 ATP5IF1 0.76
#> 4 ARHGEF12 AXIN1 0.76
#> 5 ARHGEF12 AIFM1 0.71
#> 6 AIFM1 ARHGEF12 0.71
qc_res$data_summary$cor_heatmap
# Metadata summary ---------------------------------------------------------
qc_res$metadata_summary$na_col_hist
#> `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
qc_res$metadata_summary$na_row_hist
#> `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
qc_res$metadata_summary$Age
#> Picking joint bandwidth of 6.06
qc_res$metadata_summary$Sex
qc_res$metadata_summary$BMI
#> Picking joint bandwidth of 1.77
qc_res$metadata_summary$Stage
qc_res$metadata_summary$Grade
qc_res$metadata_summary$Cohort