Boxplot by group as well as custom scatter plot (markers for a subset of values)

When working with laboratory data, I want to superimpose a subset of data points on a box, grouped by processing and ordered by time. Combining all the elements is not direct in SAS and requires a smart approach that I cannot think of or find :)

The beauty of the desired plot is that it displays two different types of emissions:

  • Boxes include statistical emissions - square markers (1.5 IQR)
  • Then, “normal range” overlay markers are a clinical definition specific to each laboratory test.

This is difficult when grouping data (for example, by processing) and then blocking or categorizing another variable (for example, a time point). SAS internally determines the distance between drawers, so this interval is difficult to imitate for superimposed data markers of the normal range. A general solution in this direction would be an unreliable kludge.

I demonstrated this approach below to manually simulate group separation for overlap markers - just to give an idea of ​​intent. As expected, the normal deviations of the range do not coincide with the boxplot groups. In addition, data points that meet both exclusion criteria (statistical and clinical) appear as separate points, not separate points with markers attached. My annotations are green:

SGPLOT-overlay-fail

SAS , ?

:

proc sql;
  create table labstruct
    (  mygroup         char(3) label='Treatment Group'
     , myvisitnum      num     label='Visit number'
     , myvisitname     char(8) label='Visit name'
     , labtestname     char(8) label='Name of lab test'
     , labseed         num     label='Lab measurement seed'
     , lablow          num     label='Low end of normal range'
     , labhigh         num     label='High end of normal range'
    )
  ;
  insert into labstruct
    values('A', 1,  'Day 1',  'Test XYZ', 48, 40, 60)
    values('A', 5,  'Week 1', 'Test XYZ', 50, 40, 60)
    values('A', 10, 'Week 2', 'Test XYZ', 52, 40, 60)
    values('B', 1,  'Day 1',  'Test XYZ', 52, 40, 60)
    values('B', 5,  'Week 1', 'Test XYZ', 50, 40, 60)
    values('B', 10, 'Week 2', 'Test XYZ', 48, 40, 60)
  ;
quit;

data labdata;
  set labstruct;

  * Put normal range outliers on 2nd axis, manually separate groups on 2nd axis *;
  select (mygroup);
    when ('A') scatternum = myvisitnum - 1;
    when ('B') scatternum = myvisitnum + 1;
    otherwise;
  end;

  * Make more obs from the seeds above *;
  label labvalue = 'Lab measurement';
  do repeat = 1 to 20;
    labvalue = labseed + 6*rannor(3297);

    * Scatter plot ONLY normal range outliers *;
    if labvalue < lablow or labvalue > labhigh 
       then scattervalue = labvalue;
    else scattervalue = .;

    output;
  end;
  drop repeat labseed;
run;

proc sgplot data=labdata;
  block x=myvisitnum block=myvisitname / 
        nofill 
        lineattrs=(color=lightgray);
  vbox labvalue / 
       category=myvisitnum
       group=mygroup
       outlierattrs=(symbol=square);
  scatter x=scatternum y=scattervalue /
       group=mygroup
       x2axis
       jitter;
  x2axis display=none;
  keylegend / position=bottom type=marker;
run;
+4
2

- , , , . , boxplot.

, , , , , boxplot . , , . , .

, , discreteoffset, - . , group , SAS, , ; , a b, .

discreteoffset , , - ; , , ( , ).

, , ( group visnum, a_1 b_1 ..); , ( 0 ). ; , IQR, , , ( "" ).

, , , , , , ( ). 3 , , (1 0, , , +/- 0,25). , , , , SAS , .

proc sql;
  create table labstruct
    (  mygroup         char(3) label='Treatment Group'
     , myvisitnum      num     label='Visit number'
     , myvisitname     char(8) label='Visit name'
     , labtestname     char(8) label='Name of lab test'
     , labseed         num     label='Lab measurement seed'
     , lablow          num     label='Low end of normal range'
     , labhigh         num     label='High end of normal range'
    )
  ;
  insert into labstruct
    values('A', 1,  'Day 1',  'Test XYZ', 48, 40, 60)
    values('A', 5,  'Week 1', 'Test XYZ', 50, 40, 60)
    values('A', 10, 'Week 2', 'Test XYZ', 52, 40, 60)
    values('B', 1,  'Day 1',  'Test XYZ', 52, 40, 60)
    values('B', 5,  'Week 1', 'Test XYZ', 50, 40, 60)
    values('B', 10, 'Week 2', 'Test XYZ', 48, 40, 60)
  ;
quit;

data labdata;
  set labstruct;

  * Put normal range outliers on 2nd axis, manually separate groups on 2nd axis *;
  select (mygroup);
    when ('A') a_scatternum = myvisitnum;  /* Note the separate names now, but no added +/- 1 */
    when ('B') b_scatternum = myvisitnum;
    otherwise;
  end;

  * Make more obs from the seeds above *;
  label labvalue = 'Lab measurement';
  do repeat = 1 to 20;
    labvalue = labseed + 6*rannor(3297);

    * Scatter plot ONLY normal range outliers *;
    if labvalue < lablow or labvalue > labhigh 
       then scattervalue = labvalue;
    else scattervalue = .;

    output;
  end;
  drop repeat labseed;
run;

proc sgplot data=labdata noautolegend;  /* suppress auto-legend */
  block x=myvisitnum block=myvisitname / 
        nofill 
        lineattrs=(color=lightgray);
  vbox labvalue / 
       category=myvisitnum
       group=mygroup
       outlierattrs=(symbol=square) name="boxplot"; /* Name for keylegend */
  scatter x=a_scatternum y=scattervalue /     /* Now you have two of these - and no need for an x2axis */
       group=mygroup discreteoffset=-0.175
        jitter
       ;  
  scatter x=b_scatternum y=scattervalue /
       group=mygroup discreteoffset=0.175
        jitter
       ;
  keylegend "boxplot" / position=bottom type=marker;  /* Needed to make a custom keylegend or else you have a mess with three plots in it */
run;
+3

! boxplot . , SAS 9.4 "", vbox, SAS x, . SAS 9.4 ( , :).

SAS ( ):

enter image description here

: "" VBOX - "x" SCATTER. , VBOX SCATTER , 0.7 0.85, :

proc sql;
  create table labstruct
    (  mygroup         char(3) label='Treatment Group'
     , myvisitnum      num     label='Visit number'
     , myvisitname     char(8) label='Visit name'
     , labtestname     char(8) label='Name of lab test'
     , labseed         num     label='Lab measurement seed'
     , lablow          num     label='Low end of normal range'
     , labhigh         num     label='High end of normal range'
    )
  ;
  insert into labstruct
    values('A', 1,  'Day 1',  'Test XYZ', 48, 40, 60)
    values('A', 5,  'Week 1', 'Test XYZ', 50, 40, 60)
    values('A', 10, 'Week 2', 'Test XYZ', 52, 40, 60)
    values('B', 1,  'Day 1',  'Test XYZ', 52, 40, 60)
    values('B', 5,  'Week 1', 'Test XYZ', 50, 40, 60)
    values('B', 10, 'Week 2', 'Test XYZ', 48, 40, 60)
  ;
quit;

data labdata;
  set labstruct;

  * Make more obs from the seeds above *;
  label labvalue = 'Lab measurement';
  do repeat = 1 to 20;
    labvalue = labseed + 6*rannor(3297);

    * Scatter plot ONLY normal range outliers *;
    if labvalue < lablow or labvalue > labhigh 
       then scattervalue = labvalue;
    else scattervalue = .;

    output;
  end;
  drop repeat labseed;
run;

proc sgplot data=labdata;
  block x=myvisitnum block=myvisitname / 
        nofill 
        lineattrs=(color=lightgray);
  vbox labvalue / 
       category=myvisitnum
       group=mygroup
       groupdisplay=cluster
       clusterwidth=0.7
       outlierattrs=(symbol=square);
  scatter x=myvisitnum y=scattervalue /
       group=mygroup
       groupdisplay=cluster
       clusterwidth=0.7
       jitter;
  keylegend / 
       position=bottom type=marker;
run;

, , , !

+1
source

Source: https://habr.com/ru/post/1621123/


All Articles