/* gender_order - a LifeLines gender order statistics program by Jim Eggert (eggertj@ll.mit.edu) Version 1, 5 August 1993 listsort code by John Chandler (JCHBN@CUVMB.CC.COLUMBIA.EDU) Version 2, 10 August 1993 added family examples, modified output format slightly Version 3, 26 March 1995 changed listsort to quicksort This program computes gender order statistics for children in all the families in a database. Genders are considered to be ternary: male (M), female (F), or unknown (U). Children in a family form a pattern of genders by birth order, e.g. MFFM for a family consisting of a boy, two girls, then a boy. The frequency of these patterns is calculated. In addition to the complete gender pattern for a family, initial gender patterns are computed. For the above example, the initial patterns are . (no children), M, MF, MFF, and MFFM. The frequency of these initial patterns can be used to answer questions such as how many families with a boy then two girls go on to have another boy. For example, suppose you want to know what fraction of families with a child gender pattern P (e.g., P=MFFM) have no more children, have a boy next (PM), have a girl next (PF), and have a child of unknown (to the database!) gender next (PU). You can find these fractions as #complete(P)/#initial(P), #initial(PM)/#initial(P), #initial(PF)/#initial(P), and #initial(PU)/#initial(P), respectively. Note that these fractions should add up to 1. Also note that the pattern "." denotes no children at all. As a initial pattern it gives the total number of families in the database, as a complete pattern the number of childless families in the database. You can use either of two compare functions to sort the results differently. Rename the one you want to use as compare, the other one something else (like compare1). */ /* This compare procedure sorts purely alphabetically */ func compare1(astring,bstring) { return(strcmp(astring,bstring)) } /* This compare procedure sorts by length and alphabetically within groups of equal length */ func compare(astring,bstring) { set(alen,strlen(astring)) set(blen,strlen(bstring)) if (lt(alen,blen)) { return(neg(1)) } if (gt(alen,blen)) { return(1) } return(strcmp(astring,bstring)) } /* quicksort: Sort an input list by generating a permuted index list Input: alist - list to be sorted Output: ilist - list of index pointers into "alist" in sorted order Needed: compare- external function of two arguments to return -1,0,+1 according to relative order of the two arguments */ proc quicksort(alist,ilist) { set(len,length(alist)) set(index,len) while(index) { setel(ilist,index,index) decr(index) } call qsort(alist,ilist,1,len) } /* recursive core of quicksort */ proc qsort(alist,ilist,left,right) { if(pcur,getpivot(alist,ilist,left,right)) { set(pivot,getel(alist,getel(ilist,pcur))) set(mid,partition(alist,ilist,left,right,pivot)) call qsort(alist,ilist,left,sub(mid,1)) call qsort(alist,ilist,mid,right) } } /* partition around pivot */ func partition(alist,ilist,left,right,pivot) { while(1) { set(tmp,getel(ilist,left)) setel(ilist,left,getel(ilist,right)) setel(ilist,right,tmp) while(lt(compare(getel(alist,getel(ilist,left)),pivot),0)) { incr(left) } while(ge(compare(getel(alist,getel(ilist,right)),pivot),0)) { decr(right) } if(gt(left,right)) { break() } } return(left) } /* choose pivot */ func getpivot(alist,ilist,left,right) { set(pivot,getel(alist,getel(ilist,left))) set(left0,left) incr(left) while(le(left,right)) { set(rel,compare(getel(alist,getel(ilist,left)),pivot)) if (gt(rel,0)) { return(left) } if (lt(rel,0)) { return(left0) } incr(left) } return(0) } proc main() { list(patterns) list(initial_counts) list(complete_counts) table(indices) list(sorted_indices) list(complete_examples) /* accumulate gender order statistics, even unknown genders */ enqueue(initial_counts,0) enqueue(complete_counts,0) enqueue(patterns,".") enqueue(complete_examples,"no example") insert(indices,".",1) set(max_index,1) set(max_nc,0) set(nextfam,0) print("Processing families ") forfam(family,fnum) { setel(initial_counts,1,fnum) set(pattern,"") set(index,1) if (nc,nchildren(family)) { if (gt(nc,max_nc)) { set(max_nc,nc) } children(family,child,cnum) { if (not(strcmp(sex(child),"F"))) { set(pattern,save(concat(pattern,"F"))) } elsif (not(strcmp(sex(child),"M"))) { set(pattern,save(concat(pattern,"M"))) } else { set(pattern,save(concat(pattern,"U"))) } set(index,lookup(indices,pattern)) if (index) { setel(initial_counts,index, add(getel(initial_counts,index),1)) } else { set(max_index,add(max_index,1)) set(index,max_index) insert(indices,pattern,index) enqueue(patterns,save(pattern)) enqueue(initial_counts,1) enqueue(complete_counts,0) } } } else { if(not(strcmp(getel(complete_examples,1),"no example"))) { setel(complete_examples,1,save(key(family))) } } if (not(getel(complete_examples,index))) { setel(complete_examples,index,save(key(family))) } setel(complete_counts,index,add(getel(complete_counts,index),1)) if (ge(fnum,nextfam)) { print(d(fnum)) print(" ") set(nextfam,add(nextfam,100)) } } print("\nSorting results...") call quicksort(patterns,sorted_indices) /* print out gender order statistics sorted alphabetically */ print("done\nPrinting results...") set(initialcol,add(max_nc,16)) set(completecol,add(initialcol,12)) set(examplecol,add(completecol,12)) "Gender pattern" col(sub(initialcol,7)) "initial" col(sub(completecol,8)) "complete" col(examplecol) "example\n" forlist(sorted_indices,index,inum) { getel(patterns,index) set(initial,getel(initial_counts,index)) col(sub(initialcol,strlen(d(initial)))) d(initial) set(complete,getel(complete_counts,index)) col(sub(completecol,strlen(d(complete)))) d(complete) col(examplecol) getel(complete_examples,index) "\n" } print("done") }