To treat Xx and xX as the same we might use a strsplit/sort approach, but on factor levels for sake of efficiency. This assumes diploid, biallelic loci.
canonicalize_genotype <- \(x, decreasing.=TRUE) {
base_levels <- \(A) {
strsplit(A, '') |>
lapply(sort.int, decreasing=decreasing.) |>
sapply(paste, collapse='')
}
x <- unlist(x)
len <- nchar(x)
if (var(len) != 0) {
stop('lengths ambiguous.')
} else {
len <- el(len)
}
a <- vapply(seq_len(len/2) - 1, \(i) {
substr(x, 1 + 2*i, 2 + 2*i)
}, FUN.VALUE=character(length(x))) |>
as.factor()
levels(a) <- base_levels(levels(a))
matrix(a, ncol=len/2) |>
as.data.frame() |>
Reduce(f=paste0)
}
Gives:
> canonicalize_genotype(offspring) |>
+ table()
aabb aaBb aaBB Aabb AaBb AaBB AAbb AABb AABB
1 2 1 2 4 2 1 2 1
Notice that the table displays Xx rather than xX, as the OP seems to prefer. To get xX, use canonicalize_genotype(., decreasing.=FALSE).
I wrote it so it generalizes to more than two loci [[aa, aA, Aa, AA], [bb, bB, Bb, BB], [cc, cC, Cc, CC], ...]:
> offspring3 |> canonicalize_genotype() |> table()
aabbcc aabbCc aabbCC aaBbcc aaBbCc aaBbCC aaBBcc aaBBCc aaBBCC Aabbcc AabbCc AabbCC AaBbcc AaBbCc AaBbCC
1 2 1 2 4 2 1 2 1 2 4 2 4 8 4
AaBBcc AaBBCc AaBBCC AAbbcc AAbbCc AAbbCC AABbcc AABbCc AABbCC AABBcc AABBCc AABBCC
2 4 2 1 2 1 2 4 2 1 2 1
Data:
mk_mat <- \(m, n) {
replicate(m*m, lapply(seq_len(n), \(i) {
sample(c(letters[i], LETTERS[i]), replace=TRUE) |>
paste(collapse='')
}), simplify=FALSE) |>
sapply(paste, collapse='') |>
matrix(m, m)
}
set.seed(42)
df <- mk_mat(4, 3) |>
as.data.frame()
mk_punnet <- \(nloci) {
m <- matrix(as.logical(intToBits(0:(2^nloci - 1))), ncol=32, byrow=TRUE)
loci <- t(replicate(2^nloci, LETTERS[seq_len(nloci)]))
loci[m[, nloci:1]] <- tolower(loci[m[, nloci:1]])
aloci <- asplit(loci, 1)
outer(aloci, aloci, Vectorize(\(a, b) paste0(a, b, collapse=''))) |>
`dimnames<-`(
replicate(2,
apply(loci, 1, paste, collapse=''),
simplify=FALSE)
) |>
as.data.frame()
}
offspring3 <- mk_punnet(nloci=3)
Giving:
> offspring3
ABC ABc AbC Abc aBC aBc abC abc
ABC AABBCC AABBCc AABbCC AABbCc AaBBCC AaBBCc AaBbCC AaBbCc
ABc AABBcC AABBcc AABbcC AABbcc AaBBcC AaBBcc AaBbcC AaBbcc
AbC AAbBCC AAbBCc AAbbCC AAbbCc AabBCC AabBCc AabbCC AabbCc
Abc AAbBcC AAbBcc AAbbcC AAbbcc AabBcC AabBcc AabbcC Aabbcc
aBC aABBCC aABBCc aABbCC aABbCc aaBBCC aaBBCc aaBbCC aaBbCc
aBc aABBcC aABBcc aABbcC aABbcc aaBBcC aaBBcc aaBbcC aaBbcc
abC aAbBCC aAbBCc aAbbCC aAbbCc aabBCC aabBCc aabbCC aabbCc
abc aAbBcC aAbBcc aAbbcC aAbbcc aabBcC aabBcc aabbcC aabbcc