package org.seqcode.tools.sequence;
import java.util.Date;
import java.util.Random;
/**
* uShuffle: a useful tool for shuffling biological sequences while preserving the k-let counts
*
*/
public class UShuffle {
public static String title = "uShuffle: a useful tool for shuffling biological sequences while preserving the k-let counts";
public static void main(String[] argv) {
String string = null;
int n = 1, k = 2;
long seed = new Date().getTime();
try {
for (int i = 0; i < argv.length; i++) {
if (argv[i].compareTo("-s") == 0) {
if (i + 1 < argv.length && argv[i + 1].charAt(0) != '-')
string = argv[++i];
else
print_help_and_exit();
} else if (argv[i].compareTo("-n") == 0) {
if (i + 1 < argv.length && argv[i + 1].charAt(0) != '-')
n = Integer.parseInt(argv[++i]);
else
print_help_and_exit();
} else if (argv[i].compareTo("-k") == 0) {
if (i + 1 < argv.length && argv[i + 1].charAt(0) != '-')
k = Integer.parseInt(argv[++i]);
else
print_help_and_exit();
} else if (argv[i].compareTo("-seed") == 0) {
if (i + 1 < argv.length && argv[i + 1].charAt(0) != '-')
seed = Integer.parseInt(argv[++i]);
else
print_help_and_exit();
}
}
} catch (NumberFormatException nfe) {
print_help_and_exit();
}
if (n <= 0 || string == null)
print_help_and_exit();
UShuffle us = new UShuffle();
us.set_randfunc(new Random(seed));
char[] s = string.toCharArray();
char[] t = new char[s.length];
us.shuffle1(s, s.length, k);
for (int i = 0; i < n; i++) {
us.shuffle2(t);
System.out.println(new String(t));
}
}
private static void print_help_and_exit() {
System.out.println(title + "\nOptions:\n" +
" -s <string> specifies the sequence\n" +
" -n <number> specifies the number of random sequences to generate\n" +
" -k <number> specifies the let size\n" +
" -seed <number> specifies the seed for random number generator\n");
System.exit(0);
}
/* set random function */
private Random rand = new Random();
public void set_randfunc(Random rand) {
this.rand = rand;
}
/* global variables for the Euler algorithm */
private char[] s_;
private int l_;
private int k_;
static class vertex {
int[] indices;
int n_indices;
int i_indices;
boolean intree;
int next;
int i_sequence;
};
private vertex[] vertices;
private int n_vertices;
private int root;
/* hashtable utility */
static class hentry {
hentry next;
int i_sequence;
int i_vertices;
};
private hentry[] entries;
private hentry[] htable;
private int htablesize;
private double hmagic;
private int hcode(int i_sequence) {
double f = 0.0;
for (int i = 0; i < k_ - 1; i++) {
f += s_[i_sequence + i];
f *= hmagic;
}
if (f < 0.0)
f = -f;
return (int) (htablesize * f) % htablesize;
}
private void hinit(int size) {
entries = new hentry[size];
for (int i = 0; i < size; i++)
entries[i] = new hentry();
htable = new hentry[size];
htablesize = size;
hmagic = (Math.sqrt(5.0) - 1.0) / 2.0;
}
private void hcleanup() {
entries = null;
htable = null;
htablesize = 0;
}
private int strncmp(int i, int j, int n) {
for (int k = 0; k < n; k++) {
char ci = s_[i + k];
char cj = s_[j + k];
if (ci != cj)
return ci - cj;
}
return 0;
}
private void hinsert(int i_sequence) {
int code = hcode(i_sequence);
hentry e, e2 = entries[i_sequence];
for (e = htable[code]; e != null; e = e.next)
if (strncmp(e.i_sequence, i_sequence, k_ - 1) == 0) {
e2.i_sequence = e.i_sequence;
e2.i_vertices = e.i_vertices;
return;
}
e2.i_sequence = i_sequence;
e2.i_vertices = n_vertices++;
e2.next = htable[code];
htable[code] = e2;
}
/* the Euler algorithm */
public void shuffle1(char[] s, int l, int k) {
int i, j, n_lets;
s_ = s;
l_ = l;
k_ = k;
if (k_ >= l_ || k_ <= 1) /* two special cases */
return;
/* use hashtable to find distinct vertices */
n_lets = l_ - k_ + 2; /* number of (k-1)-lets */
n_vertices = 0;
hinit(n_lets);
for (i = 0; i < n_lets; i++)
hinsert(i);
root = entries[n_lets - 1].i_vertices; /* the last let */
vertices = new vertex[n_vertices];
for (i = 0; i < n_vertices; i++)
vertices[i] = new vertex();
/* set i_sequence and n_indices for each vertex */
for (i = 0; i < n_lets; i++) { /* for each let */
hentry ev = entries[i];
vertex v = vertices[ev.i_vertices];
v.i_sequence = ev.i_sequence;
if (i < n_lets - 1) /* not the last let */
v.n_indices++;
}
/* allocate indices for each vertex */
for (i = 0; i < n_vertices; i++) { /* for each vertex */
vertex v = vertices[i];
v.indices = new int[v.n_indices];
}
/* populate indices for each vertex */
for (i = 0; i < n_lets - 1; i++) { /* for each edge */
hentry eu = entries[i];
hentry ev = entries[i + 1];
vertex u = vertices[eu.i_vertices];
u.indices[u.i_indices++] = ev.i_vertices;
}
hcleanup();
}
private void permute(char[] t, int n) {
for (int i = n - 1; i > 0; i--) {
int j = rand.nextInt(i + 1);
char tmp = t[i]; t[i] = t[j]; t[j] = tmp; /* swap */
}
}
private void permute(int[] t, int n) {
for (int i = n - 1; i > 0; i--) {
int j = rand.nextInt(i + 1);
int tmp = t[i]; t[i] = t[j]; t[j] = tmp; /* swap */
}
}
private void strncpy(char[] t, char[] s, int n) {
for (int i = 0; i < n; i++)
t[i] = s[i];
}
public void shuffle2(char[] t) {
vertex u, v;
int i, j;
/* exact copy case */
if (k_ >= l_) {
strncpy(t, s_, l_);
return;
}
/* simple permutation case */
if (k_ <= 1) {
strncpy(t, s_, l_);
permute(t, l_);
return;
}
/* the Wilson algorithm for random arborescence */
for (i = 0; i < n_vertices; i++)
vertices[i].intree = false;
vertices[root].intree = true;
for (i = 0; i < n_vertices; i++) {
u = vertices[i];
while (!u.intree) {
u.next = rand.nextInt(u.n_indices);
u = vertices[u.indices[u.next]];
}
u = vertices[i];
while (!u.intree) {
u.intree = true;
u = vertices[u.indices[u.next]];
}
}
/* shuffle indices to prepare for walk */
for (i = 0; i < n_vertices; i++) {
u = vertices[i];
if (i != root) {
j = u.indices[u.n_indices - 1]; /* swap the last one */
u.indices[u.n_indices - 1] = u.indices[u.next];
u.indices[u.next] = j;
permute(u.indices, u.n_indices - 1); /* permute the rest */
} else
permute(u.indices, u.n_indices);
u.i_indices = 0; /* reset to zero before walk */
}
/* walk the graph */
strncpy(t, s_, k_ - 1); /* the first let remains the same */
u = vertices[0];
i = k_ - 1;
while (u.i_indices < u.n_indices) {
v = vertices[u.indices[u.i_indices]];
j = v.i_sequence + k_ - 2;
t[i++] = s_[j];
u.i_indices++;
u = v;
}
}
public void shuffle(char[] s, char[] t, int l, int k) {
shuffle1(s, l, k);
shuffle2(t);
}
}