BitPAl
bit-parallel alignment
HOME
Main Page
About
BITPAL
BitPAl Single Word
BitPAl Multi Word
HELP
Manual
FAQ
Edit Distance
Download Code
Test data
/** * Copyright (c) 2013, Laboratory for Biocomputing and Informatics, Boston University * All rights reserved. * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * + Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * + Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * + This source code may not be used in any program that is sold, any * derivative work must allow free distribution and modification. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE LABORATORY FOR BIOCOMPUTING AND INFORMATICS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * bitwise edit distance alignment multiple word.c * bitwise edit distance alignment * * Created by Gary Benson on 7/15/12. * Copyright 2012 Boston University. All rights reserved. * */ #include
#include
#include
#include
#define wordSize 64 #define wordSizeMinusOne 63 #define freeMultipleWords free(matchA);free(matchC);free(matchG);free(matchT);free(matchN);free(P_D);free(P_S_or_P_D); int EditDistanceMultipleWord(int match, int mismatch, int indel, char *stringN, char *stringM){ //the procedure computes fast bitwise edit distance //n is the length of string1 //m is the length of string2 //n and m are *not* determined by strlen because they //may be part of longer strings //this procedure does *not* switch strings to make the longer string as string1 //best if longer string is string1 //returns -1 if error int i,j, N, M; unsigned long long int *matchA; unsigned long long int *matchC; unsigned long long int *matchG; unsigned long long int *matchT; unsigned long long int *matchN; unsigned long long int *matchVector[256] = { [0 ... 255] = NULL }; //to hold one of matchA, matchC, etc. unsigned long long int bitmask; unsigned long long int matchString; unsigned long long int *P_D; unsigned long long int *P_S_or_P_D; unsigned long long int M_m; unsigned long long int R_IS; unsigned long long int not_M_I; unsigned long long int not_M_I_xor_P_D; unsigned long long int VC_0; unsigned long long int VC_plus_1; unsigned long long int VC_0_shift; unsigned long long int VC_plus_1_shift; unsigned long long int sum; unsigned long long int sum_and_R_IS; unsigned long long int highBitMask64 = 0x8000000000000000; unsigned long long int carryBitSum; unsigned long long int carryBitVC_plus_1_shift; unsigned long long int carryBitVC_0_shift; unsigned long long int oldCarryBitVC_plus_1_shift; unsigned long long int oldCarryBitVC_0_shift; unsigned long long int mask; unsigned long long int P_DErase; unsigned long long int P_IErase; unsigned long long int P_S_or_P_DErase; int PDScore; int PSorPDScore; int editDistanceScore; int countOneBits; int Score; int bestFinalRowScore; int remainingBits; unsigned long long int remainMask = 0xFFFFFFFFFFFFFFFF; int nWords; int integerPart; int NWords; char *iterate; //compute number of wordSize-bit words required. //All computation is done in wordSize-bit words. //First bit word can only hold wordSize-1 string positions //so there is space for the zero column N = strlen(stringN); #ifdef DEBUG printf("length of string N: %d\n", N); #endif if(N>wordSize-1){ integerPart = (N-wordSize+1)/wordSize; nWords = (N-wordSize+1-wordSize*integerPart)>0?integerPart+2:integerPart+1; } else return(BitwiseEdit(match, mismatch, indel, stringN, stringM)); //length of strings for edit distance is n and m //number of wordSize-bit words is nWords NWords = nWords; /* New Addition */ //Create mask for remaining bits of sequence in last word remainingBits = N - (NWords-1)*63; #ifdef DEBUG fprintf(stderr, "Remaining bits = %d\n", remainingBits); #endif remainMask >>= 64-remainingBits; #ifdef DEBUG fprintf(stderr, "Remaining bit mask = %s\n", convertToBitString64(remainMask)); #endif //storage allocation matchA = (unsigned long long int *)calloc(NWords,sizeof(unsigned long long int)); matchC = (unsigned long long int *)calloc(NWords,sizeof(unsigned long long int)); matchG = (unsigned long long int *)calloc(NWords,sizeof(unsigned long long int)); matchT = (unsigned long long int *)calloc(NWords,sizeof(unsigned long long int)); matchN = (unsigned long long int *)calloc(NWords,sizeof(unsigned long long int)); P_D = (unsigned long long int *)calloc(NWords,sizeof(unsigned long long int)); P_S_or_P_D = (unsigned long long int *)calloc(NWords,sizeof(unsigned long long int)); matchVector['A'] = matchA; matchVector['C'] = matchC; matchVector['G'] = matchG; matchVector['T'] = matchT; matchVector['N'] = matchN; //*************************encode match strings A C G T N for string1 //loop through stringN and store bits in matchA, matchC, etc. //column zero in the score matrix is ignored //so we start with i = 0 and bitmask = 1 in the first nWord only //**********************load complement of row zero in the score matrix which is all 1s //!replace with fast matchstring assignment for(j=0;j
>wordSizeMinusOne; carryBitVC_plus_1_shift = (VC_plus_1&highBitMask64)>>wordSizeMinusOne; //the vertical class shifts (with carry) VC_0_shift=(VC_0<<1)+oldCarryBitVC_0_shift; VC_plus_1_shift=(VC_plus_1<<1)+oldCarryBitVC_plus_1_shift; //reset first position in VC_plus_1_shift for boundary condition //this is done with oldCarryBitVC__plus_1_shift above //VC_plus_1_shift+=1;????? //new pair classes P_D[j] = M_m&VC_plus_1_shift; P_S_or_P_D[j] = VC_plus_1_shift|(M_m&VC_0_shift); } } //find edit distance score //time proportional to number of ones in bit strings PDScore=0; PSorPDScore=0; for(j=0;j
>=1; P_IErase>>=1; //debug /* * printf("\n\nP_DErase: %s",convertToBitString64(P_DErase)); * printf("\nP_IErase: %s",convertToBitString64(P_IErase)); */ } } freeMultipleWords; #ifdef DEBUG fprintf(stderr, "Inside func: score = %d, M = %d, N = %d, PDScore = %d, PSorPDScore = %d\n", editDistanceScore, M, N, PDScore, PSorPDScore ); #endif return(editDistanceScore); }