001package com.hfg.bio.seq.format.feature.genbank; 002 003import java.util.ArrayList; 004import java.util.List; 005import java.util.Stack; 006 007import com.hfg.bio.seq.NucleicAcid; 008import com.hfg.bio.seq.SeqLocation; 009import com.hfg.bio.seq.format.SeqIOException; 010import com.hfg.bio.seq.format.feature.SeqFeatureLocation; 011import com.hfg.math.Range; 012import com.hfg.util.CompareUtil; 013import com.hfg.util.StringBuilderPlus; 014import com.hfg.util.StringUtil; 015import com.hfg.util.collection.CollectionUtil; 016 017//------------------------------------------------------------------------------ 018/** 019 GenBank feature location. 020 Not all location expression types are currently supported for methods like eval(). 021 <div> 022 See <a href='http://www.insdc.org/documents/feature-table#3.4.3'>http://www.insdc.org/documents/feature-table#3.4.3</a> 023 </div> 024 @author J. Alex Taylor, hairyfatguy.com 025 */ 026//------------------------------------------------------------------------------ 027// com.hfg XML/HTML Coding Library 028// 029// This library is free software; you can redistribute it and/or 030// modify it under the terms of the GNU Lesser General Public 031// License as published by the Free Software Foundation; either 032// version 2.1 of the License, or (at your option) any later version. 033// 034// This library is distributed in the hope that it will be useful, 035// but WITHOUT ANY WARRANTY; without even the implied warranty of 036// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 037// Lesser General Public License for more details. 038// 039// You should have received a copy of the GNU Lesser General Public 040// License along with this library; if not, write to the Free Software 041// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 042// 043// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com 044// jataylor@hairyfatguy.com 045//------------------------------------------------------------------------------ 046 047/* 048 Examples (Not all of these forms are currently supported by this class: 049 050 Location Description 051 052 467 Points to a single base in the presented sequence 053 054 340..565 Points to a continuous range of bases bounded by and 055 including the starting and ending bases 056 057 <345..500 Indicates that the exact lower boundary point of a feature 058 is unknown. The location begins at some base previous to 059 the first base specified (which need not be contained in 060 the presented sequence) and continues to and includes the 061 ending base 062 063 <1..888 The feature starts before the first sequenced base and 064 continues to and includes base 888 065 066 1..>888 The feature starts at the first sequenced base and 067 continues beyond base 888 068 069 102.110 Indicates that the exact location is unknown but that it is 070 one of the bases between bases 102 and 110, inclusive 071 072 123^124 Points to a site between bases 123 and 124 073 074 join(12..78,134..202) Regions 12 to 78 and 134 to 202 should be joined to form 075 one contiguous sequence 076 077 078 complement(34..126) Start at the base complementary to 126 and finish at the 079 base complementary to base 34 (the feature is on the strand 080 complementary to the presented strand) 081 082 083 complement(join(2691..4571,4918..5163)) 084 Joins regions 2691 to 4571 and 4918 to 5163, then 085 complements the joined segments (the feature is on the 086 strand complementary to the presented strand) 087 088 join(complement(4918..5163),complement(2691..4571)) 089 Complements regions 4918 to 5163 and 2691 to 4571, then 090 joins the complemented segments (the feature is on the 091 strand complementary to the presented strand) 092 093 J00194.1:100..202 Points to bases 100 to 202, inclusive, in the entry (in 094 this database) with primary accession number 'J00194' 095 096 join(1..100,J00194.1:100..202) 097 Joins region 1..100 of the existing entry with the region 098 100..202 of remote entry J00194 099 100 */ 101 102public class GenBankFeatureLocation implements SeqFeatureLocation 103{ 104 private String mStringValue; 105 private SeqOperation mSeqOperation; 106 private boolean mParsed; 107 private boolean mEvaluatable; 108 private Integer mSeqLength; 109 110 private enum SeqOperationType 111 { 112 complement, 113 join, 114 order 115 } 116 117 //########################################################################### 118 // CONSTRUCTORS 119 //########################################################################### 120 121 //--------------------------------------------------------------------------- 122 public GenBankFeatureLocation(String inLocationString) 123 { 124 mStringValue = inLocationString; 125 } 126 127 //########################################################################### 128 // PUBLIC METHODS 129 //########################################################################### 130 131 //-------------------------------------------------------------------------- 132 @Override 133 public GenBankFeatureLocation clone() 134 { 135 GenBankFeatureLocation copy; 136 try 137 { 138 copy = (GenBankFeatureLocation) super.clone(); 139 } 140 catch (CloneNotSupportedException e) 141 { 142 throw new RuntimeException("Coding problem! CloneNotSupportedException should not be possible when cloning a " 143 + this.getClass().getSimpleName() + " object!", e); 144 } 145 146 if (mSeqOperation != null) 147 { 148 copy.mSeqOperation = mSeqOperation.clone(); 149 } 150 151 return copy; 152 } 153 154 //--------------------------------------------------------------------------- 155 public Long length() 156 { 157 if (! mParsed) 158 { 159 parse(); 160 } 161 162 return (mEvaluatable && mSeqOperation != null ? mSeqOperation.length() : null); 163 } 164 165 166 //-------------------------------------------------------------------------- 167 public GenBankFeatureLocation setSeqLength(Integer inValue) 168 { 169 mSeqLength = inValue; 170 return this; 171 } 172 173 //-------------------------------------------------------------------------- 174 public Integer getSeqLength() 175 { 176 return mSeqLength; 177 } 178 179 180 //--------------------------------------------------------------------------- 181 public void append(String inAdditionalLocationData) 182 { 183 mStringValue = (StringUtil.isSet(mStringValue) ? mStringValue : "") + inAdditionalLocationData; 184 mParsed = false; 185 } 186 187 //--------------------------------------------------------------------------- 188 /** 189 Extends the last sequence range by the specified number of base pairs. 190 Supports addition and subtraction for both forward and reverse (ie. complement) location types. 191 * @param inAdditionalLength length to extend the last sequence range by 192 */ 193 public void extend3Prime(int inAdditionalLength) 194 { 195 if (! mParsed) 196 { 197 parse(); 198 } 199 200 int lengthToAdd = inAdditionalLength; 201 if (mSeqOperation != null) 202 { 203 if (mSeqOperation.mType != null) 204 { 205 if (mSeqOperation.mType.equals(SeqOperationType.complement)) 206 { 207 // Reverse strand. Extend the first range 3'. 208 209 SeqLocation revSeqLocationToExtend = null; 210 if (CollectionUtil.hasValues(mSeqOperation.mSubOperations)) 211 { 212 SeqOperation subOperation = mSeqOperation.mSubOperations.get(0); 213 if (subOperation.mType.equals(SeqOperationType.join) 214 && CollectionUtil.hasValues(subOperation.mLocations)) 215 { 216 if (lengthToAdd < 0) 217 { 218 // Asked to subtract instead of extend 219 for (int i = 0; i < subOperation.mLocations.size(); i++) 220 { 221 revSeqLocationToExtend = subOperation.mLocations.get(i); 222 if (revSeqLocationToExtend.length() > -lengthToAdd) 223 { 224 break; 225 } 226 else 227 { 228 // The size to remove is longer than the current location. Remove it completely 229 lengthToAdd += revSeqLocationToExtend.length(); 230 subOperation.mLocations.remove(i--); 231 } 232 } 233 } 234 else 235 { 236 revSeqLocationToExtend = subOperation.mLocations.get(0); 237 } 238 } 239 } 240 else if (CollectionUtil.hasValues(mSeqOperation.mLocations)) 241 { 242 if (lengthToAdd < 0) 243 { 244 // Asked to subtract instead of extend 245 for (int i = 0; i < mSeqOperation.mLocations.size(); i++) 246 { 247 revSeqLocationToExtend = mSeqOperation.mLocations.get(i); 248 if (revSeqLocationToExtend.length() > -lengthToAdd) 249 { 250 break; 251 } 252 else 253 { 254 // The size to remove is longer than the current location. Remove it completely 255 lengthToAdd += revSeqLocationToExtend.length(); 256 mSeqOperation.mLocations.remove(i--); 257 } 258 } 259 } 260 else 261 { 262 revSeqLocationToExtend = mSeqOperation.mLocations.get(0); 263 } 264 } 265 266 if (revSeqLocationToExtend != null) 267 { 268 int newStart = revSeqLocationToExtend.getStart() - lengthToAdd; 269 if (newStart < 1) 270 { 271 // Can't extend 5' of the first nucleotide 272 newStart = 1; 273 } 274 275 revSeqLocationToExtend.setStart(newStart); 276 } 277 } 278 else 279 { 280 // Forward stand. Extend the last range to the right. 281 SeqLocation fwdSeqLocationToExtend = null; 282 if (CollectionUtil.hasValues(mSeqOperation.mSubOperations)) 283 { 284 SeqOperation subOperation = mSeqOperation.mSubOperations.get(0); 285 if (subOperation.mType.equals(SeqOperationType.join) 286 && CollectionUtil.hasValues(subOperation.mLocations)) 287 { 288 if (lengthToAdd < 0) 289 { 290 // Asked to subtract instead of extend 291 for (int i = subOperation.mLocations.size() - 1; i >= 0; i--) 292 { 293 fwdSeqLocationToExtend = subOperation.mLocations.get(i); 294 if (fwdSeqLocationToExtend.length() > -lengthToAdd) 295 { 296 break; 297 } 298 else 299 { 300 // The size to remove is longer than the current location. Remove it completely 301 lengthToAdd += fwdSeqLocationToExtend.length(); 302 subOperation.mLocations.remove(i++); 303 } 304 } 305 } 306 else 307 { 308 fwdSeqLocationToExtend = subOperation.mLocations.get(subOperation.mLocations.size() - 1); 309 } 310 } 311 } 312 else if (CollectionUtil.hasValues(mSeqOperation.mLocations)) 313 { 314 if (lengthToAdd < 0) 315 { 316 // Asked to subtract instead of extend 317 for (int i = mSeqOperation.mLocations.size() - 1; i >= 0; i--) 318 { 319 fwdSeqLocationToExtend = mSeqOperation.mLocations.get(i); 320 if (fwdSeqLocationToExtend.length() > -lengthToAdd) 321 { 322 break; 323 } 324 else 325 { 326 // The size to remove is longer than the current location. Remove it completely 327 lengthToAdd += fwdSeqLocationToExtend.length(); 328 mSeqOperation.mLocations.remove(i); 329 } 330 } 331 } 332 else 333 { 334 fwdSeqLocationToExtend = mSeqOperation.mLocations.get(mSeqOperation.mLocations.size() - 1); 335 } 336 } 337 338 if (fwdSeqLocationToExtend != null) 339 { 340 int newEnd = fwdSeqLocationToExtend.getEnd() + lengthToAdd; 341 if (mSeqLength != null 342 && newEnd > mSeqLength) 343 { 344 // Can't extend 3' of the last nucleotide 345 newEnd = mSeqLength; 346 } 347 348 fwdSeqLocationToExtend.setEnd(newEnd); 349 } 350 } 351 } 352 else if (CollectionUtil.hasValues(mSeqOperation.mLocations)) 353 { 354 if (lengthToAdd < 0) 355 { 356 // Asked to subtract instead of extend 357 SeqLocation fwdSeqLocationToExtend = null; 358 359 for (int i = mSeqOperation.mLocations.size() - 1; i >= 0; i--) 360 { 361 fwdSeqLocationToExtend = mSeqOperation.mLocations.get(i); 362 if (fwdSeqLocationToExtend.length() > -lengthToAdd) 363 { 364 break; 365 } 366 else 367 { 368 // The size to remove is longer than the current location. Remove it completely 369 lengthToAdd += fwdSeqLocationToExtend.length(); 370 mSeqOperation.mLocations.remove(i); 371 } 372 } 373 374 int newEnd = fwdSeqLocationToExtend.getEnd() + lengthToAdd; 375 if (mSeqLength != null 376 && newEnd > mSeqLength) 377 { 378 // Can't extend 3' of the last nucleotide 379 newEnd = mSeqLength; 380 } 381 382 fwdSeqLocationToExtend.setEnd(newEnd); 383 } 384 else 385 { 386 SeqLocation fwdSeqLocationToExtend = mSeqOperation.mLocations.get(mSeqOperation.mLocations.size() - 1); 387 388 int newEnd = fwdSeqLocationToExtend.getEnd() + lengthToAdd; 389 if (mSeqLength != null 390 && newEnd > mSeqLength) 391 { 392 // Can't extend 3' of the last nucleotide 393 newEnd = mSeqLength; 394 } 395 396 fwdSeqLocationToExtend.setEnd(newEnd); 397 } 398 } 399 } 400 401 mStringValue = null; 402 } 403 404 //--------------------------------------------------------------------------- 405 /** 406 Extends the start of the first sequence range by the specified number of base pairs. 407 Supports addition and subtraction for both forward and reverse (ie. complement) location types. 408 * @param inAdditionalLength length to extend the first sequence range by 409 */ 410 public void extend5Prime(int inAdditionalLength) 411 { 412 if (! mParsed) 413 { 414 parse(); 415 } 416 417 int lengthToAdd = inAdditionalLength; 418 if (mSeqOperation != null) 419 { 420 if (mSeqOperation.mType != null) 421 { 422 if (mSeqOperation.mType.equals(SeqOperationType.complement)) 423 { 424 // Reverse strand. Extend the last range 3'. 425 426 SeqLocation revSeqLocationToExtend = null; 427 if (CollectionUtil.hasValues(mSeqOperation.mSubOperations)) 428 { 429 SeqOperation subOperation = mSeqOperation.mSubOperations.get(0); 430 if (subOperation.mType.equals(SeqOperationType.join) 431 && CollectionUtil.hasValues(subOperation.mLocations)) 432 { 433 if (lengthToAdd < 0) 434 { 435 // Asked to subtract instead of extend 436 for (int i = subOperation.mLocations.size() - 1; i >= 0; i--) 437 { 438 revSeqLocationToExtend = subOperation.mLocations.get(i); 439 if (revSeqLocationToExtend.length() > -lengthToAdd) 440 { 441 break; 442 } 443 else 444 { 445 // The size to remove is longer than the current location. Remove it completely 446 lengthToAdd += revSeqLocationToExtend.length(); 447 subOperation.mLocations.remove(i); 448 } 449 } 450 } 451 else 452 { 453 revSeqLocationToExtend = subOperation.mLocations.get(subOperation.mLocations.size() - 1); 454 } 455 } 456 } 457 else if (CollectionUtil.hasValues(mSeqOperation.mLocations)) 458 { 459 if (lengthToAdd < 0) 460 { 461 // Asked to subtract instead of extend 462 for (int i = mSeqOperation.mLocations.size() - 1; i >= 0; i--) 463 { 464 revSeqLocationToExtend = mSeqOperation.mLocations.get(i); 465 if (revSeqLocationToExtend.length() > -lengthToAdd) 466 { 467 break; 468 } 469 else 470 { 471 // The size to remove is longer than the current location. Remove it completely 472 lengthToAdd += revSeqLocationToExtend.length(); 473 mSeqOperation.mLocations.remove(i); 474 } 475 } 476 } 477 else 478 { 479 revSeqLocationToExtend = mSeqOperation.mLocations.get(mSeqOperation.mLocations.size() - 1); 480 } 481 } 482 483 if (revSeqLocationToExtend != null) 484 { 485 int newEnd = revSeqLocationToExtend.getEnd() + lengthToAdd; 486 if (mSeqLength != null 487 && newEnd > mSeqLength) 488 { 489 // Can't extend 3' of the last nucleotide 490 newEnd = mSeqLength; 491 } 492 493 revSeqLocationToExtend.setEnd(newEnd); 494 } 495 } 496 else 497 { 498 // Forward stand. Extend the first range 5'. 499 SeqLocation fwdSeqLocationToExtend = null; 500 if (CollectionUtil.hasValues(mSeqOperation.mSubOperations)) 501 { 502 SeqOperation subOperation = mSeqOperation.mSubOperations.get(0); 503 if (subOperation.mType.equals(SeqOperationType.join) 504 && CollectionUtil.hasValues(subOperation.mLocations)) 505 { 506 if (lengthToAdd < 0) 507 { 508 // Asked to subtract instead of extend 509 for (int i = 0; i < subOperation.mLocations.size(); i++) 510 { 511 fwdSeqLocationToExtend = subOperation.mLocations.get(i); 512 if (fwdSeqLocationToExtend.length() > -lengthToAdd) 513 { 514 break; 515 } 516 else 517 { 518 // The size to remove is longer than the current location. Remove it completely 519 lengthToAdd += fwdSeqLocationToExtend.length(); 520 subOperation.mLocations.remove(i--); 521 } 522 } 523 } 524 else 525 { 526 fwdSeqLocationToExtend = subOperation.mLocations.get(0); 527 } 528 } 529 } 530 else if (CollectionUtil.hasValues(mSeqOperation.mLocations)) 531 { 532 if (lengthToAdd < 0) 533 { 534 // Asked to subtract instead of extend 535 for (int i = 0; i < mSeqOperation.mLocations.size(); i++) 536 { 537 fwdSeqLocationToExtend = mSeqOperation.mLocations.get(i); 538 if (fwdSeqLocationToExtend.length() > -lengthToAdd) 539 { 540 break; 541 } 542 else 543 { 544 // The size to remove is longer than the current location. Remove it completely 545 lengthToAdd += fwdSeqLocationToExtend.length(); 546 mSeqOperation.mLocations.remove(i--); 547 } 548 } 549 } 550 else 551 { 552 fwdSeqLocationToExtend = mSeqOperation.mLocations.get(0); 553 } 554 } 555 556 if (fwdSeqLocationToExtend != null) 557 { 558 int newStart = fwdSeqLocationToExtend.getStart() - lengthToAdd; 559 if (newStart < 1) 560 { 561 // Can't extend 5' of the first nucleotide 562 newStart = 1; 563 } 564 565 fwdSeqLocationToExtend.setStart(newStart); 566 } 567 } 568 } 569 else if (CollectionUtil.hasValues(mSeqOperation.mLocations)) 570 { 571 if (lengthToAdd < 0) 572 { 573 // Asked to subtract instead of extend 574 SeqLocation fwdSeqLocationToExtend = null; 575 576 for (int i = 0; i < mSeqOperation.mLocations.size(); i++) 577 { 578 fwdSeqLocationToExtend = mSeqOperation.mLocations.get(i); 579 if (fwdSeqLocationToExtend.length() > -lengthToAdd) 580 { 581 break; 582 } 583 else 584 { 585 // The size to remove is longer than the current location. Remove it completely 586 lengthToAdd += fwdSeqLocationToExtend.length(); 587 mSeqOperation.mLocations.remove(i--); 588 } 589 } 590 591 int newStart = fwdSeqLocationToExtend.getStart() - lengthToAdd; 592 if (newStart < 1) 593 { 594 // Can't extend 5' of the first nucleotide 595 newStart = 1; 596 } 597 598 fwdSeqLocationToExtend.setStart(newStart); 599 } 600 else 601 { 602 SeqLocation fwdSeqLocationToExtend = mSeqOperation.mLocations.get(0); 603 604 int newStart = fwdSeqLocationToExtend.getStart() - lengthToAdd; 605 if (newStart < 1) 606 { 607 // Can't extend 5' of the first nucleotide 608 newStart = 1; 609 } 610 611 fwdSeqLocationToExtend.setStart(newStart); 612 } 613 } 614 } 615 616 mStringValue = null; 617 } 618 619 //--------------------------------------------------------------------------- 620 /** 621 Sets the 5' position of the location. 622 Supports both forward and reverse (ie. complement) location types. 623 @param inValue new 5' position 624 */ 625 public void set5Prime(int inValue) 626 { 627 if (! mParsed) 628 { 629 parse(); 630 } 631 632 if (inValue <= 0) 633 { 634 throw new RuntimeException("Values less than or equal to 0 are not valid!"); 635 } 636 637 if (mSeqOperation != null) 638 { 639 if (mSeqOperation.mType != null) 640 { 641 if (mSeqOperation.mType.equals(SeqOperationType.complement)) 642 { 643 // Reverse strand. 644 645 if (CollectionUtil.hasValues(mSeqOperation.mSubOperations)) 646 { 647 for (SeqOperation subOperation : mSeqOperation.mSubOperations) 648 { 649 if (subOperation.mType.equals(SeqOperationType.join) 650 && CollectionUtil.hasValues(subOperation.mLocations)) 651 { 652 for (int i = subOperation.mLocations.size() - 1; i >= 0; i--) 653 { 654 SeqLocation seqLocation = subOperation.mLocations.get(i); 655 if (seqLocation.getStart() > inValue) 656 { 657 subOperation.mLocations.remove(i); 658 } 659 else 660 { 661 seqLocation.setEnd(inValue); 662 break; 663 } 664 } 665 666 if (1 == subOperation.mLocations.size()) 667 { 668 // Only one segment left. join isn't necessary 669 subOperation.mType = null; 670 } 671 } 672 } 673 } 674 else if (CollectionUtil.hasValues(mSeqOperation.mLocations)) 675 { 676 for (int i = mSeqOperation.mLocations.size() - 1; i >= 0; i--) 677 { 678 SeqLocation seqLocation = mSeqOperation.mLocations.get(i); 679 if (seqLocation.getStart() > inValue) 680 { 681 mSeqOperation.mLocations.remove(i); 682 } 683 else 684 { 685 seqLocation.setEnd(inValue); 686 break; 687 } 688 } 689 690 if (mSeqOperation.mType != null 691 && mSeqOperation.mType.equals(SeqOperationType.join) 692 && 1 == mSeqOperation.mLocations.size()) 693 { 694 // Only one segment left. join isn't necessary 695 mSeqOperation.mType = null; 696 } 697 } 698 } 699 else 700 { 701 // Forward stand. Extend the first range 5'. 702 if (CollectionUtil.hasValues(mSeqOperation.mSubOperations)) 703 { 704 for (SeqOperation subOperation : mSeqOperation.mSubOperations) 705 { 706 if (subOperation.mType.equals(SeqOperationType.join) 707 && CollectionUtil.hasValues(subOperation.mLocations)) 708 { 709 for (int i = 0; i < subOperation.mLocations.size(); i++) 710 { 711 SeqLocation seqLocation = subOperation.mLocations.get(i); 712 if (seqLocation.getEnd() < inValue) 713 { 714 subOperation.mLocations.remove(i--); 715 } 716 else 717 { 718 seqLocation.setStart(inValue); 719 break; 720 } 721 } 722 723 if (1 == subOperation.mLocations.size()) 724 { 725 // Only one segment left. join isn't necessary 726 subOperation.mType = null; 727 } 728 } 729 } 730 } 731 else if (CollectionUtil.hasValues(mSeqOperation.mLocations)) 732 { 733 for (int i = 0; i < mSeqOperation.mLocations.size(); i++) 734 { 735 SeqLocation seqLocation = mSeqOperation.mLocations.get(i); 736 if (seqLocation.getEnd() < inValue) 737 { 738 mSeqOperation.mLocations.remove(i--); 739 } 740 else 741 { 742 seqLocation.setStart(inValue); 743 break; 744 } 745 } 746 747 // Did we remove all the locations? (Happens if the specified start is > the 3' of the last location) 748 if (0 == mSeqOperation.mLocations.size()) 749 { 750 mSeqOperation.mLocations.add(new SeqLocation(inValue, inValue)); 751 } 752 753 if (mSeqOperation.mType != null 754 && mSeqOperation.mType.equals(SeqOperationType.join) 755 && mSeqOperation.mLocations.size() < 2) 756 { 757 // Only one segment left. join isn't necessary 758 mSeqOperation.mType = null; 759 } 760 761 mStringValue = mSeqOperation.toString(); 762 } 763 } 764 } 765 else if (CollectionUtil.hasValues(mSeqOperation.mLocations)) 766 { 767 for (int i = 0; i < mSeqOperation.mLocations.size(); i++) 768 { 769 SeqLocation seqLocation = mSeqOperation.mLocations.get(i); 770 if (seqLocation.getEnd() < inValue) 771 { 772 mSeqOperation.mLocations.remove(i--); 773 } 774 else 775 { 776 seqLocation.setStart(inValue); 777 break; 778 } 779 } 780 781 if (mSeqOperation.mType != null 782 && mSeqOperation.mType.equals(SeqOperationType.join) 783 && 1 == mSeqOperation.mLocations.size()) 784 { 785 // Only one segment left. join isn't necessary 786 mSeqOperation.mType = null; 787 } 788 } 789 } 790 791 mStringValue = null; 792 } 793 794 //--------------------------------------------------------------------------- 795 /** 796 Sets the 3' position of the location. 797 Supports both forward and reverse (ie. complement) location types. 798 @param inValue new 3' position 799 */ 800 public void set3Prime(int inValue) 801 { 802 if (! mParsed) 803 { 804 parse(); 805 } 806 807 if (inValue <= 0) 808 { 809 throw new RuntimeException("Values less than or equal to 0 are not valid!"); 810 } 811 812 if (mSeqOperation != null) 813 { 814 if (mSeqOperation.mType != null) 815 { 816 if (mSeqOperation.mType.equals(SeqOperationType.complement)) 817 { 818 // Reverse strand. 819 820 if (CollectionUtil.hasValues(mSeqOperation.mSubOperations)) 821 { 822 for (SeqOperation subOperation : mSeqOperation.mSubOperations) 823 { 824 if (subOperation.mType.equals(SeqOperationType.join) 825 && CollectionUtil.hasValues(subOperation.mLocations)) 826 { 827 for (int i = 0; i < subOperation.mLocations.size(); i++) 828 { 829 SeqLocation seqLocation = subOperation.mLocations.get(i); 830 if (seqLocation.getEnd() < inValue) 831 { 832 subOperation.mLocations.remove(i--); 833 } 834 else 835 { 836 seqLocation.setStart(inValue); 837 break; 838 } 839 } 840 841 if (1 == subOperation.mLocations.size()) 842 { 843 // Only one segment left. join isn't necessary 844 subOperation.mType = null; 845 } 846 } 847 } 848 } 849 else if (CollectionUtil.hasValues(mSeqOperation.mLocations)) 850 { 851 for (int i = 0; i < mSeqOperation.mLocations.size(); i++) 852 { 853 SeqLocation seqLocation = mSeqOperation.mLocations.get(i); 854 if (seqLocation.getEnd() < inValue) 855 { 856 mSeqOperation.mLocations.remove(i--); 857 } 858 else 859 { 860 seqLocation.setStart(inValue); 861 break; 862 } 863 } 864 865 866 if (mSeqOperation.mType != null 867 && mSeqOperation.mType.equals(SeqOperationType.join) 868 && 1 == mSeqOperation.mLocations.size()) 869 { 870 // Only one segment left. join isn't necessary 871 mSeqOperation.mType = null; 872 } 873 } 874 } 875 else 876 { 877 // Forward stand. Extend the first range 5'. 878 if (CollectionUtil.hasValues(mSeqOperation.mSubOperations)) 879 { 880 for (SeqOperation subOperation : mSeqOperation.mSubOperations) 881 { 882 if (subOperation.mType.equals(SeqOperationType.join) 883 && CollectionUtil.hasValues(subOperation.mLocations)) 884 { 885 for (int i = mSeqOperation.mLocations.size() - 1; i >= 0; i--) 886 { 887 SeqLocation seqLocation = subOperation.mLocations.get(i); 888 if (seqLocation.getStart() > inValue) 889 { 890 subOperation.mLocations.remove(i); 891 } 892 else 893 { 894 seqLocation.setEnd(inValue); 895 break; 896 } 897 } 898 899 if (1 == subOperation.mLocations.size()) 900 { 901 // Only one segment left. join isn't necessary 902 subOperation.mType = null; 903 } 904 } 905 } 906 } 907 else if (CollectionUtil.hasValues(mSeqOperation.mLocations)) 908 { 909 for (int i = mSeqOperation.mLocations.size() - 1; i >= 0; i--) 910 { 911 SeqLocation seqLocation = mSeqOperation.mLocations.get(i); 912 if (seqLocation.getStart() > inValue) 913 { 914 mSeqOperation.mLocations.remove(i); 915 } 916 else 917 { 918 seqLocation.setEnd(inValue); 919 break; 920 } 921 } 922 923 if (mSeqOperation.mType != null 924 && mSeqOperation.mType.equals(SeqOperationType.join) 925 && 1 == mSeqOperation.mLocations.size()) 926 { 927 // Only one segment left. join isn't necessary 928 mSeqOperation.mType = null; 929 } 930 } 931 } 932 } 933 else if (CollectionUtil.hasValues(mSeqOperation.mLocations)) 934 { 935 for (int i = mSeqOperation.mLocations.size() - 1; i >= 0; i--) 936 { 937 SeqLocation seqLocation = mSeqOperation.mLocations.get(i); 938 if (seqLocation.getStart() > inValue) 939 { 940 mSeqOperation.mLocations.remove(i); 941 } 942 else 943 { 944 seqLocation.setEnd(inValue); 945 break; 946 } 947 } 948 949 if (mSeqOperation.mType != null 950 && mSeqOperation.mType.equals(SeqOperationType.join) 951 && 1 == mSeqOperation.mLocations.size()) 952 { 953 // Only one segment left. join isn't necessary 954 mSeqOperation.mType = null; 955 } 956 } 957 } 958 959 mStringValue = null; 960 } 961 962 //--------------------------------------------------------------------------- 963 public String eval(String inSequence) 964 { 965 if (! mParsed) 966 { 967 parse(); 968 } 969 970 return (mEvaluatable ? mSeqOperation.eval(inSequence) : null); 971 } 972 973 //--------------------------------------------------------------------------- 974 public String eval(NucleicAcid inNucleicAcid) 975 { 976 if (! mParsed) 977 { 978 parse(); 979 } 980 981 return (mEvaluatable ? mSeqOperation.eval(inNucleicAcid) : null); 982 } 983 984 //-------------------------------------------------------------------------- 985 public List<SeqLocation> getIntrons() 986 { 987 if (! mParsed) 988 { 989 parse(); 990 } 991 992 List<SeqLocation> introns = null; 993 994 if (mSeqOperation != null) 995 { 996 introns = mSeqOperation.getIntrons(); 997 } 998 999 return introns; 1000 } 1001 1002 //--------------------------------------------------------------------------- 1003 @Override 1004 public String toString() 1005 { 1006 if (null == mStringValue) 1007 { 1008 String stringValue = null; 1009 if (mSeqOperation != null) 1010 { 1011 stringValue = mSeqOperation.toString(); 1012 } 1013 1014 mStringValue = stringValue; 1015 } 1016 1017 return mStringValue; 1018 } 1019 1020 //--------------------------------------------------------------------------- 1021 @Override 1022 public boolean equals(Object inObj2) 1023 { 1024 return (inObj2 != null 1025 && inObj2 instanceof SeqFeatureLocation 1026 && 0 == compareTo(inObj2)); 1027 } 1028 1029 //--------------------------------------------------------------------------- 1030 @Override 1031 public int hashCode() 1032 { 1033 return mStringValue.hashCode(); 1034 } 1035 1036 //--------------------------------------------------------------------------- 1037 @Override 1038 public int compareTo(Object inObj2) 1039 { 1040 int result = 1; 1041 1042 if (inObj2 != null 1043 && inObj2 instanceof SeqFeatureLocation) 1044 { 1045 SeqFeatureLocation seqLoc2 = (SeqFeatureLocation) inObj2; 1046 1047 result = CompareUtil.compare(toIntRange(), seqLoc2.toIntRange()); 1048 if (0 == result) 1049 { 1050 result = CompareUtil.compare(toString(), inObj2.toString()); 1051 } 1052 } 1053 1054 return result; 1055 } 1056 1057 //--------------------------------------------------------------------------- 1058 public Range<Integer> toIntRange() 1059 { 1060 if (! mParsed) 1061 { 1062 parse(); 1063 } 1064 1065 return mEvaluatable ? mSeqOperation.toIntRange() : null; 1066 } 1067 1068 //--------------------------------------------------------------------------- 1069 public Integer get5Prime() 1070 { 1071 if (! mParsed) 1072 { 1073 parse(); 1074 } 1075 1076 return mEvaluatable ? (mSeqOperation.mType != null && mSeqOperation.mType.equals(SeqOperationType.complement) ? mSeqOperation.toIntRange().getEnd() : mSeqOperation.toIntRange().getStart()) : null; 1077 } 1078 1079 //--------------------------------------------------------------------------- 1080 public Integer get3Prime() 1081 { 1082 if (! mParsed) 1083 { 1084 parse(); 1085 } 1086 1087 return mEvaluatable ? (mSeqOperation.mType != null && mSeqOperation.mType.equals(SeqOperationType.complement) ? mSeqOperation.toIntRange().getStart() : mSeqOperation.toIntRange().getEnd()) : null; 1088 } 1089 1090 //########################################################################### 1091 // PRIVATE METHODS 1092 //########################################################################### 1093 1094 //--------------------------------------------------------------------------- 1095 private void parse() 1096 { 1097 try 1098 { 1099 innerParse(); 1100 } 1101 catch (Exception e) 1102 { 1103 throw new SeqIOException("Problem parsing GenBank feature location: " + StringUtil.singleQuote(mStringValue) + "!", e); 1104 } 1105 } 1106 1107 //--------------------------------------------------------------------------- 1108 private void innerParse() 1109 { 1110 mSeqOperation = null; 1111 mEvaluatable = false; 1112 1113 Stack<SeqOperation> operationStack = new Stack<>(); 1114 SeqOperation currentOperation = null; 1115 1116 if (! StringUtil.isSet(mStringValue) 1117 || mStringValue.contains("^") 1118 || mStringValue.contains(":")) 1119 { 1120 mEvaluatable = false; 1121 } 1122 else 1123 { 1124 String[] pieces = mStringValue.split("[\\(\\)]"); 1125 for (String piece : pieces) 1126 { 1127 if (StringUtil.isSet(piece)) 1128 { 1129 piece = piece.trim(); 1130 1131 if (piece.startsWith(",")) // SeqOperation is on the same level as the previous 1132 { 1133 piece = piece.substring(1).trim(); 1134 1135 operationStack.pop(); 1136 currentOperation = operationStack.peek(); 1137 } 1138 1139 if (piece.charAt(0) == '<' 1140 || piece.charAt(0) == '>') 1141 { 1142 piece = piece.substring(1); 1143 } 1144 1145 if (piece.charAt(0) == '-' 1146 || Character.isDigit(piece.charAt(0))) 1147 { 1148 String[] locationRanges = piece.split(","); 1149 for (String locationRange : locationRanges) 1150 { 1151 String[] locationPieces = locationRange.trim().split("\\.\\."); 1152 1153 for (String locationPiece : locationPieces) 1154 { 1155 if (locationPiece.contains(".")) 1156 { 1157 mEvaluatable = false; 1158 return; 1159 } 1160 } 1161 1162 SeqLocation location = new SeqLocation(); 1163 1164 String startString = locationPieces[0]; 1165 if (startString.charAt(0) == '<' 1166 || startString.charAt(0) == '>') 1167 { 1168 startString = startString.substring(1); 1169 } 1170 1171 location.setStart(Integer.parseInt(startString)); 1172 if (1 == locationPieces.length) 1173 { 1174 location.setEnd(Integer.parseInt(startString)); 1175 } 1176 else if (2 == locationPieces.length) 1177 { 1178 String endString = locationPieces[1]; 1179 if (endString.charAt(0) == '<' 1180 || endString.charAt(0) == '>') 1181 { 1182 endString = endString.substring(1); 1183 } 1184 1185 location.setEnd(Integer.parseInt(endString)); 1186 } 1187 else if (locationPieces.length > 2) 1188 { 1189 throw new SeqIOException("Trouble parsing location string " + StringUtil.singleQuote(mStringValue) + "!"); 1190 } 1191 1192 if (null == currentOperation) 1193 { 1194 currentOperation = new SeqOperation(location); 1195 mSeqOperation = currentOperation; 1196 } else 1197 { 1198 currentOperation.addLocation(location); 1199 } 1200 } 1201 } 1202 else 1203 { 1204 SeqOperationType seqOperationType = SeqOperationType.valueOf(piece); 1205 1206 currentOperation = new SeqOperation(seqOperationType); 1207 if (null == mSeqOperation) 1208 { 1209 mSeqOperation = currentOperation; 1210 } 1211 else 1212 { 1213 operationStack.peek().addSubOperation(currentOperation); 1214 } 1215 1216 operationStack.push(currentOperation); 1217 } 1218 } 1219 } 1220 1221 mEvaluatable = true; 1222 } 1223 1224 mParsed = true; 1225 } 1226 1227 //########################################################################### 1228 // INNER CLASS 1229 //########################################################################### 1230 1231 private class SeqOperation implements Cloneable 1232 { 1233 private SeqOperationType mType; 1234 private List<SeqOperation> mSubOperations; 1235 private List<SeqLocation> mLocations; 1236 1237 //------------------------------------------------------------------------ 1238 public SeqOperation(SeqLocation inLocation) 1239 { 1240 addLocation(inLocation); 1241 } 1242 1243 //------------------------------------------------------------------------ 1244 public SeqOperation(SeqOperationType inType) 1245 { 1246 mType = inType; 1247 } 1248 1249 1250 //-------------------------------------------------------------------------- 1251 @Override 1252 public SeqOperation clone() 1253 { 1254 SeqOperation copy; 1255 try 1256 { 1257 copy = (SeqOperation) super.clone(); 1258 } 1259 catch (CloneNotSupportedException e) 1260 { 1261 throw new RuntimeException("Coding problem! CloneNotSupportedException should not be possible when cloning a " 1262 + this.getClass().getSimpleName() + " object!", e); 1263 } 1264 1265 if (mSubOperations != null) 1266 { 1267 copy.mSubOperations = new ArrayList<>(mSubOperations.size()); 1268 for (SeqOperation op : mSubOperations) 1269 { 1270 copy.mSubOperations.add(op.clone()); 1271 } 1272 } 1273 1274 if (mLocations != null) 1275 { 1276 copy.mLocations = new ArrayList<>(mLocations.size()); 1277 for (SeqLocation loc : mLocations) 1278 { 1279 copy.mLocations.add(loc.clone()); 1280 } 1281 } 1282 1283 return copy; 1284 } 1285 1286 //------------------------------------------------------------------------ 1287 @Override 1288 public String toString() 1289 { 1290 StringBuilder stringValue = new StringBuilder(); 1291 if (mType != null) 1292 { 1293 stringValue.append(mType.name()); 1294 stringValue.append("("); 1295 1296 if (CollectionUtil.hasValues(mSubOperations)) 1297 { 1298 for (SeqOperation operation : mSubOperations) 1299 { 1300 stringValue.append(operation.toString()); 1301 } 1302 } 1303 } 1304 1305 if (CollectionUtil.hasValues(mLocations)) 1306 { 1307 StringBuilderPlus locBuffer = new StringBuilderPlus().setDelimiter(","); 1308 for (SeqLocation loc : mLocations) 1309 { 1310 // TODO: SeqLocation should have it's own toString(). 1311 if (1 == loc.length()) 1312 { 1313 locBuffer.delimitedAppend(loc.getStart()); 1314 } 1315 else 1316 { 1317 locBuffer.delimitedAppend(loc.getStart() + ".." + loc.getEnd()); 1318 } 1319 } 1320 1321 stringValue.append(locBuffer); 1322 } 1323 1324 if (mType != null) 1325 { 1326 stringValue.append(")"); 1327 } 1328 1329 1330 return stringValue.toString(); 1331 } 1332 1333 //------------------------------------------------------------------------ 1334 public long length() 1335 { 1336 long length = 0L; 1337 if (CollectionUtil.hasValues(mLocations)) 1338 { 1339 for (SeqLocation seqLocation : mLocations) 1340 { 1341 length += seqLocation.length(); 1342 } 1343 } 1344 else if (CollectionUtil.hasValues(mSubOperations)) 1345 { 1346 for (SeqOperation subOperation : mSubOperations) 1347 { 1348 length += subOperation.length(); 1349 } 1350 } 1351 1352 return length; 1353 } 1354 1355 //------------------------------------------------------------------------ 1356 public void addLocation(SeqLocation inValue) 1357 { 1358 if (null == mLocations) 1359 { 1360 mLocations = new ArrayList<>(5); 1361 } 1362 1363 mLocations.add(inValue); 1364 } 1365 1366 //------------------------------------------------------------------------ 1367 public void addSubOperation(SeqOperation inValue) 1368 { 1369 if (null == mSubOperations) 1370 { 1371 mSubOperations = new ArrayList<>(5); 1372 } 1373 1374 mSubOperations.add(inValue); 1375 } 1376 1377 //--------------------------------------------------------------------------- 1378 public Range<Integer> toIntRange() 1379 { 1380 Range<Integer> intRange = null; 1381 if (CollectionUtil.hasValues(mLocations)) 1382 { 1383 intRange = new Range<>(mLocations.get(0).getStart(), mLocations.get(mLocations.size() - 1).getEnd()); 1384 } 1385 else if (CollectionUtil.hasValues(mSubOperations)) 1386 { 1387 Range<Integer> startRange = mSubOperations.get(0).toIntRange(); 1388 Range<Integer> endRange = mSubOperations.get(mSubOperations.size() - 1).toIntRange(); 1389 if (startRange != null 1390 && endRange != null) 1391 { 1392 intRange = new Range<>(startRange.getStart(), endRange.getEnd()); 1393 } 1394 } 1395 1396 if (intRange != null 1397 && intRange.getStart() > intRange.getEnd()) 1398 { 1399 // Flip the start and end values 1400 int start = intRange.getStart(); 1401 intRange.setStart(intRange.getEnd()); 1402 intRange.setEnd(start); 1403 } 1404 1405 return intRange; 1406 } 1407 1408 //------------------------------------------------------------------------ 1409 public String eval(String inSequence) 1410 { 1411 String resultSeq; 1412 1413 try 1414 { 1415 List<String> subseqs = extractLocations(inSequence); 1416 if (!CollectionUtil.hasValues(subseqs)) 1417 { 1418 throw new SeqIOException("Problem evaluating location " + StringUtil.singleQuote(this.toString()) + "!"); 1419 } 1420 1421 if (mType != null) 1422 { 1423 switch (mType) 1424 { 1425 case complement: // Really means reverse complement 1426 if (subseqs.size() != 1) 1427 { 1428 throw new SeqIOException("Trouble evaluating location string " + StringUtil.singleQuote(mStringValue) + "!"); 1429 } 1430 1431 NucleicAcid seqObj = new NucleicAcid().setSequence(subseqs.get(0)); 1432 resultSeq = seqObj.getReverseComplementSequence(); 1433 break; 1434 1435 case join: 1436 resultSeq = StringUtil.join(subseqs, ""); 1437 break; 1438 1439 default: 1440 throw new SeqIOException("Trouble evaluating location string " + StringUtil.singleQuote(mStringValue) + "!"); 1441 } 1442 } 1443 else 1444 { 1445 if (1 == subseqs.size()) 1446 { 1447 // Simple seq location 1448 resultSeq = subseqs.get(0); 1449 } 1450 else 1451 { 1452 throw new SeqIOException("Trouble evaluating location string " + StringUtil.singleQuote(mStringValue) + "!"); 1453 } 1454 } 1455 } 1456 catch (SeqIOException e) 1457 { 1458 throw e; // These already have a good description 1459 } 1460 catch (Exception e) 1461 { 1462 throw new SeqIOException("Trouble evaluating location string " + StringUtil.singleQuote(mStringValue) + "!", e); 1463 } 1464 1465 return resultSeq; 1466 } 1467 1468 //------------------------------------------------------------------------ 1469 public String eval(NucleicAcid inNucleicAcid) 1470 { 1471 String resultSeq; 1472 1473 try 1474 { 1475 List<String> subseqs = extractLocations(inNucleicAcid); 1476 if (! CollectionUtil.hasValues(subseqs)) 1477 { 1478 throw new SeqIOException("Problem evaluating location " + StringUtil.singleQuote(this.toString()) + "!"); 1479 } 1480 1481 if (mType != null) 1482 { 1483 switch (mType) 1484 { 1485 case complement: // Really means reverse complement 1486 if (subseqs.size() != 1) 1487 { 1488 throw new SeqIOException("Trouble evaluating location string " + StringUtil.singleQuote(mStringValue) + "!"); 1489 } 1490 1491 NucleicAcid seqObj = new NucleicAcid().setSequence(subseqs.get(0)); 1492 resultSeq = seqObj.getReverseComplementSequence(); 1493 break; 1494 1495 case join: 1496 resultSeq = StringUtil.join(subseqs, ""); 1497 break; 1498 1499 default: 1500 throw new SeqIOException("Trouble evaluating location string " + StringUtil.singleQuote(mStringValue) + "!"); 1501 } 1502 } 1503 else 1504 { 1505 if (1 == subseqs.size()) 1506 { 1507 // Simple seq location 1508 resultSeq = subseqs.get(0); 1509 } 1510 else 1511 { 1512 throw new SeqIOException("Trouble evaluating location string " + StringUtil.singleQuote(mStringValue) + "!"); 1513 } 1514 } 1515 } 1516 catch (SeqIOException e) 1517 { 1518 throw e; // These already have a good description 1519 } 1520 catch (Exception e) 1521 { 1522 throw new SeqIOException("Trouble evaluating location string " + StringUtil.singleQuote(mStringValue) + "!"); 1523 } 1524 1525 return resultSeq; 1526 } 1527 1528 //-------------------------------------------------------------------------- 1529 public List<SeqLocation> getIntrons() 1530 { 1531 List<SeqLocation> introns = null; 1532 1533 if (mType != null) 1534 { 1535 switch (mType) 1536 { 1537 case complement: // Really means reverse complement 1538 1539 if (mSubOperations != null) 1540 { 1541 for (SeqOperation operation : mSubOperations) 1542 { 1543 if (null == introns) 1544 { 1545 introns = new ArrayList<>(10); 1546 } 1547 1548 List<SeqLocation> opIntrons = operation.getIntrons(); 1549 if (CollectionUtil.hasValues(opIntrons)) 1550 { 1551 introns.addAll(opIntrons); 1552 } 1553 } 1554 } 1555 1556 break; 1557 1558 case join: 1559 if (mSubOperations != null) 1560 { 1561 for (SeqOperation operation : mSubOperations) 1562 { 1563 List<SeqLocation> opIntrons = operation.getIntrons(); 1564 if (CollectionUtil.hasValues(opIntrons)) 1565 { 1566 if (null == introns) 1567 { 1568 introns = new ArrayList<>(mSeqOperation.mLocations.size() - 1); 1569 } 1570 1571 introns.addAll(opIntrons); 1572 } 1573 } 1574 } 1575 else if (CollectionUtil.hasValues(mLocations) 1576 && mLocations.size() > 1) 1577 { 1578 if (null == introns) 1579 { 1580 introns = new ArrayList<>(mLocations.size() - 1); 1581 } 1582 1583 for (int i = 0; i < mLocations.size() - 1; i++) 1584 { 1585 introns.add(new SeqLocation(mLocations.get(i).getEnd() + 1, mLocations.get(i + 1).getStart() - 1)); 1586 } 1587 } 1588 break; 1589 1590 default: 1591 throw new SeqIOException("Trouble evaluating location string " + StringUtil.singleQuote(mStringValue) + "!"); 1592 } 1593 } 1594 1595 return introns; 1596 } 1597 1598 //------------------------------------------------------------------------ 1599 private List<String> extractLocations(String inSequence) 1600 { 1601 List<String> subseqs = null; 1602 1603 if (CollectionUtil.hasValues(mLocations)) 1604 { 1605 subseqs = new ArrayList<>(mLocations.size()); 1606 for (SeqLocation location : mLocations) 1607 { 1608 subseqs.add(inSequence.substring(location.getStart() - 1, location.getEnd())); 1609 } 1610 } 1611 else if (CollectionUtil.hasValues(mSubOperations)) 1612 { 1613 subseqs = new ArrayList<>(25); 1614 for (SeqOperation subOperation : mSubOperations) 1615 { 1616 subseqs.add(subOperation.eval(inSequence)); 1617 } 1618 } 1619 1620 return subseqs; 1621 } 1622 1623 //------------------------------------------------------------------------ 1624 private List<String> extractLocations(NucleicAcid inNucleicAcid) 1625 { 1626 List<String> subseqs = null; 1627 1628 if (CollectionUtil.hasValues(mLocations)) 1629 { 1630 subseqs = new ArrayList<>(mLocations.size()); 1631 for (SeqLocation location : mLocations) 1632 { 1633 subseqs.add(inNucleicAcid.getSubSequence(location)); 1634 } 1635 } 1636 else if (CollectionUtil.hasValues(mSubOperations)) 1637 { 1638 subseqs = new ArrayList<>(25); 1639 for (SeqOperation subOperation : mSubOperations) 1640 { 1641 subseqs.add(subOperation.eval(inNucleicAcid)); 1642 } 1643 } 1644 1645 return subseqs; 1646 } 1647 1648 } 1649}