Pages

Monday, December 5, 2011

Replace a string

Static Replacement
Replacing one static String with another can be done in various ways : 
public final class ReplaceSubstring {
  
  /**
  *  Simplest in Java 1.5, using the replace method, which 
  *  takes CharSequence objects.
  */
  public static String replace15(
    String aInput, String  aOldPattern, String aNewPattern
  ){
    return aInput.replace(aOldPattern, aNewPattern);
  }

  /**
  * Not quite as simple in Java 1.4. The replaceAll method works, 
  * but requires more care, since it uses regular expressions, which 
  * may contain special characters.
  */
  public static String replace14(
    String aInput, String aOldPattern, String aNewPattern
  ){

    /*
    * The replaceAll method is a bit dangerous to use.
    * The aOldPattern is converted into a regular expression.
    * Thus, if aOldPattern may contain characters which have
    * special meaning to regular expressions, then they must
    * be 'escaped' before being passed to replaceAll. It is
    * easy to forget to do this.
    *
    * In addition, aNewPattern treats '$' as special characters 
    * as well: they refer to 'back references'.
    */
    return aInput.replaceAll(aOldPattern, aNewPattern);
    /*
    Here is an alternative implementation using Pattern and Matcher,
    which is preferred when the same pattern is used repeatedly
    final Pattern pattern = Pattern.compile( aOldPattern );
    final Matcher matcher = pattern.matcher( aInput );
    return matcher.replaceAll( aNewPattern );
    */
  }

  /**
  * If Java 1.4 is unavailable, the following technique may be used.
  *
  * @param aInput is the original String which may contain substring aOldPattern
  * @param aOldPattern is the non-empty substring which is to be replaced
  * @param aNewPattern is the replacement for aOldPattern
  */
  public static String replaceOld(
    final String aInput,
    final String aOldPattern,
    final String aNewPattern
  ){
     if ( aOldPattern.equals("") ) {
        throw new IllegalArgumentException("Old pattern must have content.");
     }

     final StringBuffer result = new StringBuffer();
     //startIdx and idxOld delimit various chunks of aInput; these
     //chunks always end where aOldPattern begins
     int startIdx = 0;
     int idxOld = 0;
     while ((idxOld = aInput.indexOf(aOldPattern, startIdx)) >= 0) {
       //grab a part of aInput which does not include aOldPattern
       result.append( aInput.substring(startIdx, idxOld) );
       //add aNewPattern to take place of aOldPattern
       result.append( aNewPattern );

       //reset the startIdx to just after the current match, to see
       //if there are any further matches
       startIdx = idxOld + aOldPattern.length();
     }
     //the final chunk will go to the end of aInput
     result.append( aInput.substring(startIdx) );
     return result.toString();
  }

  /** Example: update an ip address appearing in a link.  */
  public static void main (String[] aArguments) {
    String OLD_IP = "45.23.102.12";
    //escape the  '.', a special character in regular expressions
    String OLD_IP_REGEX = "45\\.23\\.102\\.12";
    String NEW_IP = "99.104.106.95";
    String LINK = "http://45.23.102.12:8080/index.html";
    
    log("Old link : " + LINK);
    
    String newLink = replace15(LINK, OLD_IP, NEW_IP);
    log("New link with Java 1.5 replace: " + newLink);
    
    newLink = replace14(LINK, OLD_IP_REGEX, NEW_IP);
    log("New link with Java 1.4 replaceAll: " + newLink);

    newLink = replaceOld(LINK, OLD_IP, NEW_IP);
    log("New link with oldest style: " + newLink);
  }
  
  private static void log(String aMessage){
    System.out.println(aMessage);
  }
} 


Example run of this class :
Old link : http://45.23.102.12:8080/index.html
New link with Java 1.5 replace: http://99.104.106.95:8080/index.html
New link with Java 1.4 replaceAll: http://99.104.106.95:8080/index.html
New link with oldest style: http://99.104.106.95:8080/index.html
Dynamic Replacement
If replacement string is not fixed, and needs to be created dynamically, then another approach is required. In the following example, strings of the form "href=Topic182.cjp" are replaced with a corresponding string "href=#182".
The number 182 is taken only as an example. It is in fact extracted dynamically, and referenced in the replacement string using the back reference "$1", where 1 is the index of the matching group for these digits. 
import java.util.regex.*;

public final class ReplaceSubstringDynamically {

  public static void main (String[] aArguments) {
    String htmlText = "<a href=\"Topic27.cjp\">xyz</a> blah <a href=Topic8.cjp>abc</a>";
    System.out.println("Old HTML text : " + htmlText);
    System.out.println("New HTML text : " + replaceLinks(htmlText));
  }

  /**
  * Replace the document links in a snippet of HTML with corresponding
  * fragment links, which start with the # sign, and refer to labelled
  * locations within a single document.
  */
  private static String replaceLinks(String aHtmlTextWithLinks){
    Pattern pattern = Pattern.compile(fLINK);
    Matcher matcher = pattern.matcher(aHtmlTextWithLinks);
    return matcher.replaceAll(fFRAGMENT);
  }

  /**
  * The single matching group of this regex are the digits ((?:\\d){1,3}),
  * which correspond to group 1.
  */
  private static String fLINK = "href=(?:\"|\')?Topic((?:\\d){1,3})\\.cjp(?:\"|\')?";

  /**
  * The "$1" refers to matching group 1 of fLINK (the digits).
  */
  private static String fFRAGMENT = "href=#$1";
} 


Example run of this class :
Old HTML text : <a href="Topic27.cjp">xyz</a> blah <a href=Topic8.cjp>abc</a>
New HTML text : <a href=#27>xyz</a> blah <a href=#8>abc</a>
Here is a second example, where the replacement string is computed without using back references. 
import java.util.regex.*;

public final class ReplaceSubstringAppendReplacement {

  public static void main (String[] aArguments) {
    String text = "Apples and oranges are better for all.";
    System.out.println("Old text : " + text);
    System.out.println("New text : " + getEditedText(text));
  }

  /**
  * Replace all words starting with the letter 'a' or 'A' with
  * their uppercase forms.
  */
  private static String getEditedText(String aText){
    StringBuffer result = new StringBuffer();
    Matcher matcher = fINITIAL_A.matcher(aText);
    while ( matcher.find() ) {
      matcher.appendReplacement(result, getReplacement(matcher));
    }
    matcher.appendTail(result);
    return result.toString();
  }

  private static final Pattern fINITIAL_A = Pattern.compile(
    "(?:\\s|^)a(?:\\w)*",
    Pattern.CASE_INSENSITIVE
  );

  private static String getReplacement(Matcher aMatcher){
    return aMatcher.group(0).toUpperCase();
  }

} 


Example run of this class :
Old text : Apples and oranges are better for all.
New text : APPLES AND oranges ARE better for ALL.
Warning
The methods :
treat '$' and '\' in the replacement text as special characters. If the replacement text can contain arbitrary text, then these characters will usually be escaped usingMatcher.quoteReplacement(String). 

No comments:

Post a Comment