From 0bdf7869bf1de3549de62e49901ebcea8578f5d3 Mon Sep 17 00:00:00 2001 From: Mark Wielaard Date: Mon, 8 Oct 2001 21:03:34 +0000 Subject: [PATCH] URLDecoder.java: Remerge with Classpath * java/net/URLDecoder.java: Remerge with Classpath * java/net/URLEncoder.java: Merge with Classpath From-SVN: r46098 --- libjava/ChangeLog | 5 + libjava/java/net/URLDecoder.java | 121 +++++++++++++++++---- libjava/java/net/URLEncoder.java | 178 ++++++++++++++++++++++--------- 3 files changed, 233 insertions(+), 71 deletions(-) diff --git a/libjava/ChangeLog b/libjava/ChangeLog index af67c16b8a4..ff7bbf64988 100644 --- a/libjava/ChangeLog +++ b/libjava/ChangeLog @@ -1,3 +1,8 @@ +2001-10-07 Mark Wielaard + + * java/net/URLDecoder.java: Remerge with Classpath + * java/net/URLEncoder.java: Merge with Classpath + 2001-10-08 Tom Tromey Fix for PR libgcj/4481: diff --git a/libjava/java/net/URLDecoder.java b/libjava/java/net/URLDecoder.java index ae7da78e120..9c51bc03b5c 100644 --- a/libjava/java/net/URLDecoder.java +++ b/libjava/java/net/URLDecoder.java @@ -28,56 +28,131 @@ package java.net; import java.io.UnsupportedEncodingException; -/** - * This utility class contains one static method that converts a + /** + * This utility class contains static methods that converts a * string encoded in the x-www-form-urlencoded format to the original - * text. The x-www-form-urlencoded format - * replaces certain disallowed characters with - * encoded equivalents. All upper case and lower case letters in the - * US alphabet remain as is, the space character (' ') is replaced with - * '+' sign, and all other characters are converted to a "%XX" format - * where XX is the hexadecimal representation of that character. Note - * that since unicode characters are 16 bits, and this method encodes only - * 8 bits of information, the lower 8 bits of the character are used. + * text. The x-www-form-urlencoded format replaces certain disallowed + * characters with encoded equivalents. All upper case and lower case + * letters in the US alphabet remain as is, the space character (' ') + * is replaced with '+' sign, and all other characters are converted to a + * "%XX" format where XX is the hexadecimal representation of that character + * in a given character encoding (default is "UTF-8"). *

* This method is very useful for decoding strings sent to CGI scripts * - * Written using on-line Java Platform 1.2 API Specification. + * Written using on-line Java Platform 1.2/1.4 API Specification. * Status: Believed complete and correct. * * @since 1.2 * * @author Warren Levy * @author Aaron M. Renn (arenn@urbanophile.com) (documentation comments) - * @date April 22, 1999. + * @author Mark Wielaard (mark@klomp.org) */ public class URLDecoder { -/** + /** * This method translates the passed in string from x-www-form-urlencoded - * format and returns it. + * format using the default encoding "UTF-8" to decode the hex encoded + * unsafe characters. * - * @param source The String to convert + * @param s the String to convert * - * @return The converted String + * @return the converted String */ public static String decode(String s) { + try + { + return decode(s, "UTF-8"); + } + catch (UnsupportedEncodingException uee) + { + // Should never happen since UTF-8 encoding should always be supported + return s; + } + } + + /** + * This method translates the passed in string from x-www-form-urlencoded + * format using the given character encoding to decode the hex encoded + * unsafe characters. + *

+ * This implementation will decode the string even if it contains + * unsafe characters (characters that should have been encoded) or if the + * two characters following a % do not represent a hex encoded byte. + * In those cases the unsafe character or the % character will be added + * verbatim to the decoded result. + * + * @param s the String to convert + * @param encoding the character encoding to use the decode the hex encoded + * unsafe characters + * + * @return the converted String + * + * @since 1.4 + */ + public static String decode(String s, String encoding) + throws UnsupportedEncodingException + { + StringBuffer result = new StringBuffer(); + + // First convert all '+' characters to spaces. String str = s.replace('+', ' '); - String result = ""; + + // Then go through the whole string looking for byte encoded characters int i; int start = 0; + byte[] bytes = null; + int length = str.length(); while ((i = str.indexOf('%', start)) >= 0) { - result = result + str.substring(start, i) + - (char) Integer.parseInt(str.substring(i + 1, i + 3), 16); - start = i + 3; + // Add all non-encoded characters to the result buffer + result.append(str.substring(start, i)); + start = i; + + // Get all consecutive encoded bytes + while ((i+2 < length) && (str.charAt(i) == '%')) + i += 3; + + // Decode all these bytes + if ((bytes == null) || (bytes.length < ((i-start)/3))) + bytes = new byte[((i-start)/3)]; + + int index = 0; + try + { + while (start < i) + { + String sub = str.substring(start + 1, start + 3); + bytes[index] = (byte)Integer.parseInt(sub, 16); + index++; + start += 3; + } + } + catch (NumberFormatException nfe) + { + // One of the hex encoded strings was bad + } + + // Add the bytes as characters according to the given encoding + result.append(new String(bytes, 0, index, encoding)); + + // Make sure we skip to just after a % sign + // There might not have been enough encoded characters after the % + // or the hex chars were not actually hex chars (NumberFormatException) + if (start < length && s.charAt(start) == '%') + { + result.append('%'); + start++; + } } + // Add any characters left if (start < str.length()) - result = result + str.substring(start); + result.append(str.substring(start)); - return result; + return result.toString(); } -} // class URLDecoder +} // class URLDecoder diff --git a/libjava/java/net/URLEncoder.java b/libjava/java/net/URLEncoder.java index 6590dcf8feb..f39b30070d4 100644 --- a/libjava/java/net/URLEncoder.java +++ b/libjava/java/net/URLEncoder.java @@ -1,71 +1,153 @@ -// URLEncoder.java - Provides a method for encoding strings according to -// application/x-www-form-urlencoded MIME type. +/* URLEncoder.java -- Class to convert strings to a properly encoded URL + Copyright (C) 1998, 1999, 2001 Free Software Foundation, Inc. -/* Copyright (C) 1999 Free Software Foundation +This file is part of GNU Classpath. - This file is part of libgcj. +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. -This software is copyrighted work licensed under the terms of the -Libgcj License. Please consult the file "LIBGCJ_LICENSE" for -details. */ +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +As a special exception, if you link this library with other files to +produce an executable, this library does not by itself cause the +resulting executable to be covered by the GNU General Public License. +This exception does not however invalidate any other reasons why the +executable file might be covered by the GNU General Public License. */ package java.net; + import java.io.UnsupportedEncodingException; /** - * @author Warren Levy - * @date April 22, 1999. - */ - -/** - * Written using on-line Java Platform 1.2 API Specification, as well + * Written using on-line Java Platform 1.2/1.4 API Specification, as well * as "The Java Class Libraries", 2nd edition (Addison-Wesley, 1998). * Status: Believed complete and correct. */ + /** + * This utility class contains static methods that converts a + * string into a fully encoded URL string in x-www-form-urlencoded + * format. This format replaces certain disallowed characters with + * encoded equivalents. All upper case and lower case letters in the + * US alphabet remain as is, the space character (' ') is replaced with + * '+' sign, and all other characters are converted to a "%XX" format + * where XX is the hexadecimal representation of that character in a + * certain encoding (by default "UTF-8"). + *

+ * This method is very useful for encoding strings to be sent to CGI scripts + * + * @author Aaron M. Renn (arenn@urbanophile.com) + * @author Warren Levy + * @author Mark Wielaard (mark@klomp.org) + */ public class URLEncoder { - // This method, per the JCL, is conservative in that it encodes - // some "allowable" characters as % triplets. + /** + * This method translates the passed in string into x-www-form-urlencoded + * format using the standard "UTF-8" character encoding to hex-encode the + * unsafe characters. + * + * @param s The String to convert + * + * @return The converted String + */ public static String encode(String s) { - // Get the bytes in ISO-Latin-1 (i.e. 8859_1) per the JCL. - // Even though it is the default in most cases, it's specified here - // just in case System.getProperty("file.encoding") is not "8859_1". - String result = ""; try { - byte[] buf = s.getBytes("8859_1"); - int start = 0; - for (int i = 0; i < buf.length; i++) - // For efficiency, check the byte in order of most likely - // possibility so as to minimize the number of comparisons. - // Hence, exclude all the alphanumeric & allowed special chars first. - if ((buf[i] >= 'a' && buf[i] <= 'z') || - (buf[i] >= 'A' && buf[i] <= 'Z') || - (buf[i] >= '0' && buf[i] <= '9') || - buf[i] == '-' || buf[i] == '_' || buf[i] == '.' || buf[i] == '*') - ; // This is the most likely case so exclude first for efficiency. - else if (buf[i] == ' ') - buf[i] = (byte) '+'; // Replace space char with plus symbol. - else - { - result = result + new String(buf, start, i - start, "8859_1") + - "%" + Integer.toHexString(((int) buf[i]) & 0xFF); - start = i + 1; - } - - // Append remainder of allowable chars from the string, if any. - if (start < buf.length) - result = result + - new String(buf, start, buf.length - start, "8859_1"); + return encode(s, "UTF-8"); } - catch (UnsupportedEncodingException ex) + catch (UnsupportedEncodingException uee) { - // This should never happen as "8859_1" is the default encoding. + // Should never happen since UTF-8 should always be supported return s; } - - return result; } -} + + /** + * This method translates the passed in string into x-www-form-urlencoded + * format using the character encoding to hex-encode the unsafe characters. + * + * @param s The String to convert + * @param encoding The encoding to use for unsafe characters + * + * @return The converted String + * + * @since 1.4 + */ + public static String encode(String s, String encoding) + throws UnsupportedEncodingException + { + StringBuffer result = new StringBuffer(); + int length = s.length(); + int start = 0; + int i = 0; + + while (true) + { + while ( i < length && isSafe(s.charAt(i)) ) + i++; + + // Safe character can just be added + result.append(s.substring(start, i)); + + // Are we done? + if (i >= length) + return result.toString(); + else if (s.charAt(i) == ' ') + { + result.append('+'); // Replace space char with plus symbol. + i++; + } + else + { + // Get all unsafe characters + start = i; + char c; + while ( i < length && (c = s.charAt(i)) != ' ' && !isSafe(c) ) + i++; + + // Convert them to %XY encoded strings + String unsafe = s.substring(start,i); + byte bytes[] = unsafe.getBytes(encoding); + for (int j = 0; j < bytes.length; j++) + { + result.append('%'); + result.append(Integer.toHexString(((int) bytes[j]) & 0xFF)); + } + } + start = i; + } + } + + /** + * Private static method that returns true if the given char is either + * a uppercase or lowercase letter from 'a' till 'z', or a digit froim + * '0' till '9', or one of the characters '-', '_', '.' or '*'. Such + * 'safe' character don't have to be url encoded. + */ + private static boolean isSafe(char c) + { + return ((c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9') || + c == '-' || c == '_' || c == '.' || c == '*'); + } + + /** + * Private constructor that does nothing. Included to avoid a default + * public constructor being created by the compiler. + */ + private URLEncoder() { } + +} // class URLEncoder