// Default word BreakIterator. /* Copyright (C) 1999 Cygnus Solutions This file is part of libgcj. This software is copyrighted work licensed under the terms of the Libgcj License. Please consult the file "LIBGCJ_LICENSE" for details. */ package gnu.gcj.text; import java.text.BreakIterator; import java.text.CharacterIterator; /** * @author Tom Tromey <tromey@cygnus.com> * @date March 22, 1999 * Written using The Unicode Standard, Version 2.0. */ public class LineBreakIterator extends BaseBreakIterator { public Object clone () { return new LineBreakIterator (this); } public LineBreakIterator () { iter = null; } private LineBreakIterator (LineBreakIterator other) { iter = (CharacterIterator) other.iter.clone(); } // Some methods to tell us different properties of characters. private final boolean isNb (char c) { return (c == 0x00a0 // NO-BREAK SPACE || c == 0x2011 // NON-BREAKING HYPHEN || c == 0xfeff); // ZERO WITH NO-BREAK SPACE } private final boolean isClose (int type) { return (type == Character.END_PUNCTUATION // Unicode book says "comma, period, ...", which I take to // mean "Po" class. || type == Character.OTHER_PUNCTUATION); } private final boolean isIdeo (char c) { return (c >= 0x3040 && c <= 0x309f // Hiragana || c >= 0x30a0 && c <= 0x30ff // Katakana || c >= 0x4e00 && c <= 0x9fff // Han || c >= 0x3100 && c <= 0x312f); // Bopomofo } public int next () { int end = iter.getEndIndex(); if (iter.getIndex() == end) return DONE; while (iter.getIndex() < end) { char c = iter.current(); int type = Character.getType(c); char n = iter.next(); if (n == CharacterIterator.DONE || type == Character.PARAGRAPH_SEPARATOR || type == Character.LINE_SEPARATOR) break; // Handle two cases where we must scan for non-spacing marks. int start = iter.getIndex(); if (type == Character.SPACE_SEPARATOR || type == Character.START_PUNCTUATION || isIdeo (c)) { while (n != CharacterIterator.DONE && Character.getType(n) == Character.NON_SPACING_MARK) n = iter.next(); if (n == CharacterIterator.DONE) break; if (type == Character.SPACE_SEPARATOR) { int nt = Character.getType(n); if (nt != Character.NON_SPACING_MARK && nt != Character.SPACE_SEPARATOR && ! isNb (n)) break; } else if (type == Character.START_PUNCTUATION) { if (isIdeo (n)) { // Open punctuation followed by non spacing marks // and then ideograph does not have a break in // it. So skip all this. start = iter.getIndex(); } } else { // Ideograph preceded this character. if (isClose (Character.getType(n))) break; } } iter.setIndex(start); } return iter.getIndex(); } public int previous () { int start = iter.getBeginIndex(); if (iter.getIndex() == start) return DONE; while (iter.getIndex() >= start) { char c = iter.previous(); if (c == CharacterIterator.DONE) break; int type = Character.getType(c); char n = iter.previous(); if (n == CharacterIterator.DONE) break; iter.next(); int nt = Character.getType(n); // Break after paragraph separators. if (nt == Character.PARAGRAPH_SEPARATOR || nt == Character.LINE_SEPARATOR) break; // Skip non-spacing marks. int init = iter.getIndex(); while (n != CharacterIterator.DONE && nt == Character.NON_SPACING_MARK) { n = iter.previous(); nt = Character.getType(n); } if (nt == Character.SPACE_SEPARATOR && type != Character.SPACE_SEPARATOR && type != Character.NON_SPACING_MARK && ! isNb (c)) break; if (! isClose (type) && isIdeo (n)) break; if (isIdeo (c) && nt != Character.START_PUNCTUATION) break; iter.setIndex(init); } return iter.getIndex(); } }