001    /*
002    // $Id: ParseRegion.java 482 2012-01-05 23:27:27Z jhyde $
003    //
004    // Licensed to Julian Hyde under one or more contributor license
005    // agreements. See the NOTICE file distributed with this work for
006    // additional information regarding copyright ownership.
007    //
008    // Julian Hyde licenses this file to you under the Apache License,
009    // Version 2.0 (the "License"); you may not use this file except in
010    // compliance with the License. You may obtain a copy of the License at:
011    //
012    // http://www.apache.org/licenses/LICENSE-2.0
013    //
014    // Unless required by applicable law or agreed to in writing, software
015    // distributed under the License is distributed on an "AS IS" BASIS,
016    // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017    // See the License for the specific language governing permissions and
018    // limitations under the License.
019    */
020    package org.olap4j.mdx;
021    
022    /**
023     * Region of parser source code.
024     *
025     * <p>The main purpose of a ParseRegion is to give detailed locations in
026     * error messages and warnings from the parsing and validation process.
027     *
028     * <p>A region has a start and end line number and column number. A region is
029     * a point if the start and end positions are the same.
030     *
031     * <p>The line and column number are one-based, because that is what end-users
032     * understand.
033     *
034     * <p>A region's end-points are inclusive. For example, in the code
035     *
036     * <blockquote><pre>SELECT FROM [Sales]</pre></blockquote>
037     *
038     * the <code>SELECT</code> token has region [1:1, 1:6].
039     *
040     * <p>Regions are immutable.
041     *
042     * @version $Id: ParseRegion.java 482 2012-01-05 23:27:27Z jhyde $
043     * @author jhyde
044     */
045    public class ParseRegion {
046        private final int startLine;
047        private final int startColumn;
048        private final int endLine;
049        private final int endColumn;
050    
051        private static final String NL = System.getProperty("line.separator");
052    
053        /**
054         * Creates a ParseRegion.
055         *
056         * <p>All lines and columns are 1-based and inclusive. For example, the
057         * token "select" in "select from [Sales]" has a region [1:1, 1:6].
058         *
059         * @param startLine Line of the beginning of the region
060         * @param startColumn Column of the beginning of the region
061         * @param endLine Line of the end of the region
062         * @param endColumn Column of the end of the region
063         */
064        public ParseRegion(
065            int startLine,
066            int startColumn,
067            int endLine,
068            int endColumn)
069        {
070            assert endLine >= startLine;
071            assert endLine > startLine || endColumn >= startColumn;
072            this.startLine = startLine;
073            this.startColumn = startColumn;
074            this.endLine = endLine;
075            this.endColumn = endColumn;
076        }
077    
078        /**
079         * Creates a ParseRegion.
080         *
081         * All lines and columns are 1-based.
082         *
083         * @param line Line of the beginning and end of the region
084         * @param column Column of the beginning and end of the region
085         */
086        public ParseRegion(
087            int line,
088            int column)
089        {
090            this(line, column, line, column);
091        }
092    
093        /**
094         * Return starting line number (1-based).
095         *
096         * @return 1-based starting line number
097         */
098        public int getStartLine() {
099            return startLine;
100        }
101    
102        /**
103         * Return starting column number (1-based).
104         *
105         * @return 1-based starting column number
106         */
107        public int getStartColumn() {
108            return startColumn;
109        }
110    
111        /**
112         * Return ending line number (1-based).
113         *
114         * @return 1-based ending line number
115         */
116        public int getEndLine() {
117            return endLine;
118        }
119    
120        /**
121         * Return ending column number (1-based).
122         *
123         * @return 1-based starting endings column number
124         */
125        public int getEndColumn() {
126            return endColumn;
127        }
128    
129        /**
130         * Returns a string representation of this ParseRegion.
131         *
132         * <p>Regions are of the form
133         * <code>[startLine:startColumn, endLine:endColumn]</code>, or
134         * <code>[startLine:startColumn]</code> for point regions.
135         *
136         * @return string representation of this ParseRegion
137         */
138        public String toString() {
139            return "[" + startLine + ":" + startColumn
140                + ((isPoint())
141                    ? ""
142                    : ", " + endLine + ":" + endColumn)
143                + "]";
144        }
145    
146        /**
147         * Returns whether this region has the same start and end point.
148         *
149         * @return whether this region has the same start and end point
150         */
151        public boolean isPoint() {
152            return endLine == startLine && endColumn == startColumn;
153        }
154    
155        public int hashCode() {
156            return startLine ^
157                (startColumn << 2) ^
158                (endLine << 4) ^
159                (endColumn << 8);
160        }
161    
162        public boolean equals(Object obj) {
163            if (obj instanceof ParseRegion) {
164                final ParseRegion that = (ParseRegion) obj;
165                return this.startLine == that.startLine
166                    && this.startColumn == that.startColumn
167                    && this.endLine == that.endLine
168                    && this.endColumn == that.endColumn;
169            } else {
170                return false;
171            }
172        }
173    
174        /**
175         * Combines this region with a list of parse tree nodes to create a
176         * region which spans from the first point in the first to the last point
177         * in the other.
178         *
179         * @param regions Collection of source code regions
180         * @return region which represents the span of the given regions
181         */
182        public ParseRegion plusAll(Iterable<ParseRegion> regions)
183        {
184            return sum(
185                regions,
186                getStartLine(),
187                getStartColumn(),
188                getEndLine(),
189                getEndColumn());
190        }
191    
192        /**
193         * Combines the parser positions of a list of nodes to create a position
194         * which spans from the beginning of the first to the end of the last.
195         *
196         * @param nodes Collection of parse tree nodes
197         * @return region which represents the span of the given nodes
198         */
199        public static ParseRegion sum(
200            Iterable<ParseRegion> nodes)
201        {
202            return sum(nodes, Integer.MAX_VALUE, Integer.MAX_VALUE, -1, -1);
203        }
204    
205        private static ParseRegion sum(
206            Iterable<ParseRegion> regions,
207            int startLine,
208            int startColumn,
209            int endLine,
210            int endColumn)
211        {
212            int testLine;
213            int testColumn;
214            for (ParseRegion region : regions) {
215                if (region == null) {
216                    continue;
217                }
218                testLine = region.getStartLine();
219                testColumn = region.getStartColumn();
220                if ((testLine < startLine)
221                    || ((testLine == startLine) && (testColumn < startColumn)))
222                {
223                    startLine = testLine;
224                    startColumn = testColumn;
225                }
226    
227                testLine = region.getEndLine();
228                testColumn = region.getEndColumn();
229                if ((testLine > endLine)
230                    || ((testLine == endLine) && (testColumn > endColumn)))
231                {
232                    endLine = testLine;
233                    endColumn = testColumn;
234                }
235            }
236            return new ParseRegion(startLine, startColumn, endLine, endColumn);
237        }
238    
239        /**
240         * Looks for one or two carets in an MDX string, and if present, converts
241         * them into a parser position.
242         *
243         * <p>Examples:
244         *
245         * <ul>
246         * <li>findPos("xxx^yyy") yields {"xxxyyy", position 3, line 1 column 4}
247         * <li>findPos("xxxyyy") yields {"xxxyyy", null}
248         * <li>findPos("xxx^yy^y") yields {"xxxyyy", position 3, line 4 column 4
249         * through line 1 column 6}
250         * </ul>
251         *
252         * @param code Source code
253         * @return object containing source code annotated with region
254         */
255        public static RegionAndSource findPos(String code)
256        {
257            int firstCaret = code.indexOf('^');
258            if (firstCaret < 0) {
259                return new RegionAndSource(code, null);
260            }
261            int secondCaret = code.indexOf('^', firstCaret + 1);
262            if (secondCaret < 0) {
263                String codeSansCaret =
264                    code.substring(0, firstCaret)
265                    + code.substring(firstCaret + 1);
266                int [] start = indexToLineCol(code, firstCaret);
267                return new RegionAndSource(
268                    codeSansCaret,
269                    new ParseRegion(start[0], start[1]));
270            } else {
271                String codeSansCaret =
272                    code.substring(0, firstCaret)
273                    + code.substring(firstCaret + 1, secondCaret)
274                    + code.substring(secondCaret + 1);
275                int [] start = indexToLineCol(code, firstCaret);
276    
277                // subtract 1 because first caret pushed the string out
278                --secondCaret;
279    
280                // subtract 1 because the col position needs to be inclusive
281                --secondCaret;
282                int [] end = indexToLineCol(code, secondCaret);
283                return new RegionAndSource(
284                    codeSansCaret,
285                    new ParseRegion(start[0], start[1], end[0], end[1]));
286            }
287        }
288    
289        /**
290         * Returns the (1-based) line and column corresponding to a particular
291         * (0-based) offset in a string.
292         *
293         * <p>Converse of {@link #lineColToIndex(String, int, int)}.
294         *
295         * @param code Source code
296         * @param i Offset within source code
297         * @return 2-element array containing line and column
298         */
299        private static int [] indexToLineCol(String code, int i) {
300            int line = 0;
301            int j = 0;
302            while (true) {
303                String s;
304                int rn = code.indexOf("\r\n", j);
305                int r = code.indexOf("\r", j);
306                int n = code.indexOf("\n", j);
307                int prevj = j;
308                if ((r < 0) && (n < 0)) {
309                    assert rn < 0;
310                    s = null;
311                    j = -1;
312                } else if ((rn >= 0) && (rn < n) && (rn <= r)) {
313                    s = "\r\n";
314                    j = rn;
315                } else if ((r >= 0) && (r < n)) {
316                    s = "\r";
317                    j = r;
318                } else {
319                    s = "\n";
320                    j = n;
321                }
322                if ((j < 0) || (j > i)) {
323                    return new int[] { line + 1, i - prevj + 1 };
324                }
325                assert s != null;
326                j += s.length();
327                ++line;
328            }
329        }
330    
331        /**
332         * Finds the position (0-based) in a string which corresponds to a given
333         * line and column (1-based).
334         *
335         * <p>Converse of {@link #indexToLineCol(String, int)}.
336         *
337         * @param code Source code
338         * @param line Line number
339         * @param column Column number
340         * @return Offset within source code
341          */
342        private static int lineColToIndex(String code, int line, int column)
343        {
344            --line;
345            --column;
346            int i = 0;
347            while (line-- > 0) {
348                // Works on linux where line ending is "\n";
349                // also works on windows where line ending is "\r\n".
350                // Even works if they supply linux strings on windows.
351                i = code.indexOf("\n", i)
352                    + "\n".length();
353            }
354            return i + column;
355        }
356    
357        /**
358         * Generates a string of the source code annotated with caret symbols ("^")
359         * at the beginning and end of the region.
360         *
361         * <p>For example, for the region <code>(1, 9, 1, 12)</code> and source
362         * <code>"values (foo)"</code>,
363         * yields the string <code>"values (^foo^)"</code>.
364         *
365         * @param source Source code
366         * @return Source code annotated with position
367         */
368        public String annotate(String source) {
369            return addCarets(source, startLine, startColumn, endLine, endColumn);
370        }
371    
372        /**
373         * Converts a string to a string with one or two carets in it. For example,
374         * <code>addCarets("values (foo)", 1, 9, 1, 11)</code> yields "values
375         * (^foo^)".
376         *
377         * @param sql Source code
378         * @param line Line number
379         * @param col Column number
380         * @param endLine Line number of end of region
381         * @param endCol Column number of end of region
382         * @return String annotated with region
383         */
384        private static String addCarets(
385            String sql,
386            int line,
387            int col,
388            int endLine,
389            int endCol)
390        {
391            String sqlWithCarets;
392            int cut = lineColToIndex(sql, line, col);
393            sqlWithCarets = sql.substring(0, cut) + "^"
394                + sql.substring(cut);
395            if ((col != endCol) || (line != endLine)) {
396                cut = lineColToIndex(sqlWithCarets, endLine, endCol + 1);
397                ++cut; // for caret
398                if (cut < sqlWithCarets.length()) {
399                    sqlWithCarets =
400                        sqlWithCarets.substring(0, cut)
401                        + "^" + sqlWithCarets.substring(cut);
402                } else {
403                    sqlWithCarets += "^";
404                }
405            }
406            return sqlWithCarets;
407        }
408    
409        /**
410         * Combination of a region within an MDX statement with the source text
411         * of the whole MDX statement.
412         *
413         * <p>Useful for reporting errors. For example, the error in the statement
414         *
415         * <blockquote>
416         * <pre>
417         * SELECT {<b><i>[Measures].[Units In Stock]</i></b>} ON COLUMNS
418         * FROM [Sales]
419         * </pre>
420         * </blockquote>
421         *
422         * has source
423         * "SELECT {[Measures].[Units In Stock]} ON COLUMNS\nFROM [Sales]" and
424         * region [1:9, 1:34].
425         */
426        public static class RegionAndSource {
427            public final String source;
428            public final ParseRegion region;
429    
430            /**
431             * Creates a RegionAndSource.
432             *
433             * @param source Source MDX code
434             * @param region Coordinates of region within MDX code
435             */
436            public RegionAndSource(String source, ParseRegion region) {
437                this.source = source;
438                this.region = region;
439            }
440        }
441    }
442    
443    // End ParseRegion.java