git-svn-id: svn://svn.code.sf.net/p/writer2latex/code/trunk@26 f0f2a975-2e09-46c8-9428-3b39399b9f3c
This commit is contained in:
parent
574e550311
commit
9e78c8fc3d
11 changed files with 932 additions and 34 deletions
221
source/java/org/openoffice/da/comp/writer4latex/DeTeXtive.java
Normal file
221
source/java/org/openoffice/da/comp/writer4latex/DeTeXtive.java
Normal file
|
@ -0,0 +1,221 @@
|
|||
/************************************************************************
|
||||
*
|
||||
* DeTeXtive.java
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License version 2.1, as published by the Free Software Foundation.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
|
||||
* MA 02111-1307 USA
|
||||
*
|
||||
* Copyright: 2002-2009 by Henrik Just
|
||||
*
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Version 1.2 (2009-06-19)
|
||||
*
|
||||
*/
|
||||
|
||||
package org.openoffice.da.comp.writer4latex;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.IOException;
|
||||
import java.util.HashSet;
|
||||
|
||||
import org.openoffice.da.comp.w2lcommon.tex.tokenizer.Mouth;
|
||||
import org.openoffice.da.comp.w2lcommon.tex.tokenizer.Token;
|
||||
import org.openoffice.da.comp.w2lcommon.tex.tokenizer.TokenType;
|
||||
|
||||
/** This class analyzes a stream and detects if it is a TeX stream.
|
||||
* Currently it is able to identify LaTeX and XeLaTeX (ConTeXt and plain TeX may be
|
||||
* added later).
|
||||
*/
|
||||
public class DeTeXtive {
|
||||
private Mouth mouth;
|
||||
private Token token;
|
||||
|
||||
private HashSet<String> packages;
|
||||
|
||||
/** Construct a new DeTeXtive
|
||||
*/
|
||||
public DeTeXtive() {
|
||||
}
|
||||
|
||||
/** Detect the format of a given stream
|
||||
*
|
||||
* @param is the input stream
|
||||
* @return a string representing the detected format; null if the format is unknown.
|
||||
* Currently the values "LaTeX", "XeLaTeX" are supported.
|
||||
* @throws IOException if we fail to read the stream
|
||||
*/
|
||||
public String deTeXt(InputStream is) throws IOException {
|
||||
// It makes no harm to assume that the stream uses ISO Latin1 - we only consider ASCII characters
|
||||
mouth = new Mouth(new InputStreamReader(is,"ISO8859_1"));
|
||||
token = mouth.getTokenObject();
|
||||
|
||||
packages = new HashSet<String>();
|
||||
|
||||
mouth.getToken();
|
||||
|
||||
if (parseHeader() && parsePreamble()) {
|
||||
if (packages.contains("xunicode")) {
|
||||
return "XeLaTeX";
|
||||
}
|
||||
else {
|
||||
return "LaTeX";
|
||||
}
|
||||
}
|
||||
|
||||
// Unknown format
|
||||
return null;
|
||||
|
||||
}
|
||||
|
||||
// The parser!
|
||||
|
||||
// Parse a LaTeX header such as \documentclass[a4paper]{article}
|
||||
// Return true in case of success
|
||||
private boolean parseHeader() throws IOException {
|
||||
skipBlanks();
|
||||
if (token.isCS("documentclass") || token.isCS("documentstyle")) {
|
||||
// The first non-blank token is \documentclass or \documentstyle => could be a LaTeX document
|
||||
System.out.println("** Found "+token.toString());
|
||||
mouth.getToken();
|
||||
skipSpaces();
|
||||
// Skip options, if any
|
||||
if (token.is('[',TokenType.OTHER)) {
|
||||
skipOptional();
|
||||
skipSpaces();
|
||||
}
|
||||
if (token.getType()==TokenType.BEGIN_GROUP) {
|
||||
// Get class name
|
||||
String sClassName = parseArgumentAsString();
|
||||
System.out.println("** Found the class name "+sClassName);
|
||||
// Accept any class name of one or more characters
|
||||
if (sClassName.length()>0) { return true; }
|
||||
}
|
||||
}
|
||||
System.out.println("** Doesn't look like LaTeX; failed to get class name");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Parse a LaTeX preamble
|
||||
// Return true in case of success (that is, \begin{document} was found)
|
||||
private boolean parsePreamble() throws IOException {
|
||||
while (token.getType()!=TokenType.ENDINPUT) {
|
||||
if (token.isCS("usepackage")) {
|
||||
// We collect the names of all used packages, but discard their options
|
||||
// (Recall that this is only relevant for LaTeX 2e)
|
||||
mouth.getToken();
|
||||
skipSpaces();
|
||||
if (token.is('[',TokenType.OTHER)) {
|
||||
skipOptional();
|
||||
skipSpaces();
|
||||
}
|
||||
String sName = parseArgumentAsString();
|
||||
System.out.println("** Found package "+sName);
|
||||
packages.add(sName);
|
||||
}
|
||||
else if (token.getType()==TokenType.BEGIN_GROUP) {
|
||||
// We ignore anything inside a group
|
||||
skipGroup();
|
||||
}
|
||||
else if (token.isCS("begin")) {
|
||||
// This would usually indicate the end of the preamble
|
||||
mouth.getToken();
|
||||
skipSpaces();
|
||||
if ("document".equals(parseArgumentAsString())) {
|
||||
System.out.println("Found \\begin{document}");
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Any other content in the preamble is simply ignored
|
||||
mouth.getToken();
|
||||
}
|
||||
}
|
||||
System.out.println("** Doesn't look like LaTeX; failed to find \\begin{document}");
|
||||
return false;
|
||||
}
|
||||
|
||||
private void skipBlanks() throws IOException {
|
||||
while (token.getType()==TokenType.SPACE || token.isCS("par")) {
|
||||
mouth.getToken();
|
||||
}
|
||||
}
|
||||
|
||||
private void skipSpaces() throws IOException {
|
||||
// Actually, we will never get two space tokens in a row
|
||||
while (token.getType()==TokenType.SPACE) {
|
||||
mouth.getToken();
|
||||
}
|
||||
}
|
||||
|
||||
private void skipOptional() throws IOException {
|
||||
assert token.is('[', TokenType.OTHER);
|
||||
|
||||
mouth.getToken(); // skip the [
|
||||
while (!token.is(']',TokenType.OTHER) && token.getType()!=TokenType.ENDINPUT) {
|
||||
if (token.getType()==TokenType.BEGIN_GROUP) {
|
||||
skipGroup();
|
||||
}
|
||||
else {
|
||||
mouth.getToken(); // skip this token
|
||||
}
|
||||
}
|
||||
mouth.getToken(); // skip the ]
|
||||
}
|
||||
|
||||
private void skipGroup() throws IOException {
|
||||
assert token.getType()==TokenType.BEGIN_GROUP;
|
||||
|
||||
mouth.getToken(); // skip the {
|
||||
while (token.getType()!=TokenType.END_GROUP && token.getType()!=TokenType.ENDINPUT) {
|
||||
if (token.getType()==TokenType.BEGIN_GROUP) {
|
||||
skipGroup();
|
||||
}
|
||||
else {
|
||||
mouth.getToken(); // skip this token
|
||||
}
|
||||
}
|
||||
mouth.getToken(); // skip the }
|
||||
}
|
||||
|
||||
private String parseArgumentAsString() throws IOException {
|
||||
if (token.getType()==TokenType.BEGIN_GROUP) {
|
||||
// Argument is contained in a group
|
||||
mouth.getToken(); // skip the {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
while (token.getType()!=TokenType.END_GROUP && token.getType()!=TokenType.ENDINPUT) {
|
||||
if (token.getType()!=TokenType.COMMAND_SEQUENCE) {
|
||||
// should not include cs, ignore if it happens
|
||||
sb.append(token.getChar());
|
||||
}
|
||||
mouth.getToken();
|
||||
}
|
||||
mouth.getToken(); // skip the }
|
||||
return sb.toString();
|
||||
}
|
||||
else {
|
||||
// Argument is a single token
|
||||
String s = "";
|
||||
if (token.getType()!=TokenType.COMMAND_SEQUENCE) {
|
||||
// should not include cs, ignore if it happens
|
||||
s = token.getString();
|
||||
}
|
||||
mouth.getToken();
|
||||
return s;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue