w2phtml/source/java/org/openoffice/da/comp/writer2latex/DeTeXtive.java
henrikjust 44f4c68801 Change license to GPLv3
git-svn-id: svn://svn.code.sf.net/p/writer2latex/code/trunk@272 f0f2a975-2e09-46c8-9428-3b39399b9f3c
2018-03-06 20:06:05 +00:00

220 lines
6.3 KiB
Java

/************************************************************************
*
* DeTeXtive.java
*
* Copyright: 2002-2014 by Henrik Just
*
* This file is part of Writer2LaTeX.
*
* Writer2LaTeX is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Writer2LaTeX is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Writer2LaTeX. If not, see <http://www.gnu.org/licenses/>.
*
* Version 1.4 (2014-09-24)
*
*/
package org.openoffice.da.comp.writer2latex;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.IOException;
import java.util.HashSet;
import org.openoffice.da.comp.w2lcommon.tex.tokenizer.Mouth;
import org.openoffice.da.comp.w2lcommon.tex.tokenizer.Token;
import org.openoffice.da.comp.w2lcommon.tex.tokenizer.TokenType;
/** This class analyzes a stream and detects if it is a TeX stream.
* Currently it is able to identify LaTeX and XeLaTeX (ConTeXt and plain TeX may be
* added later).
*/
public class DeTeXtive {
private Mouth mouth;
private Token token;
private HashSet<String> packages;
/** Construct a new DeTeXtive
*/
public DeTeXtive() {
}
/** Detect the format of a given stream
*
* @param is the input stream
* @return a string representing the detected format; null if the format is unknown.
* Currently the values "LaTeX", "XeLaTeX" are supported.
* @throws IOException if we fail to read the stream
*/
public String deTeXt(InputStream is) throws IOException {
// It makes no harm to assume that the stream uses ISO Latin1 - we only consider ASCII characters
mouth = new Mouth(new InputStreamReader(is,"ISO8859_1"));
token = mouth.getTokenObject();
packages = new HashSet<String>();
mouth.getToken();
if (parseHeader() && parsePreamble()) {
if (packages.contains("xunicode")) {
return "XeLaTeX";
}
else {
return "LaTeX";
}
}
// Unknown format
return null;
}
// The parser!
// Parse a LaTeX header such as \documentclass[a4paper]{article}
// Return true in case of success
private boolean parseHeader() throws IOException {
skipBlanks();
if (token.isCS("documentclass") || token.isCS("documentstyle")) {
// The first non-blank token is \documentclass or \documentstyle => could be a LaTeX document
//System.out.println("** Found "+token.toString());
mouth.getToken();
skipSpaces();
// Skip options, if any
if (token.is('[',TokenType.OTHER)) {
skipOptional();
skipSpaces();
}
if (token.getType()==TokenType.BEGIN_GROUP) {
// Get class name
String sClassName = parseArgumentAsString();
//System.out.println("** Found the class name "+sClassName);
// Accept any class name of one or more characters
if (sClassName.length()>0) { return true; }
}
}
//System.out.println("** Doesn't look like LaTeX; failed to get class name");
return false;
}
// Parse a LaTeX preamble
// Return true in case of success (that is, \begin{document} was found)
private boolean parsePreamble() throws IOException {
while (token.getType()!=TokenType.ENDINPUT) {
if (token.isCS("usepackage")) {
// We collect the names of all used packages, but discard their options
// (Recall that this is only relevant for LaTeX 2e)
mouth.getToken();
skipSpaces();
if (token.is('[',TokenType.OTHER)) {
skipOptional();
skipSpaces();
}
String sName = parseArgumentAsString();
//System.out.println("** Found package "+sName);
packages.add(sName);
}
else if (token.getType()==TokenType.BEGIN_GROUP) {
// We ignore anything inside a group
skipGroup();
}
else if (token.isCS("begin")) {
// This would usually indicate the end of the preamble
mouth.getToken();
skipSpaces();
if ("document".equals(parseArgumentAsString())) {
//System.out.println("Found \\begin{document}");
return true;
}
}
else {
// Any other content in the preamble is simply ignored
mouth.getToken();
}
}
//System.out.println("** Doesn't look like LaTeX; failed to find \\begin{document}");
return false;
}
private void skipBlanks() throws IOException {
while (token.getType()==TokenType.SPACE || token.isCS("par")) {
mouth.getToken();
}
}
private void skipSpaces() throws IOException {
// Actually, we will never get two space tokens in a row
while (token.getType()==TokenType.SPACE) {
mouth.getToken();
}
}
private void skipOptional() throws IOException {
assert token.is('[', TokenType.OTHER);
mouth.getToken(); // skip the [
while (!token.is(']',TokenType.OTHER) && token.getType()!=TokenType.ENDINPUT) {
if (token.getType()==TokenType.BEGIN_GROUP) {
skipGroup();
}
else {
mouth.getToken(); // skip this token
}
}
mouth.getToken(); // skip the ]
}
private void skipGroup() throws IOException {
assert token.getType()==TokenType.BEGIN_GROUP;
mouth.getToken(); // skip the {
while (token.getType()!=TokenType.END_GROUP && token.getType()!=TokenType.ENDINPUT) {
if (token.getType()==TokenType.BEGIN_GROUP) {
skipGroup();
}
else {
mouth.getToken(); // skip this token
}
}
mouth.getToken(); // skip the }
}
private String parseArgumentAsString() throws IOException {
if (token.getType()==TokenType.BEGIN_GROUP) {
// Argument is contained in a group
mouth.getToken(); // skip the {
StringBuilder sb = new StringBuilder();
while (token.getType()!=TokenType.END_GROUP && token.getType()!=TokenType.ENDINPUT) {
if (token.getType()!=TokenType.COMMAND_SEQUENCE) {
// should not include cs, ignore if it happens
sb.append(token.getChar());
}
mouth.getToken();
}
mouth.getToken(); // skip the }
return sb.toString();
}
else {
// Argument is a single token
String s = "";
if (token.getType()!=TokenType.COMMAND_SEQUENCE) {
// should not include cs, ignore if it happens
s = token.getString();
}
mouth.getToken();
return s;
}
}
}