HtmlAssetTranslator.java

/*
 * Copyright 2011 ZXing authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.google.zxing.client.j2se;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.bootstrap.DOMImplementationRegistry;
import org.w3c.dom.ls.DOMImplementationLS;
import org.w3c.dom.ls.LSSerializer;
import org.xml.sax.SAXException;

import javax.xml.XMLConstants;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import java.util.regex.Pattern;

/**
 * <p>A utility which auto-translates the English-language text in a directory of HTML documents using
 * Google Translate.</p>
 *
 * <p>Pass the Android client assets/ directory as first argument, and the language to translate to second
 * as a comma-separated list. Specify "all" for language to try to translate for all existing translations.
 * Each argument after this is the name of a file to translate; if the first one is "all", all files will
 * be translated.</p>
 *
 * <p>Usage: {@code HtmlAssetTranslator android/assets/ (all|lang1[,lang2 ...]) (all|file1.html[ file2.html ...])}</p>
 *
 * <p>{@code android/assets/ es all} will translate .html files in subdirectory html-en to
 * directory html-es, for example. Note that only text nodes in the HTML document are translated.
 * Any text that is a child of a node with {@code class="notranslate"} will not be translated. It will
 * also add a note at the end of the translated page that indicates it was automatically translated.</p>
 *
 * @author Sean Owen
 * @deprecated without replacement since 3.4.2
 */
@Deprecated
public final class HtmlAssetTranslator {

  private static final Pattern COMMA = Pattern.compile(",");

  private HtmlAssetTranslator() {}

  public static void main(String[] args) throws IOException {
    if (args.length < 3) {
      System.err.println("Usage: HtmlAssetTranslator android/assets/ " +
                         "(all|lang1[,lang2 ...]) (all|file1.html[ file2.html ...])");
      return;
    }
    Path assetsDir = Paths.get(args[0]);
    Collection<String> languagesToTranslate = parseLanguagesToTranslate(assetsDir, args[1]);
    List<String> restOfArgs = Arrays.asList(args).subList(2, args.length);
    Collection<String> fileNamesToTranslate = parseFileNamesToTranslate(assetsDir, restOfArgs);
    for (String language : languagesToTranslate) {
      translateOneLanguage(assetsDir, language, fileNamesToTranslate);
    }
  }

  private static Collection<String> parseLanguagesToTranslate(Path assetsDir,
                                                              String languageArg) throws IOException {
    if ("all".equals(languageArg)) {
      Collection<String> languages = new ArrayList<>();
      DirectoryStream.Filter<Path> fileFilter = entry -> {
        String fileName = entry.getFileName().toString();
        return Files.isDirectory(entry) && !Files.isSymbolicLink(entry) &&
            fileName.startsWith("html-") && !"html-en".equals(fileName);
      };
      try (DirectoryStream<Path> dirs = Files.newDirectoryStream(assetsDir, fileFilter)) {
        for (Path languageDir : dirs) {
          languages.add(languageDir.getFileName().toString().substring(5));
        }
      }
      return languages;
    } else {
      return Arrays.asList(COMMA.split(languageArg));
    }
  }

  private static Collection<String> parseFileNamesToTranslate(Path assetsDir,
                                                              List<String> restOfArgs) throws IOException {
    if ("all".equals(restOfArgs.get(0))) {
      Collection<String> fileNamesToTranslate = new ArrayList<>();
      Path htmlEnAssetDir = assetsDir.resolve("html-en");
      try (DirectoryStream<Path> files = Files.newDirectoryStream(htmlEnAssetDir, "*.html")) {
        for (Path file : files) {
          fileNamesToTranslate.add(file.getFileName().toString());
        }
      }
      return fileNamesToTranslate;
    } else {
      return restOfArgs;
    }
  }

  private static void translateOneLanguage(Path assetsDir,
                                           String language,
                                           final Collection<String> filesToTranslate) throws IOException {
    Path targetHtmlDir = assetsDir.resolve("html-" + language);
    Files.createDirectories(targetHtmlDir);
    Path englishHtmlDir = assetsDir.resolve("html-en");

    String translationTextTranslated =
        StringsResourceTranslator.translateString("Translated by Google Translate.", language);

    DirectoryStream.Filter<Path> filter = entry -> {
      String name = entry.getFileName().toString();
      return name.endsWith(".html") && (filesToTranslate.isEmpty() || filesToTranslate.contains(name));
    };
    try (DirectoryStream<Path> files = Files.newDirectoryStream(englishHtmlDir, filter)) {
      for (Path sourceFile : files) {
        translateOneFile(language, targetHtmlDir, sourceFile, translationTextTranslated);
      }
    }
  }

  private static void translateOneFile(String language,
                                       Path targetHtmlDir,
                                       Path sourceFile,
                                       String translationTextTranslated) throws IOException {

    Path destFile = targetHtmlDir.resolve(sourceFile.getFileName());

    Document document;
    try {
      DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
      factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
      DocumentBuilder builder = factory.newDocumentBuilder();
      document = builder.parse(sourceFile.toFile());
    } catch (ParserConfigurationException pce) {
      throw new IllegalStateException(pce);
    } catch (SAXException sae) {
      throw new IOException(sae);
    }

    Element rootElement = document.getDocumentElement();
    rootElement.normalize();

    Queue<Node> nodes = new LinkedList<>();
    nodes.add(rootElement);

    while (!nodes.isEmpty()) {
      Node node = nodes.poll();
      if (shouldTranslate(node)) {
        NodeList children = node.getChildNodes();
        for (int i = 0; i < children.getLength(); i++) {
          nodes.add(children.item(i));
        }
      }
      if (node.getNodeType() == Node.TEXT_NODE) {
        String text = node.getTextContent();
        if (!text.trim().isEmpty()) {
          text = StringsResourceTranslator.translateString(text, language);
          node.setTextContent(' ' + text + ' ');
        }
      }
    }

    Node translateText = document.createTextNode(translationTextTranslated);
    Node paragraph = document.createElement("p");
    paragraph.appendChild(translateText);
    Node body = rootElement.getElementsByTagName("body").item(0);
    body.appendChild(paragraph);

    DOMImplementationRegistry registry;
    try {
      registry = DOMImplementationRegistry.newInstance();
    } catch (ClassNotFoundException | InstantiationException | IllegalAccessException e) {
      throw new IllegalStateException(e);
    }

    DOMImplementationLS impl = (DOMImplementationLS) registry.getDOMImplementation("LS");
    LSSerializer writer = impl.createLSSerializer();
    String fileAsString = writer.writeToString(document);
    // Replace default XML header with HTML DOCTYPE
    fileAsString = fileAsString.replaceAll("<\\?xml[^>]+>", "<!DOCTYPE HTML>");
    Files.write(destFile, Collections.singleton(fileAsString), StandardCharsets.UTF_8);
  }

  private static boolean shouldTranslate(Node node) {
    // Ignore "notranslate" nodes
    NamedNodeMap attributes = node.getAttributes();
    if (attributes != null) {
      Node classAttribute = attributes.getNamedItem("class");
      if (classAttribute != null) {
        String textContent = classAttribute.getTextContent();
        if (textContent != null && textContent.contains("notranslate")) {
          return false;
        }
      }
    }
    String nodeName = node.getNodeName();
    if ("script".equalsIgnoreCase(nodeName)) {
      return false;
    }
    // Ignore non-text snippets
    String textContent = node.getTextContent();
    if (textContent != null) {
      for (int i = 0; i < textContent.length(); i++) {
        if (Character.isLetter(textContent.charAt(i))) {
          return true;
        }
      }
    }
    return false;
  }

}