--- /dev/null
+# Copyright 1999-2015 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+# $Id$
+
+EAPI=5
+
+JAVA_PKG_IUSE="doc source"
+
+inherit java-pkg-2 java-pkg-simple
+
+DESCRIPTION="HTML parser written in Java that can be used as a tool, library or Ant task"
+HOMEPAGE="http://htmlcleaner.sourceforge.net/"
+SRC_URI="mirror://sourceforge/htmlcleaner/files/${P}-src.zip"
+
+LICENSE="BSD"
+SLOT="0"
+KEYWORDS="~amd64 ~x86"
+IUSE="test"
+
+CDEPEND="dev-java/jdom:2"
+
+DEPEND="${CDEPEND}
+ >=virtual/jdk-1.5
+ app-arch/unzip
+ dev-java/ant-core:0
+ test? ( dev-java/junit:4 )"
+
+RDEPEND="${CDEPEND}
+ >=virtual/jre-1.5"
+
+JAVA_SRC_DIR="src/main/java"
+JAVA_GENTOO_CLASSPATH="jdom-2"
+
+src_prepare() {
+ # Don't require default.xml to be in the current directory.
+ sed -i "s:\"default\.xml\":\"${JAVA_PKG_SHAREPATH}/default.xml\":g" \
+ src/main/java/org/htmlcleaner/ConfigFileTagProvider.java || die
+}
+
+src_configure() {
+ JAVA_CLASSPATH_EXTRA=$(java-pkg_getjars --build-only ant-core)
+}
+
+src_install() {
+ java-pkg-simple_src_install
+ java-pkg_register-ant-task
+ java-pkg_dolauncher ${PN} --main org.${PN}.CommandLine
+
+ insinto "${JAVA_PKG_SHAREPATH}"
+ newins example.xml default.xml
+}
+
+src_test() {
+ local DIR="src/test/java"
+ local CP="${DIR}:${PN}.jar:$(java-pkg_getjars junit-4,${JAVA_GENTOO_CLASSPATH})"
+
+ local TESTS=$(find "${DIR}" -name "*Test.java" ! -name "Abstract*")
+ TESTS="${TESTS//src\/test\/java\/}"
+ TESTS="${TESTS//.java}"
+ TESTS="${TESTS//\//.}"
+
+ ejavac -classpath "${CP}" -d "${DIR}" $(find "${DIR}" -name "*.java")
+ ejunit4 -classpath "${CP}" ${TESTS}
+}
--- /dev/null
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE pkgmetadata SYSTEM "http://www.gentoo.org/dtd/metadata.dtd">
+<pkgmetadata>
+ <herd>java</herd>
+ <longdescription>HtmlCleaner is an open source HTML parser written in Java. HTML found on the Web is usually dirty, ill-formed and unsuitable for further processing. For any serious consumption of such documents, it is necessary to first clean up the mess and bring some order to the tags, attributes and ordinary text. For any given HTML document, HtmlCleaner reorders individual elements and produces well-formed XML. By default, it follows similar rules that the most of web browsers use in order to create the Document Object Model. However, you can provide custom tag and rule sets for tag filtering and balancing.</longdescription>
+</pkgmetadata>