This fixes up PDF.cc to handle acroread in Acrobat 4, which has a bug with the -pairs option. It turns out that even without the -pairs option, acroread 4 is still prone to segmentation violations when generating PostScript, so acroread 3 is a better choice anyway. Apply this patch AFTER applying the htdig-3.1.2-bugfixes.patch. --- htdig-3.1.2/htdig/PDF.cc.orig Tue Mar 23 17:17:33 1999 +++ htdig-3.1.2/htdig/PDF.cc Fri Aug 13 16:05:16 1999 @@ -104,13 +104,22 @@ PDF::parse(Retriever &retriever, URL &ur acroread = "acroread"; // Check for existance of acroread program! (if not, return) - //struct stat stat_buf; - // Check that it exists, and is a regular file. - //if ((stat(acroread, &stat_buf) == -1) || !S_ISREG(stat_buf.st_mode)) - // { - // printf("PDF::parse: cannot find acroread\n"); - // return; - // } + struct stat stat_buf; + static int notfound = 0; + if (notfound) // we only need to complain once + return; + String arg0 = acroread; + char *endarg = strchr(arg0.get(), ' '); + if (endarg) + *endarg = '\0'; + // If first arg is a path, check that it exists, and is a regular file. + if (strchr(arg0.get(), '/') && + ((stat(arg0.get(), &stat_buf) == -1) || !S_ISREG(stat_buf.st_mode))) + { + printf("PDF::parse: cannot find pdf parser %s\n", arg0.get()); + notfound = 1; + return; + } // Write the pdf contents in a temp file to give it to acroread @@ -140,9 +149,19 @@ PDF::parse(Retriever &retriever, URL &ur // Use acroread as a filter to convert to PostScript. - // Now generalized to allow xpdf as a parser (works with most recent xpdf) + // Now generalized to allow xpdf as a parser, or other compatible parsers + // (It was claimed it works with most recent xpdf, but it doesn't!) // acroread << " -toPostScript " << pdfName << " " << tmpdir << " 2>&1"; - acroread << " " << pdfName << " " << psName << " 2>&1"; + String dest = psName; + if (strstr(acroread.get(), "acroread")) + { + // special-case tests only for acroread (what else you gonna use?) + if (!strstr(acroread.get(), "-toPostScript")) + acroread << " -toPostScript "; // add missing option + if (!strstr(acroread.get(), "-pairs")) // don't use -pairs with 4.0 + dest = tmpdir; + } + acroread << " " << pdfName << " " << dest << " 2>&1"; if (system(acroread)) { --- htdig-3.1.2/htcommon/defaults.cc.orig Thu Mar 25 11:49:40 1999 +++ htdig-3.1.2/htcommon/defaults.cc Fri Aug 13 16:05:16 1999 @@ -21,7 +21,7 @@ ConfigDefaults defaults[] = {"database_dir", DATABASE_DIR}, {"bin_dir", BIN_DIR}, {"image_url_prefix", IMAGE_URL_PREFIX}, - {"pdf_parser", PDF_PARSER " -toPostScript -pairs"}, + {"pdf_parser", PDF_PARSER " -toPostScript"}, {"version", VERSION}, // --- htdig-3.1.2/htdoc/attrs.html.orig Fri Aug 6 14:00:28 1999 +++ htdig-3.1.2/htdoc/attrs.html Tue Aug 17 10:55:45 1999 @@ -4271,7 +4271,7 @@ default:
The default value of this attribute is determined at compile time, to include the path to the acroread @@ -4301,7 +4320,7 @@ example: