今天发现个好东西PHPWord
今年搞了一个小调查,传回来100来份WORD文档,不想一题题去复制统计,所以就想搞个小程序去自动收集
首先我想用C#的,后面发现它操作Word需要用到一个叫Microsoft Office 14.0 Object Library的东东,这个一般在安装完整版的word里面会有的。
偏偏我安装的是精简版,没这个东西,于是想象还是转到php吧。
然后一搜索,发现github有个开源的PHPOffice/PHPWord,顿时感觉真香,赶紧下一个来用
我的思路是PHP打开WORD读取里面文字,然后选出括号内的选项,处理输出
中间参考了一下网上的代码
<?php require_once 'PHPWord/src/PhpWord/PhpWord.php'; // 包含头文件 require_once __DIR__ . '/PHPWord/src/PhpWord/Autoloader.php'; define('PHPWORD_BASE_PATH', __DIR__."\PHPWord\src\PhpWord");//定义一下根目录 Autoloader::Register(); use PhpOffice\PhpWord\Autoloader; use PhpOffice\PhpWord\Settings; use PhpOffice\PhpWord\IOFactory; use PhpOffice\PhpWord\PhpWord; date_default_timezone_set('UTC'); error_reporting(E_ALL); define('CLI', (PHP_SAPI == 'cli') ? true : false); define('EOL', CLI ? PHP_EOL : '<br />'); define('SCRIPT_FILENAME', basename($_SERVER['SCRIPT_FILENAME'], '.php')); define('IS_INDEX', SCRIPT_FILENAME == 'index'); Settings::loadConfig(); //获取文件夹里文件名 function getDirContent($path){ if(!is_dir($path)){ return false; } //scandir方法 $arr = array(); $data = scandir($path); foreach ($data as $value){ if($value != '.' && $value != '..'){ $arr[] = $value; } } return $arr; } $rdir="wenjianjia1";//要获取的文件夹名 $dir1=getDirContent($rdir);//获取文件夹内文件名信息 echo "<table border=1>"; for ($i = 0; $i <count($dir1); $i++) //循环处理 { //print "处理:".$dir1[$i]."<br/>" ; $source =$rdir."\\".$dir1[$i]; //$source = "1710722013.docx"; $file= readWordToHtml($source); //然后开始处理 $file = preg_replace( "@<(.*?)>@is","",$file); $file = str_replace(" ","",$file); $file=str_replace('(','(',$file); $file=str_replace(')',')',$file); preg_match_all( '/[\(]([ABCDEFabcdef ]*)[\)]/m', $file, $m );//m 将字符串视为多行,不管是那行都能匹配 $resut=""; foreach($m[1] as $key=>$val){ $resut.="<td>".strtoupper($val)."</td>";//转大写 } echo "<tr>"."<td>".$dir1[$i]."</td>".$resut."</tr>"; } echo "</table>"; function readWordToHtml($source) { $phpWord = \PhpOffice\PhpWord\IOFactory::load($source); $html = ''; foreach ($phpWord->getSections() as $section) { foreach ($section->getElements() as $ele1) { $paragraphStyle = $ele1->getParagraphStyle(); if ($paragraphStyle) { $html .= '<p style="text-align:'. $paragraphStyle->getAlignment() .';text-indent:20px;">'; } else { $html .= '<p>'; } if ($ele1 instanceof \PhpOffice\PhpWord\Element\TextRun) { foreach ($ele1->getElements() as $ele2) { if ($ele2 instanceof \PhpOffice\PhpWord\Element\Text) { $style = $ele2->getFontStyle(); $fontFamily = mb_convert_encoding($style->getName(), 'GBK', 'UTF-8'); $fontSize = $style->getSize(); $isBold = $style->isBold(); $styleString = ''; $fontFamily && $styleString .= "font-family:{$fontFamily};"; $fontSize && $styleString .= "font-size:{$fontSize}px;"; $isBold && $styleString .= "font-weight:bold;"; $html .= sprintf('<span style="%s">%s</span>', $styleString, mb_convert_encoding($ele2->getText(), 'GBK', 'UTF-8') ); } elseif ($ele2 instanceof \PhpOffice\PhpWord\Element\Image) { $imageSrc = 'images/' . md5($ele2->getSource()) . '.' . $ele2->getImageExtension(); $imageData = $ele2->getImageStringData(true); // $imageData = 'data:' . $ele2->getImageType() . ';base64,' . $imageData; file_put_contents($imageSrc, base64_decode($imageData)); $html .= '<img src="'. $imageSrc .'" style="width:100%;height:auto">'; } } } $html .= '</p>'; } } return $html;//mb_convert_encoding($html, 'UTF-8', 'GBK');//如果编码不是中文就要转化 }
另外我还发现原程序使用了composer的安装方式,我从没使用过这种方式,只好上网COPY了一个Autoloader.php,根据自己的要求改了一下,放到
phpword程序目录里面
</pre> <?php /** * PHPWord * * Copyright (c) 2011 PHPWord * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * @category PHPWord * @package PHPWord * @copyright Copyright (c) 010 PHPWord * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt LGPL * @version Beta 0.6.3, 08.07.2011 */ class Autoloader { public static function Register() { return spl_autoload_register(array('Autoloader', 'Load')); } public static function Load($strObjectName) { //echo strpos($strObjectName, 'PhpWord'); if((class_exists($strObjectName)) || (strpos($strObjectName, 'PhpWord') === false)) {//注意PhpWord的大小写 return false; } $strObjectFilePath = PHPWORD_BASE_PATH.substr(str_replace('_', '/', $strObjectName),17) . '.php'; //echo $strObjectFilePath; if((file_exists($strObjectFilePath) === false) || (is_readable($strObjectFilePath) === false)) { return false; } require($strObjectFilePath); } } ?> <pre>
上一篇: 用过的声控光控开关