Browse Source

github-7 - Form check box extraction with XWPFWordExtractor

pull/79/head
antony-liu 10 years ago
parent
commit
62b49d0517
  1. 16
      ooxml/OpenXmlFormats/Wordprocessing/BaseTypes.cs
  2. 485
      ooxml/OpenXmlFormats/Wordprocessing/FormField.cs
  3. 23
      ooxml/XWPF/Usermodel/XWPFRun.cs
  4. 16
      testcases/ooxml/XSSF/UserModel/TestXSSFDataValidation.cs
  5. 16
      testcases/ooxml/XWPF/Extractor/TestXWPFWordExtractor.cs

16
ooxml/OpenXmlFormats/Wordprocessing/BaseTypes.cs

@ -530,6 +530,22 @@ namespace NPOI.OpenXmlFormats.Wordprocessing
this.valField = value;
}
}
public static CT_MacroName Parse(XmlNode node, XmlNamespaceManager namespaceManager)
{
if (node == null)
return null;
CT_MacroName ctObj = new CT_MacroName();
ctObj.val = XmlHelper.ReadString(node.Attributes["w:val"]);
return ctObj;
}
internal void Write(StreamWriter sw, string nodeName)
{
sw.Write(string.Format("<w:{0}", nodeName));
XmlHelper.WriteAttribute(sw, "w:val", this.val, true);
sw.Write("/>");
}
}
/// <summary>

485
ooxml/OpenXmlFormats/Wordprocessing/FormField.cs

@ -28,6 +28,22 @@ namespace NPOI.OpenXmlFormats.Wordprocessing
this.valField = value;
}
}
public static CT_FFTextType Parse(XmlNode node, XmlNamespaceManager namespaceManager)
{
if (node == null)
return null;
CT_FFTextType ctObj = new CT_FFTextType();
ctObj.valField = (ST_FFTextType)Enum.Parse(typeof(ST_FFTextType), XmlHelper.ReadString(node.Attributes["w:val"]));
return ctObj;
}
internal void Write(StreamWriter sw, string nodeName)
{
sw.Write(string.Format("<w:{0}", nodeName));
XmlHelper.WriteAttribute(sw, "w:val", this.valField.ToString());
sw.Write("/>");
}
}
@ -77,6 +93,24 @@ namespace NPOI.OpenXmlFormats.Wordprocessing
this.valField = value;
}
}
public static CT_FFName Parse(XmlNode node, XmlNamespaceManager namespaceManager)
{
if (node == null)
return null;
CT_FFName ctObj = new CT_FFName();
ctObj.val = XmlHelper.ReadString(node.Attributes["w:val"]);
return ctObj;
}
internal void Write(StreamWriter sw, string nodeName)
{
sw.Write(string.Format("<w:{0}", nodeName));
XmlHelper.WriteAttribute(sw, "w:val", this.val, true);
sw.Write("/>");
}
}
@ -99,6 +133,24 @@ namespace NPOI.OpenXmlFormats.Wordprocessing
private bool dirtyFieldSpecified;
private CT_FFData ffDataField;
public CT_FFData ffData
{
get { return this.ffDataField; }
set { this.ffDataField = value; }
}
private CT_Text fldDataField;
public CT_Text fldData
{
get { return this.fldDataField; }
set { this.fldDataField = value; }
}
private CT_TrackChangeNumbering numberingChangeField;
public CT_TrackChangeNumbering numberingChange
{
get { return this.numberingChangeField; }
set { this.numberingChangeField = value; }
}
[XmlElement("ffData", typeof(CT_FFData), Order = 0)]
[XmlElement("fldData", typeof(CT_Text), Order = 0)]
[XmlElement("numberingChange", typeof(CT_TrackChangeNumbering), Order = 0)]
@ -124,7 +176,21 @@ namespace NPOI.OpenXmlFormats.Wordprocessing
ctObj.fldLock = (ST_OnOff)Enum.Parse(typeof(ST_OnOff), node.Attributes["w:fldLock"].Value);
if (node.Attributes["w:dirty"] != null)
ctObj.dirty = (ST_OnOff)Enum.Parse(typeof(ST_OnOff), node.Attributes["w:dirty"].Value);
foreach (XmlNode childNode in node.ChildNodes)
{
if (childNode.LocalName == "ffData")
{
ctObj.ffDataField = CT_FFData.Parse(childNode, namespaceManager);
}
else if (childNode.LocalName == "fldData")
{
ctObj.fldDataField = CT_Text.Parse(childNode, namespaceManager);
}
else if (childNode.LocalName == "numberingChange")
{
ctObj.numberingChangeField = CT_TrackChangeNumbering.Parse(childNode, namespaceManager);
}
}
return ctObj;
}
@ -137,6 +203,17 @@ namespace NPOI.OpenXmlFormats.Wordprocessing
XmlHelper.WriteAttribute(sw, "w:fldLock", this.fldLock.ToString());
XmlHelper.WriteAttribute(sw, "w:dirty", this.dirty.ToString());
sw.Write(">");
if (this.ffDataField != null)
{
this.ffDataField.Write(sw, "ffData");
}
if (this.fldDataField != null)
this.fldDataField.Write(sw, "fldData");
if (this.numberingChangeField != null)
{
this.numberingChangeField.Write(sw, "numberingChange");
}
sw.Write(string.Format("</w:{0}>", nodeName));
}
@ -214,14 +291,14 @@ namespace NPOI.OpenXmlFormats.Wordprocessing
public class CT_FFData
{
private object[] itemsField;
private List<object> itemsField;
private ItemsChoiceType14[] itemsElementNameField;
private List<FFDataItemsType> itemsElementNameField;
public CT_FFData()
{
this.itemsElementNameField = new ItemsChoiceType14[0];
this.itemsField = new object[0];
this.itemsElementNameField = new List<FFDataItemsType>();
this.itemsField = new List<object>();
}
[XmlElement("calcOnExit", typeof(CT_OnOff), Order = 0)]
@ -239,26 +316,233 @@ namespace NPOI.OpenXmlFormats.Wordprocessing
{
get
{
return this.itemsField;
return this.itemsField.ToArray();
}
set
{
this.itemsField = value;
this.itemsField.Clear();
this.itemsField.AddRange(value);
}
}
[XmlElement("ItemsElementName", Order = 1)]
[XmlIgnore]
public ItemsChoiceType14[] ItemsElementName
public FFDataItemsType[] ItemsElementName
{
get
{
return this.itemsElementNameField;
return this.itemsElementNameField.ToArray();
}
set
{
this.itemsElementNameField = value;
this.itemsElementNameField.Clear();
this.itemsElementNameField.AddRange(value);
}
}
internal static CT_FFData Parse(XmlNode node, XmlNamespaceManager namespaceManager)
{
if (node == null)
return null;
CT_FFData ctObj = new CT_FFData();
foreach (XmlNode childNode in node.ChildNodes)
{
if (childNode.LocalName == "name")
{
ctObj.AddNewObject(CT_FFName.Parse(childNode, namespaceManager) ,FFDataItemsType.name);
}
//else if (childNode.LocalName == "tabIndex")
//{
//}
else if (childNode.LocalName == "enabled")
{
ctObj.AddNewObject(CT_OnOff.Parse(childNode, namespaceManager), FFDataItemsType.name);
}
else if (childNode.LocalName == "calcOnExit")
{
ctObj.AddNewObject(CT_OnOff.Parse(childNode, namespaceManager), FFDataItemsType.calcOnExit);
}
else if (childNode.LocalName == "checkBox")
{
ctObj.AddNewObject(CT_FFCheckBox.Parse(childNode, namespaceManager), FFDataItemsType.checkBox);
}
else if (childNode.LocalName == "ddList")
{
ctObj.AddNewObject(CT_FFDDList.Parse(childNode, namespaceManager), FFDataItemsType.ddList);
}
else if (childNode.LocalName == "entryMacro")
{
ctObj.AddNewObject(CT_MacroName.Parse(childNode, namespaceManager), FFDataItemsType.entryMacro);
}
else if (childNode.LocalName == "exitMacro")
{
ctObj.AddNewObject(CT_MacroName.Parse(childNode, namespaceManager), FFDataItemsType.exitMacro);
}
else if (childNode.LocalName == "helpText")
{
ctObj.AddNewObject(CT_FFHelpText.Parse(childNode, namespaceManager), FFDataItemsType.helpText);
}
else if (childNode.LocalName == "statusText")
{
ctObj.AddNewObject(CT_FFStatusText.Parse(childNode, namespaceManager), FFDataItemsType.statusText);
}
else if (childNode.LocalName == "textInput")
{
ctObj.AddNewObject(CT_FFTextInput.Parse(childNode, namespaceManager), FFDataItemsType.textInput);
}
}
return ctObj;
}
internal void Write(StreamWriter sw, string nodeName)
{
sw.Write(string.Format("<w:{0}", nodeName));
for (int i=0;i<this.itemsElementNameField.Count;i++)
{
if (this.itemsElementNameField[i] == FFDataItemsType.name)
(this.itemsField[i] as CT_FFName).Write(sw, "name");
else if (this.itemsElementNameField[i] == FFDataItemsType.enabled)
(this.itemsField[i] as CT_OnOff).Write(sw, "enabled");
else if (this.itemsElementNameField[i] == FFDataItemsType.calcOnExit)
(this.itemsField[i] as CT_OnOff).Write(sw, "calcOnExit");
else if (this.itemsElementNameField[i] == FFDataItemsType.ddList)
(this.itemsField[i] as CT_FFDDList).Write(sw, "ddList");
else if (this.itemsElementNameField[i] == FFDataItemsType.checkBox)
(this.itemsField[i] as CT_FFCheckBox).Write(sw, "checkBox");
else if (this.itemsElementNameField[i] == FFDataItemsType.entryMacro)
(this.itemsField[i] as CT_MacroName).Write(sw, "entryMacro");
else if (this.itemsElementNameField[i] == FFDataItemsType.exitMacro)
(this.itemsField[i] as CT_MacroName).Write(sw, "exitMacro");
else if (this.itemsElementNameField[i] == FFDataItemsType.helpText)
(this.itemsField[i] as CT_FFHelpText).Write(sw, "helpText");
else if (this.itemsElementNameField[i] == FFDataItemsType.statusText)
(this.itemsField[i] as CT_FFStatusText).Write(sw, "statusText");
else if (this.itemsElementNameField[i] == FFDataItemsType.textInput)
(this.itemsField[i] as CT_FFTextInput).Write(sw, "textInput");
}
sw.Write(string.Format("</{0}>", nodeName));
}
private void AddNewObject(object obj, FFDataItemsType type)
{
lock(this)
{
this.itemsElementNameField.Add(type);
this.itemsField.Add(obj);
}
}
#region Generic methods for object operation
private List<T> GetObjectList<T>(FFDataItemsType type) where T : class
{
lock (this)
{
List<T> list = new List<T>();
for (int i = 0; i < itemsElementNameField.Count; i++)
{
if (itemsElementNameField[i] == type)
list.Add(itemsField[i] as T);
}
return list;
}
}
private int SizeOfObjectArray(FFDataItemsType type)
{
lock (this)
{
int size = 0;
for (int i = 0; i < itemsElementNameField.Count; i++)
{
if (itemsElementNameField[i] == type)
size++;
}
return size;
}
}
private T GetObjectArray<T>(int p, FFDataItemsType type) where T : class
{
lock (this)
{
int pos = GetObjectIndex(type, p);
if (pos < 0 || pos >= this.itemsField.Count)
return null;
return itemsField[pos] as T;
}
}
private T InsertNewObject<T>(FFDataItemsType type, int p) where T : class, new()
{
T t = new T();
lock (this)
{
int pos = GetObjectIndex(type, p);
this.itemsElementNameField.Insert(pos, type);
this.itemsField.Insert(pos, t);
}
return t;
}
private T AddNewObject<T>(FFDataItemsType type) where T : class, new()
{
T t = new T();
lock (this)
{
this.itemsElementNameField.Add(type);
this.itemsField.Add(t);
}
return t;
}
private void SetObjectArray<T>(FFDataItemsType type, int p, T obj) where T : class
{
lock (this)
{
int pos = GetObjectIndex(type, p);
if (pos < 0 || pos >= this.itemsField.Count)
return;
if (this.itemsField[pos] is T)
this.itemsField[pos] = obj;
else
throw new Exception(string.Format(@"object types are difference, itemsField[{0}] is {1}, and parameter obj is {2}",
pos, this.itemsField[pos].GetType().Name, typeof(T).Name));
}
}
private int GetObjectIndex(FFDataItemsType type, int p)
{
int index = -1;
int pos = 0;
for (int i = 0; i < itemsElementNameField.Count; i++)
{
if (itemsElementNameField[i] == type)
{
if (pos == p)
{
//return itemsField[p] as T;
index = i;
break;
}
else
pos++;
}
}
return index;
}
private void RemoveObject(FFDataItemsType type, int p)
{
lock (this)
{
int pos = GetObjectIndex(type, p);
if (pos < 0 || pos >= this.itemsField.Count)
return;
itemsElementNameField.RemoveAt(pos);
itemsField.RemoveAt(pos);
}
}
#endregion
public List<CT_FFCheckBox> GetCheckBoxList()
{
return GetObjectList<CT_FFCheckBox>(FFDataItemsType.checkBox);
}
}
@ -321,6 +605,52 @@ namespace NPOI.OpenXmlFormats.Wordprocessing
this.checkedField = value;
}
}
public static CT_FFCheckBox Parse(XmlNode node, XmlNamespaceManager namespaceManager)
{
if (node == null)
return null;
CT_FFCheckBox ctObj = new CT_FFCheckBox();
foreach (XmlNode childNode in node.ChildNodes)
{
if (childNode.LocalName == "checked")
{
ctObj.checkedField = CT_OnOff.Parse(childNode, namespaceManager);
}
else if (childNode.LocalName == "default")
{
ctObj.defaultField = CT_OnOff.Parse(childNode, namespaceManager);
}
else if (childNode.LocalName == "size")
{
ctObj.itemField = CT_HpsMeasure.Parse(childNode, namespaceManager);
}
else if (childNode.LocalName == "sizeAuto")
{
ctObj.itemField = CT_OnOff.Parse(childNode, namespaceManager);
}
}
return ctObj;
}
internal void Write(StreamWriter sw, string nodeName)
{
sw.Write(string.Format("<w:{0}>", nodeName));
if (this.defaultField != null)
this.defaultField.Write(sw, "w:default");
if (this.checkedField != null)
this.checkedField.Write(sw, "w:checked");
if (this.itemField != null)
{
if (this.itemField is CT_OnOff)
(this.itemField as CT_OnOff).Write(sw, "w:sizeAuto");
else
(this.itemField as CT_HpsMeasure).Write(sw, "w:size");
}
sw.Write(string.Format("</w:{0}>", nodeName));
}
}
@ -382,6 +712,45 @@ namespace NPOI.OpenXmlFormats.Wordprocessing
this.listEntryField = value;
}
}
public static CT_FFDDList Parse(XmlNode node, XmlNamespaceManager namespaceManager)
{
if (node == null)
return null;
CT_FFDDList ctObj = new CT_FFDDList();
foreach (XmlNode childNode in node.ChildNodes)
{
if (childNode.LocalName == "result")
{
ctObj.resultField = CT_DecimalNumber.Parse(childNode, namespaceManager);
}
else if (childNode.LocalName == "default")
{
ctObj.defaultField = CT_DecimalNumber.Parse(childNode, namespaceManager);
}
else if (childNode.LocalName == "listEntry")
{
ctObj.listEntryField.Add(CT_String.Parse(childNode, namespaceManager));
}
}
return ctObj;
}
internal void Write(StreamWriter sw, string nodeName)
{
sw.Write(string.Format("<w:{0}>", nodeName));
if (this.defaultField != null)
this.defaultField.Write(sw, "w:default");
if (this.resultField != null)
this.resultField.Write(sw, "w:result");
foreach (CT_String str in listEntry)
{
str.Write(sw, "w:listEntry");
}
sw.Write(string.Format("</w:{0}>", nodeName));
}
}
@ -436,6 +805,31 @@ namespace NPOI.OpenXmlFormats.Wordprocessing
this.valField = value;
}
}
public static CT_FFHelpText Parse(XmlNode node, XmlNamespaceManager namespaceManager)
{
if (node == null)
return null;
CT_FFHelpText ctObj = new CT_FFHelpText();
if (node.Attributes["w:type"] != null)
{
ctObj.typeFieldSpecified = true;
ctObj.typeField = (ST_InfoTextType)Enum.Parse(typeof(ST_InfoTextType), node.Attributes["w:type"].Value);
}
ctObj.valField = XmlHelper.ReadString(node.Attributes["w:val"]);
return ctObj;
}
internal void Write(StreamWriter sw, string nodeName)
{
sw.Write(string.Format("<w:{0}", nodeName));
XmlHelper.WriteAttribute(sw, "w:val", this.valField);
if (this.typeFieldSpecified)
{
XmlHelper.WriteAttribute(sw, "w:type", this.typeField.ToString());
}
sw.Write("/>");
}
}
@ -503,6 +897,31 @@ namespace NPOI.OpenXmlFormats.Wordprocessing
this.valField = value;
}
}
public static CT_FFStatusText Parse(XmlNode node, XmlNamespaceManager namespaceManager)
{
if (node == null)
return null;
CT_FFStatusText ctObj = new CT_FFStatusText();
if (node.Attributes["w:type"] != null)
{
ctObj.typeFieldSpecified = true;
ctObj.typeField = (ST_InfoTextType)Enum.Parse(typeof(ST_InfoTextType), node.Attributes["w:type"].Value);
}
ctObj.valField = XmlHelper.ReadString(node.Attributes["w:val"]);
return ctObj;
}
internal void Write(StreamWriter sw, string nodeName)
{
sw.Write(string.Format("<w:{0}", nodeName));
XmlHelper.WriteAttribute(sw, "w:val", this.valField);
if (this.typeFieldSpecified)
{
XmlHelper.WriteAttribute(sw, "w:type", this.typeField.ToString());
}
sw.Write("/>");
}
}
@ -580,12 +999,56 @@ namespace NPOI.OpenXmlFormats.Wordprocessing
this.formatField = value;
}
}
public static CT_FFTextInput Parse(XmlNode node, XmlNamespaceManager namespaceManager)
{
if (node == null)
return null;
CT_FFTextInput ctObj = new CT_FFTextInput();
foreach (XmlNode childNode in node.ChildNodes)
{
if (childNode.LocalName == "type")
{
ctObj.typeField = CT_FFTextType.Parse(childNode, namespaceManager);
}
else if (childNode.LocalName == "default")
{
ctObj.defaultField = CT_String.Parse(childNode, namespaceManager);
}
else if (childNode.LocalName == "format")
{
ctObj.formatField = CT_String.Parse(childNode, namespaceManager);
}
else if (childNode.LocalName == "maxLength")
{
ctObj.maxLengthField = CT_DecimalNumber.Parse(childNode, namespaceManager);
}
}
return ctObj;
}
internal void Write(StreamWriter sw, string nodeName)
{
sw.Write(string.Format("<w:{0}", nodeName));
if (this.typeField == null)
this.typeField.Write(sw, "w:type");
if (this.defaultField != null)
this.defaultField.Write(sw, "w:default");
if (this.formatField != null)
this.formatField.Write(sw, "w:format");
if (this.maxLengthField != null)
this.maxLengthField.Write(sw, "w:maxLength");
sw.Write(string.Format("</{0}>", nodeName));
}
}
[Serializable]
[XmlType(Namespace = "http://schemas.openxmlformats.org/wordprocessingml/2006/main", IncludeInSchema = false)]
public enum ItemsChoiceType14
public enum FFDataItemsType
{

23
ooxml/XWPF/Usermodel/XWPFRun.cs

@ -1000,7 +1000,28 @@ namespace NPOI.XWPF.UserModel
text.Append(((CT_Text)o).Value);
}
}
// Complex type evaluation (currently only for extraction of check boxes)
if (o is CT_FldChar)
{
CT_FldChar ctfldChar = ((CT_FldChar)o);
if (ctfldChar.fldCharType == ST_FldCharType.begin)
{
if (ctfldChar.ffData != null)
{
foreach (CT_FFCheckBox checkBox in ctfldChar.ffData.GetCheckBoxList())
{
if (checkBox.@default.val == true)
{
text.Append("|X|");
}
else
{
text.Append("|_|");
}
}
}
}
}
if (o is CT_PTab)
{
text.Append("\t");

16
testcases/ooxml/XSSF/UserModel/TestXSSFDataValidation.cs

@ -37,7 +37,7 @@ namespace NPOI.XSSF.UserModel
{
XSSFWorkbook workbook = XSSFTestDataSamples.OpenSampleWorkbook("DataValidations-49244.xlsx");
ISheet sheet = workbook.GetSheetAt(0);
List<XSSFDataValidation> dataValidations = ((XSSFSheet)sheet).GetDataValidations();
List<IDataValidation> dataValidations = ((XSSFSheet)sheet).GetDataValidations();
/**
* For each validation type, there are two cells with the same validation. This Tests
@ -60,13 +60,13 @@ namespace NPOI.XSSF.UserModel
int[] validationTypes = new int[] { ValidationType.INTEGER, ValidationType.DECIMAL, ValidationType.TEXT_LENGTH };
int[] SingleOperandOperatorTypes = new int[]{
OperatorType.LESS_THAN,OperatorType.LESS_OR_EQUAL,
OperatorType.GREATER_THAN,OperatorType.GREATER_OR_EQUAL,
OperatorType.EQUAL,OperatorType.NOT_EQUAL
};
OperatorType.LESS_THAN,OperatorType.LESS_OR_EQUAL,
OperatorType.GREATER_THAN,OperatorType.GREATER_OR_EQUAL,
OperatorType.EQUAL,OperatorType.NOT_EQUAL
};
int[] doubleOperandOperatorTypes = new int[]{
OperatorType.BETWEEN,OperatorType.NOT_BETWEEN
};
OperatorType.BETWEEN,OperatorType.NOT_BETWEEN
};
decimal value = (decimal)10, value2 = (decimal)20;
double dvalue = (double)10.001, dvalue2 = (double)19.999;
@ -254,7 +254,7 @@ namespace NPOI.XSSF.UserModel
XSSFWorkbook wb = new XSSFWorkbook();
XSSFSheet sheet = wb.CreateSheet() as XSSFSheet;
List<XSSFDataValidation> lst = sheet.GetDataValidations(); //<-- works
List<IDataValidation> lst = sheet.GetDataValidations(); //<-- works
Assert.AreEqual(0, lst.Count);
//create the cell that will have the validation applied

16
testcases/ooxml/XWPF/Extractor/TestXWPFWordExtractor.cs

@ -415,5 +415,21 @@ namespace NPOI.XWPF.Extractor
string text = extractor.Text;
extractor.Close();
}
[Test]
public void TestCheckboxes()
{
XWPFDocument doc = XWPFTestDataSamples.OpenSampleDocument("checkboxes.docx");
Console.WriteLine(doc);
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
Assert.AreEqual("This is a small test for checkboxes \nunchecked: |_| \n" +
"Or checked: |X|\n\n\n\n\n" +
"Test a checkbox within a textbox: |_| -> |X|\n\n\n" +
"In Table:\n|_|\t|X|\n\n\n" +
"In Sequence:\n|X||_||X|\n", extractor.Text);
extractor.Close();
}
}
}
Loading…
Cancel
Save