File Format Store
The File Format Store web service provides methods for creating, retrieving and updating file format definitions that
identify the structure of files uploaded via the Upload Manager web service. The web
service definition is available at
https://webservices.data-8.co.uk/batch/v1.0/fileformatstore.asmx.
In order to cleanse a file, the system must know the format of the file (e.g. CSV, tab delimited etc) and where different
types of information are stored in the file. This information is provided as a
FileFormat data structure, which can be reused for multiple data cleansing jobs
or defined on a one-off basis.
Each file format definition must be given a unique name to allow it to be referenced when using it to start a new job.
-
FileFormat
Defines a file format, including the underlying file type and the column layout.
-
FormatColumn
Defines a column within a file format.
-
KnownColumnTypes
Enumerates the types of data that a column can contain.
-
FileFormatType
Enumerates the available underlying file types for a file format.
-
DataType
Enumerates the available categories of data that a file can contain.
The following code illustrates how to configure the file format definition for a CSV file containing columns
for title, forename, surname, six address lines, postcode and two telephone numbers.
/// <summary>
/// Defines the name to use for the file format definition.
/// </summary>
private const string FILE_FORMAT_NAME = "NameAddressTelNo";
/// <summary>
/// Checks if the file format definition exists, and creates it if it is missing.
/// </summary>
private void ConfigureFileFormat()
{
// Check if the file format already exists.
FileFormatStore proxy = new FileFormatStore();
FileFormat format = proxy.GetFormatDetails(Username, Password, FILE_FORMAT_NAME);
if (format == null)
{
// File format is not defined - create it.
format = new FileFormat();
format.Name = FILE_FORMAT_NAME;
format.Type = FileFormatType.Csv;
format.HeaderLine = false;
format.DataType = DataType.Residential;
format.Columns = new FormatColumn[12];
format.Columns[0] = new FormatColumn();
format.Columns[0].Name = "Title";
format.Columns[0].ColumnType = KnownColumnTypes.Title;
format.Columns[1] = new FormatColumn();
format.Columns[1].Name = "Forename";
format.Columns[1].ColumnType = KnownColumnTypes.FirstName;
format.Columns[2] = new FormatColumn();
format.Columns[2].Name = "Surname";
format.Columns[2].ColumnType = KnownColumnTypes.Surname;
format.Columns[3] = new FormatColumn();
format.Columns[3].Name = "Add1";
format.Columns[3].ColumnType = KnownColumnTypes.Address1;
format.Columns[4] = new FormatColumn();
format.Columns[4].Name = "Add2";
format.Columns[4].ColumnType = KnownColumnTypes.Address2;
format.Columns[5] = new FormatColumn();
format.Columns[5].Name = "Add3";
format.Columns[5].ColumnType = KnownColumnTypes.Address3;
format.Columns[6] = new FormatColumn();
format.Columns[6].Name = "Add4";
format.Columns[6].ColumnType = KnownColumnTypes.Address4;
format.Columns[7] = new FormatColumn();
format.Columns[7].Name = "Add5";
format.Columns[7].ColumnType = KnownColumnTypes.Address5;
format.Columns[8] = new FormatColumn();
format.Columns[8].Name = "Add6";
format.Columns[8].ColumnType = KnownColumnTypes.Address6;
format.Columns[9] = new FormatColumn();
format.Columns[9].Name = "Postcode";
format.Columns[9].ColumnType = KnownColumnTypes.Postcode;
format.Columns[10] = new FormatColumn();
format.Columns[10].Name = "TelNo1";
format.Columns[10].ColumnType = KnownColumnTypes.Telephone;
format.Columns[11] = new FormatColumn();
format.Columns[11].Name = "TelNo2";
format.Columns[11].ColumnType = KnownColumnTypes.Telephone;
proxy.UpdateFormat(Username, Password, format);
}
}