<?xml version="1.0" encoding="UTF-8"?>
<job>
  <name>Pentaho MapReduce - wordcount</name>
    <description/>
    <extended_description/>
    <job_version/>
    <job_status>0</job_status>
  <directory>&#47;</directory>
  <created_user>-</created_user>
  <created_date>2010&#47;07&#47;19 21:35:45.843</created_date>
  <modified_user>-</modified_user>
  <modified_date>2010&#47;07&#47;19 21:35:45.843</modified_date>
    <parameters>
    </parameters>
    <slaveservers>
    </slaveservers>
<job-log-table><connection/>
<schema/>
<table/>
<size_limit_lines/>
<interval/>
<timeout_days/>
<field><id>ID_JOB</id><enabled>Y</enabled><name>ID_JOB</name></field><field><id>CHANNEL_ID</id><enabled>Y</enabled><name>CHANNEL_ID</name></field><field><id>JOBNAME</id><enabled>Y</enabled><name>JOBNAME</name></field><field><id>STATUS</id><enabled>Y</enabled><name>STATUS</name></field><field><id>LINES_READ</id><enabled>Y</enabled><name>LINES_READ</name></field><field><id>LINES_WRITTEN</id><enabled>Y</enabled><name>LINES_WRITTEN</name></field><field><id>LINES_UPDATED</id><enabled>Y</enabled><name>LINES_UPDATED</name></field><field><id>LINES_INPUT</id><enabled>Y</enabled><name>LINES_INPUT</name></field><field><id>LINES_OUTPUT</id><enabled>Y</enabled><name>LINES_OUTPUT</name></field><field><id>LINES_REJECTED</id><enabled>Y</enabled><name>LINES_REJECTED</name></field><field><id>ERRORS</id><enabled>Y</enabled><name>ERRORS</name></field><field><id>STARTDATE</id><enabled>Y</enabled><name>STARTDATE</name></field><field><id>ENDDATE</id><enabled>Y</enabled><name>ENDDATE</name></field><field><id>LOGDATE</id><enabled>Y</enabled><name>LOGDATE</name></field><field><id>DEPDATE</id><enabled>Y</enabled><name>DEPDATE</name></field><field><id>REPLAYDATE</id><enabled>Y</enabled><name>REPLAYDATE</name></field><field><id>LOG_FIELD</id><enabled>Y</enabled><name>LOG_FIELD</name></field></job-log-table>
<jobentry-log-table><connection/>
<schema/>
<table/>
<timeout_days/>
<field><id>ID_BATCH</id><enabled>Y</enabled><name>ID_BATCH</name></field><field><id>CHANNEL_ID</id><enabled>Y</enabled><name>CHANNEL_ID</name></field><field><id>LOG_DATE</id><enabled>Y</enabled><name>LOG_DATE</name></field><field><id>JOBNAME</id><enabled>Y</enabled><name>TRANSNAME</name></field><field><id>JOBENTRYNAME</id><enabled>Y</enabled><name>STEPNAME</name></field><field><id>LINES_READ</id><enabled>Y</enabled><name>LINES_READ</name></field><field><id>LINES_WRITTEN</id><enabled>Y</enabled><name>LINES_WRITTEN</name></field><field><id>LINES_UPDATED</id><enabled>Y</enabled><name>LINES_UPDATED</name></field><field><id>LINES_INPUT</id><enabled>Y</enabled><name>LINES_INPUT</name></field><field><id>LINES_OUTPUT</id><enabled>Y</enabled><name>LINES_OUTPUT</name></field><field><id>LINES_REJECTED</id><enabled>Y</enabled><name>LINES_REJECTED</name></field><field><id>ERRORS</id><enabled>Y</enabled><name>ERRORS</name></field><field><id>RESULT</id><enabled>Y</enabled><name>RESULT</name></field><field><id>NR_RESULT_ROWS</id><enabled>Y</enabled><name>NR_RESULT_ROWS</name></field><field><id>NR_RESULT_FILES</id><enabled>Y</enabled><name>NR_RESULT_FILES</name></field><field><id>LOG_FIELD</id><enabled>N</enabled><name>LOG_FIELD</name></field><field><id>COPY_NR</id><enabled>N</enabled><name>COPY_NR</name></field></jobentry-log-table>
<channel-log-table><connection/>
<schema/>
<table/>
<timeout_days/>
<field><id>ID_BATCH</id><enabled>Y</enabled><name>ID_BATCH</name></field><field><id>CHANNEL_ID</id><enabled>Y</enabled><name>CHANNEL_ID</name></field><field><id>LOG_DATE</id><enabled>Y</enabled><name>LOG_DATE</name></field><field><id>LOGGING_OBJECT_TYPE</id><enabled>Y</enabled><name>LOGGING_OBJECT_TYPE</name></field><field><id>OBJECT_NAME</id><enabled>Y</enabled><name>OBJECT_NAME</name></field><field><id>OBJECT_COPY</id><enabled>Y</enabled><name>OBJECT_COPY</name></field><field><id>REPOSITORY_DIRECTORY</id><enabled>Y</enabled><name>REPOSITORY_DIRECTORY</name></field><field><id>FILENAME</id><enabled>Y</enabled><name>FILENAME</name></field><field><id>OBJECT_ID</id><enabled>Y</enabled><name>OBJECT_ID</name></field><field><id>OBJECT_REVISION</id><enabled>Y</enabled><name>OBJECT_REVISION</name></field><field><id>PARENT_CHANNEL_ID</id><enabled>Y</enabled><name>PARENT_CHANNEL_ID</name></field><field><id>ROOT_CHANNEL_ID</id><enabled>Y</enabled><name>ROOT_CHANNEL_ID</name></field></channel-log-table>
   <pass_batchid>N</pass_batchid>
   <shared_objects_file/>
  <entries>
    <entry>
      <name>Pentaho MapReduce (Wordcount)</name>
      <description/>
      <type>HadoopTransJobExecutorPlugin</type>
      <hadoop_job_name>Pentaho MapReduce (Wordcount)</hadoop_job_name>
      <map_trans_repo_dir/>
      <map_trans_repo_file/>
      <map_trans_repo_reference/>
      <map_trans>${Internal.Job.Filename.Directory}&#47;wordcount-mapper.ktr</map_trans>
      <combiner_trans_repo_dir/>
      <combiner_trans_repo_file/>
      <combiner_trans_repo_reference/>
      <combiner_trans/>
      <reduce_trans_repo_dir/>
      <reduce_trans_repo_file/>
      <reduce_trans_repo_reference/>
      <reduce_trans>${Internal.Job.Filename.Directory}&#47;wordcount-reducer.ktr</reduce_trans>
      <reduce_single_threaded>Y</reduce_single_threaded>
      <map_input_step_name>Hadoop Input</map_input_step_name>
      <map_output_step_name>Hadoop Output</map_output_step_name>
      <combiner_input_step_name/>
      <combiner_output_step_name/>
      <reduce_input_step_name>Hadoop Input</reduce_input_step_name>
      <reduce_output_step_name>Hadoop Output</reduce_output_step_name>
      <blocking>Y</blocking>
      <logging_interval>5</logging_interval>
      <input_path>&#47;wordcount&#47;input</input_path>
      <input_format_class>org.apache.hadoop.mapred.TextInputFormat</input_format_class>
      <output_path>&#47;wordcount&#47;output</output_path>
      <clean_output_path>Y</clean_output_path>
      <suppress_output_map_key>N</suppress_output_map_key>
      <suppress_output_map_value>N</suppress_output_map_value>
      <suppress_output_key>N</suppress_output_key>
      <suppress_output_value>N</suppress_output_value>
      <output_format_class>org.apache.hadoop.mapred.TextOutputFormat</output_format_class>
      <hdfs_hostname>hadoop-server</hdfs_hostname>
      <hdfs_port>8020</hdfs_port>
      <job_tracker_hostname>hadoop-server</job_tracker_hostname>
      <job_tracker_port>8021</job_tracker_port>
      <num_map_tasks>2</num_map_tasks>
      <num_reduce_tasks>1</num_reduce_tasks>
      <working_dir>&#47;var&#47;tmp</working_dir>
      <user_defined_list>
      </user_defined_list>
      <parallel>N</parallel>
      <draw>Y</draw>
      <nr>0</nr>
      <xloc>299</xloc>
      <yloc>158</yloc>
      </entry>
    <entry>
      <name>START</name>
      <description/>
      <type>SPECIAL</type>
      <start>Y</start>
      <dummy>N</dummy>
      <repeat>N</repeat>
      <schedulerType>0</schedulerType>
      <intervalSeconds>0</intervalSeconds>
      <intervalMinutes>60</intervalMinutes>
      <hour>12</hour>
      <minutes>0</minutes>
      <weekDay>1</weekDay>
      <DayOfMonth>1</DayOfMonth>
      <parallel>N</parallel>
      <draw>Y</draw>
      <nr>0</nr>
      <xloc>19</xloc>
      <yloc>158</yloc>
      </entry>
    <entry>
      <name>Failure</name>
      <description/>
      <type>ABORT</type>
      <message/>
      <parallel>N</parallel>
      <draw>Y</draw>
      <nr>0</nr>
      <xloc>440</xloc>
      <yloc>292</yloc>
      </entry>
    <entry>
      <name>Success</name>
      <description/>
      <type>SUCCESS</type>
      <parallel>N</parallel>
      <draw>Y</draw>
      <nr>0</nr>
      <xloc>439</xloc>
      <yloc>158</yloc>
      </entry>
    <entry>
      <name>Copy Files to HDFS</name>
      <description/>
      <type>HadoopCopyFilesPlugin</type>
      <copy_empty_folders>Y</copy_empty_folders>
      <arg_from_previous>N</arg_from_previous>
      <overwrite_files>N</overwrite_files>
      <include_subfolders>N</include_subfolders>
      <remove_source_files>N</remove_source_files>
      <add_result_filesname>N</add_result_filesname>
      <destination_is_a_file>N</destination_is_a_file>
      <create_destination_folder>Y</create_destination_folder>
      <fields>
        <field>
          <source_filefolder>.&#47;</source_filefolder>
          <destination_filefolder>hdfs:&#47;&#47;hadoop-server:8020&#47;wordcount&#47;input</destination_filefolder>
          <wildcard>.*README.*</wildcard>
        </field>
      </fields>
      <parallel>N</parallel>
      <draw>Y</draw>
      <nr>0</nr>
      <xloc>159</xloc>
      <yloc>158</yloc>
      </entry>
  </entries>
  <hops>
    <hop>
      <from>Pentaho MapReduce (Wordcount)</from>
      <to>Success</to>
      <from_nr>0</from_nr>
      <to_nr>0</to_nr>
      <enabled>Y</enabled>
      <evaluation>Y</evaluation>
      <unconditional>N</unconditional>
    </hop>
    <hop>
      <from>Pentaho MapReduce (Wordcount)</from>
      <to>Failure</to>
      <from_nr>0</from_nr>
      <to_nr>0</to_nr>
      <enabled>Y</enabled>
      <evaluation>N</evaluation>
      <unconditional>N</unconditional>
    </hop>
    <hop>
      <from>Copy Files to HDFS</from>
      <to>Pentaho MapReduce (Wordcount)</to>
      <from_nr>0</from_nr>
      <to_nr>0</to_nr>
      <enabled>Y</enabled>
      <evaluation>Y</evaluation>
      <unconditional>Y</unconditional>
    </hop>
    <hop>
      <from>START</from>
      <to>Copy Files to HDFS</to>
      <from_nr>0</from_nr>
      <to_nr>0</to_nr>
      <enabled>Y</enabled>
      <evaluation>Y</evaluation>
      <unconditional>Y</unconditional>
    </hop>
  </hops>
  <notepads>
    <notepad>
      <note>SETUP INSTRUCTIONS:
1. Create an input directory in HDFS and place text file(s) in the input directory that you want to use to test the wordcount example
2. Update the &apos;Pentaho MapReduce&apos; step (Job Setup and Cluster tabs) to configure the correct paths and server names including:
    - Input Path - the path in HDFS from which to read files for counting
    - Output Path - where the processed count of words will be placed
    - HDFS Hostname
    - Job Tracker Hostname</note>
      <xloc>20</xloc>
      <yloc>40</yloc>
      <width>443</width>
      <heigth>73</heigth>
      <fontname>Microsoft Sans Serif</fontname>
      <fontsize>8</fontsize>
      <fontbold>N</fontbold>
      <fontitalic>N</fontitalic>
      <fontcolorred>0</fontcolorred>
      <fontcolorgreen>0</fontcolorgreen>
      <fontcolorblue>0</fontcolorblue>
      <backgroundcolorred>255</backgroundcolorred>
      <backgroundcolorgreen>165</backgroundcolorgreen>
      <backgroundcolorblue>0</backgroundcolorblue>
      <bordercolorred>100</bordercolorred>
      <bordercolorgreen>100</bordercolorgreen>
      <bordercolorblue>100</bordercolorblue>
      <drawshadow>Y</drawshadow>
    </notepad>
  </notepads>
</job>
