在kettle里调用java代码出错

在kettle里调用java代码出错

public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {
  if (first) {
    first = false;

    /* TODO: Your code here. (Using info fields)

    FieldHelper infoField = get(Fields.Info, "info_field_name");

    RowSet infoStream = findInfoRowSet("info_stream_tag");

    Object[] infoRow = null;

    int infoRowCount = 0;

    // Read all rows from info step before calling getRow() method, which returns first row from any
    // input rowset. As rowMeta for info and input steps varies getRow() can lead to errors.
    while((infoRow = getRowFrom(infoStream)) != null){

      // do something with info data
      infoRowCount++;
    }
    */
  }

  Object[] r = getRow();

  if (r == null) {
    setOutputDone();
    return false;
  }

  // It is always safest to call createOutputRow() to ensure that your output row's Object[] is large
  // enough to handle any new fields you are creating in this step.
  r = createOutputRow(r, data.outputRowMeta.size());

  /* TODO: Your code here. (See Sample)

  // Get the value from an input field
  String foobar = get(Fields.In, "a_fieldname").getString(r);

  foobar += "bar";
    
  // Set a value in a new output field
  get(Fields.Out, "output_fieldname").setValue(r, foobar);

  */
  // Send the row on to the next step.
  putRow(data.outputRowMeta, r);

  return true;
}

需要在上述kettle控件里自带的代码里插入下面的清洗需求代码

import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class regex_address {
    /**
     * @Desc regexAddress()函数,传入地址,以List存储省市县
     * @param address
     * @return List<Map<String,String>>
     */
    public static List<Map<String,String>> regexAddress(String address){
        // 自定义正则表达式
        // 由于后续通过province分组无法获取到四个直辖市,所以在后续代码中再次处理,这里只需要匹配成功即可
        String regex = "((?<province>[^省]+省|.+自治区|.*?行政区)|上海市|北京市|天津市|重庆市)(?<city>[^市]+自治州|.*?地区|.*?行政单位|.+盟|市辖区|.*?市)?(?<country>[^县]+县|.+市|.+区|.+旗|.+海域|.+岛|)?(?<town>[^区]+区|.+镇|.+街|.+街道|.+路)?(?<village>[^村]+村|.+大道)?(?<other>.*)";
        // 使用regex正则表达式匹配address地址
        Matcher matcher = Pattern.compile(regex).matcher(address);
        // 初始化省市县
        String province = null,city = null,country = null;
        // 定义ArrayList<>()
        ArrayList<Map<String, String>> table = new ArrayList<Map<String,String>>();
        Map<String,String> row = null;
        //匹配成功的情况
        while (matcher.find()){
            row = new LinkedHashMap<String,String>();
            //自定义正则表达式2,用于匹配四个直辖市
            String regex2 = "(上海市|北京市|天津市|重庆市)";
            Matcher matcher1 = Pattern.compile(regex2).matcher(address);
            //如果匹配成功,则将直辖市复制给province变量
            if (matcher1.find())
                province = matcher1.group(1);
            else // 否则直接通过matcher从regex中根据分组province匹配获得
                String province = matcher.group("province");
            row.put("province",province == null ? "":province.trim());
            // 匹配city
            String city = matcher.group("city");
            row.put("city",city == null ? "":city.trim());
            // 匹配country
            String country = matcher.group("country");
            row.put("country",country == null ? "":country.trim());
            // 匹配town
            String town = matcher.group("town");
            row.put("town",town == null ? "":town.trim());
            // 匹配village
            String village = matcher.group("village");
            row.put("village",village == null ? "":village.trim());
            // 匹配other
            String other = matcher.group("other");
            row.put("other",other == null ? "":other.trim());
            table.add(row);
        }
    return table;
    }
}
String str = get(Fields.In, "地址").getString(r);
public static void main(String[] args) {
    String[] addressList = str;
    for(String address : addressList){
        System.out.println(address);
        List<Map<String, String>> address1 = regexAddress(address);
        String province = address1.get(0).get("province");
        String city = address1.get(0).get("city");
        String country = address1.get(0).get("country");
        String town= address1.get(0).get("town");
        String village = address1.get(0).get("village");
        String other = address1.get(0).get("other");
    }
}

我对java不太熟悉,插入后总是各种报错,求各位开发者提供正确的代码,kettle的输出语句为:get(Fields.Out, "output_fieldname").setValue(r, foobar);
好人一生平安

试试这个

import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {
    Object[] r = getRow();

    if (r == null) {
    setOutputDone();
    return false;
    }

    if (first) {
    first = false;
    }
    String address = get(Fields.In, "地址").getString(r);

    List<Map<String, String>> addressList = regexAddress(address);

    Object[] outPutRows = createOutputRow(r, addressList.size());

    for (Map<String, String> map : addressList) {
    String province = (String) map.get("province");
    String city = (String) map.get("city");
    String country = (String) map.get("country");
    String town= (String) map.get("town");
    String village = (String) map.get("village");
    String other = (String) map.get("other");

    get(Fields.Out, "province").setValue(outPutRows, province);
    get(Fields.Out, "city").setValue(outPutRows, city);
    get(Fields.Out, "country").setValue(outPutRows, country);
    get(Fields.Out, "town").setValue(outPutRows, town);
    get(Fields.Out, "village").setValue(outPutRows, village);
    get(Fields.Out, "other").setValue(outPutRows, other);
    }
    // Send the row on to the next step.
    putRow(data.outputRowMeta, outPutRows);
    return true;
}

/**
 * @param address
 * @return List<Map < String, String>>
 * @Desc regexAddress()函数,传入地址,以List存储省市县
 */
public static List<Map<String, String>> regexAddress(String address) {
    // 自定义正则表达式
    // 由于后续通过province分组无法获取到四个直辖市,所以在后续代码中再次处理,这里只需要匹配成功即可
    String regex = "((?<province>[^省]+省|.+自治区|.*?行政区)|上海市|北京市|天津市|重庆市)(?<city>[^市]+自治州|.*?地区|.*?行政单位|.+盟|市辖区|.*?市)?(?<country>[^县]+县|.+市|.+区|.+旗|.+海域|.+岛|)?(?<town>[^区]+区|.+镇|.+街|.+街道|.+路)?(?<village>[^村]+村|.+大道)?(?<other>.*)";
    // 使用regex正则表达式匹配address地址
    Matcher matcher = Pattern.compile(regex).matcher(address);
    // 初始化省市县
    String province = null, city = null, country = null;
    // 定义ArrayList<>()
    ArrayList<Map<String, String>> table = new ArrayList<Map<String, String>>();
    Map<String, String> row = null;
    //匹配成功的情况
    while (matcher.find()) {
    row = new LinkedHashMap<String, String>();
    //自定义正则表达式2,用于匹配四个直辖市
    String regex2 = "(上海市|北京市|天津市|重庆市)";
    Matcher matcher1 = Pattern.compile(regex2).matcher(address);
    //如果匹配成功,则将直辖市复制给province变量
    if (matcher1.find())
    province = matcher1.group(1);
    else // 否则直接通过matcher从regex中根据分组province匹配获得
    province = matcher.group("province");
    row.put("province", province == null ? "" : province.trim());
    // 匹配city
    city = matcher.group("city");
    row.put("city", city == null ? "" : city.trim());
    // 匹配country
    country = matcher.group("country");
    row.put("country", country == null ? "" : country.trim());
    // 匹配town
    String town = matcher.group("town");
    row.put("town", town == null ? "" : town.trim());
    // 匹配village
    String village = matcher.group("village");
    row.put("village", village == null ? "" : village.trim());
    // 匹配other
    String other = matcher.group("other");
    row.put("other", other == null ? "" : other.trim());
    table.add(row);
    }
    return table;
}

https://blog.csdn.net/xj627141903/article/details/124838961