- wget http://archive.apache.org/dist/mahout/0.9/mahout-distribution-0.9.tar.gz
- tar -zxvf mahout-distribution-0.9.tar.gz
- # set mahout environment
- export MAHOUT_HOME=/usr/local/src/mahout-distribution-0.9
- export MAHOUT_CONF_DIR=$MAHOUT_HOME/conf
- export PATH=$MAHOUT_HOME/conf:$MAHOUT_HOME/bin:$PATH
- # set hadoop environment
- export HADOOP_HOME=/usr/local/src/hadoop-1.2.1
- export HADOOP_CONF_DIR=$HADOOP_HOME/conf
- export PATH=$PATH:$HADOOP_HOME/bin
- export HADOOP_HOME_WARN_SUPPRESS=not_null
- 数据格式:
- 1,100001,5
- 1,100002,3
- 1,100003,4
- 1,100004,3
- 1,100005,3
- 1,100007,4
- 1,100008,1
- 1,100009,5
- 1,1000011,2
- INPUT="/movie_lens.data"
- TMP_DIR="/mahout_temp"
- OUTPUT="/cf_mahout_output"
- MAHOUT_CMD="/usr/local/src/mahout-distribution-0.9/bin/mahout“
- $MAHOUT_CMD itemsimilarity
- -i $INPUT
- -o $OUTPUT
- --maxSimilaritiesPerItem 1000
- --threshold 0.0000001
- --similarityClassname SIMILARITY_COSINE
- --tempDir $TMP_DIR