python - Unable to process the medium or large files using spark 1.3.1 when using stand alone cluster management -
i using spark 1.3.1 process files along spark cluster management having 1 worker attached it.
i able process small files , when try process medium or large files getting following error
py4jjavaerror: error occurred while calling z:org.apache.spark.api.python.pythonrdd.runjob. : org.apache.spark.sparkexception: job aborted due stage failure: task 0 in stage 19.0 failed 4 times, recent failure: lost task 0.3 in stage 19.0 (tid 54, 790-gykghs1.ad.mdp.com): java.net.socketexception: connection reset peer: socket write error @ java.net.socketoutputstream.socketwrite0(native method) @ java.net.socketoutputstream.socketwrite(unknown source) @ java.net.socketoutputstream.write(unknown source) @ java.io.bufferedoutputstream.flushbuffer(unknown source) @ java.io.bufferedoutputstream.write(unknown source) @ java.io.dataoutputstream.write(unknown source) @ java.io.filteroutputstream.write(unknown source) @ org.apache.spark.api.python.pythonrdd$.writeutf(pythonrdd.scala:591) @ org.apache.spark.api.python.pythonrdd$.org$apache$spark$api$python$pythonrdd$$write$1(pythonrdd.scala:411) @ org.apache.spark.api.python.pythonrdd$$anonfun$writeiteratortostream$1.apply(pythonrdd.scala:421) @ org.apache.spark.api.python.pythonrdd$$anonfun$writeiteratortostream$1.apply(pythonrdd.scala:421) @ scala.collection.iterator$class.foreach(iterator.scala:727) @ scala.collection.abstractiterator.foreach(iterator.scala:1157) @ org.apache.spark.api.python.pythonrdd$.writeiteratortostream(pythonrdd.scala:421) @ org.apache.spark.api.python.pythonrdd$writerthread$$anonfun$run$1.apply(pythonrdd.scala:243) @ org.apache.spark.util.utils$.loguncaughtexceptions(utils.scala:1618) @ org.apache.spark.api.python.pythonrdd$writerthread.run(pythonrdd.scala:205) driver stacktrace: @ org.apache.spark.scheduler.dagscheduler.org$apache$spark$scheduler$dagscheduler$$failjobandindependentstages(dagscheduler.scala:1204) @ org.apache.spark.scheduler.dagscheduler$$anonfun$abortstage$1.apply(dagscheduler.scala:1193) @ org.apache.spark.scheduler.dagscheduler$$anonfun$abortstage$1.apply(dagscheduler.scala:1192) @ scala.collection.mutable.resizablearray$class.foreach(resizablearray.scala:59) @ scala.collection.mutable.arraybuffer.foreach(arraybuffer.scala:47) @ org.apache.spark.scheduler.dagscheduler.abortstage(dagscheduler.scala:1192) @ org.apache.spark.scheduler.dagscheduler$$anonfun$handletasksetfailed$1.apply(dagscheduler.scala:693) @ org.apache.spark.scheduler.dagscheduler$$anonfun$handletasksetfailed$1.apply(dagscheduler.scala:693) @ scala.option.foreach(option.scala:236) @ org.apache.spark.scheduler.dagscheduler.handletasksetfailed(dagscheduler.scala:693) @ org.apache.spark.scheduler.dagschedulereventprocessloop.onreceive(dagscheduler.scala:1393) @ org.apache.spark.scheduler.dagschedulereventprocessloop.onreceive(dagscheduler.scala:1354) @ org.apache.spark.util.eventloop$$anon$1.run(eventloop.scala:48)
any regarding appreciated.
Comments
Post a Comment