class Fog::AWS::EMR::Real
Public Class Methods
Initialize connection to EMR
Notes¶ ↑
options parameter must include values for :aws_access_key_id and :aws_secret_access_key in order to create a connection
Examples¶ ↑
emr = EMR.new( :aws_access_key_id => your_aws_access_key_id, :aws_secret_access_key => your_aws_secret_access_key )
Parameters¶ ↑
-
options<~Hash> - config arguments for connection. Defaults to {}.
-
region<~String> - optional region to use. For instance, in 'eu-west-1', 'us-east-1' and etc.
-
Returns¶ ↑
# File lib/fog/aws/emr.rb, line 61 def initialize(options={}) @use_iam_profile = options[:use_iam_profile] @connection_options = options[:connection_options] || {} @instrumentor = options[:instrumentor] @instrumentor_name = options[:instrumentor_name] || 'fog.aws.emr' options[:region] ||= 'us-east-1' @host = options[:host] || "elasticmapreduce.#{options[:region]}.amazonaws.com" @path = options[:path] || '/' @persistent = options[:persistent] || false @port = options[:port] || 443 @scheme = options[:scheme] || 'https' @connection = Fog::XML::Connection.new("#{@scheme}://#{@host}:#{@port}#{@path}", @persistent, @connection_options) @region = options[:region] setup_credentials(options) end
Public Instance Methods
adds an instance group to a running cluster docs.amazonwebservices.com/ElasticMapReduce/latest/API/API_AddInstanceGroups.html
Parameters¶ ↑
-
JobFlowId <~String> - Job flow in which to add the instance groups
-
InstanceGroups<~Array> - Instance Groups to add
-
'BidPrice'<~String> - Bid price for each Amazon EC2 instance in the instance group when launching nodes as Spot Instances, expressed in USD.
-
'InstanceCount'<~Integer> - Target number of instances for the instance group
-
'InstanceRole'<~String> - MASTER | CORE | TASK The role of the instance group in the cluster
-
'InstanceType'<~String> - The Amazon EC2 instance type for all instances in the instance group
-
'MarketType'<~String> - ON_DEMAND | SPOT Market type of the Amazon EC2 instances used to create a cluster node
-
'Name'<~String> - Friendly name given to the instance group.
-
Returns¶ ↑
-
response<~Excon::Response>:
-
body<~Hash>:
-
# File lib/fog/aws/requests/emr/add_instance_groups.rb, line 22 def add_instance_groups(job_flow_id, options={}) if instance_groups = options.delete('InstanceGroups') options.merge!(Fog::AWS.indexed_param('InstanceGroups.member.%d', [*instance_groups])) end request({ 'Action' => 'AddInstanceGroups', 'JobFlowId' => job_flow_id, :parser => Fog::Parsers::AWS::EMR::AddInstanceGroups.new, }.merge(options)) end
adds new steps to a running job flow. docs.amazonwebservices.com/ElasticMapReduce/latest/API/API_AddJobFlowSteps.html
Parameters¶ ↑
-
JobFlowId <~String> - A string that uniquely identifies the job flow
-
Steps <~Array> - A list of steps to be executed by the job flow
-
'ActionOnFailure'<~String> - TERMINATE_JOB_FLOW | CANCEL_AND_WAIT | CONTINUE Specifies the action to take if the job flow step fails
-
'HadoopJarStep'<~Array> - Specifies the JAR file used for the job flow step
-
'Args'<~String list> - A list of command line arguments passed to the JAR file's main function when executed.
-
'Jar'<~String> - A path to a JAR file run during the step.
-
'MainClass'<~String> - The name of the main class in the specified Java file. If not specified, the JAR file should specify a Main-Class in its manifest file
-
'Properties'<~Array> - A list of Java properties that are set when the step runs. You can use these properties to pass key value pairs to your main function
-
'Key'<~String> - The unique identifier of a key value pair
-
'Value'<~String> - The value part of the identified key
-
-
-
'Name'<~String> - The name of the job flow step
-
Returns¶ ↑
-
response<~Excon::Response>:
-
body<~Hash>:
-
# File lib/fog/aws/requests/emr/add_job_flow_steps.rb, line 25 def add_job_flow_steps(job_flow_id, options={}) if steps = options.delete('Steps') options.merge!(Fog::AWS.serialize_keys('Steps', steps)) end request({ 'Action' => 'AddJobFlowSteps', 'JobFlowId' => job_flow_id, :parser => Fog::Parsers::AWS::EMR::AddJobFlowSteps.new, }.merge(options)) end
returns a list of job flows that match all of the supplied parameters. docs.amazonwebservices.com/ElasticMapReduce/latest/API/API_DescribeJobFlows.html
Parameters¶ ↑
-
CreatedAfter <~DateTime> - Return only job flows created after this date and time
-
CreatedBefore <~DateTime> - Return only job flows created before this date and time
-
JobFlowIds <~String list> - Return only job flows whose job flow ID is contained in this list
-
JobFlowStates <~String list> - RUNNING | WAITING | SHUTTING_DOWN | STARTING Return only job flows whose state is contained in this list
Returns¶ ↑
-
response<~Excon::Response>:
-
body<~Hash>:
-
-
JobFlows <~Array> - A list of job flows matching the parameters supplied.
-
AmiVersion <~String> - A list of bootstrap actions that will be run before Hadoop is started on the cluster nodes.
-
'BootstrapActions'<~Array> - A list of the bootstrap actions run by the job flow
-
'BootstrapConfig <~Array> - A description of the bootstrap action
-
'Name' <~String> - The name of the bootstrap action
-
'ScriptBootstrapAction' <~Array> - The script run by the bootstrap action.
-
'Args' <~String list> - A list of command line arguments to pass to the bootstrap action script.
-
'Path' <~String> - Location of the script to run during a bootstrap action.
-
-
-
-
'ExecutionStatusDetail'<~Array> - Describes the execution status of the job flow
-
'CreationDateTime <~DateTime> - The creation date and time of the job flow.
-
'EndDateTime <~DateTime> - The completion date and time of the job flow.
-
'LastStateChangeReason <~String> - Description of the job flow last changed state.
-
'ReadyDateTime <~DateTime> - The date and time when the job flow was ready to start running bootstrap actions.
-
'StartDateTime <~DateTime> - The start date and time of the job flow.
-
'State <~DateTime> - COMPLETED | FAILED | TERMINATED | RUNNING | SHUTTING_DOWN | STARTING | WAITING | BOOTSTRAPPING The state of the job flow.
-
-
Instances <~Array> - A specification of the number and type of Amazon EC2 instances on which to run the job flow.
-
'Ec2KeyName'<~String> - Specifies the name of the Amazon EC2 key pair that can be used to ssh to the master node as the user called “hadoop.
-
'HadoopVersion'<~String> - “0.18” | “0.20” Specifies the Hadoop version for the job flow
-
'InstanceCount'<~Integer> - The number of Amazon EC2 instances used to execute the job flow
-
'InstanceGroups'<~Array> - Configuration for the job flow's instance groups
-
'BidPrice' <~String> - Bid price for each Amazon EC2 instance in the instance group when launching nodes as Spot Instances, expressed in USD.
-
'CreationDateTime' <~DateTime> - The date/time the instance group was created.
-
'EndDateTime' <~DateTime> - The date/time the instance group was terminated.
-
'InstanceGroupId' <~String> - Unique identifier for the instance group.
-
'InstanceRequestCount'<~Integer> - Target number of instances for the instance group
-
'InstanceRole'<~String> - MASTER | CORE | TASK The role of the instance group in the cluster
-
'InstanceRunningCount'<~Integer> - Actual count of running instances
-
'InstanceType'<~String> - The Amazon EC2 instance type for all instances in the instance group
-
'LastStateChangeReason'<~String> - Details regarding the state of the instance group
-
'Market'<~String> - ON_DEMAND | SPOT Market type of the Amazon EC2 instances used to create a cluster
-
'Name'<~String> - Friendly name for the instance group
-
'ReadyDateTime'<~DateTime> - The date/time the instance group was available to the cluster
-
'StartDateTime'<~DateTime> - The date/time the instance group was started
-
'State'<~String> - PROVISIONING | STARTING | BOOTSTRAPPING | RUNNING | RESIZING | ARRESTED | SHUTTING_DOWN | TERMINATED | FAILED | ENDED State of instance group
-
-
-
'KeepJobFlowAliveWhenNoSteps' <~Boolean> - Specifies whether the job flow should terminate after completing all steps
-
'MasterInstanceId'<~String> - The Amazon EC2 instance identifier of the master node
-
'MasterInstanceType'<~String> - The EC2 instance type of the master node
-
'MasterPublicDnsName'<~String> - The
DNS
name of the master node -
'NormalizedInstanceHours'<~Integer> - An approximation of the cost of the job flow, represented in m1.small/hours.
-
'Placement'<~Array> - Specifies the Availability Zone the job flow will run in
-
'AvailabilityZone' <~String> - The Amazon EC2 Availability Zone for the job flow.
-
-
'SlaveInstanceType'<~String> - The EC2 instance type of the slave nodes
-
'TerminationProtected'<~Boolean> - Specifies whether to lock the job flow to prevent the Amazon EC2 instances from being terminated by API call, user intervention, or in the event of a job flow error
-
-
LogUri <~String> - Specifies the location in Amazon S3 to write the log files of the job flow. If a value is not provided, logs are not created
-
Name <~String> - The name of the job flow
-
Steps <~Array> - A list of steps to be executed by the job flow
-
'ExecutionStatusDetail'<~Array> - Describes the execution status of the job flow
-
'CreationDateTime <~DateTime> - The creation date and time of the job flow.
-
'EndDateTime <~DateTime> - The completion date and time of the job flow.
-
'LastStateChangeReason <~String> - Description of the job flow last changed state.
-
'ReadyDateTime <~DateTime> - The date and time when the job flow was ready to start running bootstrap actions.
-
'StartDateTime <~DateTime> - The start date and time of the job flow.
-
'State <~DateTime> - COMPLETED | FAILED | TERMINATED | RUNNING | SHUTTING_DOWN | STARTING | WAITING | BOOTSTRAPPING The state of the job flow.
-
-
StepConfig <~Array> - The step configuration
-
'ActionOnFailure'<~String> - TERMINATE_JOB_FLOW | CANCEL_AND_WAIT | CONTINUE Specifies the action to take if the job flow step fails
-
'HadoopJarStep'<~Array> - Specifies the JAR file used for the job flow step
-
'Args'<~String list> - A list of command line arguments passed to the JAR file's main function when executed.
-
'Jar'<~String> - A path to a JAR file run during the step.
-
'MainClass'<~String> - The name of the main class in the specified Java file. If not specified, the JAR file should specify a Main-Class in its manifest file
-
'Properties'<~Array> - A list of Java properties that are set when the step runs. You can use these properties to pass key value pairs to your main function
-
'Key'<~String> - The unique identifier of a key value pair
-
'Value'<~String> - The value part of the identified key
-
-
-
'Name'<~String> - The name of the job flow step
-
-
# File lib/fog/aws/requests/emr/describe_job_flows.rb, line 81 def describe_job_flows(options={}) if job_ids = options.delete('JobFlowIds') options.merge!(Fog::AWS.serialize_keys('JobFlowIds', job_ids)) end if job_states = options.delete('JobFlowStates') options.merge!(Fog::AWS.serialize_keys('JobFlowStates', job_states)) end request({ 'Action' => 'DescribeJobFlows', :parser => Fog::Parsers::AWS::EMR::DescribeJobFlows.new, }.merge(options)) end
modifies the number of nodes and configuration settings of an instance group.. docs.amazonwebservices.com/ElasticMapReduce/latest/API/API_ModifyInstanceGroups.html
Parameters¶ ↑
-
InstanceGroups <~InstanceGroupModifyConfig list> - Instance groups to change
-
InstanceCount <~Integer> - Target size for instance group
-
InstanceGroupId <~String> - Unique ID of the instance group to expand or shrink
-
Returns¶ ↑
-
response<~Excon::Response>:
-
body<~Hash>
-
# File lib/fog/aws/requests/emr/modify_instance_groups.rb, line 17 def modify_instance_groups(options={}) if job_ids = options.delete('InstanceGroups') options.merge!(Fog::AWS.serialize_keys('InstanceGroups', job_ids)) end request({ 'Action' => 'ModifyInstanceGroups', :parser => Fog::Parsers::AWS::EMR::ModifyInstanceGroups.new, }.merge(options)) end
# File lib/fog/aws/emr.rb, line 79 def reload @connection.reset end
# File lib/fog/aws/requests/emr/run_job_flow.rb, line 69 def run_hive(name, options={}) steps = [] steps << { 'Name' => 'Setup Hive', 'HadoopJarStep' => { 'Jar' => 's3://us-east-1.elasticmapreduce/libs/script-runner/script-runner.jar', 'Args' => ['s3://us-east-1.elasticmapreduce/libs/hive/hive-script', '--base-path', 's3://us-east-1.elasticmapreduce/libs/hive/', '--install-hive']}, 'ActionOnFailure' => 'TERMINATE_JOB_FLOW' } # To add a configuration step to the Hive flow, see the step below # steps << { # 'Name' => 'Install Hive Site Configuration', # 'HadoopJarStep' => { # 'Jar' => 's3://us-east-1.elasticmapreduce/libs/script-runner/script-runner.jar', # 'Args' => ['s3://us-east-1.elasticmapreduce/libs/hive/hive-script', '--base-path', 's3://us-east-1.elasticmapreduce/libs/hive/', '--install-hive-site', '--hive-site=s3://my.bucket/hive/hive-site.xml']}, # 'ActionOnFailure' => 'TERMINATE_JOB_FLOW' # } options['Steps'] = steps if not options['Instances'].nil? options['Instances']['KeepJobFlowAliveWhenNoSteps'] = true end run_job_flow name, options end
creates and starts running a new job flow docs.amazonwebservices.com/ElasticMapReduce/latest/API/API_RunJobFlow.html
Parameters¶ ↑
-
AdditionalInfo <~String> - A JSON string for selecting additional features.
-
BootstrapActions <~Array> - A list of bootstrap actions that will be run before Hadoop is started on the cluster nodes.
-
'Name'<~String> - The name of the bootstrap action
-
'ScriptBootstrapAction'<~Array> - The script run by the bootstrap action
-
'Args' <~Array> - A list of command line arguments to pass to the bootstrap action script
-
'Path' <~String> - Location of the script to run during a bootstrap action. Can be either a location in Amazon S3 or on a local file system.
-
-
-
Instances <~Array> - A specification of the number and type of Amazon EC2 instances on which to run the job flow.
-
'Ec2KeyName'<~String> - Specifies the name of the Amazon EC2 key pair that can be used to ssh to the master node as the user called “hadoop.
-
'HadoopVersion'<~String> - “0.18” | “0.20” Specifies the Hadoop version for the job flow
-
'InstanceCount'<~Integer> - The number of Amazon EC2 instances used to execute the job flow
-
'InstanceGroups'<~Array> - Configuration for the job flow's instance groups
-
'BidPrice' <~String> - Bid price for each Amazon EC2 instance in the instance group when launching nodes as Spot Instances, expressed in USD.
-
'InstanceCount'<~Integer> - Target number of instances for the instance group
-
'InstanceRole'<~String> - MASTER | CORE | TASK The role of the instance group in the cluster
-
'InstanceType'<~String> - The Amazon EC2 instance type for all instances in the instance group
-
'MarketType'<~String> - ON_DEMAND | SPOT Market type of the Amazon EC2 instances used to create a cluster node
-
'Name'<~String> - Friendly name given to the instance group.
-
-
'KeepJobFlowAliveWhenNoSteps' <~Boolean> - Specifies whether the job flow should terminate after completing all steps
-
'MasterInstanceType'<~String> - The EC2 instance type of the master node
-
'Placement'<~Array> - Specifies the Availability Zone the job flow will run in
-
'AvailabilityZone' <~String> - The Amazon EC2 Availability Zone for the job flow.
-
-
'SlaveInstanceType'<~String> - The EC2 instance type of the slave nodes
-
'TerminationProtected'<~Boolean> - Specifies whether to lock the job flow to prevent the Amazon EC2 instances from being terminated by API call, user intervention, or in the event of a job flow error
-
-
LogUri <~String> - Specifies the location in Amazon S3 to write the log files of the job flow. If a value is not provided, logs are not created
-
Name <~String> - The name of the job flow
-
Steps <~Array> - A list of steps to be executed by the job flow
-
'ActionOnFailure'<~String> - TERMINATE_JOB_FLOW | CANCEL_AND_WAIT | CONTINUE Specifies the action to take if the job flow step fails
-
'HadoopJarStep'<~Array> - Specifies the JAR file used for the job flow step
-
'Args'<~String list> - A list of command line arguments passed to the JAR file's main function when executed.
-
'Jar'<~String> - A path to a JAR file run during the step.
-
'MainClass'<~String> - The name of the main class in the specified Java file. If not specified, the JAR file should specify a Main-Class in its manifest file
-
'Properties'<~Array> - A list of Java properties that are set when the step runs. You can use these properties to pass key value pairs to your main function
-
'Key'<~String> - The unique identifier of a key value pair
-
'Value'<~String> - The value part of the identified key
-
-
-
'Name'<~String> - The name of the job flow step
-
Returns¶ ↑
-
response<~Excon::Response>:
-
body<~Hash>:
-
# File lib/fog/aws/requests/emr/run_job_flow.rb, line 49 def run_job_flow(name, options={}) if bootstrap_actions = options.delete('BootstrapActions') options.merge!(Fog::AWS.serialize_keys('BootstrapActions', bootstrap_actions)) end if instances = options.delete('Instances') options.merge!(Fog::AWS.serialize_keys('Instances', instances)) end if steps = options.delete('Steps') options.merge!(Fog::AWS.serialize_keys('Steps', steps)) end request({ 'Action' => 'RunJobFlow', 'Name' => name, :parser => Fog::Parsers::AWS::EMR::RunJobFlow.new, }.merge(options)) end
locks a job flow so the Amazon EC2 instances in the cluster cannot be terminated by user intervention. docs.amazonwebservices.com/ElasticMapReduce/latest/API/API_SetTerminationProtection.html
Parameters¶ ↑
-
JobFlowIds <~String list> - list of strings that uniquely identify the job flows to protect
-
TerminationProtected <~Boolean> - indicates whether to protect the job flow
Returns¶ ↑
-
response<~Excon::Response>:
-
body<~Hash>
-
# File lib/fog/aws/requests/emr/set_termination_protection.rb, line 16 def set_termination_protection(is_protected, options={}) if job_ids = options.delete('JobFlowIds') options.merge!(Fog::AWS.serialize_keys('JobFlowIds', job_ids)) end request({ 'Action' => 'SetTerminationProtection', 'TerminationProtected' => is_protected, :parser => Fog::Parsers::AWS::EMR::SetTerminationProtection.new, }.merge(options)) end
shuts a list of job flows down. docs.amazonwebservices.com/ElasticMapReduce/latest/API/API_TerminateJobFlows.html
Parameters¶ ↑
-
JobFlowIds <~String list> - list of strings that uniquely identify the job flows to protect
Returns¶ ↑
-
response<~Excon::Response>:
-
body<~Hash>
-
# File lib/fog/aws/requests/emr/terminate_job_flows.rb, line 15 def terminate_job_flows(options={}) if job_ids = options.delete('JobFlowIds') options.merge!(Fog::AWS.serialize_keys('JobFlowIds', job_ids)) end request({ 'Action' => 'TerminateJobFlows', :parser => Fog::Parsers::AWS::EMR::TerminateJobFlows.new, }.merge(options)) end
Private Instance Methods
# File lib/fog/aws/emr.rb, line 123 def _request(body, headers, idempotent, parser) @connection.request({ :body => body, :expects => 200, :headers => headers, :idempotent => idempotent, :method => 'POST', :parser => parser }) end
# File lib/fog/aws/emr.rb, line 94 def request(params) refresh_credentials_if_expired idempotent = params.delete(:idempotent) parser = params.delete(:parser) body, headers = Fog::AWS.signed_params_v4( params, { 'Content-Type' => 'application/x-www-form-urlencoded' }, { :signer => @signer, :aws_session_token => @aws_session_token, :method => 'POST', :host => @host, :path => @path, :port => @port, :version => '2009-03-31' #'2010-07-28' } ) if @instrumentor @instrumentor.instrument("#{@instrumentor_name}.request", params) do _request(body, headers, idempotent, parser) end else _request(body, headers, idempotent, parser) end end
# File lib/fog/aws/emr.rb, line 85 def setup_credentials(options) @aws_access_key_id = options[:aws_access_key_id] @aws_secret_access_key = options[:aws_secret_access_key] @aws_session_token = options[:aws_session_token] @aws_credentials_expire_at = options[:aws_credentials_expire_at] @signer = Fog::AWS::SignatureV4.new( @aws_access_key_id, @aws_secret_access_key, @region, 'elasticmapreduce') end